Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
release:
types: [published]
workflow_dispatch:

name: pkgdown.yaml

permissions: read-all

jobs:
pkgdown:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: |
github::SebKrantz/pkgdown
local::.
needs: website

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.5.0
with:
clean: false
branch: gh-pages
folder: docs
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: dfms
Version: 0.2.2
Version: 0.3.0
Title: Dynamic Factor Models
Authors@R: c(person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch"),
person("Rytis", "Bagdziunas", role = "aut"))
Expand All @@ -15,8 +15,8 @@ Description: Efficient estimation of Dynamic Factor Models using the Expectation
of factors are also provided - following Bai and Ng (2002) <doi:10.1111/1468-0262.00273>.
URL: https://sebkrantz.github.io/dfms/
BugReports: https://github.com/SebKrantz/dfms/issues
Depends: R (>= 3.3.0)
Imports: Rcpp (>= 1.0.1), collapse (>= 1.8.0)
Depends: R (>= 3.5.0)
Imports: Rcpp (>= 1.0.1), collapse (>= 2.0.0)
LinkingTo: Rcpp, RcppArmadillo
Suggests: xts, vars, magrittr, testthat (>= 3.0.0), knitr, rmarkdown, covr
License: GPL-3
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ importFrom(collapse,"%=%")
importFrom(collapse,"%r*%")
importFrom(collapse,TRA.matrix)
importFrom(collapse,ckmatch)
importFrom(collapse,flag)
importFrom(collapse,fmedian)
importFrom(collapse,fmedian.default)
importFrom(collapse,fmin.matrix)
Expand All @@ -54,6 +55,8 @@ importFrom(collapse,setop)
importFrom(collapse,t_list)
importFrom(collapse,unattrib)
importFrom(collapse,unlist2d)
importFrom(collapse,vec)
importFrom(collapse,whichNA)
importFrom(collapse,whichv)
importFrom(grDevices,rainbow)
importFrom(graphics,abline)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# dfms 0.3.0

* Added argument `quarterly.vars`, enabling mixed-frequency estimation with monthly and quarterly data following Banbura and Modugno (2014). The data matrix should contain the quarterly variables at the end of the matrix (after the monthly ones).

# dfms 0.2.2

* Replace Armadillo `inv_sympd()` by Armadillo `inv()` in C++ Kalman Filter to improve numerical robustness at a minor performance cost.
Expand Down
50 changes: 39 additions & 11 deletions R/DFM.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.EM_DGR <- quote(EMstepDGR(X, A, C, Q, R, F_0, P_0, cpX, n, r, sr, TT, rQi, rRi))
.EM_BM <- quote(EMstepBMOPT(X, A, C, Q, R, F_0, P_0, XW0, W, n, r, sr, TT, dgind, dnkron, dnkron_ind, rQi, rRi))
.EM_BM_idio <- quote(EMstepBMidio(X, A, C, Q, R, F_0, P_0, XW0, W, dgind, dnkron, dnkron_ind, r, p, n, sr, TT, rQi, rRi))
.EMstepBMMQ <- quote(EMstepBMMQ(X, A, C, Q, R, F_0, P_0, XW0, NW, dgind, dnkron, dnkron_ind, r, p, R_mat, n, nq, sr, TT, rQi, rRi))
.KFS <- quote(SKFS(X, A, C, Q, R, F_0, P_0))


Expand All @@ -15,6 +16,7 @@
#' @param p integer. number of lags in factor VAR.
#' @param \dots (optional) arguments to \code{\link{tsnarmimp}}.
#' @param idio.ar1 logical. Model observation errors as AR(1) processes: \eqn{e_t = \rho e_{t-1} + v_t}{e(t) = rho e(t-1) + v(t)}. \emph{Note} that this substantially increases computation time, and is generaly not needed if \code{n} is large (>30). See theoretical vignette for details.
#' @param quarterly.vars character. Names of quarterly variables in \code{X} (if any). Monthly variables should be to the left of the quarterly variables in the data matrix and quarterly observations should be provided every 3rd period.
#' @param rQ character. restrictions on the state (transition) covariance matrix (Q).
#' @param rR character. restrictions on the observation (measurement) covariance matrix (R).
#' @param em.method character. The implementation of the Expectation Maximization Algorithm used. The options are:
Expand Down Expand Up @@ -115,7 +117,7 @@
#' Stock, J. H., & Watson, M. W. (2016). Dynamic Factor Models, Factor-Augmented Vector Autoregressions, and Structural Vector Autoregressions in Macroeconomics. \emph{Handbook of Macroeconomics, 2}, 415–525. https://doi.org/10.1016/bs.hesmac.2016.04.002
#'
#' @useDynLib dfms, .registration = TRUE
#' @importFrom collapse fscale qsu fvar fmedian fmedian.default qM unattrib na_omit %=% %-=% %+=% %/=% %*=% %r*% whichv setColnames setDimnames
#' @importFrom collapse fscale qsu fvar flag fmedian fmedian.default qM unattrib na_omit %=% %-=% %+=% %/=% %*=% %r*% whichv whichNA vec setColnames setDimnames
#' @importFrom grDevices rainbow
#' @importFrom graphics abline legend lines par
#' @importFrom stats filter residuals rnorm spline ts.plot
Expand Down Expand Up @@ -203,7 +205,8 @@
#' @export

DFM <- function(X, r, p = 1L, ...,
idio.ar1 = FALSE, # quarterly.vars = c(), blocks = c(),
idio.ar1 = FALSE,
quarterly.vars = NULL, # blocks = c(),
rQ = c("none", "diagonal", "identity"),
rR = c("diagonal", "identity", "none"),
em.method = c("auto", "DGR", "BM", "none"),
Expand All @@ -230,6 +233,7 @@ DFM <- function(X, r, p = 1L, ...,
if(!is.logical(idio.ar1) || is.na(idio.ar1)) stop("idio.ar1 needs to be logical")
if(!is.logical(pos.corr) || is.na(pos.corr)) stop("pos.corr needs to be logical")
if(!is.logical(check.increased) || is.na(check.increased)) stop("check.increased needs to be logical")
if(!is.null(quarterly.vars) && !is.character(quarterly.vars)) stop("quarterly.vars needs to be a character vector with the names of quarterly variables in X")

rp <- r * p
sr <- seq_len(r)
Expand All @@ -242,6 +246,11 @@ DFM <- function(X, r, p = 1L, ...,
Xnam <- dimnames(X)[[2L]]
dimnames(X) <- NULL
n <- ncol(X)
if(MFl <- !is.null(quarterly.vars)) {
qind <- ckmatch(quarterly.vars, Xnam)
nq <- length(qind)
if(!identical(sort(qind), (n-nq+1):n)) stop("Pleae order your data such that quarterly variables are to the right (after) the monthly variables")
}

# Missing values
anymiss <- anyNA(X)
Expand All @@ -261,8 +270,8 @@ DFM <- function(X, r, p = 1L, ...,

# This is because after removing missing rows, the data could be complete, e.g. differencing data with diff.xts() of collapse::fdiff() just gives a NA row
if(anymiss && length(rm.rows)) anymiss <- any(W)
BMl <- switch(tolower(em.method[1L]), auto = anymiss, dgr = FALSE, bm = TRUE, none = NA, stop("Unknown EM option:", em.method[1L]))
if(idio.ar1 && !is.na(BMl)) BMl <- TRUE
BMl <- switch(tolower(em.method[1L]), auto = anymiss || idio.ar1 || MFl, dgr = FALSE || idio.ar1 || MFl,
bm = TRUE, none = NA, stop("Unknown EM option:", em.method[1L]))

# Run PCA to get initial factor estimates:
# v <- svd(X_imp, nu = 0L, nv = min(as.integer(r), n, TT))$v # Not faster than eigen...
Expand All @@ -276,8 +285,13 @@ DFM <- function(X, r, p = 1L, ...,
F_pc <- X_imp %*% v

## Initial System Matrices
init <- if(idio.ar1) init_cond_idio_ar1(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) else
init_cond(X, F_pc, v, n, r, p, BMl, rRi, rQi)
if(MFl) {
init <- if(idio.ar1) stop("Not yet implemented") else
init_cond_MQ(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi)
} else {
init <- if(idio.ar1) init_cond_idio_ar1(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) else
init_cond(X, F_pc, v, n, r, p, BMl, rRi, rQi)
}
A <- init$A; C <- init$C; Q <- init$Q; R <- init$R; F_0 <- init$F_0; P_0 <- init$P_0

## Run standartized data through Kalman filter and smoother once
Expand All @@ -300,6 +314,7 @@ DFM <- function(X, r, p = 1L, ...,
P_2s = setDimnames(kfs_res$P_smooth[sr, sr,, drop = FALSE], list(fnam, fnam, NULL)),
anyNA = anymiss, # || length(rm.rows), # This is for internal missing values only
rm.rows = rm.rows,
quarterly.vars = quarterly.vars,
em.method = if(is.na(BMl)) "none" else if(BMl) "BM" else "DGR",
call = match.call())

Expand Down Expand Up @@ -335,11 +350,22 @@ DFM <- function(X, r, p = 1L, ...,
converged <- FALSE

if(BMl) {
expr <- if(idio.ar1) .EM_BM_idio else .EM_BM
dnkron <- matrix(1, r, r) %x% diag(n) # Used to be inside EMstep, taken out to speed up the algorithm
dnkron_ind <- whichv(dnkron, 1)
XW0 <- X_imp
if(MFl) {
nm <- n - nq
rpC <- r * max(p, 5L)
rpC1nq <- (rpC+1L):(rpC+nq)
NW <- !W
R_mat <- kronecker(Rcon, diag(r)) # TODO: autsource this
expr <- if(idio.ar1) stop("not implemented yet") else .EMstepBMMQ
dnkron <- matrix(1, r, r) %x% diag(nm) # Used to be inside EMstep, taken out to speed up the algorithm
dnkron_ind <- whichv(dnkron, 1)
} else {
expr <- if(idio.ar1) .EM_BM_idio else .EM_BM
dnkron <- matrix(1, r, r) %x% diag(n) # Used to be inside EMstep, taken out to speed up the algorithm
dnkron_ind <- whichv(dnkron, 1)
}
dgind <- 0:(n-1) * n + 1:n
XW0 <- X_imp
if(anymiss) XW0[W] <- 0 else W <- is.na(X) # TODO: think about this...
} else {
expr <- .EM_DGR
Expand Down Expand Up @@ -379,7 +405,9 @@ DFM <- function(X, r, p = 1L, ...,
A = setDimnames(em_res$A[sr, seq_len(rp), drop = FALSE], lagnam(fnam, p)),
C = setDimnames(em_res$C[, sr, drop = FALSE], list(Xnam, fnam)),
Q = setDimnames(em_res$Q[sr, sr, drop = FALSE], list(unam, unam)),
R = setDimnames(if(idio.ar1) em_res$Q[-seq_len(rp), -seq_len(rp), drop = FALSE] else em_res$R, list(Xnam, Xnam)),
R = setDimnames(if(MFl && idio.ar1) stop("not implemented yet") else if(MFl)
`[<-`(em_res$R, (nm+1):n, (nm+1):n, value = em_res$Q[rpC1nq, rpC1nq]) else if(idio.ar1)
em_res$Q[-seq_len(rp), -seq_len(rp), drop = FALSE] else em_res$R, list(Xnam, Xnam)),
e = if(idio.ar1) setColnames(kfs_res$F_smooth[, -seq_len(rp), drop = FALSE], Xnam) else NULL,
rho = if(idio.ar1) setNames(diag(em_res$A)[-seq_len(rp)], Xnam) else NULL,
loglik = if(num_iter == max.iter) loglik_all else loglik_all[seq_len(num_iter)],
Expand Down
144 changes: 144 additions & 0 deletions R/EMBM_MQ.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@

# Define the inputs of the function
# X: a matrix with n rows and TT columns representing the observed data at each time t.
# A: a transition matrix with size r*p+r*r.
# C: an observation matrix with size n*r.
# Q: a covariance matrix for the state equation disturbances with size r*p+r*r.
# R: a covariance matrix for the observation disturbances with size n*n.
# Z_0: the initial value of the state variable.
# V_0: the initial value of the variance-covariance matrix of the state variable.
# r: order of the state autoregression (AR) process.
# p: number of lags in the state AR process.
# i_idio: a vector of indicators specifying which variables are idiosyncratic noise variables.'

# Z_0 = F_0; V_0 = P_0
EMstepBMMQ = function(X, A, C, Q, R, Z_0, V_0, XW0, NW, dgind, dnkron, dnkron_ind, r, p, R_mat, n, nq, sr, TT, rQi, rRi) {

# Define dimensions of the input arguments
rC = 5L # ncol(Rcon)
pC = max(p, rC)
rp = r * p
rpC = r * pC
nm = n - nq
rC = rC * r # Note here replacing rC!!

snm = seq_len(nm)
srp = seq_len(rp)
srpC = seq_len(rpC)
rpC1nq = (rpC+1L):(rpC+nq)

# Compute expected sufficient statistics for a single Kalman filter sequence
# Run the Kalman filter with the current parameter estimates
kfs_res = SKFS(X, A, C, Q, R, Z_0, V_0, TRUE)

# Extract values from the Kalman Filter output
Zsmooth = kfs_res$F_smooth
Vsmooth = kfs_res$P_smooth
VVsmooth = kfs_res$PPm_smooth
Zsmooth0 = kfs_res$F_smooth_0
Vsmooth0 = kfs_res$P_smooth_0
loglik = kfs_res$loglik

# Perform algebraic operations involving E(Z_t), E(Z_{t-1}), Y_t, Y_{t-1} to update A_new, Q_new, C_new, and R_new

# Normal Expectations to get Z(t+1) and A(t+1).
tmp = rbind(Zsmooth0[srpC], Zsmooth[-TT, srpC, drop = FALSE])
tmp2 = sum3(Vsmooth[srpC, srpC, -TT, drop = FALSE])
EZZ = crossprod(Zsmooth[, srpC, drop = FALSE]) %+=% (tmp2 + Vsmooth[srpC, srpC, TT]) # E(Z'Z)
EZZ_BB = crossprod(tmp) %+=% (tmp2 + Vsmooth0[srpC, srpC]) # E(Z(-1)'Z(-1))
EZZ_FB = crossprod(Zsmooth[, srpC, drop = FALSE], tmp) %+=% sum3(VVsmooth[srpC, srpC,, drop = FALSE]) # E(Z'Z(-1))

# Update matrices A and Q
A_new = A
Q_new = Q

# System matrices
A_new[sr, srp] = EZZ_FB[sr, srp, drop = FALSE] %*% ainv(EZZ_BB[srp, srp, drop = FALSE])
if(rQi) {
Qsr = (EZZ[sr, sr] - tcrossprod(A_new[sr, srp, drop = FALSE], EZZ_FB[sr, srp, drop = FALSE])) / TT
Q_new[sr, sr] = if(rQi == 2L) Qsr else diag(diag(Qsr))
} else Q_new[sr, sr] = diag(r)
Q_new[rpC1nq, rpC1nq] = diag(diag(crossprod(Zsmooth[, rpC1nq, drop = FALSE]) + sum3(Vsmooth[rpC1nq, rpC1nq,, drop = FALSE])) / TT)

# E(X'X) & E(X'Z)
# Estimate matrix C using maximum likelihood approach
denom = numeric(nm*r^2)
nom = matrix(0, nm, r)

# nanYt = diagm(nanY[1:nM,t] .== 0)
# denom += kron(Zsmooth[1:r,t+1] * Zsmooth[1:r,t+1]' + Vsmooth[1:r,1:r,t+1], nanYt)
# nom += y[1:nM,t]*Zsmooth[1:r,t+1]';
for (t in 1:TT) {
tmp = Zsmooth[t, sr]
nom %+=% tcrossprod(XW0[t, snm], tmp)
tmp2 = tcrossprod(tmp) + Vsmooth[sr, sr, t]
dim(tmp2) = NULL
denom %+=% tcrossprod(tmp2, NW[t, snm])
}

dim(denom) = c(r, r, nm)
dnkron[dnkron_ind] = aperm.default(denom, c(1L, 3L, 2L))
# Solve for vec_C and reshape into C_new
C_new = C
C_new[1:nm, sr] = solve.default(dnkron, unattrib(nom)) # ainv() -> slower...

# Now updating the quarterly observation matrix C
for (i in (nm+1):n) {
denom = numeric(rC^2)
nom = numeric(rC)

for (t in 1:TT) {
if(NW[t, i]) denom %+=% (crossprod(Zsmooth[t, 1:rC, drop = FALSE]) + Vsmooth[1:rC, 1:rC , t])
nom %+=% (XW0[t, i] * Zsmooth[t, 1:rC])
}
dim(denom) = c(rC, rC)
denom_inv = ainv(denom)
C_i = denom_inv %*% nom # Is this a scalar? -> yes, otherwise the replacement with C_new doesn't make sense
tmp = tcrossprod(denom_inv, R_mat)
C_i_constr = C_i - tmp %*% ainv(R_mat %*% tmp) %*% R_mat %*% C_i
C_new[i, 1:rC] = C_i_constr
}

if(rRi) {
R_new = matrix(0, n, n)
Rdg = R[dgind]
for (t in 1:TT) {
nnanYt = NW[t, ]
tmp = C_new * nnanYt
R[dgind] = Rdg * !nnanYt # If R is not diagonal
tmp2 = tmp %*% tcrossprod(Vsmooth[,, t], tmp)
tmp2 %+=% tcrossprod(XW0[t, ] - tmp %*% Zsmooth[t, ])
tmp2 %+=% R # If R is not diagonal
# tmp2[dgind] = tmp2[dgind] + (Rdg * !nnanYt) # If R is diagonal...
R_new %+=% tmp2
}
if(rRi == 2L) { # Unrestricted
R_new %/=% TT
R_new[R_new < 1e-7] = 1e-7
R_new[(nm+1):n, ] = 0
R_new[, (nm+1):n] = 0
} else { # Diagonal
RR = R_new[dgind] / TT
RR[RR < 1e-7] = 1e-7 # RR(RR<1e-2) = 1e-2;
R_new %*=% 0
R_new[dgind[snm]] = RR[snm] # TODO: set quarterly obs to a small number?
}
} else {
R_new = diag(n)
diag(R_new)[(nm+1):n] = 0
}

# Set initial conditions
V_0_new = V_0
V_0_new[srpC, srpC] = Vsmooth0[srpC, srpC]
diag(V_0_new)[-srpC] = diag(Vsmooth0)[-srpC]

return(list(A = A_new,
C = C_new,
Q = Q_new,
R = R_new,
F_0 = drop(Zsmooth0),
P_0 = V_0_new,
loglik = kfs_res$loglik))

}
Loading
Loading