SebKrantz · SebKrantz · Mar 17, 2025 · Jun 30, 2023 · Jun 30, 2023 · Jul 2, 2023
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,51 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+  release:
+    types: [published]
+  workflow_dispatch:
+
+name: pkgdown.yaml
+
+permissions: read-all
+
+jobs:
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: |
+            github::SebKrantz/pkgdown
+            local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
+
+      - name: Deploy to GitHub pages 🚀
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/github-pages-deploy-action@v4.5.0
+        with:
+          clean: false
+          branch: gh-pages
+          folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: dfms
-Version: 0.2.2
+Version: 0.3.0
 Title: Dynamic Factor Models
 Authors@R: c(person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch"),
              person("Rytis", "Bagdziunas", role = "aut"))
@@ -15,8 +15,8 @@ Description: Efficient estimation of Dynamic Factor Models using the Expectation
   of factors are also provided - following Bai and Ng (2002) <doi:10.1111/1468-0262.00273>.
 URL: https://sebkrantz.github.io/dfms/
 BugReports: https://github.com/SebKrantz/dfms/issues
-Depends: R (>= 3.3.0)
-Imports: Rcpp (>= 1.0.1), collapse (>= 1.8.0)
+Depends: R (>= 3.5.0)
+Imports: Rcpp (>= 1.0.1), collapse (>= 2.0.0)
 LinkingTo: Rcpp, RcppArmadillo
 Suggests: xts, vars, magrittr, testthat (>= 3.0.0), knitr, rmarkdown, covr
 License: GPL-3

diff --git a/NAMESPACE b/NAMESPACE
@@ -33,6 +33,7 @@ importFrom(collapse,"%=%")
 importFrom(collapse,"%r*%")
 importFrom(collapse,TRA.matrix)
 importFrom(collapse,ckmatch)
+importFrom(collapse,flag)
 importFrom(collapse,fmedian)
 importFrom(collapse,fmedian.default)
 importFrom(collapse,fmin.matrix)
@@ -54,6 +55,8 @@ importFrom(collapse,setop)
 importFrom(collapse,t_list)
 importFrom(collapse,unattrib)
 importFrom(collapse,unlist2d)
+importFrom(collapse,vec)
+importFrom(collapse,whichNA)
 importFrom(collapse,whichv)
 importFrom(grDevices,rainbow)
 importFrom(graphics,abline)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# dfms 0.3.0
+
+* Added argument `quarterly.vars`, enabling mixed-frequency estimation with monthly and quarterly data following Banbura and Modugno (2014). The data matrix should contain the quarterly variables at the end of the matrix (after the monthly ones).
+
 # dfms 0.2.2
 
 * Replace Armadillo `inv_sympd()` by Armadillo `inv()` in C++ Kalman Filter to improve numerical robustness at a minor performance cost.

diff --git a/R/DFM.R b/R/DFM.R
@@ -2,6 +2,7 @@
 .EM_DGR <- quote(EMstepDGR(X, A, C, Q, R, F_0, P_0, cpX, n, r, sr, TT, rQi, rRi))
 .EM_BM <- quote(EMstepBMOPT(X, A, C, Q, R, F_0, P_0, XW0, W, n, r, sr, TT, dgind, dnkron, dnkron_ind, rQi, rRi))
 .EM_BM_idio <- quote(EMstepBMidio(X, A, C, Q, R, F_0, P_0, XW0, W, dgind, dnkron, dnkron_ind, r, p, n, sr, TT, rQi, rRi))
+.EMstepBMMQ <- quote(EMstepBMMQ(X, A, C, Q, R, F_0, P_0, XW0, NW, dgind, dnkron, dnkron_ind, r, p, R_mat, n, nq, sr, TT, rQi, rRi))
 .KFS <- quote(SKFS(X, A, C, Q, R, F_0, P_0))
 
 
@@ -15,6 +16,7 @@
 #' @param p integer. number of lags in factor VAR.
 #' @param \dots (optional) arguments to \code{\link{tsnarmimp}}.
 #' @param idio.ar1 logical. Model observation errors as AR(1) processes: \eqn{e_t = \rho e_{t-1} + v_t}{e(t) = rho e(t-1) + v(t)}. \emph{Note} that this substantially increases computation time, and is generaly not needed if \code{n} is large (>30). See theoretical vignette for details.
+#' @param quarterly.vars character. Names of quarterly variables in \code{X} (if any). Monthly variables should be to the left of the quarterly variables in the data matrix and quarterly observations should be provided every 3rd period.
 #' @param rQ character. restrictions on the state (transition) covariance matrix (Q).
 #' @param rR character. restrictions on the observation (measurement) covariance matrix (R).
 #' @param em.method character. The implementation of the Expectation Maximization Algorithm used. The options are:
@@ -115,7 +117,7 @@
 #' Stock, J. H., & Watson, M. W. (2016). Dynamic Factor Models, Factor-Augmented Vector Autoregressions, and Structural Vector Autoregressions in Macroeconomics. \emph{Handbook of Macroeconomics, 2}, 415–525. https://doi.org/10.1016/bs.hesmac.2016.04.002
 #'
 #' @useDynLib dfms, .registration = TRUE
-#' @importFrom collapse fscale qsu fvar fmedian fmedian.default qM unattrib na_omit %=% %-=% %+=% %/=% %*=% %r*% whichv setColnames setDimnames
+#' @importFrom collapse fscale qsu fvar flag fmedian fmedian.default qM unattrib na_omit %=% %-=% %+=% %/=% %*=% %r*% whichv whichNA vec setColnames setDimnames
 #' @importFrom grDevices rainbow
 #' @importFrom graphics abline legend lines par
 #' @importFrom stats filter residuals rnorm spline ts.plot
@@ -203,7 +205,8 @@
 #' @export
 
 DFM <- function(X, r, p = 1L, ...,
-                idio.ar1 = FALSE, # quarterly.vars = c(), blocks = c(),
+                idio.ar1 = FALSE,
+                quarterly.vars = NULL, # blocks = c(),
                 rQ = c("none", "diagonal", "identity"),
                 rR = c("diagonal", "identity", "none"),
                 em.method = c("auto", "DGR", "BM", "none"),
@@ -230,6 +233,7 @@ DFM <- function(X, r, p = 1L, ...,
   if(!is.logical(idio.ar1) || is.na(idio.ar1)) stop("idio.ar1 needs to be logical")
   if(!is.logical(pos.corr) || is.na(pos.corr)) stop("pos.corr needs to be logical")
   if(!is.logical(check.increased) || is.na(check.increased)) stop("check.increased needs to be logical")
+  if(!is.null(quarterly.vars) && !is.character(quarterly.vars)) stop("quarterly.vars needs to be a character vector with the names of quarterly variables in X")
 
   rp <- r * p
   sr <- seq_len(r)
@@ -242,6 +246,11 @@ DFM <- function(X, r, p = 1L, ...,
   Xnam <- dimnames(X)[[2L]]
   dimnames(X) <- NULL
   n <- ncol(X)
+  if(MFl <- !is.null(quarterly.vars)) {
+    qind <- ckmatch(quarterly.vars, Xnam)
+    nq <- length(qind)
+    if(!identical(sort(qind), (n-nq+1):n)) stop("Pleae order your data such that quarterly variables are to the right (after) the monthly variables")
+  }
 
   # Missing values
   anymiss <- anyNA(X)
@@ -261,8 +270,8 @@ DFM <- function(X, r, p = 1L, ...,
 
   # This is because after removing missing rows, the data could be complete, e.g. differencing data with diff.xts() of collapse::fdiff() just gives a NA row
   if(anymiss && length(rm.rows)) anymiss <- any(W)
-  BMl <- switch(tolower(em.method[1L]), auto = anymiss, dgr = FALSE, bm = TRUE, none = NA, stop("Unknown EM option:", em.method[1L]))
-  if(idio.ar1 && !is.na(BMl)) BMl <- TRUE
+  BMl <- switch(tolower(em.method[1L]), auto = anymiss || idio.ar1 || MFl, dgr = FALSE || idio.ar1 || MFl,
+                bm = TRUE, none = NA, stop("Unknown EM option:", em.method[1L]))
 
   # Run PCA to get initial factor estimates:
   # v <- svd(X_imp, nu = 0L, nv = min(as.integer(r), n, TT))$v # Not faster than eigen...
@@ -276,8 +285,13 @@ DFM <- function(X, r, p = 1L, ...,
   F_pc <- X_imp %*% v
 
   ## Initial System Matrices
-  init <- if(idio.ar1) init_cond_idio_ar1(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) else
-                                init_cond(X, F_pc, v, n, r, p, BMl, rRi, rQi)
+  if(MFl) {
+    init <- if(idio.ar1) stop("Not yet implemented") else
+      init_cond_MQ(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi)
+  } else {
+    init <- if(idio.ar1) init_cond_idio_ar1(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) else
+      init_cond(X, F_pc, v, n, r, p, BMl, rRi, rQi)
+  }
   A <- init$A; C <- init$C; Q <- init$Q; R <- init$R; F_0 <- init$F_0; P_0 <- init$P_0
 
   ## Run standartized data through Kalman filter and smoother once
@@ -300,6 +314,7 @@ DFM <- function(X, r, p = 1L, ...,
                        P_2s = setDimnames(kfs_res$P_smooth[sr, sr,, drop = FALSE], list(fnam, fnam, NULL)),
                        anyNA = anymiss, # || length(rm.rows), # This is for internal missing values only
                        rm.rows = rm.rows,
+                       quarterly.vars = quarterly.vars,
                        em.method = if(is.na(BMl)) "none" else if(BMl) "BM" else "DGR",
                        call = match.call())
 
@@ -335,11 +350,22 @@ DFM <- function(X, r, p = 1L, ...,
   converged <- FALSE
 
   if(BMl) {
-    expr <- if(idio.ar1) .EM_BM_idio else .EM_BM
-    dnkron <- matrix(1, r, r) %x% diag(n) # Used to be inside EMstep, taken out to speed up the algorithm
-    dnkron_ind <- whichv(dnkron, 1)
-    XW0 <- X_imp
+    if(MFl) {
+      nm <- n - nq
+      rpC <- r * max(p, 5L)
+      rpC1nq <- (rpC+1L):(rpC+nq)
+      NW <- !W
+      R_mat <- kronecker(Rcon, diag(r)) # TODO: autsource this
+      expr <- if(idio.ar1) stop("not implemented yet") else .EMstepBMMQ
+      dnkron <- matrix(1, r, r) %x% diag(nm) # Used to be inside EMstep, taken out to speed up the algorithm
+      dnkron_ind <- whichv(dnkron, 1)
+    } else {
+      expr <- if(idio.ar1) .EM_BM_idio else .EM_BM
+      dnkron <- matrix(1, r, r) %x% diag(n) # Used to be inside EMstep, taken out to speed up the algorithm
+      dnkron_ind <- whichv(dnkron, 1)
+    }
     dgind <- 0:(n-1) * n + 1:n
+    XW0 <- X_imp
     if(anymiss) XW0[W] <- 0 else W <- is.na(X) # TODO: think about this...
   } else {
     expr <- .EM_DGR
@@ -379,7 +405,9 @@ DFM <- function(X, r, p = 1L, ...,
                     A = setDimnames(em_res$A[sr, seq_len(rp), drop = FALSE], lagnam(fnam, p)),
                     C = setDimnames(em_res$C[, sr, drop = FALSE], list(Xnam, fnam)),
                     Q = setDimnames(em_res$Q[sr, sr, drop = FALSE], list(unam, unam)),
-                    R = setDimnames(if(idio.ar1) em_res$Q[-seq_len(rp), -seq_len(rp), drop = FALSE] else em_res$R, list(Xnam, Xnam)),
+                    R = setDimnames(if(MFl && idio.ar1) stop("not implemented yet") else if(MFl)
+                                    `[<-`(em_res$R, (nm+1):n, (nm+1):n, value = em_res$Q[rpC1nq, rpC1nq]) else if(idio.ar1)
+                                    em_res$Q[-seq_len(rp), -seq_len(rp), drop = FALSE] else em_res$R, list(Xnam, Xnam)),
                     e = if(idio.ar1) setColnames(kfs_res$F_smooth[, -seq_len(rp), drop = FALSE], Xnam) else NULL,
                     rho = if(idio.ar1) setNames(diag(em_res$A)[-seq_len(rp)], Xnam) else NULL,
                     loglik = if(num_iter == max.iter) loglik_all else loglik_all[seq_len(num_iter)],

diff --git a/R/EMBM_MQ.R b/R/EMBM_MQ.R
@@ -0,0 +1,144 @@
+
+# Define the inputs of the function
+# X: a matrix with n rows and TT columns representing the observed data at each time t.
+# A: a transition matrix with size r*p+r*r.
+# C: an observation matrix with size n*r.
+# Q: a covariance matrix for the state equation disturbances with size r*p+r*r.
+# R: a covariance matrix for the observation disturbances with size n*n.
+# Z_0: the initial value of the state variable.
+# V_0: the initial value of the variance-covariance matrix of the state variable.
+# r: order of the state autoregression (AR) process.
+# p: number of lags in the state AR process.
+# i_idio: a vector of indicators specifying which variables are idiosyncratic noise variables.'
+
+# Z_0 = F_0; V_0 = P_0
+EMstepBMMQ = function(X, A, C, Q, R, Z_0, V_0, XW0, NW, dgind, dnkron, dnkron_ind, r, p, R_mat, n, nq, sr, TT, rQi, rRi) {
+
+  # Define dimensions of the input arguments
+  rC = 5L # ncol(Rcon)
+  pC = max(p, rC)
+  rp = r * p
+  rpC = r * pC
+  nm = n - nq
+  rC = rC * r # Note here replacing rC!!
+
+  snm = seq_len(nm)
+  srp = seq_len(rp)
+  srpC = seq_len(rpC)
+  rpC1nq = (rpC+1L):(rpC+nq)
+
+  # Compute expected sufficient statistics for a single Kalman filter sequence
+  # Run the Kalman filter with the current parameter estimates
+  kfs_res = SKFS(X, A, C, Q, R, Z_0, V_0, TRUE)
+
+  # Extract values from the Kalman Filter output
+  Zsmooth = kfs_res$F_smooth
+  Vsmooth = kfs_res$P_smooth
+  VVsmooth = kfs_res$PPm_smooth
+  Zsmooth0 = kfs_res$F_smooth_0
+  Vsmooth0 = kfs_res$P_smooth_0
+  loglik = kfs_res$loglik
+
+  # Perform algebraic operations involving E(Z_t), E(Z_{t-1}), Y_t, Y_{t-1} to update A_new, Q_new, C_new, and R_new
+
+  # Normal Expectations to get Z(t+1) and A(t+1).
+  tmp = rbind(Zsmooth0[srpC], Zsmooth[-TT, srpC, drop = FALSE])
+  tmp2 = sum3(Vsmooth[srpC, srpC, -TT, drop = FALSE])
+  EZZ = crossprod(Zsmooth[, srpC, drop = FALSE]) %+=% (tmp2 + Vsmooth[srpC, srpC, TT])                   # E(Z'Z)
+  EZZ_BB = crossprod(tmp) %+=% (tmp2 + Vsmooth0[srpC, srpC])                                             # E(Z(-1)'Z(-1))
+  EZZ_FB = crossprod(Zsmooth[, srpC, drop = FALSE], tmp) %+=% sum3(VVsmooth[srpC, srpC,, drop = FALSE])  # E(Z'Z(-1))
+
+  # Update matrices A and Q
+  A_new = A
+  Q_new = Q
+
+  # System matrices
+  A_new[sr, srp] = EZZ_FB[sr, srp, drop = FALSE] %*% ainv(EZZ_BB[srp, srp, drop = FALSE])
+  if(rQi) {
+    Qsr = (EZZ[sr, sr] - tcrossprod(A_new[sr, srp, drop = FALSE], EZZ_FB[sr, srp, drop = FALSE])) / TT
+    Q_new[sr, sr] = if(rQi == 2L) Qsr else diag(diag(Qsr))
+  } else Q_new[sr, sr] = diag(r)
+  Q_new[rpC1nq, rpC1nq] = diag(diag(crossprod(Zsmooth[, rpC1nq, drop = FALSE]) + sum3(Vsmooth[rpC1nq, rpC1nq,, drop = FALSE])) / TT)
+
+  # E(X'X) & E(X'Z)
+  # Estimate matrix C using maximum likelihood approach
+  denom = numeric(nm*r^2)
+  nom = matrix(0, nm, r)
+
+  # nanYt = diagm(nanY[1:nM,t] .== 0)
+  # denom += kron(Zsmooth[1:r,t+1] * Zsmooth[1:r,t+1]' + Vsmooth[1:r,1:r,t+1], nanYt)
+  # nom += y[1:nM,t]*Zsmooth[1:r,t+1]';
+  for (t in 1:TT) {
+    tmp = Zsmooth[t, sr]
+    nom %+=% tcrossprod(XW0[t, snm], tmp)
+    tmp2 = tcrossprod(tmp) + Vsmooth[sr, sr, t]
+    dim(tmp2) = NULL
+    denom %+=% tcrossprod(tmp2, NW[t, snm])
+  }
+
+  dim(denom) = c(r, r, nm)
+  dnkron[dnkron_ind] = aperm.default(denom, c(1L, 3L, 2L))
+  # Solve for vec_C and reshape into C_new
+  C_new = C
+  C_new[1:nm, sr] = solve.default(dnkron, unattrib(nom)) # ainv() -> slower...
+
+  # Now updating the quarterly observation matrix C
+  for (i in (nm+1):n) {
+      denom = numeric(rC^2)
+      nom = numeric(rC)
+
+      for (t in 1:TT) {
+          if(NW[t, i]) denom %+=% (crossprod(Zsmooth[t, 1:rC, drop = FALSE]) + Vsmooth[1:rC, 1:rC , t])
+          nom %+=% (XW0[t, i] * Zsmooth[t, 1:rC])
+      }
+      dim(denom) = c(rC, rC)
+      denom_inv = ainv(denom)
+      C_i = denom_inv %*% nom # Is this a scalar? -> yes, otherwise the replacement with C_new doesn't make sense
+      tmp = tcrossprod(denom_inv, R_mat)
+      C_i_constr = C_i - tmp %*% ainv(R_mat %*% tmp) %*% R_mat %*% C_i
+      C_new[i, 1:rC] = C_i_constr
+  }
+
+  if(rRi) {
+    R_new = matrix(0, n, n)
+    Rdg = R[dgind]
+    for (t in 1:TT) {
+      nnanYt = NW[t, ]
+      tmp = C_new * nnanYt
+      R[dgind] = Rdg * !nnanYt # If R is not diagonal
+      tmp2 = tmp %*% tcrossprod(Vsmooth[,, t], tmp)
+      tmp2 %+=% tcrossprod(XW0[t, ] - tmp %*% Zsmooth[t, ])
+      tmp2 %+=% R # If R is not diagonal
+      # tmp2[dgind] = tmp2[dgind] + (Rdg * !nnanYt) # If R is diagonal...
+      R_new %+=% tmp2
+    }
+    if(rRi == 2L) { # Unrestricted
+      R_new %/=% TT
+      R_new[R_new < 1e-7] = 1e-7
+      R_new[(nm+1):n, ] = 0
+      R_new[, (nm+1):n] = 0
+    } else { # Diagonal
+      RR = R_new[dgind] / TT
+      RR[RR < 1e-7] = 1e-7 # RR(RR<1e-2) = 1e-2;
+      R_new %*=% 0
+      R_new[dgind[snm]] = RR[snm] # TODO: set quarterly obs to a small number?
+    }
+  } else {
+    R_new = diag(n)
+    diag(R_new)[(nm+1):n] = 0
+  }
+
+  # Set initial conditions
+  V_0_new = V_0
+  V_0_new[srpC, srpC] = Vsmooth0[srpC, srpC]
+  diag(V_0_new)[-srpC] = diag(Vsmooth0)[-srpC]
+
+  return(list(A = A_new,
+              C = C_new,
+              Q = Q_new,
+              R = R_new,
+              F_0 = drop(Zsmooth0),
+              P_0 = V_0_new,
+              loglik = kfs_res$loglik))
+
+}