From 3efe7df26cbc4a3d835acad0d85694e55c7e03c4 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sat, 1 Jul 2023 00:22:47 +0200 Subject: [PATCH 01/14] Minor fixes. --- .../BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl index f635646..00ed778 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl @@ -22,7 +22,7 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ # Static predictions f = x * v; - ff = zeros(T-rC, 0); + ff = zeros(T-rC+1, 0); for kk = 0:rC-1 ff = [ff f[rC-kk:end-kk,:]]; end @@ -40,16 +40,16 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ iff_i = inv(ff_i'*ff_i) Cc = iff_i*ff_i'*xx_i Cc = Cc - iff_i*Rcon'*inv(Rcon*iff_i*Rcon')*(Rcon*Cc-q) - C[i, 1:rC*r] .= Cc' + C[i, 1:rC*r] = Cc' end res = x[rC:end, :] - ff * C[:, 1:rC*r]' resNaN = copy(res) resNaN[indNaN[rC:end, :]] .= NaN - R = Diagonal(nanvar(resNaN)) + R = Diagonal(dropdims(nanvar(resNaN, dims = 1), dims = 1)) R[NM+1:end, NM+1:end] .= 0 - C = [C [zeros(NM,rC*NQ);kron([1 2 3 2 1], eye(NQ))]] + C = [C [zeros(NM,rC*NQ); kron([1 2 3 2 1], eye(NQ))]] # Estimate A & Q from stacked F(t) = A*F(t-1) + e(t); z = f; @@ -65,8 +65,8 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ A[1:r,1:r*p] = A_temp'; A[r+1:end,1:r*(pC-1)] = eye(r*(pC-1)); - temp = zeros(5*NQ) - temp[NQ+1:end,1:end-NQ] .= eye(4*NQ) + temp = zeros(5*NQ, 5*NQ) + temp[NQ+1:end,1:end-NQ] = eye(4*NQ) A = BlockDiagonal([A, temp]) # Turn A into a normal matrix from BlockDiagonal A = Matrix(A) @@ -78,7 +78,7 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ # Extract covariance matrix of e and assign it to the top-left r x r block of Q Q[1:r, 1:r] = cov(e) # Extract diagonal elements of the nanvar of resNaN from column NM+1 to end and assign it to the bottom-right NQ x NQ block of Q - Q[pC*r+1:pC*r+NQ, pC*r+1:pC*r+NQ] = Diagonal(nanvar(resNaN[:, NM+1:end])) / 19 + Q[pC*r+1:pC*r+NQ, pC*r+1:pC*r+NQ] = Diagonal(dropdims(nanvar(resNaN[:, NM+1:end], dims = 1), dims = 1) / 19) # Calculate rp2 # rp2 = (r*pC+5*NQ)^2 From 53887d8a35ef633dfbbea0c880dbc616642e2353 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sat, 1 Jul 2023 00:23:29 +0200 Subject: [PATCH 02/14] Set nQ to 9. --- .../BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl index 99d086b..4c3349e 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl @@ -22,12 +22,12 @@ eye(n) = Matrix{Float64}(I, n, n) P = (r = 5, p = 2, max_iter = 100, # Building the matrix of restrictions on the loadings for the quarterly variables Rconstr = [2 -1 0 0 0; - 3 0 -1 0 0; - 2 0 0 -1 0; - 1 0 0 0 -1], + 3 0 -1 0 0; + 2 0 0 -1 0; + 1 0 0 0 -1], q = zeros(4,1), restr = "_restrMQ", - nQ = 10 # need to set number of quarterly variables + nQ = 9 # need to set number of quarterly variables ) From 9f4dd0e97c53900ba3f4d9527f3102ed045b4309 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 2 Jul 2023 11:13:18 +0200 Subject: [PATCH 03/14] Minor improcements. Removing weird division by 19. --- .../EM_DFM_SS_restrMQ/Procedures.jl | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl index 00ed778..cc51a89 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl @@ -49,7 +49,7 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ R = Diagonal(dropdims(nanvar(resNaN, dims = 1), dims = 1)) R[NM+1:end, NM+1:end] .= 0 - C = [C [zeros(NM,rC*NQ); kron([1 2 3 2 1], eye(NQ))]] + C = [C zeros(NM,rC*NQ) kron([1 2 3 2 1], eye(NQ))] # Estimate A & Q from stacked F(t) = A*F(t-1) + e(t); z = f; @@ -78,7 +78,7 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ # Extract covariance matrix of e and assign it to the top-left r x r block of Q Q[1:r, 1:r] = cov(e) # Extract diagonal elements of the nanvar of resNaN from column NM+1 to end and assign it to the bottom-right NQ x NQ block of Q - Q[pC*r+1:pC*r+NQ, pC*r+1:pC*r+NQ] = Diagonal(dropdims(nanvar(resNaN[:, NM+1:end], dims = 1), dims = 1) / 19) + Q[pC*r+1:pC*r+NQ, pC*r+1:pC*r+NQ] = Diagonal(dropdims(nanvar(resNaN[:, NM+1:end], dims = 1), dims = 1)) # / 19 # Calculate rp2 # rp2 = (r*pC+5*NQ)^2 @@ -90,7 +90,7 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ return A, C, Q, R, initZ, initV end - +# y_est, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ function EMstep(y, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ) n, T = size(y) rC = size(R_mat, 2) @@ -111,10 +111,10 @@ function EMstep(y, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ) A_new[1:r, 1:rp] = EZZ_FB[1:r, 1:rp] * inv(EZZ_BB[1:rp, 1:rp]) Q_new[1:r, 1:r] = (EZZ[1:r, 1:r] - A_new[1:r, 1:rp] * EZZ_FB[1:r, 1:rp]') / T - Q_new[rpC+1:rpC+nQ, rpC+1:rpC+nQ] = Diagonal(diag(Zsmooth[rpC+1:rpC+nQ, 2:end] * Zsmooth[rpC+1:rpC+nQ, 2:end]' + sum(Vsmooth[rpC+1:rpC+nQ, rpC+1:rpC+nQ, 2:end], dims=3))) / T + Q_new[rpC+1:rpC+nQ, rpC+1:rpC+nQ] = Diagonal(diag(Zsmooth[rpC+1:rpC+nQ, 2:end] * Zsmooth[rpC+1:rpC+nQ, 2:end]' + dropdims(sum(Vsmooth[rpC+1:rpC+nQ, rpC+1:rpC+nQ, 2:end], dims=3), dims = 3)) / T) Z_0 = Zsmooth[:, 1] - V_0 = zeros(length(Z_0)) + V_0 = zeros(length(Z_0), length(Z_0)) V_0[1:rpC, 1:rpC] = Vsmooth[1:rpC, 1:rpC, 1] V_0[rpC+1:end, rpC+1:end] = Diagonal(diag(Vsmooth[rpC+1:end, rpC+1:end, 1])) @@ -127,11 +127,8 @@ function EMstep(y, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ) nom = zeros(nM, r) for t=1:T - nanYt = diagm(nanY[1:nM,t] .== 0) - denom += kron(Zsmooth[1:r,t+1] * Zsmooth[1:r,t+1]' + Vsmooth[1:r,1:r,t+1], nanYt) - nom += y[1:nM,t]*Zsmooth[1:r,t+1]'; end @@ -146,12 +143,11 @@ function EMstep(y, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ) nom = zeros(1, rC) for t=1:T - nanYt = diagm(nanY[i,t] .== 0) - denom += kron(Zsmooth[1:rC, t+1]*Zsmooth[1:rC, t+1]' + Vsmooth[1:rC, 1:rC, t+1], nanYt) - nom += Y[i, t]*Zsmooth[1:rC, t+1]' + denom += !nanY[i,t] * (Zsmooth[1:rC, t+1]*Zsmooth[1:rC, t+1]' + Vsmooth[1:rC, 1:rC, t+1]) + nom += y[i, t]*Zsmooth[1:rC, t+1]' end - C_i = inv(denom)*nom' + C_i = inv(denom) * nom' C_i_constr = C_i - inv(denom)*R_mat'*inv(R_mat*inv(denom)*R_mat')*(R_mat*C_i-q) C_new[i, 1:rC] = C_i_constr end @@ -163,8 +159,7 @@ function EMstep(y, A, C, Q, R, Z_0, V_0, r, p, R_mat, q, nQ) nanYt*C_new*Vsmooth[:,:,t+1]*C_new'*nanYt + (I(n)-nanYt)*R*(I(n)-nanYt) end - R_new ./= T - R_new = Diagonal(diag(R_new)) + R_new = Diagonal(diag(R_new) / T) R_new[n-nQ+1:end, n-nQ+1:end] .= 0 return C_new, R_new, A_new, Q_new, loglik, Z_0, V_0 From 3aa6e03617d7ab80a812d4d17d6c358111e7812c Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 2 Jul 2023 11:15:45 +0200 Subject: [PATCH 04/14] Fixing spline: proper removal of missing rows. --- misc/BM2014/BM14_Julia/EM_DFM_SS_idio/remNaNs_spline.jl | 6 +++--- misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/remNaNs_spline.jl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_idio/remNaNs_spline.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_idio/remNaNs_spline.jl index 5ece48b..f7f246d 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_idio/remNaNs_spline.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_idio/remNaNs_spline.jl @@ -23,7 +23,7 @@ function remNaNs_spline(X, options) nanEnd = cumsum(rem1[end:-1:1]) .== 1:T; nanEnd = nanEnd[end:-1:1]; nanLE = nanLead .| nanEnd; - X[nanLE,:] = []; + X = X[.!nanLE,:] # X[nanLE,:] = []; indNaN = isnan.(X); for i = 1:N x = X[:,i]; @@ -46,7 +46,7 @@ function remNaNs_spline(X, options) nanEnd = cumsum(rem1[end:-1:1]) .== 1:T; nanEnd = nanEnd[end:-1:1]; nanLE = nanLead .| nanEnd; - X[nanLE,:] = []; + X = X[.!nanLE,:] # X[nanLE,:] = []; indNaN = isnan.(X); elseif options.method == 4 # remove rows with leading and closing zeros & replace missing values rem1 = dropdims(sum(indNaN, dims = 2), dims = 2) .== N; @@ -54,7 +54,7 @@ function remNaNs_spline(X, options) nanEnd = cumsum(rem1[end:-1:1]) .== 1:T; nanEnd = nanEnd[end:-1:1]; nanLE = nanLead .| nanEnd; - X[nanLE,:] = []; + X = X[.!nanLE,:] # X[nanLE,:] = []; indNaN = isnan.(X); for i = 1:N x = X[:,i]; diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/remNaNs_spline.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/remNaNs_spline.jl index 5ece48b..f7f246d 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/remNaNs_spline.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/remNaNs_spline.jl @@ -23,7 +23,7 @@ function remNaNs_spline(X, options) nanEnd = cumsum(rem1[end:-1:1]) .== 1:T; nanEnd = nanEnd[end:-1:1]; nanLE = nanLead .| nanEnd; - X[nanLE,:] = []; + X = X[.!nanLE,:] # X[nanLE,:] = []; indNaN = isnan.(X); for i = 1:N x = X[:,i]; @@ -46,7 +46,7 @@ function remNaNs_spline(X, options) nanEnd = cumsum(rem1[end:-1:1]) .== 1:T; nanEnd = nanEnd[end:-1:1]; nanLE = nanLead .| nanEnd; - X[nanLE,:] = []; + X = X[.!nanLE,:] # X[nanLE,:] = []; indNaN = isnan.(X); elseif options.method == 4 # remove rows with leading and closing zeros & replace missing values rem1 = dropdims(sum(indNaN, dims = 2), dims = 2) .== N; @@ -54,7 +54,7 @@ function remNaNs_spline(X, options) nanEnd = cumsum(rem1[end:-1:1]) .== 1:T; nanEnd = nanEnd[end:-1:1]; nanLE = nanLead .| nanEnd; - X[nanLE,:] = []; + X = X[.!nanLE,:] # X[nanLE,:] = []; indNaN = isnan.(X); for i = 1:N x = X[:,i]; From 14a9aa15ab70a7a048b410198c2ff5d8dc22313d Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 2 Jul 2023 11:19:24 +0200 Subject: [PATCH 05/14] Minors. --- .../EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl index 4c3349e..e7550d3 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/EM_DFM_SS_restrMQ.jl @@ -1,6 +1,6 @@ ########################################### -# Julia Implementation of Single Frequency -# DFM with AR(1) Observation Residuals +# Julia Implementation of Mixed Frequency +# DFM with WN Observation Residuals ########################################### # Installation and loading @@ -18,18 +18,6 @@ using BlockDiagonals eye(n) = Matrix{Float64}(I, n, n) -# Parameters -P = (r = 5, p = 2, max_iter = 100, -# Building the matrix of restrictions on the loadings for the quarterly variables - Rconstr = [2 -1 0 0 0; - 3 0 -1 0 0; - 2 0 0 -1 0; - 1 0 0 0 -1], - q = zeros(4,1), - restr = "_restrMQ", - nQ = 9 # need to set number of quarterly variables -) - mutable struct Nanopt method @@ -41,6 +29,17 @@ include("runKF.jl") include("remNaNs_spline.jl") include("Procedures.jl") +# Parameters +P = (r = 4, p = 2, max_iter = 100, +# Building the matrix of restrictions on the loadings for the quarterly variables + Rconstr = [2 -1 0 0 0; + 3 0 -1 0 0; + 2 0 0 -1 0; + 1 0 0 0 -1], + q = zeros(4,1), + nQ = 2 # need to set number of quarterly variables +) + function EM_DFM_SS_restrMQ(X, P) @@ -85,7 +84,7 @@ function EM_DFM_SS_restrMQ(X, P) converged = false # y for the estimation is WITH missing data - y = xNaN' + y = transpose(xNaN) #-------------------------------------------------------------------------- #THE EM LOOP From 2f41c2ec33ab226bb6dc703fb5080ac0f23d23bf Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 2 Jul 2023 11:49:49 +0200 Subject: [PATCH 06/14] Small fix. --- misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl index cc51a89..f7131af 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl @@ -49,7 +49,7 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ R = Diagonal(dropdims(nanvar(resNaN, dims = 1), dims = 1)) R[NM+1:end, NM+1:end] .= 0 - C = [C zeros(NM,rC*NQ) kron([1 2 3 2 1], eye(NQ))] + C = [C [zeros(NM,rC*NQ); kron([1 2 3 2 1], eye(NQ))]] # Estimate A & Q from stacked F(t) = A*F(t-1) + e(t); z = f; From dd5d9a80a5fa50faa82aaebc5ab26756d7f8c8e3 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Mon, 3 Jul 2023 00:37:04 +0200 Subject: [PATCH 07/14] Adding some explanations. --- .../BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl index f7131af..653ddaf 100644 --- a/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl +++ b/misc/BM2014/BM14_Julia/EM_DFM_SS_restrMQ/Procedures.jl @@ -24,25 +24,29 @@ function InitCond(xNaN, r, p, optNaN, Rcon, q, NQ) # Rcon is R_mat, NQ is nQ ff = zeros(T-rC+1, 0); for kk = 0:rC-1 - ff = [ff f[rC-kk:end-kk,:]]; + ff = [ff f[rC-kk:end-kk,:]]; # Matrix of lagged factors end Rcon = kron(Rcon,eye(r)); q = kron(q,ones(r,1)); - + + # This loops over the quarterly variables for i = N-NQ+1:N xx_i = xNaN[rC:T, i] if sum(.!isnan.(xx_i)) < size(ff, 2)+2 xx_i = x[rC:T, i] end ff_i = ff[.!isnan.(xx_i), :] - xx_i = xx_i[.!isnan.(xx_i)] + xx_i = xx_i[.!isnan.(xx_i)] # Quarterly observations (no interpolation) iff_i = inv(ff_i'*ff_i) - Cc = iff_i*ff_i'*xx_i + Cc = iff_i*ff_i'*xx_i # Coefficients from regressing quarterly observations on corresponding values of factors + # This is restricted least squares with restrictions: Rcon * C_0 = q + # The restrictions in Rcon (with -1 in the right places) make sense! Cc = Cc - iff_i*Rcon'*inv(Rcon*iff_i*Rcon')*(Rcon*Cc-q) - C[i, 1:rC*r] = Cc' + C[i, 1:rC*r] = Cc' # This replaces the corresponding row. end + # This computes residuals based on the new C matrix (with and without missing values) res = x[rC:end, :] - ff * C[:, 1:rC*r]' resNaN = copy(res) resNaN[indNaN[rC:end, :]] .= NaN From cdb1fb10e2b6c7146e120addb74042bb14ef344d Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Mon, 3 Jul 2023 00:37:54 +0200 Subject: [PATCH 08/14] Adding initial conditions function for mixed frequency estimation. --- R/init_cond.R | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/R/init_cond.R b/R/init_cond.R index b5d7b64..4ee952b 100644 --- a/R/init_cond.R +++ b/R/init_cond.R @@ -68,3 +68,72 @@ init_cond_idio_ar1 <- function(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) return(list(A = A, C = C, Q = Q, R = R, F_0 = F_0, P_0 = P_0)) } +# Global variable in the package +Rcon <- matrix(c(2, -1, 0, 0, 0, + 3, 0, -1, 0, 0, + 2, 0, 0, -1, 0, + 1, 0, 0, 0, -1), ncol = 5, byrow = TRUE) + +init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, nq, rRi, rQi) { + + rC <- 5L # ncol(Rcon) + pC <- max(p, rC) + rpC <- r * pC + TT <- nrow(F_pc) + nm <- n - nq + + # Observation equation ------------------------------- + # Static predictions (all.equal(unattrib(HDB(X_imp, F_pc)), unattrib(F_pc %*% t(v)))) + C <- cbind(v, matrix(0, n, rpC-r)) + rRcon <- kronecker(Rcon, diag(r)) + + # Contemporaneous factors + lags + FF <- do.call(cbind, lapply(0:(rC-1), function(i) F_pc[(rC - i):(TT - i), ])) + + # Now looping over the quarterly variables + for (i in (nm+1):n) { + x_i = X[rC:TT, i] + nna = whichNA(x_i, invert = TRUE) + if (length(nna) < ncol(FF) + 2L) x_i = X_imp[rC:TT, i] + ff_i = FF[nna, ] + x_i = x_i[nna] # Quarterly observations (no interpolation) + Iff_i = ainv(crossprod(ff_i)) + Cc = Iff_i %*% crossprod(ff_i, x_i) # Coefficients from regressing quarterly observations on lagged factors + # This is restricted least squares with restrictions: Rcon * C_0 = q + # The restrictions in Rcon (with -1 in the right places) make sense! + tmp = tcrossprod(Iff_i, rRcon) + Cc = Cc - tmp %*% ainv(rRcon %*% tmp) %*% rRcon %*% Cc + C[i, 1:(rC*r)] = Cc # This replaces the corresponding row. + } + + if(rRi) { + # This computes residuals based on the new C matrix + res <- X[rC:TT, ] - tcrossprod(FF, C[, 1:(rC*r)]) # residuals from static predictions + R <- if(rRi == 2L) cov(res, use = "pairwise.complete.obs") else diag(fvar(res, na.rm = TRUE)) + } else R <- diag(n) + R[-(1:nm), -(1:nm)] <- 0 # Note: should have tol on diagonal? + + # Note: should allow for additional zeros? + C = cbind(C, rbind(matrix(0, nm, rC*nq), kronecker(t(c(1, 2, 3, 2, 1)), diag(nq)))) + + # Transition equation ------------------------------- + rpC5nq <- rpC + 5 * nq + var <- .VAR(F_pc, p) + A <- Q <- matrix(0, rpC5nq, rpC5nq) + A[1:r, 1:rp] <- t(var$A) # var$A is rp x r matrix + A[(r+1):rpC, 1:(rpC-r)] <- diag(rpC-r) + A[(rpC+nq+1):rpC5nq, (rpC+nq+1):rpC5nq] <- diag(4*nq) + + Q[1:r, 1:r] <- switch(rQi + 1L, diag(r), diag(fvar(var$res)), cov(var$res)) + Q[(rpC+1):(rpC+nq), (rpC+1):(rpC+nq)] <- if(rRi == 2L) + cov(res[, (nm+1):end, drop = FALSE], use = "pairwise.complete.obs") else if(rRi == 1L) + diag(fvar(res[, (nm+1):end], na.rm = TRUE)) else diag(nq) + + # Initial state and state covariance (P) ------------ + F_0 <- rep(0, rpC5nq) # BM14 uses zeros, DGR12 uses the first row of PC's. Both give more or less the same... + # Kalman gain is normally A %*% t(A) + Q, but here A is somewhat tricky... + P_0 <- ainv(diag(rpC5nq^2) - kronecker(A,A)) %*% unattrib(Q) + dim(P_0) <- c(rpC5nq, rpC5nq) + + return(list(A = A, C = C, Q = Q, R = R, F_0 = F_0, P_0 = P_0)) +} \ No newline at end of file From 54aed42ee75c18b7c853e29a9e44a4b28031c8e0 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 6 Aug 2023 14:38:47 +0200 Subject: [PATCH 09/14] Minors. --- R/init_cond.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/init_cond.R b/R/init_cond.R index 4ee952b..d1bd34d 100644 --- a/R/init_cond.R +++ b/R/init_cond.R @@ -74,12 +74,11 @@ Rcon <- matrix(c(2, -1, 0, 0, 0, 2, 0, 0, -1, 0, 1, 0, 0, 0, -1), ncol = 5, byrow = TRUE) -init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, nq, rRi, rQi) { +init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) { rC <- 5L # ncol(Rcon) pC <- max(p, rC) rpC <- r * pC - TT <- nrow(F_pc) nm <- n - nq # Observation equation ------------------------------- @@ -103,7 +102,7 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, nq, rRi, rQi) { # The restrictions in Rcon (with -1 in the right places) make sense! tmp = tcrossprod(Iff_i, rRcon) Cc = Cc - tmp %*% ainv(rRcon %*% tmp) %*% rRcon %*% Cc - C[i, 1:(rC*r)] = Cc # This replaces the corresponding row. + C[i, 1:(rC*r)] = Cc # This replaces the corresponding row. } if(rRi) { @@ -125,8 +124,8 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, nq, rRi, rQi) { A[(rpC+nq+1):rpC5nq, (rpC+nq+1):rpC5nq] <- diag(4*nq) Q[1:r, 1:r] <- switch(rQi + 1L, diag(r), diag(fvar(var$res)), cov(var$res)) - Q[(rpC+1):(rpC+nq), (rpC+1):(rpC+nq)] <- if(rRi == 2L) - cov(res[, (nm+1):end, drop = FALSE], use = "pairwise.complete.obs") else if(rRi == 1L) + Q[(rpC+1):(rpC+nq), (rpC+1):(rpC+nq)] <- if(rRi == 2L) + cov(res[, (nm+1):end, drop = FALSE], use = "pairwise.complete.obs") else if(rRi == 1L) diag(fvar(res[, (nm+1):end], na.rm = TRUE)) else diag(nq) # Initial state and state covariance (P) ------------ @@ -136,4 +135,4 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, nq, rRi, rQi) { dim(P_0) <- c(rpC5nq, rpC5nq) return(list(A = A, C = C, Q = Q, R = R, F_0 = F_0, P_0 = P_0)) -} \ No newline at end of file +} From 9d5da90e73b2f2683cec40cbab4a738fd03c41a4 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 9 Jun 2024 14:29:11 +0200 Subject: [PATCH 10/14] Save status. --- R/DFM.R | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/R/DFM.R b/R/DFM.R index ed9e5df..c737395 100644 --- a/R/DFM.R +++ b/R/DFM.R @@ -203,7 +203,8 @@ #' @export DFM <- function(X, r, p = 1L, ..., - idio.ar1 = FALSE, # quarterly.vars = c(), blocks = c(), + idio.ar1 = FALSE, + quarterly.vars = NULL, # blocks = c(), rQ = c("none", "diagonal", "identity"), rR = c("diagonal", "identity", "none"), em.method = c("auto", "DGR", "BM", "none"), @@ -230,6 +231,7 @@ DFM <- function(X, r, p = 1L, ..., if(!is.logical(idio.ar1) || is.na(idio.ar1)) stop("idio.ar1 needs to be logical") if(!is.logical(pos.corr) || is.na(pos.corr)) stop("pos.corr needs to be logical") if(!is.logical(check.increased) || is.na(check.increased)) stop("check.increased needs to be logical") + if(!is.null(quarterly.vars) && !is.character(quarterly.vars)) stop("quarterly.vars needs to be a character vector with the names of quarterly variables in X") rp <- r * p sr <- seq_len(r) @@ -242,6 +244,11 @@ DFM <- function(X, r, p = 1L, ..., Xnam <- dimnames(X)[[2L]] dimnames(X) <- NULL n <- ncol(X) + if(MFl <- !is.null(quarterly.vars)) { + qind <- ckmatch(quarterly.vars, Xnam) + nq <- length(qind) + if(!identical(sort(qind), (n-nq+1):n)) stop("Pleae order your data such that quarterly variables are to the right (after) the monthly variables") + } # Missing values anymiss <- anyNA(X) @@ -261,8 +268,8 @@ DFM <- function(X, r, p = 1L, ..., # This is because after removing missing rows, the data could be complete, e.g. differencing data with diff.xts() of collapse::fdiff() just gives a NA row if(anymiss && length(rm.rows)) anymiss <- any(W) - BMl <- switch(tolower(em.method[1L]), auto = anymiss, dgr = FALSE, bm = TRUE, none = NA, stop("Unknown EM option:", em.method[1L])) - if(idio.ar1 && !is.na(BMl)) BMl <- TRUE + BMl <- switch(tolower(em.method[1L]), auto = anymiss || idio.ar1 || MFl, dgr = FALSE || idio.ar1 || MFl, + bm = TRUE, none = NA, stop("Unknown EM option:", em.method[1L])) # Run PCA to get initial factor estimates: # v <- svd(X_imp, nu = 0L, nv = min(as.integer(r), n, TT))$v # Not faster than eigen... @@ -276,8 +283,13 @@ DFM <- function(X, r, p = 1L, ..., F_pc <- X_imp %*% v ## Initial System Matrices - init <- if(idio.ar1) init_cond_idio_ar1(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) else - init_cond(X, F_pc, v, n, r, p, BMl, rRi, rQi) + if(MFl) { + init <- if(idio.ar1) stop("Not yet implemented") else + init_cond_MQ(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) + } else { + init <- if(idio.ar1) init_cond_idio_ar1(X, F_pc, v, n, r, p, BMl, rRi, rQi, anymiss, tol) else + init_cond(X, F_pc, v, n, r, p, BMl, rRi, rQi) + } A <- init$A; C <- init$C; Q <- init$Q; R <- init$R; F_0 <- init$F_0; P_0 <- init$P_0 ## Run standartized data through Kalman filter and smoother once From 2e548116cb6548014c2ed71d563fe7dac1f3a809 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 16 Mar 2025 23:52:34 -0400 Subject: [PATCH 11/14] Add mixed-frequency option (quarterly.vars argument). --- DESCRIPTION | 6 +-- NAMESPACE | 3 ++ R/DFM.R | 28 +++++++--- R/EMBM_MQ.R | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++ R/init_cond.R | 26 ++++++--- R/methods.R | 72 ++++++++++++++++++------- R/utils.R | 3 ++ man/DFM.Rd | 3 ++ 8 files changed, 248 insertions(+), 37 deletions(-) create mode 100644 R/EMBM_MQ.R diff --git a/DESCRIPTION b/DESCRIPTION index 1b5f353..0c8f860 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,14 +15,14 @@ Description: Efficient estimation of Dynamic Factor Models using the Expectation of factors are also provided - following Bai and Ng (2002) . URL: https://sebkrantz.github.io/dfms/ BugReports: https://github.com/SebKrantz/dfms/issues -Depends: R (>= 3.3.0) -Imports: Rcpp (>= 1.0.1), collapse (>= 1.8.0) +Depends: R (>= 3.5.0) +Imports: Rcpp (>= 1.0.1), collapse (>= 2.0.0) LinkingTo: Rcpp, RcppArmadillo Suggests: xts, vars, magrittr, testthat (>= 3.0.0), knitr, rmarkdown, covr License: GPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE, roclets = c ("namespace", "rd", "srr::srr_stats_roclet")) -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.3 Config/testthat/edition: 3 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 0f274cc..cf878dc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ importFrom(collapse,"%=%") importFrom(collapse,"%r*%") importFrom(collapse,TRA.matrix) importFrom(collapse,ckmatch) +importFrom(collapse,flag) importFrom(collapse,fmedian) importFrom(collapse,fmedian.default) importFrom(collapse,fmin.matrix) @@ -54,6 +55,8 @@ importFrom(collapse,setop) importFrom(collapse,t_list) importFrom(collapse,unattrib) importFrom(collapse,unlist2d) +importFrom(collapse,vec) +importFrom(collapse,whichNA) importFrom(collapse,whichv) importFrom(grDevices,rainbow) importFrom(graphics,abline) diff --git a/R/DFM.R b/R/DFM.R index c737395..a599df1 100644 --- a/R/DFM.R +++ b/R/DFM.R @@ -2,6 +2,7 @@ .EM_DGR <- quote(EMstepDGR(X, A, C, Q, R, F_0, P_0, cpX, n, r, sr, TT, rQi, rRi)) .EM_BM <- quote(EMstepBMOPT(X, A, C, Q, R, F_0, P_0, XW0, W, n, r, sr, TT, dgind, dnkron, dnkron_ind, rQi, rRi)) .EM_BM_idio <- quote(EMstepBMidio(X, A, C, Q, R, F_0, P_0, XW0, W, dgind, dnkron, dnkron_ind, r, p, n, sr, TT, rQi, rRi)) +.EMstepBMMQ <- quote(EMstepBMMQ(X, A, C, Q, R, F_0, P_0, XW0, NW, dgind, dnkron, dnkron_ind, r, p, R_mat, n, nq, sr, TT, rQi, rRi)) .KFS <- quote(SKFS(X, A, C, Q, R, F_0, P_0)) @@ -15,6 +16,7 @@ #' @param p integer. number of lags in factor VAR. #' @param \dots (optional) arguments to \code{\link{tsnarmimp}}. #' @param idio.ar1 logical. Model observation errors as AR(1) processes: \eqn{e_t = \rho e_{t-1} + v_t}{e(t) = rho e(t-1) + v(t)}. \emph{Note} that this substantially increases computation time, and is generaly not needed if \code{n} is large (>30). See theoretical vignette for details. +#' @param quarterly.vars character. Names of quarterly variables in \code{X} (if any). Monthly variables should be to the left of the quarterly variables in the data matrix and quarterly observations should be provided every 3rd period. #' @param rQ character. restrictions on the state (transition) covariance matrix (Q). #' @param rR character. restrictions on the observation (measurement) covariance matrix (R). #' @param em.method character. The implementation of the Expectation Maximization Algorithm used. The options are: @@ -115,7 +117,7 @@ #' Stock, J. H., & Watson, M. W. (2016). Dynamic Factor Models, Factor-Augmented Vector Autoregressions, and Structural Vector Autoregressions in Macroeconomics. \emph{Handbook of Macroeconomics, 2}, 415–525. https://doi.org/10.1016/bs.hesmac.2016.04.002 #' #' @useDynLib dfms, .registration = TRUE -#' @importFrom collapse fscale qsu fvar fmedian fmedian.default qM unattrib na_omit %=% %-=% %+=% %/=% %*=% %r*% whichv setColnames setDimnames +#' @importFrom collapse fscale qsu fvar flag fmedian fmedian.default qM unattrib na_omit %=% %-=% %+=% %/=% %*=% %r*% whichv whichNA vec setColnames setDimnames #' @importFrom grDevices rainbow #' @importFrom graphics abline legend lines par #' @importFrom stats filter residuals rnorm spline ts.plot @@ -312,6 +314,7 @@ DFM <- function(X, r, p = 1L, ..., P_2s = setDimnames(kfs_res$P_smooth[sr, sr,, drop = FALSE], list(fnam, fnam, NULL)), anyNA = anymiss, # || length(rm.rows), # This is for internal missing values only rm.rows = rm.rows, + quarterly.vars = quarterly.vars, em.method = if(is.na(BMl)) "none" else if(BMl) "BM" else "DGR", call = match.call()) @@ -347,11 +350,22 @@ DFM <- function(X, r, p = 1L, ..., converged <- FALSE if(BMl) { - expr <- if(idio.ar1) .EM_BM_idio else .EM_BM - dnkron <- matrix(1, r, r) %x% diag(n) # Used to be inside EMstep, taken out to speed up the algorithm - dnkron_ind <- whichv(dnkron, 1) - XW0 <- X_imp + if(MFl) { + nm <- n - nq + rpC <- r * max(p, 5L) + rpC1nq <- (rpC+1L):(rpC+nq) + NW <- !W + R_mat <- kronecker(Rcon, diag(r)) # TODO: autsource this + expr <- if(idio.ar1) stop("not implemented yet") else .EMstepBMMQ + dnkron <- matrix(1, r, r) %x% diag(nm) # Used to be inside EMstep, taken out to speed up the algorithm + dnkron_ind <- whichv(dnkron, 1) + } else { + expr <- if(idio.ar1) .EM_BM_idio else .EM_BM + dnkron <- matrix(1, r, r) %x% diag(n) # Used to be inside EMstep, taken out to speed up the algorithm + dnkron_ind <- whichv(dnkron, 1) + } dgind <- 0:(n-1) * n + 1:n + XW0 <- X_imp if(anymiss) XW0[W] <- 0 else W <- is.na(X) # TODO: think about this... } else { expr <- .EM_DGR @@ -391,7 +405,9 @@ DFM <- function(X, r, p = 1L, ..., A = setDimnames(em_res$A[sr, seq_len(rp), drop = FALSE], lagnam(fnam, p)), C = setDimnames(em_res$C[, sr, drop = FALSE], list(Xnam, fnam)), Q = setDimnames(em_res$Q[sr, sr, drop = FALSE], list(unam, unam)), - R = setDimnames(if(idio.ar1) em_res$Q[-seq_len(rp), -seq_len(rp), drop = FALSE] else em_res$R, list(Xnam, Xnam)), + R = setDimnames(if(MFl && idio.ar1) stop("not implemented yet") else if(MFl) + `[<-`(em_res$R, (nm+1):n, (nm+1):n, value = em_res$Q[rpC1nq, rpC1nq]) else if(idio.ar1) + em_res$Q[-seq_len(rp), -seq_len(rp), drop = FALSE] else em_res$R, list(Xnam, Xnam)), e = if(idio.ar1) setColnames(kfs_res$F_smooth[, -seq_len(rp), drop = FALSE], Xnam) else NULL, rho = if(idio.ar1) setNames(diag(em_res$A)[-seq_len(rp)], Xnam) else NULL, loglik = if(num_iter == max.iter) loglik_all else loglik_all[seq_len(num_iter)], diff --git a/R/EMBM_MQ.R b/R/EMBM_MQ.R new file mode 100644 index 0000000..a6db55f --- /dev/null +++ b/R/EMBM_MQ.R @@ -0,0 +1,144 @@ + +# Define the inputs of the function +# X: a matrix with n rows and TT columns representing the observed data at each time t. +# A: a transition matrix with size r*p+r*r. +# C: an observation matrix with size n*r. +# Q: a covariance matrix for the state equation disturbances with size r*p+r*r. +# R: a covariance matrix for the observation disturbances with size n*n. +# Z_0: the initial value of the state variable. +# V_0: the initial value of the variance-covariance matrix of the state variable. +# r: order of the state autoregression (AR) process. +# p: number of lags in the state AR process. +# i_idio: a vector of indicators specifying which variables are idiosyncratic noise variables.' + +# Z_0 = F_0; V_0 = P_0 +EMstepBMMQ = function(X, A, C, Q, R, Z_0, V_0, XW0, NW, dgind, dnkron, dnkron_ind, r, p, R_mat, n, nq, sr, TT, rQi, rRi) { + + # Define dimensions of the input arguments + rC = 5L # ncol(Rcon) + pC = max(p, rC) + rp = r * p + rpC = r * pC + nm = n - nq + rC = rC * r # Note here replacing rC!! + + snm = seq_len(nm) + srp = seq_len(rp) + srpC = seq_len(rpC) + rpC1nq = (rpC+1L):(rpC+nq) + + # Compute expected sufficient statistics for a single Kalman filter sequence + # Run the Kalman filter with the current parameter estimates + kfs_res = SKFS(X, A, C, Q, R, Z_0, V_0, TRUE) + + # Extract values from the Kalman Filter output + Zsmooth = kfs_res$F_smooth + Vsmooth = kfs_res$P_smooth + VVsmooth = kfs_res$PPm_smooth + Zsmooth0 = kfs_res$F_smooth_0 + Vsmooth0 = kfs_res$P_smooth_0 + loglik = kfs_res$loglik + + # Perform algebraic operations involving E(Z_t), E(Z_{t-1}), Y_t, Y_{t-1} to update A_new, Q_new, C_new, and R_new + + # Normal Expectations to get Z(t+1) and A(t+1). + tmp = rbind(Zsmooth0[srpC], Zsmooth[-TT, srpC, drop = FALSE]) + tmp2 = sum3(Vsmooth[srpC, srpC, -TT, drop = FALSE]) + EZZ = crossprod(Zsmooth[, srpC, drop = FALSE]) %+=% (tmp2 + Vsmooth[srpC, srpC, TT]) # E(Z'Z) + EZZ_BB = crossprod(tmp) %+=% (tmp2 + Vsmooth0[srpC, srpC]) # E(Z(-1)'Z(-1)) + EZZ_FB = crossprod(Zsmooth[, srpC, drop = FALSE], tmp) %+=% sum3(VVsmooth[srpC, srpC,, drop = FALSE]) # E(Z'Z(-1)) + + # Update matrices A and Q + A_new = A + Q_new = Q + + # System matrices + A_new[sr, srp] = EZZ_FB[sr, srp, drop = FALSE] %*% ainv(EZZ_BB[srp, srp, drop = FALSE]) + if(rQi) { + Qsr = (EZZ[sr, sr] - tcrossprod(A_new[sr, srp, drop = FALSE], EZZ_FB[sr, srp, drop = FALSE])) / TT + Q_new[sr, sr] = if(rQi == 2L) Qsr else diag(diag(Qsr)) + } else Q_new[sr, sr] = diag(r) + Q_new[rpC1nq, rpC1nq] = diag(diag(crossprod(Zsmooth[, rpC1nq, drop = FALSE]) + sum3(Vsmooth[rpC1nq, rpC1nq,, drop = FALSE])) / TT) + + # E(X'X) & E(X'Z) + # Estimate matrix C using maximum likelihood approach + denom = numeric(nm*r^2) + nom = matrix(0, nm, r) + + # nanYt = diagm(nanY[1:nM,t] .== 0) + # denom += kron(Zsmooth[1:r,t+1] * Zsmooth[1:r,t+1]' + Vsmooth[1:r,1:r,t+1], nanYt) + # nom += y[1:nM,t]*Zsmooth[1:r,t+1]'; + for (t in 1:TT) { + tmp = Zsmooth[t, sr] + nom %+=% tcrossprod(XW0[t, snm], tmp) + tmp2 = tcrossprod(tmp) + Vsmooth[sr, sr, t] + dim(tmp2) = NULL + denom %+=% tcrossprod(tmp2, NW[t, snm]) + } + + dim(denom) = c(r, r, nm) + dnkron[dnkron_ind] = aperm.default(denom, c(1L, 3L, 2L)) + # Solve for vec_C and reshape into C_new + C_new = C + C_new[1:nm, sr] = solve.default(dnkron, unattrib(nom)) # ainv() -> slower... + + # Now updating the quarterly observation matrix C + for (i in (nm+1):n) { + denom = numeric(rC^2) + nom = numeric(rC) + + for (t in 1:TT) { + if(NW[t, i]) denom %+=% (crossprod(Zsmooth[t, 1:rC, drop = FALSE]) + Vsmooth[1:rC, 1:rC , t]) + nom %+=% (XW0[t, i] * Zsmooth[t, 1:rC]) + } + dim(denom) = c(rC, rC) + denom_inv = ainv(denom) + C_i = denom_inv %*% nom # Is this a scalar? -> yes, otherwise the replacement with C_new doesn't make sense + tmp = tcrossprod(denom_inv, R_mat) + C_i_constr = C_i - tmp %*% ainv(R_mat %*% tmp) %*% R_mat %*% C_i + C_new[i, 1:rC] = C_i_constr + } + + if(rRi) { + R_new = matrix(0, n, n) + Rdg = R[dgind] + for (t in 1:TT) { + nnanYt = NW[t, ] + tmp = C_new * nnanYt + R[dgind] = Rdg * !nnanYt # If R is not diagonal + tmp2 = tmp %*% tcrossprod(Vsmooth[,, t], tmp) + tmp2 %+=% tcrossprod(XW0[t, ] - tmp %*% Zsmooth[t, ]) + tmp2 %+=% R # If R is not diagonal + # tmp2[dgind] = tmp2[dgind] + (Rdg * !nnanYt) # If R is diagonal... + R_new %+=% tmp2 + } + if(rRi == 2L) { # Unrestricted + R_new %/=% TT + R_new[R_new < 1e-7] = 1e-7 + R_new[(nm+1):n, ] = 0 + R_new[, (nm+1):n] = 0 + } else { # Diagonal + RR = R_new[dgind] / TT + RR[RR < 1e-7] = 1e-7 # RR(RR<1e-2) = 1e-2; + R_new %*=% 0 + R_new[dgind[snm]] = RR[snm] # TODO: set quarterly obs to a small number? + } + } else { + R_new = diag(n) + diag(R_new)[(nm+1):n] = 0 + } + + # Set initial conditions + V_0_new = V_0 + V_0_new[srpC, srpC] = Vsmooth0[srpC, srpC] + diag(V_0_new)[-srpC] = diag(Vsmooth0)[-srpC] + + return(list(A = A_new, + C = C_new, + Q = Q_new, + R = R_new, + F_0 = drop(Zsmooth0), + P_0 = V_0_new, + loglik = kfs_res$loglik)) + +} diff --git a/R/init_cond.R b/R/init_cond.R index d1bd34d..c0c57ee 100644 --- a/R/init_cond.R +++ b/R/init_cond.R @@ -76,6 +76,8 @@ Rcon <- matrix(c(2, -1, 0, 0, 0, init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) { + # .c(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) %=% list(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) + rp <- r * p rC <- 5L # ncol(Rcon) pC <- max(p, rC) rpC <- r * pC @@ -89,7 +91,7 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) { # Contemporaneous factors + lags FF <- do.call(cbind, lapply(0:(rC-1), function(i) F_pc[(rC - i):(TT - i), ])) - # Now looping over the quarterly variables + # Now looping over the quarterly variables: at the end for (i in (nm+1):n) { x_i = X[rC:TT, i] nna = whichNA(x_i, invert = TRUE) @@ -98,7 +100,7 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) { x_i = x_i[nna] # Quarterly observations (no interpolation) Iff_i = ainv(crossprod(ff_i)) Cc = Iff_i %*% crossprod(ff_i, x_i) # Coefficients from regressing quarterly observations on lagged factors - # This is restricted least squares with restrictions: Rcon * C_0 = q + # This is restricted least squares with restrictions: Rcon * C_0 = (q is 0) # The restrictions in Rcon (with -1 in the right places) make sense! tmp = tcrossprod(Iff_i, rRcon) Cc = Cc - tmp %*% ainv(rRcon %*% tmp) %*% rRcon %*% Cc @@ -110,7 +112,8 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) { res <- X[rC:TT, ] - tcrossprod(FF, C[, 1:(rC*r)]) # residuals from static predictions R <- if(rRi == 2L) cov(res, use = "pairwise.complete.obs") else diag(fvar(res, na.rm = TRUE)) } else R <- diag(n) - R[-(1:nm), -(1:nm)] <- 0 # Note: should have tol on diagonal? + R[(nm+1):n, (nm+1):n] <- 0 # Note: should have tol on diagonal? + # diag(R)[diag(R) == 0] <- 1e-6 # Note: should allow for additional zeros? C = cbind(C, rbind(matrix(0, nm, rC*nq), kronecker(t(c(1, 2, 3, 2, 1)), diag(nq)))) @@ -120,18 +123,25 @@ init_cond_MQ <- function(X, X_imp, F_pc, v, n, r, p, TT, nq, rRi, rQi) { var <- .VAR(F_pc, p) A <- Q <- matrix(0, rpC5nq, rpC5nq) A[1:r, 1:rp] <- t(var$A) # var$A is rp x r matrix - A[(r+1):rpC, 1:(rpC-r)] <- diag(rpC-r) - A[(rpC+nq+1):rpC5nq, (rpC+nq+1):rpC5nq] <- diag(4*nq) + A[(r+1L):rpC, 1:(rpC-r)] <- diag(rpC-r) + A[(rpC+nq+1L):rpC5nq, (rpC+nq+1L):rpC5nq] <- diag(4*nq) Q[1:r, 1:r] <- switch(rQi + 1L, diag(r), diag(fvar(var$res)), cov(var$res)) Q[(rpC+1):(rpC+nq), (rpC+1):(rpC+nq)] <- if(rRi == 2L) - cov(res[, (nm+1):end, drop = FALSE], use = "pairwise.complete.obs") else if(rRi == 1L) - diag(fvar(res[, (nm+1):end], na.rm = TRUE)) else diag(nq) + cov(res[, -seq_len(nm), drop = FALSE], use = "pairwise.complete.obs") else if(rRi == 1L) + diag(fvar(res[, -seq_len(nm)], na.rm = TRUE)) else diag(nq) + diag(Q)[diag(Q) == 0] <- 1e-6 # Prevent singularity in Kalman Filter + # Initial state and state covariance (P) ------------ F_0 <- rep(0, rpC5nq) # BM14 uses zeros, DGR12 uses the first row of PC's. Both give more or less the same... # Kalman gain is normally A %*% t(A) + Q, but here A is somewhat tricky... - P_0 <- ainv(diag(rpC5nq^2) - kronecker(A,A)) %*% unattrib(Q) + # A_sparse <- as(A, "sparseMatrix") + # M_sparse <- diag(rpC5nq^2) - kronecker(A_sparse, A_sparse) + 1e-6 + # P_0 <- solve(M_sparse) %*% unattrib(Q) + M <- diag(rpC5nq^2) - kronecker(A, A) + diag(M) <- diag(M) + 1e-4 # Ensure matrix is non-singular + P_0 <- ainv(M) %*% unattrib(Q) dim(P_0) <- c(rpC5nq, rpC5nq) return(list(A = A, C = C, Q = Q, R = R, F_0 = F_0, P_0 = P_0)) diff --git a/R/methods.R b/R/methods.R index 9f288a1..ad30ad6 100644 --- a/R/methods.R +++ b/R/methods.R @@ -19,8 +19,13 @@ print.dfm <- function(x, digits = 4L, ...) { A <- x$A r <- dim(A)[1L] p <- dim(A)[2L]/r + if(length(qv <- x$quarterly.vars)) { + cat("Mixed Frequency Dynamic Factor Model\nn = ", dim(X)[2L], ", nm = ", dim(X)[2L] - length(qv), ", nq = ", length(qv), ", T = ", dim(X)[1L], ", r = ", r, ", p = ", p, + "\n%NA = ", round(sum(attr(X, "missing"))/prod(dim(X))*100, digits), ", %NAm = ", round(sum(attr(X, "missing")[, -ckmatch(qv, colnames(X))])/(nrow(X)*(ncol(X)-length(qv)))*100, digits), "\n", sep = "") + } else { cat("Dynamic Factor Model: n = ", dim(X)[2L], ", T = ", dim(X)[1L], ", r = ", r, ", p = ", p, ", %NA = ", if(x$anyNA) round(sum(attr(X, "missing"))/prod(dim(X))*100, digits) else 0,"\n", sep = "") + } if(length(x$rho)) cat(" with AR(1) errors: mean(abs(rho)) =", round(mean(abs(x$rho)), 3), "\n") fnam <- paste0("f", seq_len(r)) cat("\nFactor Transition Matrix [A]\n") @@ -49,8 +54,9 @@ summary.dfm <- function(object, method = switch(object$em.method, none = "2s", " rescov <- pwcov(res, use = if(!idio_ar1 && anymissing) "pairwise.complete.obs" else "everything", P = TRUE) ACF <- if(idio_ar1) object$rho else AC1(res, anymissing) R2 <- 1 - diag(rescov[,, 1L]) - summ <- list(info = c(n = dim(X)[2L], T = dim(X)[1L], r = r, p = p, - `%NA` = if(anymissing) sum(attr(X, "missing")) / prod(dim(X)) * 100 else 0), + summ <- list(info = c(n = dim(X)[2L], T = dim(X)[1L], r = r, p = p, nq = length(object$quarterly.vars), + `%NA` = if(anymissing) sum(attr(X, "missing")) / prod(dim(X)) * 100 else 0, + `%NAm` = if(length(object$quarterly.vars)) sum(attr(X, "missing")[, -ckmatch(object$quarterly.vars, colnames(X))])/(nrow(X)*(ncol(X)-length(object$quarterly.vars)))*100 else NA), call = object$call, idio_ar1 = idio_ar1, F_stats = msum(Fa), @@ -83,8 +89,13 @@ print.dfm_summary <- function(x, compact = sum(x$info["n"] > 15, x$info["n"] > 40), ...) { inf <- as.integer(x$info[1:4]) - cat("Dynamic Factor Model: n = ", inf[1L], ", T = ", inf[2L], ", r = ", inf[3L], ", p = ", inf[4L], - ", %NA = ", round(x$info[5L], digits), "\n", sep = "") + if(length(x$info["nq"] > 0)) { + cat("Mixed Frequency Dynamic Factor Model\nn = ", inf[1L], ", nm = ", inf[1L] - x$info["nq"], ", nq = ", x$info["nq"], ", T = ", inf[2L], ", r = ", inf[3L], ", p = ", inf[4L], + "\n%NA = ", round(x$info["%NA"], digits), ", %NAm = ", round(x$info["%NAm"], digits), "\n", sep = "") + } else { + cat("Dynamic Factor Model: n = ", inf[1L], ", T = ", inf[2L], ", r = ", inf[3L], ", p = ", inf[4L], + ", %NA = ", round(x$info["%NA"], digits), "\n", sep = "") + } if(x$idio_ar1) cat(" with AR(1) errors: mean(abs(rho)) =", round(mean(abs(x$res_ACF)), 3), "\n") cat("\nCall: ", deparse(x$call)) # cat("\nModel: ", )) @@ -297,6 +308,19 @@ as.data.frame.dfm <- function(x, ..., return(res) } +predict_dfm_core <- function(object, method) { + Fa <- switch(tolower(method), + pca = object$F_pca, `2s` = object$F_2s, qml = object$F_qml, + stop("Unkown method", method)) + if(is.null(object$quarterly.vars)) return(tcrossprod(Fa, object$C)) + qind <- ckmatch(object$quarterly.vars, dimnames(object$C)[[1L]]) + res_m <- tcrossprod(Fa, object$C[-qind,, drop = FALSE]) + Fa_lags <- flag(Fa, 0:4, fill = 0, stubs = FALSE) + Cq_lags <- object$C[qind, rep(1:ncol(Fa), each = 5), drop = FALSE] %r*% rep(c(1, 2, 3, 2, 1), ncol(Fa)) + res_q <- tcrossprod(Fa_lags, Cq_lags) + cbind(res_m, res_q) +} + #' @name residuals.dfm #' @aliases residuals.dfm #' @aliases resid.dfm @@ -336,11 +360,8 @@ residuals.dfm <- function(object, standardized = FALSE, na.keep = TRUE, ...) { X <- object$X_imp - Fa <- switch(tolower(method), - pca = object$F_pca, `2s` = object$F_2s, qml = object$F_qml, - stop("Unkown method", method)) if(!(standardized && length(object[["e"]]))) { - X_pred <- tcrossprod(Fa, object$C) + X_pred <- predict_dfm_core(object, method) if(!standardized) { # TODO: What if AR(1) resid available? stats <- attr(X, "stats") X_pred <- unscale(X_pred, stats) @@ -364,10 +385,7 @@ fitted.dfm <- function(object, standardized = FALSE, na.keep = TRUE, ...) { X <- object$X_imp - Fa <- switch(tolower(method), - pca = object$F_pca, `2s` = object$F_2s, qml = object$F_qml, - stop("Unkown method", method)) - res <- tcrossprod(Fa, object$C) + res <- predict_dfm_core(object, method) if(!standardized) res <- unscale(res, attr(X, "stats")) if(na.keep && object$anyNA) res[attr(X, "missing")] <- NA if(orig.format) { @@ -459,16 +477,30 @@ predict.dfm <- function(object, F_fc <- matrix(NA_real_, nrow = h, ncol = nf) X_fc <- matrix(NA_real_, nrow = h, ncol = ny) - F_last <- ftail(Fa, p) # dimnames(F_last) <- list(c("L2", "L1"), c("f1", "f2")) - spi <- p:1 # DFM forecasting loop - for (i in seq_len(h)) { - F_reg <- ftail(F_last, p) - F_fc[i, ] <- tmp <- A %*% `dim<-`(t(F_reg)[, spi, drop = FALSE], NULL) - dim(tmp) <- NULL - X_fc[i, ] <- C %*% tmp - F_last <- rbind(F_last, tmp) + if(is.null(object$quarterly.vars)) { + F_last <- ftail(Fa, p) # dimnames(F_last) <- list(c("L2", "L1"), c("f1", "f2")) + for (i in seq_len(h)) { + F_reg <- ftail(F_last, p) + F_fc[i, ] <- tmp <- A %*% vec(t(F_reg)[, p:1, drop = FALSE]) + dim(tmp) <- NULL + X_fc[i, ] <- C %*% tmp + F_last <- rbind(F_reg, tmp) + } + } else { + F_last <- ftail(Fa, max(p, 5)) + qind <- ckmatch(object$quarterly.vars, dimnames(X)[[2L]]) + Cq_lags <- object$C[qind, rep(1:ncol(Fa), each = 5), drop = FALSE] %r*% rep(c(1, 2, 3, 2, 1), ncol(Fa)) + # Mixed frequency forecasting loop + for (i in seq_len(h)) { + F_reg <- ftailrev(F_last, p) + F_fc[i, ] <- tmp <- A %*% vec(t(F_reg)) + dim(tmp) <- NULL + X_fc[i, -qind] <- C[-qind,, drop = FALSE] %*% tmp + F_last <- rbind(F_last, tmp) + X_fc[i, qind] <- Cq_lags %*% vec(ftailrev(F_last, 5)) + } } # Additional univariate forecasting of the residuals? diff --git a/R/utils.R b/R/utils.R index 1c77b2d..fb3ddc3 100644 --- a/R/utils.R +++ b/R/utils.R @@ -49,6 +49,9 @@ unscale <- function(x, stats) TRA.matrix(TRA.matrix(x, stats[, "SD"], "*"), stat #' @noRd ftail <- function(x, p) {n <- dim(x)[1L]; x[(n-p+1L):n, , drop = FALSE]} +ftailrev <- function(x, p) {n <- dim(x)[1L]; x[n:(n-p+1L),, drop = FALSE]} + + #' (Fast) Barebones Vector-Autoregression #' #' Quickly estimate a VAR(p) model using Armadillo's inverse function. diff --git a/man/DFM.Rd b/man/DFM.Rd index 99b6d22..0336c05 100644 --- a/man/DFM.Rd +++ b/man/DFM.Rd @@ -10,6 +10,7 @@ DFM( p = 1L, ..., idio.ar1 = FALSE, + quarterly.vars = NULL, rQ = c("none", "diagonal", "identity"), rR = c("diagonal", "identity", "none"), em.method = c("auto", "DGR", "BM", "none"), @@ -31,6 +32,8 @@ DFM( \item{idio.ar1}{logical. Model observation errors as AR(1) processes: \eqn{e_t = \rho e_{t-1} + v_t}{e(t) = rho e(t-1) + v(t)}. \emph{Note} that this substantially increases computation time, and is generaly not needed if \code{n} is large (>30). See theoretical vignette for details.} +\item{quarterly.vars}{character. Names of quarterly variables in \code{X} (if any). Monthly variables should be to the left of the quarterly variables in the data matrix and quarterly observations should be provided every 3rd period.} + \item{rQ}{character. restrictions on the state (transition) covariance matrix (Q).} \item{rR}{character. restrictions on the observation (measurement) covariance matrix (R).} From c40300b3945c09213eb192c2456fbbda81f24203 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Sun, 16 Mar 2025 23:58:29 -0400 Subject: [PATCH 12/14] This is version 0.3.0. --- DESCRIPTION | 2 +- NEWS.md | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0c8f860..83ad10e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: dfms -Version: 0.2.2 +Version: 0.3.0 Title: Dynamic Factor Models Authors@R: c(person("Sebastian", "Krantz", role = c("aut", "cre"), email = "sebastian.krantz@graduateinstitute.ch"), person("Rytis", "Bagdziunas", role = "aut")) diff --git a/NEWS.md b/NEWS.md index e8f2b5f..350105a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -# dfms 0.2.1.9000 +# dfms 0.3.0 + +* Added argument `quarterly.vars`, enabling mixed-frequency estimation with monthly and quarterly data following Banbura and Modugno (2014). The data matrix should contain the quarterly variables at the end of the matrix (after the monthly ones). + +# dfms 0.2.2 * Replace Armadillo `inv_sympd()` by Armadillo `inv()` in C++ Kalman Filter to improve numerical robustness at a minor performance cost. From 1543adf838781ca73ca78a500c5755525eea0cc5 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Mon, 17 Mar 2025 00:09:49 -0400 Subject: [PATCH 13/14] New pkgdown. --- .github/workflows/pkgdown.yaml | 51 ++++++++++++++++++++++++++++++++++ _pkgdown.yml | 1 + 2 files changed, 52 insertions(+) create mode 100644 .github/workflows/pkgdown.yaml diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..4b73749 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,51 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + release: + types: [published] + workflow_dispatch: + +name: pkgdown.yaml + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + github::SebKrantz/pkgdown + local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/_pkgdown.yml b/_pkgdown.yml index efb5883..06cbe25 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,4 @@ +url: https://sebkrantz.github.io/dfms/ destination: docs home: From 6699ef7ddf85afec79ff8f0b90a809b048e8ec58 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Mon, 17 Mar 2025 00:13:29 -0400 Subject: [PATCH 14/14] Compatibility with master. --- DESCRIPTION | 2 +- NEWS.md | 2 +- README.md | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 83ad10e..540629f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,6 +23,6 @@ License: GPL-3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE, roclets = c ("namespace", "rd", "srr::srr_stats_roclet")) -RoxygenNote: 7.2.3 +RoxygenNote: 7.1.2 Config/testthat/edition: 3 VignetteBuilder: knitr diff --git a/NEWS.md b/NEWS.md index 350105a..bc4d7f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ * Added argument `quarterly.vars`, enabling mixed-frequency estimation with monthly and quarterly data following Banbura and Modugno (2014). The data matrix should contain the quarterly variables at the end of the matrix (after the monthly ones). -# dfms 0.2.2 +# dfms 0.2.1.9000 * Replace Armadillo `inv_sympd()` by Armadillo `inv()` in C++ Kalman Filter to improve numerical robustness at a minor performance cost. diff --git a/README.md b/README.md index 6183a36..3b66b4a 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ [![dfms status badge](https://sebkrantz.r-universe.dev/badges/dfms)](https://sebkrantz.r-universe.dev) [![CRAN status](https://www.r-pkg.org/badges/version/dfms)](https://cran.r-project.org/package=dfms) [![cran checks](https://badges.cranchecks.info/worst/dfms.svg)](https://cran.r-project.org/web/checks/check_results_dfms.html) -![downloads per month](http://cranlogs.r-pkg.org/badges/dfms?color=blue) -![downloads](http://cranlogs.r-pkg.org/badges/grand-total/dfms?color=blue) +![downloads per month](https://cranlogs.r-pkg.org/badges/dfms?color=blue) +![downloads](https://cranlogs.r-pkg.org/badges/grand-total/dfms?color=blue) [![Codecov test coverage](https://codecov.io/gh/SebKrantz/dfms/branch/main/graph/badge.svg)](https://app.codecov.io/gh/SebKrantz/dfms?branch=main) -[![minimal R version](https://img.shields.io/badge/R%3E%3D-3.3.0-6666ff.svg)](https://cran.r-project.org/) -[![status](https://tinyverse.netlify.com/badge/dfms)](https://CRAN.R-project.org/package=dfms) +[![minimal R version](https://img.shields.io/badge/R%3E%3D-3.4.0-6666ff.svg)](https://cran.r-project.org/) +[![dependencies](https://tinyverse.netlify.app/badge/dfms)](https://CRAN.R-project.org/package=dfms) [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active)