From 2375589e2aee9b698a25abfe540473ad6ef47703 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Mon, 12 May 2025 23:49:37 +0200 Subject: [PATCH 01/21] Spelling. --- R/DFM.R | 8 ++++---- man/DFM.Rd | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/DFM.R b/R/DFM.R index 3582d86..3b06ece 100644 --- a/R/DFM.R +++ b/R/DFM.R @@ -12,13 +12,13 @@ #' with time-invariant system matrices and classical assumptions, while permitting missing data. #' #' @param X a \code{T x n} numeric data matrix or frame of stationary time series. May contain missing values. -#' @param r integer. number of factors. -#' @param p integer. number of lags in factor VAR. +#' @param r integer. Number of factors. +#' @param p integer. Number of lags in factor VAR. #' @param \dots (optional) arguments to \code{\link{tsnarmimp}}. #' @param idio.ar1 logical. Model observation errors as AR(1) processes: \eqn{e_t = \rho e_{t-1} + v_t}{e(t) = rho e(t-1) + v(t)}. \emph{Note} that this substantially increases computation time, and is generaly not needed if \code{n} is large (>30). See theoretical vignette for details. #' @param quarterly.vars character. Names of quarterly variables in \code{X} (if any). Monthly variables should be to the left of the quarterly variables in the data matrix and quarterly observations should be provided every 3rd period. -#' @param rQ character. restrictions on the state (transition) covariance matrix (Q). -#' @param rR character. restrictions on the observation (measurement) covariance matrix (R). +#' @param rQ character. Restrictions on the state (transition) covariance matrix (Q). +#' @param rR character. Restrictions on the observation (measurement) covariance matrix (R). #' @param em.method character. The implementation of the Expectation Maximization Algorithm used. The options are: #' \tabular{llll}{ #' \code{"auto"} \tab\tab Automatic selection: \code{"BM"} if \code{anyNA(X)}, else \code{"DGR"}. \cr\cr diff --git a/man/DFM.Rd b/man/DFM.Rd index 2f4a277..ac311f4 100644 --- a/man/DFM.Rd +++ b/man/DFM.Rd @@ -24,9 +24,9 @@ DFM( \arguments{ \item{X}{a \code{T x n} numeric data matrix or frame of stationary time series. May contain missing values.} -\item{r}{integer. number of factors.} +\item{r}{integer. Number of factors.} -\item{p}{integer. number of lags in factor VAR.} +\item{p}{integer. Number of lags in factor VAR.} \item{\dots}{(optional) arguments to \code{\link{tsnarmimp}}.} @@ -34,9 +34,9 @@ DFM( \item{quarterly.vars}{character. Names of quarterly variables in \code{X} (if any). Monthly variables should be to the left of the quarterly variables in the data matrix and quarterly observations should be provided every 3rd period.} -\item{rQ}{character. restrictions on the state (transition) covariance matrix (Q).} +\item{rQ}{character. Restrictions on the state (transition) covariance matrix (Q).} -\item{rR}{character. restrictions on the observation (measurement) covariance matrix (R).} +\item{rR}{character. Restrictions on the observation (measurement) covariance matrix (R).} \item{em.method}{character. The implementation of the Expectation Maximization Algorithm used. The options are: \tabular{llll}{ From ba1092cc72f14688ff89150cb8b65fe62927df91 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Mon, 12 May 2025 23:49:49 +0200 Subject: [PATCH 02/21] More clarity. --- R/dfms.R | 2 +- man/dfms-package.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dfms.R b/R/dfms.R index db5dfc6..b024c4c 100644 --- a/R/dfms.R +++ b/R/dfms.R @@ -4,7 +4,7 @@ #' #' *dfms* provides efficient estimation of Dynamic Factor Models via the EM Algorithm --- following Doz, Giannone & Reichlin (2011, 2012) and Banbura & Modugno (2014). The package has the following contents: #' -#' **Information Criteria** +#' **Information Criteria to Determine the Number of Factors** #' #' \code{\link[=ICr]{ICr()}}\cr #' diff --git a/man/dfms-package.Rd b/man/dfms-package.Rd index fafe4ff..4146117 100644 --- a/man/dfms-package.Rd +++ b/man/dfms-package.Rd @@ -8,7 +8,7 @@ \description{ \emph{dfms} provides efficient estimation of Dynamic Factor Models via the EM Algorithm --- following Doz, Giannone & Reichlin (2011, 2012) and Banbura & Modugno (2014). The package has the following contents: -\strong{Information Criteria} +\strong{Information Criteria to Determine the Number of Factors} \code{\link[=ICr]{ICr()}}\cr \itemize{ From d34a104ea93d0f74e2afe28a4092a865e9f46592 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Tue, 13 May 2025 00:14:09 +0200 Subject: [PATCH 03/21] Minor fixes. --- R/my_RcppExports.R | 4 ++-- man/SKF.Rd | 2 +- man/SKFS.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/my_RcppExports.R b/R/my_RcppExports.R index 7080c2d..1087352 100644 --- a/R/my_RcppExports.R +++ b/R/my_RcppExports.R @@ -4,7 +4,7 @@ Estep <- function(X, A, C, Q, R, F_0, P_0) { #' (Fast) Stationary Kalman Filter #' -#' @description A simple and fast C++ implementation of the Kalman Filter for stationary data with time-invariant system matrices and missing data. +#' @description A simple and fast C++ implementation of the Kalman Filter for stationary data (or random walks - data should be mean zero and without a trend) with time-invariant system matrices and missing data. #' @param X numeric data matrix (\eqn{T \times n}{T x n}). #' @param A transition matrix (\eqn{rp \times rp}{rp x rp}). #' @param C observation matrix (\eqn{n \times rp}{n x rp}). @@ -103,7 +103,7 @@ FIS <- function(A, F, F_pred, P, P_pred, F_0 = NULL, P_0 = NULL) { #' @inheritParams SKF #' #' @returns All results from \code{\link{SKF}} and \code{\link{FIS}}, and additionally -#' a \eqn{rp \times rp \times T}{rp x rp x T} matrix \code{PPm_smooth}, which is equal to the estimate of \eqn{Cov(F^smooth_t, F^smooth_{t-1} | T)}{Cov(F_smooth(t), F_smooth(t-1) | T)} and needed for EM iterations. +#' a \eqn{rp \times rp \times T}{rp x rp x T} matrix \code{PPm_smooth}, which is equal to the estimate of \eqn{Cov(F^{smooth}_t, F^{smooth}_{t-1} | T)}{Cov(F_smooth(t), F_smooth(t-1) | T)} and needed for EM iterations. #' See 'Property 6.3: The Lag-One Covariance Smoother' in Shumway & Stoffer (2017). #' #' diff --git a/man/SKF.Rd b/man/SKF.Rd index c824cbe..3e67744 100644 --- a/man/SKF.Rd +++ b/man/SKF.Rd @@ -32,7 +32,7 @@ Predicted and filtered state vectors and covariances. \item{\code{loglik}}{value of the log likelihood. } } \description{ -A simple and fast C++ implementation of the Kalman Filter for stationary data with time-invariant system matrices and missing data. +A simple and fast C++ implementation of the Kalman Filter for stationary data (or random walks - data should be mean zero and without a trend) with time-invariant system matrices and missing data. } \details{ The underlying state space model is: diff --git a/man/SKFS.Rd b/man/SKFS.Rd index 0a6bdce..1f203aa 100644 --- a/man/SKFS.Rd +++ b/man/SKFS.Rd @@ -25,7 +25,7 @@ SKFS(X, A, C, Q, R, F_0, P_0, loglik = FALSE) } \value{ All results from \code{\link{SKF}} and \code{\link{FIS}}, and additionally -a \eqn{rp \times rp \times T}{rp x rp x T} matrix \code{PPm_smooth}, which is equal to the estimate of \eqn{Cov(F^smooth_t, F^smooth_{t-1} | T)}{Cov(F_smooth(t), F_smooth(t-1) | T)} and needed for EM iterations. +a \eqn{rp \times rp \times T}{rp x rp x T} matrix \code{PPm_smooth}, which is equal to the estimate of \eqn{Cov(F^{smooth}_t, F^{smooth}_{t-1} | T)}{Cov(F_smooth(t), F_smooth(t-1) | T)} and needed for EM iterations. See 'Property 6.3: The Lag-One Covariance Smoother' in Shumway & Stoffer (2017). } \description{ From d86f3b2e76abe9d84ec04cd23875bc291261ea94 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Tue, 13 May 2025 00:16:50 +0200 Subject: [PATCH 04/21] Include mixed frequency example. --- vignettes/introduction.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index a94d777..b2741c9 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -165,7 +165,7 @@ BM14 use the approximation of Mariano and Murasawa (2003) to construct a monthly In the absence of such adjustments to the algorithm, or other methods of interpolating quarterly data, a very simple way to increase the weight of these series in the estimation is to duplicate them in the dataset. Such duplication can be mechanical (e.g. duplicate quarterly series 2 times in monthly dataset), but should ideally be based on considerations about the quality of the signal stemming from different quarterly series (i.e. informative series should be duplicated more). --> -```{r, eval = FALSE, include=FALSE} +```{r} # Quarterly series from BM14 head(BM14_Q, 3) # Pre-processing the data From 022c581db61ea8dc0a9a8685f8cde6bae3f25558 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Tue, 13 May 2025 00:29:52 +0200 Subject: [PATCH 05/21] Better. --- vignettes/introduction.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index b2741c9..a2bf47f 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -169,7 +169,7 @@ In the absence of such adjustments to the algorithm, or other methods of interpo # Quarterly series from BM14 head(BM14_Q, 3) # Pre-processing the data -BM14_Q[, BM14_Models[BM14_Models$freq == "Q", ]$log_trans] %<>% log() +BM14_Q[, BM14_Models$log_trans[BM14_Models$freq == "Q"]] %<>% log() BM14_Q_diff <- diff(BM14_Q) # Merging to monthly data BM14_diff <- cbind(BM14_M_diff, BM14_Q_diff) From f56b22eeb6b85cc1bcca015bc4e864eb00e264ae Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Tue, 13 May 2025 00:47:01 +0200 Subject: [PATCH 06/21] Greater parsimony. --- R/dfms.R | 2 +- man/dfms-package.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dfms.R b/R/dfms.R index b024c4c..f6aecf2 100644 --- a/R/dfms.R +++ b/R/dfms.R @@ -2,7 +2,7 @@ #' #' @description #' -#' *dfms* provides efficient estimation of Dynamic Factor Models via the EM Algorithm --- following Doz, Giannone & Reichlin (2011, 2012) and Banbura & Modugno (2014). The package has the following contents: +#' *dfms* provides efficient estimation of Dynamic Factor Models via the EM Algorithm --- following Doz, Giannone & Reichlin (2011, 2012) and Banbura & Modugno (2014). Contents: #' #' **Information Criteria to Determine the Number of Factors** #' diff --git a/man/dfms-package.Rd b/man/dfms-package.Rd index 4146117..94cbee3 100644 --- a/man/dfms-package.Rd +++ b/man/dfms-package.Rd @@ -6,7 +6,7 @@ \alias{dfms} \title{Dynamic Factor Models} \description{ -\emph{dfms} provides efficient estimation of Dynamic Factor Models via the EM Algorithm --- following Doz, Giannone & Reichlin (2011, 2012) and Banbura & Modugno (2014). The package has the following contents: +\emph{dfms} provides efficient estimation of Dynamic Factor Models via the EM Algorithm --- following Doz, Giannone & Reichlin (2011, 2012) and Banbura & Modugno (2014). Contents: \strong{Information Criteria to Determine the Number of Factors} From 3c724f8c0741ee2fb39981ffb321326147c35cf3 Mon Sep 17 00:00:00 2001 From: Sebastian Krantz Date: Tue, 13 May 2025 08:45:41 +0200 Subject: [PATCH 07/21] Minors. --- vignettes/introduction.Rmd | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index a2bf47f..b640124 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -88,15 +88,15 @@ Estimation can then simply be done using the `DFM()` function with parameters `r ```{r} # Estimating the model with 4 factors and 3 lags using BM14's EM algorithm -model1 <- DFM(BM14_M_diff, r = 4, p = 3) -print(model1) -plot(model1) +model_m <- DFM(BM14_M_diff, r = 4, p = 3) +print(model_m) +plot(model_m) ``` The model can be investigated using `summary()`, which returns an object of class 'dfm_summary' containing the system matrices and summary statistics of the factors and the residuals in the measurement equation, as well as the R-Squared of the factor model for individual series. The print method automatically adjusts the amount of information printed to the data size. For large databases with more than 40 series, no series-level statistics are printed. ```{r} -dfm_summary <- summary(model1) +dfm_summary <- summary(model_m) print(dfm_summary) # Large model with > 40 series: defaults to compact = 2 # Can request more detailed printouts @@ -107,8 +107,8 @@ print(dfm_summary) # Large model with > 40 series: defaults to compact = 2 Apart from the model summary, the *dfm* methods `residuals()` and `fitted()` return observation residuals and fitted values from the model. The default format is a plain matrix, but the functions also have an argument to return data in the original (input) format. ```{r} -plot(resid(model1, orig.format = TRUE)) -plot(fitted(model1, orig.format = TRUE)) +plot(resid(model_m, orig.format = TRUE)) +plot(fitted(model_m, orig.format = TRUE)) ``` @@ -116,25 +116,25 @@ plot(fitted(model1, orig.format = TRUE)) Another way to examine the factor model visually is to plot the Quasi-Maximum-Likelihood (QML) factor estimates against PCA and Two-Step estimates following Doz, Giannone and Reichlin (2011)^[Doz, C., Giannone, D., & Reichlin, L. (2011). A two-step estimator for large approximate dynamic factor models based on Kalman filtering. *Journal of Econometrics, 164*(1), 188-205.], where the Kalman Filter and Smoother is run only once. Both estimates are also computed by `DFM()` during EM estimation and can also be visualized with `plot.dfm`. ```{r} -plot(model1, method = "all", type = "individual") +plot(model_m, method = "all", type = "individual") ``` -The plot with the various estimates shows that the QML estimates are more volatile in the initial periods where there are many missing series, but less volatile in the latter periods. In general, QML estimates may now always be superior across the entire data range to Two-Step and PCA estimates. Often, Two-Step estimates also provide similar forecasting performance, and are much faster to estimate using `DFM(BM14_M_diff, r = 4, p = 3, em.method = "none")`. +The plot with the various estimates shows that the QML estimates are more volatile in the initial periods where there are many missing series, but less volatile in the latter periods. In general, QML estimates may not always be superior across the entire data range to Two-Step and PCA estimates. Often, Two-Step estimates also provide similar forecasting performance, and are much faster to estimate using `DFM(BM14_M_diff, r = 4, p = 3, em.method = "none")`. The factor estimates themselves can be extracted in a data frame using `as.data.frame()`, which also provides various options regarding the estimates retained and the format of the frame. It is also possible to add a time variable from the original data (the default is a sequence of integers). ```{r} # Default: all estimates in long format -head(as.data.frame(model1, time = index(BM14_M_diff))) +head(as.data.frame(model_m, time = index(BM14_M_diff))) ``` ## Forecasting -DFM forecasts can be obtained with the `predict()` method, which dynamically forecasts the factors using the transition equation (default 10 periods), and then also predicts data forecasts using the observation equation. Objects are of class 'dfm_forecast' +DFM forecasts can be obtained with the `predict()` method, which dynamically forecasts the factors using the transition equation (default 10 periods), and then also predicts data forecasts using the observation equation. Objects are of class 'dfm_forecast'. ```{r} # 12-period ahead DFM forecast -fc <- predict(model1, h = 12) +fc <- predict(model_m, h = 12) print(fc) ``` @@ -142,12 +142,10 @@ These forecasts can also be visualized using a plot method. By default the entir ```{r} # Setting an appropriate plot range to see the forecast plot(fc, xlim = c(320, 370)) -# Predicting with Two-Step estimates -# plot(predict(model1, h = 12, method = "2s"), xlim = c(320, 370)) ``` +By default, `predict()` uses the QML factor estimates (if available). We can however also predict with PCA or TwoStep estimates using, e.g., `predict(model_m, h = 12, method = "2s")`. - -The forecasts can also be retrieved in data frame using `as.data.frame()`. Again the method has various arguments to control the forecasts retained (factors, data or both, default factors), and the format of the frame. +The forecasts can be retrieved in data frame using `as.data.frame()`. Again the method has various arguments to control the output (factors, data, or both --- default factors) and the format of the frame. ```{r} # Factor forecasts in wide format @@ -156,7 +154,7 @@ head(as.data.frame(fc, pivot = "wide")) ## Estimation with Mixed Frequency -Since v0.3.0 *dfms* allows monthly and quarterly mixed frequency estimation following Mariano & Murasawa (2003) and Banbura & Modugno (2014). Quarterly variables should be to the right of the monthly variables in the data matrix and need to be indicated using the `quarterly.vars` argument. Quarterly observations should be provided every 3rd period. +Since v0.3.0, *dfms* allows monthly and quarterly mixed frequency estimation following Mariano & Murasawa (2003) and Banbura & Modugno (2014). Quarterly variables should be to the right of the monthly variables in the data matrix and need to be indicated using the `quarterly.vars` argument. Quarterly observations should be provided every 3rd period (months 3, 6, 9, and 12). Below, I estimate the mixed frequency DFM, adding a factor to capture any idiosynchratic dynamics in the quarterly series.