我们可以估计系数,r2,单函数滚动 window 回归的残差吗?

can we estimate coefficients, r2, residuals for rolling window regression in single function?

我运行此代码生成滚动window回归的系数、r2、adj.r2和残差。

window <- (13)
ler <- function(j) {
  model <- dynlm(y ~ L(y,1) + x1 + x2 + x3, data= dd[(1:window)+j,])
  c <- model$coefs
  r2<- model$r.squared
  a.r2<- model$adj.r.squared
  e <- model$residuals
  list(c,r2,a.r2,e)
})
out <- pbapply::pblapply(0:(nrow(dd)-window), ler)

虽然我能够得到残差,但另一个(coefs、r2、adj.r2)为 NULL。我怎样才能为滚动 window 模型获得所有这些?

问题中缺少输入数据,因此我们将使用内置的 window 大小为 5 的 11x8 anscombe 数据框,仅使用 y1 和 x1 列。 (anscombe 中的所有列都是数字,因此如果您使用的实际输入有任何 non-numeric 列,请先删除它们。)

我们使用同时​​加载 zoo 的 dyn 包。这给出了一个 11 行矩阵,每个输入行一行。请注意,dplyr 破坏了 R 的延迟,因此如果您加载了 dplyr,我们已经恢复了它。

library(dyn)
lag <- stats:::lag # this line is only needed if dplyr is loaded

out <- rollapplyr(anscombe, 5, function(x) {
  fm <- dyn$lm(y1 ~ lag(y1, -1) + x1, zoo(x))
  s <- summary(fm)
  c(coef(fm), r2 = s$r.squared, a.r2 = s$adj.r.squared, e = resid(fm))
}, by.column = FALSE, fill = NA)
out

给予:

      (Intercept) lag(y1, -1)         x1         r2       a.r2         e.2        e.3         e.4         e.5
 [1,]          NA          NA         NA         NA         NA          NA         NA          NA          NA
 [2,]          NA          NA         NA         NA         NA          NA         NA          NA          NA
 [3,]          NA          NA         NA         NA         NA          NA         NA          NA          NA
 [4,]          NA          NA         NA         NA         NA          NA         NA          NA          NA
 [5,]    5.737196   0.2051911 0.05566628 0.04018973 -1.8794308 -0.88226314 -0.3069362  1.01645850  0.17274084
 [6,]    2.674691   0.6139670 0.09652978 0.29958165 -1.1012551 -0.61664914  0.6126708 -0.81556822  0.81954660
 [7,]    9.614862  -0.3998369 0.24367243 0.92448518  0.7734555  0.03285018 -0.4426953  0.26436572  0.14547936
 [8,]   -2.807541   0.7134064 0.47205105 0.98990153  0.9697046 -0.34013131  0.2161506  0.10970662  0.01427410
 [9,]    3.886087  -0.1126389 0.55864348 0.86605967  0.5981790 -0.80881355  1.1239357 -1.04515518  0.73003301
[10,]    3.504280  -0.2036625 0.68004056 0.82863726  0.4859118  1.68395442 -0.4899263  0.04283509 -1.23686318
[11,]    4.151978  -0.3805723 0.69030113 0.99924805  0.9977442  0.10216091  0.0256465 -0.03868229 -0.08912512

答案很简单,dynlm 对象不包含名为 coefsr.squaredsadj.r.squareds 的成员。看:

library(dynlm)
#> Loading required package: zoo
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric
data("UKDriverDeaths", package = "datasets")
uk <- log10(UKDriverDeaths)
model <- dynlm(uk ~ L(uk, 1) + L(uk, 12))

names(model)
#>  [1] "coefficients"  "residuals"     "effects"       "rank"         
#>  [5] "fitted.values" "assign"        "qr"            "df.residual"  
#>  [9] "xlevels"       "call"          "terms"         "model"        
#> [13] "index"         "frequency"     "twostage"

但是,它的摘要对象确实:

summary_mod <- summary(model)

names(summary_mod)
#>  [1] "call"          "terms"         "residuals"     "coefficients" 
#>  [5] "aliased"       "sigma"         "df"            "r.squared"    
#>  [9] "adj.r.squared" "fstatistic"    "cov.unscaled"  "frequency"

所以你可能需要:

ler <- function(j) {
  model <- dynlm(y ~ L(y,1) + x1 + x2 + x3, data= dd[(1:window)+j,])
  model <- summary(model)
  c <- model$coefficients
  r2<- model$r.squared
  a.r2<- model$adj.r.squared
  e <- model$residuals
  list(c,r2,a.r2,e)
}

reprex package (v2.0.1)

创建于 2022-03-05