问:如何应用多重 LASSO 回归?

Q: How to apply a multiple LASSO regression?

我正在处理两个矩阵,我的想法是对子样本使用 LASSO 方法。为此,我编写了以下代码:

library(glmnet)

iv_vardos<-matrix(1:3500, ncol = 30, nrow=3500)
colnames(iv_vardos)<-c("ReV_d_NSW","ReV_w_NSW","ReV_m_NSW","ReV_d_QLD" ,"ReV_w_QLD","ReV_m_QLD",
"ReV_d_SA","ReV_w_SA","ReV_m_SA","ReV_d_VIC","ReV_w_VIC","ReV_m_VIC","ReCov_d_QLD_NSW",
"ReCov_w_QLD_NSW", "ReCov_m_QLD_NSW", "ReCov_d_SA_NSW", "ReCov_w_SA_NSW", "ReCov_m_SA_NSW", "ReCov_d_SA_QLD", 
"ReCov_w_SA_QLD","ReCov_m_SA_QLD","ReCov_d_SA_VIC","ReCov_w_SA_VIC","ReCov_m_SA_VIC",
"ReCov_d_VIC_NSW", "ReCov_w_VIC_NSW", "ReCov_m_VIC_NSW", "ReCov_d_VIC_QLD", "ReCov_w_VIC_QLD", "ReCov_m_VIC_QLD")

dv_vardos<-matrix(3501:7000, ncol = 10, nrow=3500)
colnames(dv_vardos)<-c("NSW","QLD","VIC","SA","QLD_NSW","SA_NSW","SA_QLD","SA_VIC","VIC_NSW","VIC_QLD")

space <- 3490

#With the following code I want to estimate 10 LASSO coefficients with 10 different samples.

Dinamic_betas<-array(NA, c(10, 30, (nrow(iv_vardos)-space)))
for (i in 1:dim(Dinamic_betas)[3]) {
  cv <- cv.glmnet(iv_vardos[i:(space+i-1),], dv_vardos[i:(space+i-1),], alpha = 1, family = "mgaussian")
  LASSO_estimation<- glmnet(iv_vardos[i:(space+i-1),], dv_vardos[i:(space+i-1),], alpha = 1, lambda = cv$lambda.min, family = "mgaussian")
    #My problem starts here when I want to store the LASSO coefficients. The object LASSO_estimation is a list of different objects. 
#The most important for me is beta. A list which contains the betas estimated after use LASSO methodology
for (j in 1:Dinamic_betas[3]) {
  Dinamic_betas[1,,j]<-t(as.vector(LASSO_estimation[["beta"]][["NSW"]]))
  Dinamic_betas[2,,j]<-t(as.vector(LASSO_estimation[["beta"]][["QLD"]]))
  Dinamic_betas[3,,j]<-t(as.vector(LASSO_estimation[["beta"]][["VIC"]]))
  Dinamic_betas[4,,j]<-t(as.vector(LASSO_estimation[["beta"]][["SA"]]))
  Dinamic_betas[5,,j]<-t(as.vector(LASSO_estimation[["beta"]][["QLD_NSW"]]))
  Dinamic_betas[6,,j]<-t(as.vector(LASSO_estimation[["beta"]][["SA_NSW"]]))
  Dinamic_betas[7,,j]<-t(as.vector(LASSO_estimation[["beta"]][["SA_QLD"]]))
  Dinamic_betas[8,,j]<-t(as.vector(LASSO_estimation[["beta"]][["SA_VIC"]]))
  Dinamic_betas[9,,j]<-t(as.vector(LASSO_estimation[["beta"]][["VIC_NSW"]]))
  Dinamic_betas[10,,j]<-t(as.vector(LASSO_estimation[["beta"]][["VIC_QLD"]]))
 }
}

我想用这段代码创建一个 10x30x10 的数组,但每次我 运行 代码都会出现以下错误:

Error in 1:Dinamic_betas[3] : NA/NaN argument 

好的,我想我明白你想要做什么了。您可以通过使用 do.call 绑定系数来收集所有系数。然后就是将它们放入您创建的数组中:

Dinamic_betas<-array(NA, c(10, 30, (nrow(iv_vardos)-space)))

for (i in 1:dim(Dinamic_betas)[3]) {
  cv <- cv.glmnet(iv_vardos[i:(space+i-1),], 
  dv_vardos[i:(space+i-1),], alpha = 1, family = "mgaussian")

  LASSO_estimation<- glmnet(iv_vardos[i:(space+i-1),], dv_vardos[i:(space+i-1),], 
  alpha = 1, lambda = cv$lambda.min, family = "mgaussian")

  coefs <- as.matrix(do.call(cbind,LASSO_estimation$beta))
  colnames(coefs) <- names(LASSO_estimation$beta)
  Dinamic_betas[,,i] <- t(coefs)

}

只是一个建议,你可以尝试使用列表来存储你的系数,也可以使用类似 purrr 的东西来遍历.. 数组有时有点不灵活。