使用正向回归 and/or LARS 从插入符号中的 R 训练函数中检索系数

Retrieve coefficients from R's train function in caret using forward regression and/or LARS

我正在使用 R 并使用多种方法探索使用插入符号进行变量选择和加权。在这里,我正在探索使用前向逐步回归和最少 angular 回归 (LARS),为每个使用调整参数。在下面的代码中,我任意选择了一个因变量 (y) 和一个预测变量子集 (x's),并使用 70% 的数据子集通过训练算法得到 运行 它们。为此,我应用了重复的 10 折交叉验证。 我正在努力解决的问题是找到一个命令来识别从训练函数导出的最终模型参数(例如,截距、beta 权重)。当我调用 object$finalModel 时,我不太容易看到它。有没有办法使用列出的方法(前向逐步回归和 LARS)在 R 中恢复这些?我觉得这必须存在....

谢谢!

library (caret)
library(AppliedPredictiveModeling)
data(abalone)
str(abalone)

set.seed(18)
inTrain <- sample(1:(round(nrow(abalone)*.7)),replace=FALSE)

train_df <- abalone [inTrain,]
test_df <- abalone [-inTrain,]

#predicting Diameter using several of the dataset's variables#
train_df_x <- train_df [,4:8]
test_df_x <- test_df [,4:8]
y_train <- train_df [,3]
y_test <- test_df  [,3]

set.seed(18)
fold.ids <- createMultiFolds(y_train,k=10,times=3)
fitControl <- trainControl(method = "repeatedcv",
                           number = 10,
                           repeats = 3,
                           returnResamp = "final",
                           index = fold.ids,
                           summaryFunction = defaultSummary,
                           selectionFunction = "oneSE")

### Forward regression ###
library(leaps)
forwardLmGrid <- expand.grid (.nvmax=seq(2,5))
set.seed(18)
F_OLS_fit <- train(train_df_x, y_train,"leapForward",trControl = fitControl,metric="RMSE", tuneGrid=forwardLmGrid)

### LARS ###
larGrid <- expand.grid(.fraction=seq(.01,.99,length=50))
library(lars)
Lar_fit <- train(train_df_x, y_train,"lars",trControl = fitControl,metric="RMSE", tuneGrid=larGrid)

我将通过示例向您展示我是如何做到的:

library(data.table)
n <- 1000
x1 <- runif(n,min=-10,max=10)
x2 <- runif(n,min=-10,max=10)
x3 <- runif(n,min=-10,max=10)
x4 <- runif(n,min=-10,max=10)
x5 <- runif(n,min=-10,max=10)
y1 <- 30 + x1 + 4*x2 + x3
synthetic <- data.table(x1=x1,x2=x2,x3=x3,x4=x4,x5=x5,y=y1)
library(caret)
library(lars)
ctrl <- trainControl(method = "cv", savePred=T, number=3)
fractionGrid <- expand.grid (fraction=seq(0,1,(1/(ncol(widedt)-1))))
cvresult <- train(y~.,
                  data=synthetic,
                  method = "lars",
                  trControl = ctrl,
                  metric="RMSE",
                  tuneGrid=fractionGrid,
                  use.Gram=FALSE)
coeffs <- predict.lars(cvresult$finalModel,type="coefficients")
models <- as.data.table(coeffs$coefficients)
winnermodelscoeffs <- models[which(coeffs$fraction==cvresult$bestTune$fraction)]