在 Caret 中安装 bartMachine 获取长度参数 zero/Incorrect 维数
Fitting bartMachine in Caret getting argument of length zero/Incorrect Number of dimensions
我正在尝试为 Caret
中 bartMachine
用法的一个很好的示例建模,但我似乎无法正确地为 bartMachine
和 Caret
建模,可以谁能告诉我,主要错误到底是什么意思?或者是否有用于 BART 建模的简单可重现代码?
这是我使用 HouseVotes84 和汽车数据集的一些虚拟数据对 bartMachine 建模的片段:
library(mlbench)
library(caret)
data("HouseVotes84")
#Using HouseVotes84 as Classification Task Dataset and mtcars as Regression Task Dataset
dummy_data_classif <- HouseVotes84[,2:length(colnames(HouseVotes84))] %>%
mutate_if(is.factor, as.numeric)
dummy_data_classif <- data.frame(cbind(Class=HouseVotes84[,1], dummy_data_classif))
dummy_data_classif[is.na(dummy_data_classif)] <- 0
data("cars")
dummy_data_regr <- cars
caret_method_tester <- function(dummy_data, formula, resample_plan=1,
test_method, time_limit=30,
grid_param=c(), parallel_mode=FALSE){
library(caret)
library(R.utils)
formula <- as.formula(formula)
resampling <- NULL
if(resample_plan==1){
resampling <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==2){
resampling <- trainControl(method = "cv",
number = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==3){
resampling <- trainControl(method = "adaptive_cv",
number = 10, repeats = 5,
allowParallel = parallel_mode,
adaptive = list(min = 3, alpha = 0.05,
method = "BT", complete = FALSE))
}
else if(resample_plan==4){
resampling <- trainControl(method = "boot",
number = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==5){
resampling <- trainControl(method = "boot_all",
number = 5,
allowParallel = parallel_mode)
}
tryCatch(
expr={
if(length(grid_param) > 0){
withTimeout(
model <- caret::train(formula,
data = dummy_data,
method = test_method,
trControl = resampling,
tuneGrid=grid_param), timeout = 300
)
}
else{
withTimeout(
model <- caret::train(formula,
data = dummy_data,
method = test_method,
trControl = resampling), timeout=300
)
}
return(model)
},
error=function(cond){
message("Test Model Failed")
message("Here's the original error message:")
message(cond)
return(NULL)
},
warning=function(cond){
message("Warning Triggered!")
message("Here's the original warning message:")
message(cond)
return(model)
}
)
}
bart_reg <- caret_method_tester(dummy_data_regr, "Price ~ .",
test_method="bartMachine", time_limit=30, resample_plan=2)
Test Model Failed
Here's the original error message:
argument is of length zero
bart_classif <- caret_method_tester(dummy_data_classif, "Class ~ .",
test_method="bartMachine", time_limit=30, resample_plan=2)
Test Model Failed
Here's the original error message:
incorrect number of dimensions
我使用 try Catch 方法来轻松通知有关代码进度的事情,因此当代码失败、发出警告或成功时一目了然。
就我而言,数据集也没有任何 NA 值
如果把代码精简到最基本的部分就更好了,基本上bartMachine
的train函数是行不通的。我们可以用这个例子来说明,我们得到相同的错误信息:
mdl = train(mpg ~ .,data=mtcars,method="bartMachine",trControl=trainControl(method="cv"))
Error in if (grepl("adaptive", trControl$method) & nrow(tuneGrid) == 1) { :
argument is of length zero
该错误是 caret
中代码的错误,如果您不提供调整网格,则用于创建它的默认函数不会 return data.frame :
getModelInfo()$bartMachine$grid
function(x, y, len = NULL, search = "grid") {
if(search == "grid") {
out <- expand.grid(num_trees = 50,
k = (1:len)+ 1,
alpha = seq(.9, .99, length = len),
beta = seq(1, 3, length = len),
nu = (1:len)+ 1)
} else {
out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
k = runif(len, min = 0, max = 5),
alpha = runif(len, min = .9, max = 1),
beta = runif(len, min = 0, max = 4),
nu = runif(len, min = 0, max = 5))
}
if(is.factor(y)) {
out$k <- NA
out$nu <- NA
}
}
您可以提供调谐网格:
mdl = train(mpg ~ .,data=mtcars,method="bartMachine",
trControl=trainControl(method="boot"),
tuneGrid=data.frame(num_trees=50,k=3,alpha=0.1,beta=0.1,nu=4))
mdl
Bayesian Additive Regression Trees
32 samples
10 predictors
No pre-processing
Resampling: Bootstrapped (25 reps)
Summary of sample sizes: 32, 32, 32, 32, 32, 32, ...
Resampling results:
RMSE Rsquared MAE
2.826126 0.8344417 2.292464
Tuning parameter 'num_trees' was held constant at a value of 50
'beta' was held constant at a value of 0.1
Tuning parameter 'nu' was
held constant at a value of 4
或者你修改上面的功能,创建一个新的方法,你可以阅读更多here:
newBartMachine = getModelInfo()$bartMachine
newBartMachine$grid = function(x, y, len = NULL, search = "grid") {
if(search == "grid") {
out <- expand.grid(num_trees = 50,
k = (1:len)+ 1,
alpha = seq(.9, .99, length = len),
beta = seq(1, 3, length = len),
nu = (1:len)+ 1)
} else {
out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
k = runif(len, min = 0, max = 5),
alpha = runif(len, min = .9, max = 1),
beta = runif(len, min = 0, max = 4),
nu = runif(len, min = 0, max = 5))
}
if(is.factor(y)) {
out$k <- NA
out$nu <- NA
}
return(out)
}
mdl = train(mpg ~ .,data=mtcars,method=newBartMachine,trControl=trainControl(method="cv"),tuneLength=1)
Bayesian Additive Regression Trees
32 samples
10 predictors
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 28, 28, 28, 29, 30, 30, ...
Resampling results:
RMSE Rsquared MAE
2.338429 0.9581958 2.057181
Tuning parameter 'num_trees' was held constant at a value of 50
'beta' was held constant at a value of 1
Tuning parameter 'nu' was
held constant at a value of 2
我正在尝试为 Caret
中 bartMachine
用法的一个很好的示例建模,但我似乎无法正确地为 bartMachine
和 Caret
建模,可以谁能告诉我,主要错误到底是什么意思?或者是否有用于 BART 建模的简单可重现代码?
这是我使用 HouseVotes84 和汽车数据集的一些虚拟数据对 bartMachine 建模的片段:
library(mlbench)
library(caret)
data("HouseVotes84")
#Using HouseVotes84 as Classification Task Dataset and mtcars as Regression Task Dataset
dummy_data_classif <- HouseVotes84[,2:length(colnames(HouseVotes84))] %>%
mutate_if(is.factor, as.numeric)
dummy_data_classif <- data.frame(cbind(Class=HouseVotes84[,1], dummy_data_classif))
dummy_data_classif[is.na(dummy_data_classif)] <- 0
data("cars")
dummy_data_regr <- cars
caret_method_tester <- function(dummy_data, formula, resample_plan=1,
test_method, time_limit=30,
grid_param=c(), parallel_mode=FALSE){
library(caret)
library(R.utils)
formula <- as.formula(formula)
resampling <- NULL
if(resample_plan==1){
resampling <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==2){
resampling <- trainControl(method = "cv",
number = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==3){
resampling <- trainControl(method = "adaptive_cv",
number = 10, repeats = 5,
allowParallel = parallel_mode,
adaptive = list(min = 3, alpha = 0.05,
method = "BT", complete = FALSE))
}
else if(resample_plan==4){
resampling <- trainControl(method = "boot",
number = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==5){
resampling <- trainControl(method = "boot_all",
number = 5,
allowParallel = parallel_mode)
}
tryCatch(
expr={
if(length(grid_param) > 0){
withTimeout(
model <- caret::train(formula,
data = dummy_data,
method = test_method,
trControl = resampling,
tuneGrid=grid_param), timeout = 300
)
}
else{
withTimeout(
model <- caret::train(formula,
data = dummy_data,
method = test_method,
trControl = resampling), timeout=300
)
}
return(model)
},
error=function(cond){
message("Test Model Failed")
message("Here's the original error message:")
message(cond)
return(NULL)
},
warning=function(cond){
message("Warning Triggered!")
message("Here's the original warning message:")
message(cond)
return(model)
}
)
}
bart_reg <- caret_method_tester(dummy_data_regr, "Price ~ .",
test_method="bartMachine", time_limit=30, resample_plan=2)
Test Model Failed
Here's the original error message:
argument is of length zero
bart_classif <- caret_method_tester(dummy_data_classif, "Class ~ .",
test_method="bartMachine", time_limit=30, resample_plan=2)
Test Model Failed
Here's the original error message:
incorrect number of dimensions
我使用 try Catch 方法来轻松通知有关代码进度的事情,因此当代码失败、发出警告或成功时一目了然。
就我而言,数据集也没有任何 NA 值
如果把代码精简到最基本的部分就更好了,基本上bartMachine
的train函数是行不通的。我们可以用这个例子来说明,我们得到相同的错误信息:
mdl = train(mpg ~ .,data=mtcars,method="bartMachine",trControl=trainControl(method="cv"))
Error in if (grepl("adaptive", trControl$method) & nrow(tuneGrid) == 1) { :
argument is of length zero
该错误是 caret
中代码的错误,如果您不提供调整网格,则用于创建它的默认函数不会 return data.frame :
getModelInfo()$bartMachine$grid
function(x, y, len = NULL, search = "grid") {
if(search == "grid") {
out <- expand.grid(num_trees = 50,
k = (1:len)+ 1,
alpha = seq(.9, .99, length = len),
beta = seq(1, 3, length = len),
nu = (1:len)+ 1)
} else {
out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
k = runif(len, min = 0, max = 5),
alpha = runif(len, min = .9, max = 1),
beta = runif(len, min = 0, max = 4),
nu = runif(len, min = 0, max = 5))
}
if(is.factor(y)) {
out$k <- NA
out$nu <- NA
}
}
您可以提供调谐网格:
mdl = train(mpg ~ .,data=mtcars,method="bartMachine",
trControl=trainControl(method="boot"),
tuneGrid=data.frame(num_trees=50,k=3,alpha=0.1,beta=0.1,nu=4))
mdl
Bayesian Additive Regression Trees
32 samples
10 predictors
No pre-processing
Resampling: Bootstrapped (25 reps)
Summary of sample sizes: 32, 32, 32, 32, 32, 32, ...
Resampling results:
RMSE Rsquared MAE
2.826126 0.8344417 2.292464
Tuning parameter 'num_trees' was held constant at a value of 50
'beta' was held constant at a value of 0.1
Tuning parameter 'nu' was
held constant at a value of 4
或者你修改上面的功能,创建一个新的方法,你可以阅读更多here:
newBartMachine = getModelInfo()$bartMachine
newBartMachine$grid = function(x, y, len = NULL, search = "grid") {
if(search == "grid") {
out <- expand.grid(num_trees = 50,
k = (1:len)+ 1,
alpha = seq(.9, .99, length = len),
beta = seq(1, 3, length = len),
nu = (1:len)+ 1)
} else {
out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
k = runif(len, min = 0, max = 5),
alpha = runif(len, min = .9, max = 1),
beta = runif(len, min = 0, max = 4),
nu = runif(len, min = 0, max = 5))
}
if(is.factor(y)) {
out$k <- NA
out$nu <- NA
}
return(out)
}
mdl = train(mpg ~ .,data=mtcars,method=newBartMachine,trControl=trainControl(method="cv"),tuneLength=1)
Bayesian Additive Regression Trees
32 samples
10 predictors
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 28, 28, 28, 29, 30, 30, ...
Resampling results:
RMSE Rsquared MAE
2.338429 0.9581958 2.057181
Tuning parameter 'num_trees' was held constant at a value of 50
'beta' was held constant at a value of 1
Tuning parameter 'nu' was
held constant at a value of 2