从 r (mlr) 中的 batchmark 中提取预测
Extracting predictions from batchmark in r (mlr)
我进行了 batchmark 并且正在努力检索预测。
用以下代码减少结果后
res = reduceResultsDataTable()
jt = getJobTable()
我拉取了每个学习者和问题的结果:
#First algorithm - power combination (examplary)
job_ids_featureless_power = jt %>% filter(algorithm == "regr.featureless", problem == "Power") %>% pull(job.id)
res_featureless_power = res[job.id %in% job_ids_featureless_power]
现在结果以下列格式存储:
head(res_featureless_power$result)
[[1]]
[[1]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.072718285 1.107952812 1.439693816 -0.003963842 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[...]
[[1]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
2 2 -2.434211 -2.137163
4 4 -2.675439 -2.137163
9 9 -0.750000 -2.137163
15 15 -3.951754 -2.137163
78 78 -3.500000 -2.137163
87 87 -1.557018 -2.137163
... (#rows: 293, #cols: 3)
[...]
[[2]]
[[2]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.3432786683 1.2223546305 1.5307771452 -0.0003114365 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.0000000000
[...]
[[2]]$pred.test
Prediction: 292 observations
predict.type: response
threshold:
time: 0.00
id truth response
3 3 -2.1045455 -2.14352
6 6 -2.8157895 -2.14352
25 25 -1.7850877 -2.14352
42 42 -3.0000000 -2.14352
75 75 0.2982456 -2.14352
76 76 -2.7149123 -2.14352
... (#rows: 292, #cols: 3)
[...]
[[3]]
[[3]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.183591357 1.211876010 1.477697992 -0.002771853 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[...]
[[3]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
14 14 -2.903509 -2.138441
19 19 -0.372807 -2.138441
39 39 -3.013158 -2.138441
50 50 -3.539474 -2.138441
55 55 -1.048246 -2.138441
58 58 -1.910714 -2.138441
... (#rows: 293, #cols: 3)
[...]
[[4]]
[[4]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.327354765 1.204544750 1.525567031 -0.001207572 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[[4]]$measures.train
[1] NA NA NA NA NA NA NA NA
[[4]]$model
NULL
[[4]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
7 7 -2.1071429 -2.151513
13 13 -0.7105263 -2.151513
26 26 -1.6666667 -2.151513
36 36 -2.4285714 -2.151513
40 40 -1.7149123 -2.151513
43 43 -1.8392857 -2.151513
... (#rows: 293, #cols: 3)
[...]
[[5]]
[[5]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.565342245 1.277489347 1.601668582 -0.007325195 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[[5]]$measures.train
[1] NA NA NA NA NA NA NA NA
[[5]]$model
NULL
[[5]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
10 10 -0.07727273 -2.159876
11 11 -0.39912281 -2.159876
22 22 -3.17105263 -2.159876
34 34 -2.05263158 -2.159876
44 44 -1.82017544 -2.159876
54 54 -2.74561404 -2.159876
... (#rows: 293, #cols: 3)
[...]
[[6]]
[[6]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
1.92446761 1.12860248 1.38725182 -0.00239135 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.00000000
[...]
[[6]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
16 16 -0.3771930 -2.139435
20 20 -0.8508772 -2.139435
24 24 -1.2412281 -2.139435
28 28 0.6096491 -2.139435
29 29 -2.8928571 -2.139435
38 38 -1.7894737 -2.139435
... (#rows: 293, #cols: 3)
[...]
为了检索性能指标,我使用了以下代码,它运行良好:
x_featureless_power = lapply(res_featureless_power$result, FUN = function(x) x$measures.test) %>% data.frame() %>% t() %>% data.frame()
rownames(x_featureless_power) = NULL
但是,我无法从 pred.test 检索预测。我尝试了以下操作并收到了相应的错误消息:
job_ids_featureless_power = jt %>% filter(algorithm == "regr.featureless", problem == "Power") %>% pull(job.id)
res_featureless_power = res[job.id %in% job_ids_featureless_power]
pred_featureless_power = lapply(res_featureless_power$result, FUN = function(x) x$pred.test) %>% data.frame() %>% t() %>% data.frame()
#Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
#arguments imply differing number of rows: 293, 292
非常感谢任何提示:)
希望您能接受不包含 dplyr
内容的答案。它会指出你在哪里看,还包括一个最小的例子:
library(mlr)
library(batchtools)
reg = makeExperimentRegistry(file.dir = NA)
learners = list("classif.rpart", "classif.svm")
tasks = list(iris.task)
batchmark(learners, tasks, cv3, models = TRUE)
submitJobs()
res = reduceBatchmarkResults()
res$results$`iris-example`$classif.rpart$pred$data
res$results$`iris-example`$classif.svm$pred$data
我进行了 batchmark 并且正在努力检索预测。
用以下代码减少结果后
res = reduceResultsDataTable()
jt = getJobTable()
我拉取了每个学习者和问题的结果:
#First algorithm - power combination (examplary)
job_ids_featureless_power = jt %>% filter(algorithm == "regr.featureless", problem == "Power") %>% pull(job.id)
res_featureless_power = res[job.id %in% job_ids_featureless_power]
现在结果以下列格式存储:
head(res_featureless_power$result)
[[1]]
[[1]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.072718285 1.107952812 1.439693816 -0.003963842 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[...]
[[1]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
2 2 -2.434211 -2.137163
4 4 -2.675439 -2.137163
9 9 -0.750000 -2.137163
15 15 -3.951754 -2.137163
78 78 -3.500000 -2.137163
87 87 -1.557018 -2.137163
... (#rows: 293, #cols: 3)
[...]
[[2]]
[[2]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.3432786683 1.2223546305 1.5307771452 -0.0003114365 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.0000000000
[...]
[[2]]$pred.test
Prediction: 292 observations
predict.type: response
threshold:
time: 0.00
id truth response
3 3 -2.1045455 -2.14352
6 6 -2.8157895 -2.14352
25 25 -1.7850877 -2.14352
42 42 -3.0000000 -2.14352
75 75 0.2982456 -2.14352
76 76 -2.7149123 -2.14352
... (#rows: 292, #cols: 3)
[...]
[[3]]
[[3]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.183591357 1.211876010 1.477697992 -0.002771853 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[...]
[[3]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
14 14 -2.903509 -2.138441
19 19 -0.372807 -2.138441
39 39 -3.013158 -2.138441
50 50 -3.539474 -2.138441
55 55 -1.048246 -2.138441
58 58 -1.910714 -2.138441
... (#rows: 293, #cols: 3)
[...]
[[4]]
[[4]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.327354765 1.204544750 1.525567031 -0.001207572 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[[4]]$measures.train
[1] NA NA NA NA NA NA NA NA
[[4]]$model
NULL
[[4]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
7 7 -2.1071429 -2.151513
13 13 -0.7105263 -2.151513
26 26 -1.6666667 -2.151513
36 36 -2.4285714 -2.151513
40 40 -1.7149123 -2.151513
43 43 -1.8392857 -2.151513
... (#rows: 293, #cols: 3)
[...]
[[5]]
[[5]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
2.565342245 1.277489347 1.601668582 -0.007325195 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.000000000
[[5]]$measures.train
[1] NA NA NA NA NA NA NA NA
[[5]]$model
NULL
[[5]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
10 10 -0.07727273 -2.159876
11 11 -0.39912281 -2.159876
22 22 -3.17105263 -2.159876
34 34 -2.05263158 -2.159876
44 44 -1.82017544 -2.159876
54 54 -2.74561404 -2.159876
... (#rows: 293, #cols: 3)
[...]
[[6]]
[[6]]$`measures.test`
mse.test.mean mae.test.mean rmse.test.rmse rsq.test.mean kendalltau.test.mean
1.92446761 1.12860248 1.38725182 -0.00239135 NA
spearmanrho.test.mean pearsonsr.test.mean timeboth.test.mean
NA NA 0.00000000
[...]
[[6]]$pred.test
Prediction: 293 observations
predict.type: response
threshold:
time: 0.00
id truth response
16 16 -0.3771930 -2.139435
20 20 -0.8508772 -2.139435
24 24 -1.2412281 -2.139435
28 28 0.6096491 -2.139435
29 29 -2.8928571 -2.139435
38 38 -1.7894737 -2.139435
... (#rows: 293, #cols: 3)
[...]
为了检索性能指标,我使用了以下代码,它运行良好:
x_featureless_power = lapply(res_featureless_power$result, FUN = function(x) x$measures.test) %>% data.frame() %>% t() %>% data.frame()
rownames(x_featureless_power) = NULL
但是,我无法从 pred.test 检索预测。我尝试了以下操作并收到了相应的错误消息:
job_ids_featureless_power = jt %>% filter(algorithm == "regr.featureless", problem == "Power") %>% pull(job.id)
res_featureless_power = res[job.id %in% job_ids_featureless_power]
pred_featureless_power = lapply(res_featureless_power$result, FUN = function(x) x$pred.test) %>% data.frame() %>% t() %>% data.frame()
#Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
#arguments imply differing number of rows: 293, 292
非常感谢任何提示:)
希望您能接受不包含 dplyr
内容的答案。它会指出你在哪里看,还包括一个最小的例子:
library(mlr)
library(batchtools)
reg = makeExperimentRegistry(file.dir = NA)
learners = list("classif.rpart", "classif.svm")
tasks = list(iris.task)
batchmark(learners, tasks, cv3, models = TRUE)
submitJobs()
res = reduceBatchmarkResults()
res$results$`iris-example`$classif.rpart$pred$data
res$results$`iris-example`$classif.svm$pred$data