如何从 caret::train 对象绘制最终的 c50 决策树模型(库 C50)
How to plot final c50 decision tree model (library C50) from caret::train object
我使用来自 caret 库的 train 函数训练了 决策树 模型:
gr = expand.grid(trials = c(1, 10, 20), model = c("tree", "rules"), winnow = c(TRUE, FALSE))
dt = train(y ~ ., data = train, method = "C5.0", trControl = trainControl(method = 'cv', number = 10), tuneGrid = gr)
现在我想为最终模型绘制决策树。但是这个命令不起作用:
plot(dt$finalModel)
Error in data.frame(eval(parse(text = paste(obj$call)[xspot])), eval(parse(text = paste(obj$call)[yspot])), :
arguments imply differing number of rows: 4160, 208, 0
这里已经有人问过了:topic
建议的解决方案是使用来自拟合 train 对象的 bestTune 来定义相关的 c5.0 模型手动。然后正常绘制 c5.0 模型 :
c5model = C5.0(x = x, y = y, trials = dt$bestTune$trials, rules = dt$bestTune$model == "rules", control = C5.0Control(winnow = dt$bestTune$winnow))
plot(c5model)
我试过了。是的,它可以绘制 c5.0 模型 、BUT 从 train 对象预测的概率并手动绘制重新创建的 c5.0 模型 不匹配。
所以,我的问题是:是否可以从 caret::train 对象中提取最终的 c5.0 模型 并绘制它决策树?
预测的概率应该是一样的,见下:
library(MASS)
library(caret)
library(C50)
library(partykit)
traindata = Pima.tr
testdata = Pima.te
gr = expand.grid(trials = c(1, 2),
model = c("tree"), winnow = c(TRUE, FALSE))
dt = train(x = traindata[,-ncol(testdata)], y = traindata[,ncol(testdata)],
method = "C5.0",trControl = trainControl(method = 'cv', number=3),tuneGrid=gr)
c5model = C5.0.default(x = traindata[,-ncol(testdata)], y = traindata[,ncol(testdata)],
trials = dt$bestTune$trials, rules = dt$bestTune$model == "rules",
control = C5.0Control(winnow = dt$bestTune$winnow))
all.equal(predict(c5model,testdata[,-ncol(testdata)],type="prob"),
predict(dt$finalModel,testdata[,-ncol(testdata)],type="prob"))
[1] TRUE
所以我建议你仔细检查预测是否相同。
您在从插入符号绘制最终模型时看到的错误来自存储在 $call 下的内容,这很奇怪,我们可以将其替换为适用于绘图的调用:
plot(c5model)
finalMod = dt$finalModel
finalMod$call = c5model$call
plot(finalMod)
或者您可以像使用训练结果一样重写它,但是您可以看到它的表达式有点复杂(或者至少我不太擅长):
newcall = substitute(C5.0.default(x = X, y = Y, trials = ntrials, rules = RULES, control = C5.0Control(winnow = WINNOW)),
list(
X = quote(traindata[, -ncol(traindata)]),
Y = quote(traindata[, ncol(traindata)]),
RULES = dt$bestTune$model == "rules",
ntrials = dt$bestTune$trials,
WINNOW = dt$bestTune$winnow)
)
finalMod = dt$finalModel
finalMod$call = newcall
我使用来自 caret 库的 train 函数训练了 决策树 模型:
gr = expand.grid(trials = c(1, 10, 20), model = c("tree", "rules"), winnow = c(TRUE, FALSE))
dt = train(y ~ ., data = train, method = "C5.0", trControl = trainControl(method = 'cv', number = 10), tuneGrid = gr)
现在我想为最终模型绘制决策树。但是这个命令不起作用:
plot(dt$finalModel)
Error in data.frame(eval(parse(text = paste(obj$call)[xspot])), eval(parse(text = paste(obj$call)[yspot])), :
arguments imply differing number of rows: 4160, 208, 0
这里已经有人问过了:topic
建议的解决方案是使用来自拟合 train 对象的 bestTune 来定义相关的 c5.0 模型手动。然后正常绘制 c5.0 模型 :
c5model = C5.0(x = x, y = y, trials = dt$bestTune$trials, rules = dt$bestTune$model == "rules", control = C5.0Control(winnow = dt$bestTune$winnow))
plot(c5model)
我试过了。是的,它可以绘制 c5.0 模型 、BUT 从 train 对象预测的概率并手动绘制重新创建的 c5.0 模型 不匹配。
所以,我的问题是:是否可以从 caret::train 对象中提取最终的 c5.0 模型 并绘制它决策树?
预测的概率应该是一样的,见下:
library(MASS)
library(caret)
library(C50)
library(partykit)
traindata = Pima.tr
testdata = Pima.te
gr = expand.grid(trials = c(1, 2),
model = c("tree"), winnow = c(TRUE, FALSE))
dt = train(x = traindata[,-ncol(testdata)], y = traindata[,ncol(testdata)],
method = "C5.0",trControl = trainControl(method = 'cv', number=3),tuneGrid=gr)
c5model = C5.0.default(x = traindata[,-ncol(testdata)], y = traindata[,ncol(testdata)],
trials = dt$bestTune$trials, rules = dt$bestTune$model == "rules",
control = C5.0Control(winnow = dt$bestTune$winnow))
all.equal(predict(c5model,testdata[,-ncol(testdata)],type="prob"),
predict(dt$finalModel,testdata[,-ncol(testdata)],type="prob"))
[1] TRUE
所以我建议你仔细检查预测是否相同。
您在从插入符号绘制最终模型时看到的错误来自存储在 $call 下的内容,这很奇怪,我们可以将其替换为适用于绘图的调用:
plot(c5model)
finalMod = dt$finalModel
finalMod$call = c5model$call
plot(finalMod)
或者您可以像使用训练结果一样重写它,但是您可以看到它的表达式有点复杂(或者至少我不太擅长):
newcall = substitute(C5.0.default(x = X, y = Y, trials = ntrials, rules = RULES, control = C5.0Control(winnow = WINNOW)),
list(
X = quote(traindata[, -ncol(traindata)]),
Y = quote(traindata[, ncol(traindata)]),
RULES = dt$bestTune$model == "rules",
ntrials = dt$bestTune$trials,
WINNOW = dt$bestTune$winnow)
)
finalMod = dt$finalModel
finalMod$call = newcall