Caret 预测目标变量 nrow() 为 Null
Caret Predict Target Variable nrow() is Null
df:
library(caret)
a = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
b = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
c = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
d = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
e = c(1, 0, 1, 0, 0, 0, 1, 1, 1, 1)
#df1
df1 = data.frame(a,b,c,d,e)
#df2
df2 = data.frame(a,b,c,d,e)
Caret Log-红色模型:
df1$e <- as.factor(df1$e)
df2$e <- as.factor(df2$e)
# define training control
train_control <- trainControl(method = "cv", number = 5)
# train the model on training set
model <- train(e ~ .,
data = df1,
trControl = train_control,
method = "glm",
family=binomial())
# logistic <- glm(WonLost ~ . -PANum, data=train, family="binomial")
df2$predict <- caret::predict.train(model, newdata=df2,type = "prob")
nrow(df2$predict)
nrow(df2$e)
为什么 nrow(df2$e) 为零?我根据之前遇到的错误将目标变量更改为一个因子,但这似乎导致了我当前的问题。
Warning messages: 1: In train.default(x, y, weights = w, ...) : You
are trying to do regression and your outcome only has two possible
values Are you trying to do classification? If so, use a 2 level
factor as your outcome column.
有时 caret
对变量敏感,即使您的 glm
logit 模型在回归或分类方面存在问题也是如此。我学到的一个建议是将目标变量重新编码为 Yes/No。另外,请注意插入符号的预测被添加为 df2
中的新数据帧,这就是为什么 nrow()
有效而 e
只是一个向量,因此您必须使用 length()
或NROW()
。这里的代码:
library(caret)
#Vectors
a = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
b = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
c = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
d = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
e = c(1, 0, 1, 0, 0, 0, 1, 1, 1, 1)
#df1
df1 = data.frame(a,b,c,d,e)
#df2
df2 = data.frame(a,b,c,d,e)
#Format
df1$e[df1$e==1] <- 'Yes'
df1$e[df1$e==0] <- 'No'
df2$e[df2$e==1] <- 'Yes'
df2$e[df2$e==0] <- 'No'
# define training control
train_control <- trainControl(method = "cv", number = 5)
# train the model on training set
model <- train(e ~ .,
data = df1,
trControl = train_control,
method = "glm",
family=binomial())
#Predict
df2$predict <- caret::predict.train(model, newdata=df2,type = "prob")
#Checks
nrow(df2$predict)
NROW(df2$e)
length(df2$e)
输出:
df2
a b c d e predict.No predict.Yes
1 aa aa aa aa Yes 7.500000e-01 0.25
2 bb bb bb bb No 2.500000e-01 0.75
3 cc cc cc cc Yes 8.646869e-09 1.00
4 aa aa aa aa No 7.500000e-01 0.25
5 aa aa aa aa No 7.500000e-01 0.25
6 aa aa aa aa No 7.500000e-01 0.25
7 bb bb bb bb Yes 2.500000e-01 0.75
8 cc cc cc cc Yes 8.646869e-09 1.00
9 bb bb bb bb Yes 2.500000e-01 0.75
10 bb bb bb bb Yes 2.500000e-01 0.75
nrow(df2$predict)
[1] 10
NROW(df2$e)
[1] 10
length(df2$e)
[1] 10
df:
library(caret)
a = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
b = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
c = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
d = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
e = c(1, 0, 1, 0, 0, 0, 1, 1, 1, 1)
#df1
df1 = data.frame(a,b,c,d,e)
#df2
df2 = data.frame(a,b,c,d,e)
Caret Log-红色模型:
df1$e <- as.factor(df1$e)
df2$e <- as.factor(df2$e)
# define training control
train_control <- trainControl(method = "cv", number = 5)
# train the model on training set
model <- train(e ~ .,
data = df1,
trControl = train_control,
method = "glm",
family=binomial())
# logistic <- glm(WonLost ~ . -PANum, data=train, family="binomial")
df2$predict <- caret::predict.train(model, newdata=df2,type = "prob")
nrow(df2$predict)
nrow(df2$e)
为什么 nrow(df2$e) 为零?我根据之前遇到的错误将目标变量更改为一个因子,但这似乎导致了我当前的问题。
Warning messages: 1: In train.default(x, y, weights = w, ...) : You are trying to do regression and your outcome only has two possible values Are you trying to do classification? If so, use a 2 level factor as your outcome column.
有时 caret
对变量敏感,即使您的 glm
logit 模型在回归或分类方面存在问题也是如此。我学到的一个建议是将目标变量重新编码为 Yes/No。另外,请注意插入符号的预测被添加为 df2
中的新数据帧,这就是为什么 nrow()
有效而 e
只是一个向量,因此您必须使用 length()
或NROW()
。这里的代码:
library(caret)
#Vectors
a = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
b = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
c = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
d = c("aa", "bb", "cc", "aa", "aa", "aa", "bb", "cc", "bb", "bb")
e = c(1, 0, 1, 0, 0, 0, 1, 1, 1, 1)
#df1
df1 = data.frame(a,b,c,d,e)
#df2
df2 = data.frame(a,b,c,d,e)
#Format
df1$e[df1$e==1] <- 'Yes'
df1$e[df1$e==0] <- 'No'
df2$e[df2$e==1] <- 'Yes'
df2$e[df2$e==0] <- 'No'
# define training control
train_control <- trainControl(method = "cv", number = 5)
# train the model on training set
model <- train(e ~ .,
data = df1,
trControl = train_control,
method = "glm",
family=binomial())
#Predict
df2$predict <- caret::predict.train(model, newdata=df2,type = "prob")
#Checks
nrow(df2$predict)
NROW(df2$e)
length(df2$e)
输出:
df2
a b c d e predict.No predict.Yes
1 aa aa aa aa Yes 7.500000e-01 0.25
2 bb bb bb bb No 2.500000e-01 0.75
3 cc cc cc cc Yes 8.646869e-09 1.00
4 aa aa aa aa No 7.500000e-01 0.25
5 aa aa aa aa No 7.500000e-01 0.25
6 aa aa aa aa No 7.500000e-01 0.25
7 bb bb bb bb Yes 2.500000e-01 0.75
8 cc cc cc cc Yes 8.646869e-09 1.00
9 bb bb bb bb Yes 2.500000e-01 0.75
10 bb bb bb bb Yes 2.500000e-01 0.75
nrow(df2$predict)
[1] 10
NROW(df2$e)
[1] 10
length(df2$e)
[1] 10