使用 for 循环优化集成模型权重
Optimize ensemble model weights with for loop
我有三个模型概率预测mod.p1, mod.p2, mod.p3
require(Runuran)
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
判决中给出最终结果(是,否)
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
每个模型我有三个权重
i1=0.3
i2=0.3
i3=0.4
创建集合预测并计算 auc
Ensemble=i1*mod.p1+i2*mod.p2+i3*mod.p3
require(ROCR)
Ensemble.pred = prediction(Ensemble, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
Ensemble.auc # 0.52
现在我如何编写一个 for 循环来尝试 i1 , i2 and i3
的不同值,以及 return 最佳可能值 i1 , i2 and i3
给我最大的 AUC。
调整此版本以应用约束并以 0.1 的增量进行探索。这使用了一种稍微蛮力的方式来限制参数,但使其更通用
require(Runuran)
require(ROCR)
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
## creating ensemble predictions and calculating auc
# make the examples reproducible
set.seed = 123
# expand.grid, in base R, returns a dataframe
grid <- expand.grid(
i1 = seq(0.1, 1, by=0.1),
i2 = seq(0.1, 1, by=0.1),
i3 = seq(0.1, 1, by=0.1))
# apply contraints to the grid
grid <- grid[(grid$i1 + grid$i2 + grid$i3) == 1,]
Ensembles <- list()
grid$auc <- NA
for (i in 1:nrow(grid)) {
# final outcomes is given in verdict ( Yes , No )
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
this_set <- grid[i,]
Ensemble=(this_set[["i1"]]*mod.p1) + (this_set[["i2"]]*mod.p2) + (this_set[["i3"]]*mod.p3)
Ensemble.pred = prediction(Ensemble, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
grid$auc[i] <- Ensemble.auc
Ensembles[i] <- list(Ensemble)
}
# print the best value (note there is some other sourc
# of randomness so
# result svary form run to run)
grid[grid$auc == max(grid$auc),]
# i1 i2 i3 auc
# 8 0.8 0.1 0.1 0.554802
知道了,这就是我正在看的 -
# creation of Dummy data
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
#loop for optimizing the weights in ensemble
auc = 0
i1 = 0
i2 = 0
i3 = 0
for(i in seq(0,1,0.05)) {
for (j in seq(0,1-i,0.05)) {
k = 1-i-j
e = i1*mod.p1 + i2*mod.p2 + i3*mod.p3
Ensemble.pred = prediction(e, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
if (Ensemble.auc>auc) {
auc = Ensemble.auc
i1 = i
i2 = j
i3 = k
pred = e
}
}
}
# get final values
auc #0.524
i1 #0
i2 #0.1
i3 #0.9
你自己提出的方案我觉得其实还不错。我唯一不明白的是为什么您不替换 e = i1*mod.p1 + i2*mod.p2 + i3*mod.p3
行中的 i,j,k
值。我会像下面这样调整,效果很好。
for(i in seq(0,1,0.03)) {
for (j in seq(0,1-i,0.03)) {
k = 1-i-j
e = i*mod.p1 + j*mod.p2 + k*mod.p3
Ensemble.pred = prediction(e, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
if (Ensemble.auc>auc) {
auc = Ensemble.auc
i1 = i
i2 = j
i3 = k
pred = e
}
}
}
# get final values
auc #0.543
i1 #0.84
i2 #0.15
i3 #0.01
我有三个模型概率预测mod.p1, mod.p2, mod.p3
require(Runuran)
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
判决中给出最终结果(是,否)
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
每个模型我有三个权重
i1=0.3
i2=0.3
i3=0.4
创建集合预测并计算 auc
Ensemble=i1*mod.p1+i2*mod.p2+i3*mod.p3
require(ROCR)
Ensemble.pred = prediction(Ensemble, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
Ensemble.auc # 0.52
现在我如何编写一个 for 循环来尝试 i1 , i2 and i3
的不同值,以及 return 最佳可能值 i1 , i2 and i3
给我最大的 AUC。
调整此版本以应用约束并以 0.1 的增量进行探索。这使用了一种稍微蛮力的方式来限制参数,但使其更通用
require(Runuran)
require(ROCR)
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
## creating ensemble predictions and calculating auc
# make the examples reproducible
set.seed = 123
# expand.grid, in base R, returns a dataframe
grid <- expand.grid(
i1 = seq(0.1, 1, by=0.1),
i2 = seq(0.1, 1, by=0.1),
i3 = seq(0.1, 1, by=0.1))
# apply contraints to the grid
grid <- grid[(grid$i1 + grid$i2 + grid$i3) == 1,]
Ensembles <- list()
grid$auc <- NA
for (i in 1:nrow(grid)) {
# final outcomes is given in verdict ( Yes , No )
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
this_set <- grid[i,]
Ensemble=(this_set[["i1"]]*mod.p1) + (this_set[["i2"]]*mod.p2) + (this_set[["i3"]]*mod.p3)
Ensemble.pred = prediction(Ensemble, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
grid$auc[i] <- Ensemble.auc
Ensembles[i] <- list(Ensemble)
}
# print the best value (note there is some other sourc
# of randomness so
# result svary form run to run)
grid[grid$auc == max(grid$auc),]
# i1 i2 i3 auc
# 8 0.8 0.1 0.1 0.554802
知道了,这就是我正在看的 -
# creation of Dummy data
mod.p1<- urnorm(n = 1000, mean = 0.2, sd = 0.35, lb = 0, ub = 1)
mod.p2<- urnorm(n = 1000, mean = 0.23, sd = 0.37, lb = 0, ub = 1)
mod.p3 = urnorm(n = 1000, mean = 0.19, sd = 0.39, lb = 0, ub = 1)
Verdict <- sample( c("yes","No"), 1000, replace=TRUE, prob=c(0.2,0.8) )
#loop for optimizing the weights in ensemble
auc = 0
i1 = 0
i2 = 0
i3 = 0
for(i in seq(0,1,0.05)) {
for (j in seq(0,1-i,0.05)) {
k = 1-i-j
e = i1*mod.p1 + i2*mod.p2 + i3*mod.p3
Ensemble.pred = prediction(e, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
if (Ensemble.auc>auc) {
auc = Ensemble.auc
i1 = i
i2 = j
i3 = k
pred = e
}
}
}
# get final values
auc #0.524
i1 #0
i2 #0.1
i3 #0.9
你自己提出的方案我觉得其实还不错。我唯一不明白的是为什么您不替换 e = i1*mod.p1 + i2*mod.p2 + i3*mod.p3
行中的 i,j,k
值。我会像下面这样调整,效果很好。
for(i in seq(0,1,0.03)) {
for (j in seq(0,1-i,0.03)) {
k = 1-i-j
e = i*mod.p1 + j*mod.p2 + k*mod.p3
Ensemble.pred = prediction(e, Verdict)
Ensemble.auc = as.numeric(performance(Ensemble.pred, "auc")@y.values)
if (Ensemble.auc>auc) {
auc = Ensemble.auc
i1 = i
i2 = j
i3 = k
pred = e
}
}
}
# get final values
auc #0.543
i1 #0.84
i2 #0.15
i3 #0.01