在 R 中查找具有最低 AIC 的模型(从 for 循环返回)
Finding model (returned from for loops) with lowest AIC in R
我正在尝试寻找 AIC 最低的模型。模型是 return 从两个 for 循环中编辑出来的,这使得列的组合成为可能。我无法制作具有最低 AIC 的函数 return 模型。下面的代码演示了我被卡住的地方:
rm(list = ls())
data <- iris
data <- data[data$Species %in% c("setosa", "virginica"),]
data$Species = ifelse(data$Species == 'virginica', 0, 1)
mod_headers <- names(data[1:ncol(data)-1])
f <- function(mod_headers){
for(i in 1:length(mod_headers)){
tab <- combn(mod_headers,i)
for(j in 1:ncol(tab)){
tab_new <- c(tab[,j])
mod_tab_new <- c(tab_new, "Species")
model <- glm(Species ~., data=data[c(mod_tab_new)], family = binomial(link = "logit"))
}
}
best_model <- model[which(AIC(model)[order(AIC(model))][1])]
print(best_model)
}
f(mod_headers)
有什么建议吗?谢谢!
我用向量化替代方案替换了你的 for 循环
library(tidyverse)
library(iterators)
# Column names you want to use in glm model, saved as list
whichcols <- Reduce("c", map(1:length(mod_headers), ~lapply(iter(combn(mod_headers,.x), by="col"),function(y) c(y))))
# glm model results using selected column names, saved as list
models <- map(1:length(whichcols), ~glm(Species ~., data=data[c(whichcols[[.x]], "Species")], family = binomial(link = "logit")))
# selects model with lowest AIC
best <- models[[which.min(sapply(1:length(models),function(x)AIC(models[[x]])))]]
输出
Call: glm(formula = Species ~ ., family = binomial(link = "logit"),
data = data[c(whichcols[[.x]], "Species")])
Coefficients:
(Intercept) Petal.Length
55.40 -17.17
Degrees of Freedom: 99 Total (i.e. Null); 98 Residual
Null Deviance: 138.6
Residual Deviance: 1.208e-09 AIC: 4
使用你的循环,将所有模型放在一个列表中。
然后计算所有这些模型的 AIC。
最后 return 具有最小 AIC 的模型。
f <- function(mod_headers) {
models <- list()
k <- 1
for (i in 1:length(mod_headers)) {
tab <- combn(mod_headers, i)
for(j in 1:ncol(tab)) {
mod_tab_new <- c(tab[, j], "Species")
models[[k]] <- glm(Species ~ ., data = data[mod_tab_new],
family = binomial(link = "logit"))
k <- k + 1
}
}
models[[which.min(sapply(models, AIC))]]
}
glm() 使用迭代重新加权最小二乘算法。该算法在收敛之前达到最大迭代次数 - 更改此参数有助于您的情况:
glm(Species ~., data=data[mod_tab_new], family = binomial(link = "logit"), control = list(maxit = 50))
使用 which
时还有另一个问题,我在每个模型拟合后将其替换为 if
,以便与目前为止的最低 AIC 进行比较。但是,我认为有比这种 for-loop
方法更好的解决方案。
f <- function(mod_headers){
lowest_aic <- Inf # added
best_model <- NULL # added
for(i in 1:length(mod_headers)){
tab <- combn(mod_headers,i)
for(j in 1:ncol(tab)){
tab_new <- tab[, j]
mod_tab_new <- c(tab_new, "Species")
model <- glm(Species ~., data=data[mod_tab_new], family = binomial(link = "logit"), control = list(maxit = 50))
if(AIC(model) < lowest_aic){ # added
lowest_aic <- AIC(model) # added
best_model <- model # added
}
}
}
return(best_model)
}
我正在尝试寻找 AIC 最低的模型。模型是 return 从两个 for 循环中编辑出来的,这使得列的组合成为可能。我无法制作具有最低 AIC 的函数 return 模型。下面的代码演示了我被卡住的地方:
rm(list = ls())
data <- iris
data <- data[data$Species %in% c("setosa", "virginica"),]
data$Species = ifelse(data$Species == 'virginica', 0, 1)
mod_headers <- names(data[1:ncol(data)-1])
f <- function(mod_headers){
for(i in 1:length(mod_headers)){
tab <- combn(mod_headers,i)
for(j in 1:ncol(tab)){
tab_new <- c(tab[,j])
mod_tab_new <- c(tab_new, "Species")
model <- glm(Species ~., data=data[c(mod_tab_new)], family = binomial(link = "logit"))
}
}
best_model <- model[which(AIC(model)[order(AIC(model))][1])]
print(best_model)
}
f(mod_headers)
有什么建议吗?谢谢!
我用向量化替代方案替换了你的 for 循环
library(tidyverse)
library(iterators)
# Column names you want to use in glm model, saved as list
whichcols <- Reduce("c", map(1:length(mod_headers), ~lapply(iter(combn(mod_headers,.x), by="col"),function(y) c(y))))
# glm model results using selected column names, saved as list
models <- map(1:length(whichcols), ~glm(Species ~., data=data[c(whichcols[[.x]], "Species")], family = binomial(link = "logit")))
# selects model with lowest AIC
best <- models[[which.min(sapply(1:length(models),function(x)AIC(models[[x]])))]]
输出
Call: glm(formula = Species ~ ., family = binomial(link = "logit"),
data = data[c(whichcols[[.x]], "Species")])
Coefficients:
(Intercept) Petal.Length
55.40 -17.17
Degrees of Freedom: 99 Total (i.e. Null); 98 Residual
Null Deviance: 138.6
Residual Deviance: 1.208e-09 AIC: 4
使用你的循环,将所有模型放在一个列表中。 然后计算所有这些模型的 AIC。 最后 return 具有最小 AIC 的模型。
f <- function(mod_headers) {
models <- list()
k <- 1
for (i in 1:length(mod_headers)) {
tab <- combn(mod_headers, i)
for(j in 1:ncol(tab)) {
mod_tab_new <- c(tab[, j], "Species")
models[[k]] <- glm(Species ~ ., data = data[mod_tab_new],
family = binomial(link = "logit"))
k <- k + 1
}
}
models[[which.min(sapply(models, AIC))]]
}
glm() 使用迭代重新加权最小二乘算法。该算法在收敛之前达到最大迭代次数 - 更改此参数有助于您的情况:
glm(Species ~., data=data[mod_tab_new], family = binomial(link = "logit"), control = list(maxit = 50))
使用 which
时还有另一个问题,我在每个模型拟合后将其替换为 if
,以便与目前为止的最低 AIC 进行比较。但是,我认为有比这种 for-loop
方法更好的解决方案。
f <- function(mod_headers){
lowest_aic <- Inf # added
best_model <- NULL # added
for(i in 1:length(mod_headers)){
tab <- combn(mod_headers,i)
for(j in 1:ncol(tab)){
tab_new <- tab[, j]
mod_tab_new <- c(tab_new, "Species")
model <- glm(Species ~., data=data[mod_tab_new], family = binomial(link = "logit"), control = list(maxit = 50))
if(AIC(model) < lowest_aic){ # added
lowest_aic <- AIC(model) # added
best_model <- model # added
}
}
}
return(best_model)
}