在 purrr 中迭代略有不同的模型
Iterating though slightly different models in purrr
我有以下代码比较仅多项式项不同的模型的均方根。
library(tidyverse)
data(mtcars)
cv_mtcars = mtcars %>%
crossv_kfold(k = 10)
cv_mtcars %>%
mutate(model1 = map(train, ~lm(disp ~ wt, data = .)),
model2 = map(train, ~lm(disp ~I(wt^2), data = .)),
model3 = map(train, ~lm(disp ~I(wt^3), data = .)),
model4 = map(train, ~lm(disp ~I(wt^4), data = .)),
model5 = map(train, ~lm(disp ~I(wt^5), data = .)),
model6 = map(train, ~lm(disp ~I(wt^6), data = .)),
order1 = map2_dbl(model1, test, ~rmse(.x, .y)),
order2 = map2_dbl(model2, test, ~rmse(.x, .y)),
order3 = map2_dbl(model3, test, ~rmse(.x, .y)),
order4 = map2_dbl(model4, test, ~rmse(.x, .y)),
order5 = map2_dbl(model5, test, ~rmse(.x, .y)),
order6 = map2_dbl(model6, test, ~rmse(.x, .y))) %>%
select(order1,order2,order3,order4,order5,order6) %>% gather(1:6,key=model,value=value) %>%
ggplot()+
geom_point(aes(x=factor(model),y=value))+
labs(y="rmse",x="polynomial",title="Model Assesment",subtitle="disp~I(wt^x)")
是否有更有效的方法来迭代我的模型?我觉得我写的代码比我需要的多。
您可以通过外部调用 map
迭代多项式阶数和内部调用 map
迭代 10 次折叠来迭代模型。在下面的代码中,我使用了 poly(wt, i)
而不是 I(wt^i)
,因为 I(wt^i)
生成了一个只有最高阶项的多项式,而 poly(wt, i)
生成了一个有项的多项式所有订单的最高订单。我已经为 model_cv
对象中的每个折叠保存了 rmse
,但是您当然可以将其直接通过管道传输到 ggplot 中。
set.seed(50)
model_cv = setNames(1:6, 1:6) %>%
map_df(function(i) {
map2_dbl(cv_mtcars[["train"]], cv_mtcars[["test"]], function(train, test) {
model = lm(disp ~ poly(wt,i), data=train)
rmse(model, test)
})
}) %>%
gather(`Polynomial Order`, rmse)
ggplot(model_cv, aes(`Polynomial Order`, rmse)) +
geom_point() +
stat_summary(fun.y=mean, geom="point", pch="_", colour="red", size=7) +
labs(title="Model Assesment",subtitle="disp ~ poly(wt, order)")
我有以下代码比较仅多项式项不同的模型的均方根。
library(tidyverse)
data(mtcars)
cv_mtcars = mtcars %>%
crossv_kfold(k = 10)
cv_mtcars %>%
mutate(model1 = map(train, ~lm(disp ~ wt, data = .)),
model2 = map(train, ~lm(disp ~I(wt^2), data = .)),
model3 = map(train, ~lm(disp ~I(wt^3), data = .)),
model4 = map(train, ~lm(disp ~I(wt^4), data = .)),
model5 = map(train, ~lm(disp ~I(wt^5), data = .)),
model6 = map(train, ~lm(disp ~I(wt^6), data = .)),
order1 = map2_dbl(model1, test, ~rmse(.x, .y)),
order2 = map2_dbl(model2, test, ~rmse(.x, .y)),
order3 = map2_dbl(model3, test, ~rmse(.x, .y)),
order4 = map2_dbl(model4, test, ~rmse(.x, .y)),
order5 = map2_dbl(model5, test, ~rmse(.x, .y)),
order6 = map2_dbl(model6, test, ~rmse(.x, .y))) %>%
select(order1,order2,order3,order4,order5,order6) %>% gather(1:6,key=model,value=value) %>%
ggplot()+
geom_point(aes(x=factor(model),y=value))+
labs(y="rmse",x="polynomial",title="Model Assesment",subtitle="disp~I(wt^x)")
是否有更有效的方法来迭代我的模型?我觉得我写的代码比我需要的多。
您可以通过外部调用 map
迭代多项式阶数和内部调用 map
迭代 10 次折叠来迭代模型。在下面的代码中,我使用了 poly(wt, i)
而不是 I(wt^i)
,因为 I(wt^i)
生成了一个只有最高阶项的多项式,而 poly(wt, i)
生成了一个有项的多项式所有订单的最高订单。我已经为 model_cv
对象中的每个折叠保存了 rmse
,但是您当然可以将其直接通过管道传输到 ggplot 中。
set.seed(50)
model_cv = setNames(1:6, 1:6) %>%
map_df(function(i) {
map2_dbl(cv_mtcars[["train"]], cv_mtcars[["test"]], function(train, test) {
model = lm(disp ~ poly(wt,i), data=train)
rmse(model, test)
})
}) %>%
gather(`Polynomial Order`, rmse)
ggplot(model_cv, aes(`Polynomial Order`, rmse)) +
geom_point() +
stat_summary(fun.y=mean, geom="point", pch="_", colour="red", size=7) +
labs(title="Model Assesment",subtitle="disp ~ poly(wt, order)")