使用 tidymodels 计算所有类别的预测值
Calculating predicted values for all categories using tidymodels
此问题与此问题相关。
我是 运行 与那个问题类似的模型,但在最后一行我想有 7 个预测列(即以第一种情况下新数据集组的方式更改数据集=0,在第二组=1,以此类推
# Code from the original question
library(dplyr)
year <- rep(2014:2015, length.out=10000)
group <- sample(c(0,1,2,3,4,5,6), replace=TRUE, size=10000)
value <- sample(10000, replace=T)
female <- sample(c(0,1), replace=TRUE, size=10000)
smoker <- sample(c(0,1), replace=TRUE, size=10000)
dta <- data.frame(year=year, group=group, value=value, female=female, smoker=smoker)
# cut the dataset into list
table_list <- dta %>%
group_by(year, group) %>%
group_split()
# fit model per subgroup
model_list <- lapply(table_list, function(x) glm(smoker ~ female*group, data=x,
family=binomial(link="probit")))
# create new dataset where group =1
dat_new0 <- data.frame(dta[, c("smoker", "year", female)], group=0)
dat_new1 <- data.frame(dta[, c("smoker", "year", female)], group=1)
dat_new2 <- data.frame(dta[, c("smoker", "year", female)], group=2)
etc.
pred0 <- predict.glm(dat_new0, type = "response")
pred1 <- predict.glm(dat_new1, type = "response")
pred2 <- predict.glm(dat_new2, type = "response")
etc.
与其手动执行此操作,我想以某种方式使用 tidymodels。
我想我会为此使用 broom。首先,使用 nest()
将数据拆分为要用于建模的分组,然后 map()
对它们进行训练以训练模型:
library(tidyverse)
library(broom)
year <- rep(2014:2015, length.out=10000)
group <- sample(c(0,1,2,3,4,5,6), replace=TRUE, size=10000)
female <- sample(c(0,1), replace=TRUE, size=10000)
smoker <- sample(c(0,1), replace=TRUE, size=10000)
dta <- tibble(year = year, group = group, female = female, smoker = smoker)
mods <- dta %>%
nest(data = c(-year)) %>%
mutate(model = map(data, ~ glm(smoker ~ female*group, data = .,
family = binomial(link = "probit"))))
mods
#> # A tibble: 2 × 3
#> year data model
#> <int> <list> <list>
#> 1 2014 <tibble [5,000 × 3]> <glm>
#> 2 2015 <tibble [5,000 × 3]> <glm>
现在使用 tidyr 中的 crossing()
创建新的示例数据:
new_dat <- crossing(smoker = 0:1, female = 0:1, year = 2014:2015, group = 0:2)
new_dat
#> # A tibble: 24 × 4
#> smoker female year group
#> <int> <int> <int> <int>
#> 1 0 0 2014 0
#> 2 0 0 2014 1
#> 3 0 0 2014 2
#> 4 0 0 2015 0
#> 5 0 0 2015 1
#> 6 0 0 2015 2
#> 7 0 1 2014 0
#> 8 0 1 2014 1
#> 9 0 1 2014 2
#> 10 0 1 2015 0
#> # … with 14 more rows
然后在每个经过训练的模型上预测这个新示例数据。 (我在这里使用了 broom 的 augment()
,以便将新的预测列添加到现有列中,但您也可以使用 predict()
)。
mods %>%
mutate(preds = map(model, augment, newdata = new_dat))
#> # A tibble: 2 × 4
#> year data model preds
#> <int> <list> <list> <list>
#> 1 2014 <tibble [5,000 × 3]> <glm> <tibble [24 × 5]>
#> 2 2015 <tibble [5,000 × 3]> <glm> <tibble [24 × 5]>
由 reprex package (v2.0.1)
于 2021-11-15 创建
一旦你有了这些预测,你就可以 unnest()
它们,然后随心所欲地处理它们。
此问题与此问题相关
我是 运行 与那个问题类似的模型,但在最后一行我想有 7 个预测列(即以第一种情况下新数据集组的方式更改数据集=0,在第二组=1,以此类推
# Code from the original question
library(dplyr)
year <- rep(2014:2015, length.out=10000)
group <- sample(c(0,1,2,3,4,5,6), replace=TRUE, size=10000)
value <- sample(10000, replace=T)
female <- sample(c(0,1), replace=TRUE, size=10000)
smoker <- sample(c(0,1), replace=TRUE, size=10000)
dta <- data.frame(year=year, group=group, value=value, female=female, smoker=smoker)
# cut the dataset into list
table_list <- dta %>%
group_by(year, group) %>%
group_split()
# fit model per subgroup
model_list <- lapply(table_list, function(x) glm(smoker ~ female*group, data=x,
family=binomial(link="probit")))
# create new dataset where group =1
dat_new0 <- data.frame(dta[, c("smoker", "year", female)], group=0)
dat_new1 <- data.frame(dta[, c("smoker", "year", female)], group=1)
dat_new2 <- data.frame(dta[, c("smoker", "year", female)], group=2)
etc.
pred0 <- predict.glm(dat_new0, type = "response")
pred1 <- predict.glm(dat_new1, type = "response")
pred2 <- predict.glm(dat_new2, type = "response")
etc.
与其手动执行此操作,我想以某种方式使用 tidymodels。
我想我会为此使用 broom。首先,使用 nest()
将数据拆分为要用于建模的分组,然后 map()
对它们进行训练以训练模型:
library(tidyverse)
library(broom)
year <- rep(2014:2015, length.out=10000)
group <- sample(c(0,1,2,3,4,5,6), replace=TRUE, size=10000)
female <- sample(c(0,1), replace=TRUE, size=10000)
smoker <- sample(c(0,1), replace=TRUE, size=10000)
dta <- tibble(year = year, group = group, female = female, smoker = smoker)
mods <- dta %>%
nest(data = c(-year)) %>%
mutate(model = map(data, ~ glm(smoker ~ female*group, data = .,
family = binomial(link = "probit"))))
mods
#> # A tibble: 2 × 3
#> year data model
#> <int> <list> <list>
#> 1 2014 <tibble [5,000 × 3]> <glm>
#> 2 2015 <tibble [5,000 × 3]> <glm>
现在使用 tidyr 中的 crossing()
创建新的示例数据:
new_dat <- crossing(smoker = 0:1, female = 0:1, year = 2014:2015, group = 0:2)
new_dat
#> # A tibble: 24 × 4
#> smoker female year group
#> <int> <int> <int> <int>
#> 1 0 0 2014 0
#> 2 0 0 2014 1
#> 3 0 0 2014 2
#> 4 0 0 2015 0
#> 5 0 0 2015 1
#> 6 0 0 2015 2
#> 7 0 1 2014 0
#> 8 0 1 2014 1
#> 9 0 1 2014 2
#> 10 0 1 2015 0
#> # … with 14 more rows
然后在每个经过训练的模型上预测这个新示例数据。 (我在这里使用了 broom 的 augment()
,以便将新的预测列添加到现有列中,但您也可以使用 predict()
)。
mods %>%
mutate(preds = map(model, augment, newdata = new_dat))
#> # A tibble: 2 × 4
#> year data model preds
#> <int> <list> <list> <list>
#> 1 2014 <tibble [5,000 × 3]> <glm> <tibble [24 × 5]>
#> 2 2015 <tibble [5,000 × 3]> <glm> <tibble [24 × 5]>
由 reprex package (v2.0.1)
于 2021-11-15 创建一旦你有了这些预测,你就可以 unnest()
它们,然后随心所欲地处理它们。