在 purrr 中处理不同长度的向量
Handling vectors of different lengths in purrr
我目前有以下 R
代码运行具有不同预测变量的多个回归模型,跨不同子集,returns 使用 broom
包整理输出。
library(dplyr)
library(purrr)
library(broom)
cars <- mtcars
preds<-c("disp", "drat", "wt")
model_fits <- map_df(preds, function(pred) {
model_formula <- sprintf("mpg ~ %s", pred)
cars %>%
group_by(cyl) %>%
do(tidy(lm(model_formula, data = .), conf.int = T)) %>%
filter(term == pred) %>%
mutate(outcome = "mpg") %>%
select(outcome, cyl:estimate, starts_with("conf."))
})
这会产生以下数据框:
> model_fits
Source: local data frame [9 x 6]
Groups: cyl [3]
outcome cyl term estimate conf.low conf.high
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 mpg 4 disp -0.135141815 -0.21018121 -0.0601024237
2 mpg 6 disp 0.003605119 -0.03638572 0.0435959552
3 mpg 8 disp -0.019634095 -0.03993175 0.0006635639
4 mpg 4 drat 5.235016267 -3.19097359 13.6610061249
5 mpg 6 drat 0.350268953 -3.13669610 3.8372340053
6 mpg 8 drat 0.329543608 -3.98975120 4.6488384177
7 mpg 4 wt -5.647025261 -9.83228414 -1.4617663781
8 mpg 6 wt -2.780105939 -6.21162010 0.6514082171
9 mpg 8 wt -2.192437926 -3.80310208 -0.5817737772
在不使用循环函数的情况下将结果向量(例如 outcomes<-c("mpg", "qsec")
)合并到此脚本中的最佳方法是什么?我考虑过 purrr
包中的 map2_df
函数,但它要求两个向量的长度相同。我想要的数据框如下所示:
outcome cyl term estimate conf.low conf.high
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 mpg 4 disp -0.135141815 -0.210181205 -0.0601024237
2 mpg 6 disp 0.003605119 -0.036385718 0.0435959552
3 mpg 8 disp -0.019634095 -0.039931754 0.0006635639
4 mpg 4 drat 5.235016267 -3.190973592 13.6610061249
5 mpg 6 drat 0.350268953 -3.136696100 3.8372340053
6 mpg 8 drat 0.329543608 -3.989751201 4.6488384177
7 mpg 4 wt -5.647025261 -9.832284144 -1.4617663781
8 mpg 6 wt -2.780105939 -6.211620095 0.6514082171
9 mpg 8 wt -2.192437926 -3.803102076 -0.5817737772
10 qsec 4 disp 0.020522320 -0.024081106 0.0651257460
11 qsec 6 disp 0.032395786 0.003380046 0.0614115258
12 qsec 8 disp 0.003443553 -0.007442996 0.0143301028
13 qsec 4 drat -1.304473000 -4.633470581 2.0245245810
14 qsec 6 drat -2.234114977 -5.457932913 0.9897029580
15 qsec 8 drat -2.645047137 -3.791162337 -1.4989319372
16 qsec 4 wt 1.884663596 0.169516461 3.5998107312
17 qsec 6 wt 4.147883561 1.394030756 6.9017363651
18 qsec 8 wt 0.845029716 0.009104550 1.6809548809
我不知道这是否是最好的方法,但我会使用 map2()
并像这样使用 interaction()
进行一些灵活的准备工作:
preds<-c("disp", "drat", "wt")
outs <- c("mpg", "qsec") # new vector
# probably start function wrap here
# see how many model objects we need based on above
total_models <- interaction(outs, preds, sep = " ~ ") %>% levels() %>%
interaction(., unique(mtcars$cyl)) %>% levels() %>% # ignore warning
length()
# set up for map2()
formulas <- interaction(outs, preds, sep = " ~ ") %>% levels() %>%
rep(times = total_models / length(.)) # ignore warning
groups <- split(mtcars, mtcars$cyl) %>%
rep(each = total_models / length(.))
# use names for map's .id
names(formulas) <- formulas
#groups is names nicely by defualt split() behavior
# you could also start wrap here
map2(formulas, groups, ~ lm(.x, data = .y)) %>% # list of models here
map_df(~ tidy(., conf.int = T) %>% # no more do()
filter(term %in% preds), .id = "outcome") %>% # one big df here
mutate(cyl = names(groups), # add cyl back in; polish for consistency
outcome = gsub(" ~.*", "", outcome)) %>%
select(cyl, everything(), -(std.error:p.value)) %>%
arrange(outcome, term, cyl)
cyl outcome term estimate conf.low conf.high
1 4 mpg disp -0.135141815 -0.210181205 -0.0601024237
2 6 mpg disp 0.003605119 -0.036385718 0.0435959552
3 8 mpg disp -0.019634095 -0.039931754 0.0006635639
4 4 mpg drat 5.235016267 -3.190973592 13.6610061249
5 6 mpg drat 0.350268953 -3.136696100 3.8372340053
6 8 mpg drat 0.329543608 -3.989751201 4.6488384177
7 4 mpg wt -5.647025261 -9.832284144 -1.4617663781
8 6 mpg wt -2.780105939 -6.211620095 0.6514082171
9 8 mpg wt -2.192437926 -3.803102076 -0.5817737772
10 4 qsec disp 0.020522320 -0.024081106 0.0651257460
11 6 qsec disp 0.032395786 0.003380046 0.0614115258
12 8 qsec disp 0.003443553 -0.007442996 0.0143301028
13 4 qsec drat -1.304473000 -4.633470581 2.0245245810
14 6 qsec drat -2.234114977 -5.457932913 0.9897029580
15 8 qsec drat -2.645047137 -3.791162337 -1.4989319372
16 4 qsec wt 1.884663596 0.169516461 3.5998107312
17 6 qsec wt 4.147883561 1.394030756 6.9017363651
18 8 qsec wt 0.845029716 0.009104550 1.6809548809
我目前有以下 R
代码运行具有不同预测变量的多个回归模型,跨不同子集,returns 使用 broom
包整理输出。
library(dplyr)
library(purrr)
library(broom)
cars <- mtcars
preds<-c("disp", "drat", "wt")
model_fits <- map_df(preds, function(pred) {
model_formula <- sprintf("mpg ~ %s", pred)
cars %>%
group_by(cyl) %>%
do(tidy(lm(model_formula, data = .), conf.int = T)) %>%
filter(term == pred) %>%
mutate(outcome = "mpg") %>%
select(outcome, cyl:estimate, starts_with("conf."))
})
这会产生以下数据框:
> model_fits
Source: local data frame [9 x 6]
Groups: cyl [3]
outcome cyl term estimate conf.low conf.high
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 mpg 4 disp -0.135141815 -0.21018121 -0.0601024237
2 mpg 6 disp 0.003605119 -0.03638572 0.0435959552
3 mpg 8 disp -0.019634095 -0.03993175 0.0006635639
4 mpg 4 drat 5.235016267 -3.19097359 13.6610061249
5 mpg 6 drat 0.350268953 -3.13669610 3.8372340053
6 mpg 8 drat 0.329543608 -3.98975120 4.6488384177
7 mpg 4 wt -5.647025261 -9.83228414 -1.4617663781
8 mpg 6 wt -2.780105939 -6.21162010 0.6514082171
9 mpg 8 wt -2.192437926 -3.80310208 -0.5817737772
在不使用循环函数的情况下将结果向量(例如 outcomes<-c("mpg", "qsec")
)合并到此脚本中的最佳方法是什么?我考虑过 purrr
包中的 map2_df
函数,但它要求两个向量的长度相同。我想要的数据框如下所示:
outcome cyl term estimate conf.low conf.high
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 mpg 4 disp -0.135141815 -0.210181205 -0.0601024237
2 mpg 6 disp 0.003605119 -0.036385718 0.0435959552
3 mpg 8 disp -0.019634095 -0.039931754 0.0006635639
4 mpg 4 drat 5.235016267 -3.190973592 13.6610061249
5 mpg 6 drat 0.350268953 -3.136696100 3.8372340053
6 mpg 8 drat 0.329543608 -3.989751201 4.6488384177
7 mpg 4 wt -5.647025261 -9.832284144 -1.4617663781
8 mpg 6 wt -2.780105939 -6.211620095 0.6514082171
9 mpg 8 wt -2.192437926 -3.803102076 -0.5817737772
10 qsec 4 disp 0.020522320 -0.024081106 0.0651257460
11 qsec 6 disp 0.032395786 0.003380046 0.0614115258
12 qsec 8 disp 0.003443553 -0.007442996 0.0143301028
13 qsec 4 drat -1.304473000 -4.633470581 2.0245245810
14 qsec 6 drat -2.234114977 -5.457932913 0.9897029580
15 qsec 8 drat -2.645047137 -3.791162337 -1.4989319372
16 qsec 4 wt 1.884663596 0.169516461 3.5998107312
17 qsec 6 wt 4.147883561 1.394030756 6.9017363651
18 qsec 8 wt 0.845029716 0.009104550 1.6809548809
我不知道这是否是最好的方法,但我会使用 map2()
并像这样使用 interaction()
进行一些灵活的准备工作:
preds<-c("disp", "drat", "wt")
outs <- c("mpg", "qsec") # new vector
# probably start function wrap here
# see how many model objects we need based on above
total_models <- interaction(outs, preds, sep = " ~ ") %>% levels() %>%
interaction(., unique(mtcars$cyl)) %>% levels() %>% # ignore warning
length()
# set up for map2()
formulas <- interaction(outs, preds, sep = " ~ ") %>% levels() %>%
rep(times = total_models / length(.)) # ignore warning
groups <- split(mtcars, mtcars$cyl) %>%
rep(each = total_models / length(.))
# use names for map's .id
names(formulas) <- formulas
#groups is names nicely by defualt split() behavior
# you could also start wrap here
map2(formulas, groups, ~ lm(.x, data = .y)) %>% # list of models here
map_df(~ tidy(., conf.int = T) %>% # no more do()
filter(term %in% preds), .id = "outcome") %>% # one big df here
mutate(cyl = names(groups), # add cyl back in; polish for consistency
outcome = gsub(" ~.*", "", outcome)) %>%
select(cyl, everything(), -(std.error:p.value)) %>%
arrange(outcome, term, cyl)
cyl outcome term estimate conf.low conf.high
1 4 mpg disp -0.135141815 -0.210181205 -0.0601024237
2 6 mpg disp 0.003605119 -0.036385718 0.0435959552
3 8 mpg disp -0.019634095 -0.039931754 0.0006635639
4 4 mpg drat 5.235016267 -3.190973592 13.6610061249
5 6 mpg drat 0.350268953 -3.136696100 3.8372340053
6 8 mpg drat 0.329543608 -3.989751201 4.6488384177
7 4 mpg wt -5.647025261 -9.832284144 -1.4617663781
8 6 mpg wt -2.780105939 -6.211620095 0.6514082171
9 8 mpg wt -2.192437926 -3.803102076 -0.5817737772
10 4 qsec disp 0.020522320 -0.024081106 0.0651257460
11 6 qsec disp 0.032395786 0.003380046 0.0614115258
12 8 qsec disp 0.003443553 -0.007442996 0.0143301028
13 4 qsec drat -1.304473000 -4.633470581 2.0245245810
14 6 qsec drat -2.234114977 -5.457932913 0.9897029580
15 8 qsec drat -2.645047137 -3.791162337 -1.4989319372
16 4 qsec wt 1.884663596 0.169516461 3.5998107312
17 6 qsec wt 4.147883561 1.394030756 6.9017363651
18 8 qsec wt 0.845029716 0.009104550 1.6809548809