整洁的多元单变量回归
Tidy multiple univariate regressions
db = tibble(a = rnorm(100), b = rnorm(100), c = rnorm(100))
如果我想要一个整洁的多元线性回归,我可以去
lm(data = db, 0 + a ~ b + c) %>% tidy()
但如果我想要多个单变量回归,我会去
lm(data = db, a ~ 0 + b) %>% tidy() %>%
add_row(lm(data = db, a ~ 0 + c) %>% tidy())
现在,给定许多回归变量列,我想避免将每个单独的回归变量编码为一个新的add_row
,我应该如何使代码更综合?
这里有部分解决方案:
我认为代码可以比示例中的代码更精简?
我们可以使用{}
来屏蔽多个表达式
library(magrittr)
library(broom)
lm(data = db, a ~ 0 + b) %>%
tidy() %>%
{add_row(., lm(data = db, a ~ 0 + c) %>%
tidy())}
-输出
# A tibble: 2 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 b 0.0601 0.0907 0.663 0.509
2 c 0.0411 0.0899 0.457 0.649
或者可以在 summarise
和 unnest
内完成
library(tidyr)
db %>%
summarise(out1 = list(bind_rows(lm(a ~ 0 + b) %>% tidy,
lm(a~ 0 + c) %>% tidy))) %>%
unnest(out1)
-输出
# A tibble: 2 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 b 0.0601 0.0907 0.663 0.509
2 c 0.0411 0.0899 0.457 0.649
我的回答
db %>%
select(-a) %>%
names() %>%
paste('a~0+',.)%>%
map_df(~tidy(lm(as.formula(.x),
data= db,
)))
您可以这样做:取决于您的专栏:
library(broom)
vars <- names(db)[-1]
models <- list()
for (i in 1:2){
vc <- combn(vars,i)
for (j in 1:ncol(vc)){
model <- as.formula(paste0("a ~", paste0(vc[,j], collapse = "+")))
models <- c(models, model)
}
}
lapply(models, function(x) lm(x, data = db) %>% tidy())
[[1]]
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0155 0.0856 0.181 0.857
2 b -0.0502 0.0797 -0.630 0.530
[[2]]
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0113 0.0856 0.132 0.896
2 c 0.0553 0.0865 0.640 0.524
[[3]]
# A tibble: 3 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0132 0.0860 0.153 0.878
2 b -0.0439 0.0807 -0.544 0.588
3 c 0.0486 0.0877 0.555 0.580
db = tibble(a = rnorm(100), b = rnorm(100), c = rnorm(100))
如果我想要一个整洁的多元线性回归,我可以去
lm(data = db, 0 + a ~ b + c) %>% tidy()
但如果我想要多个单变量回归,我会去
lm(data = db, a ~ 0 + b) %>% tidy() %>%
add_row(lm(data = db, a ~ 0 + c) %>% tidy())
现在,给定许多回归变量列,我想避免将每个单独的回归变量编码为一个新的add_row
,我应该如何使代码更综合?
这里有部分解决方案:
我认为代码可以比示例中的代码更精简?
我们可以使用{}
来屏蔽多个表达式
library(magrittr)
library(broom)
lm(data = db, a ~ 0 + b) %>%
tidy() %>%
{add_row(., lm(data = db, a ~ 0 + c) %>%
tidy())}
-输出
# A tibble: 2 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 b 0.0601 0.0907 0.663 0.509
2 c 0.0411 0.0899 0.457 0.649
或者可以在 summarise
和 unnest
library(tidyr)
db %>%
summarise(out1 = list(bind_rows(lm(a ~ 0 + b) %>% tidy,
lm(a~ 0 + c) %>% tidy))) %>%
unnest(out1)
-输出
# A tibble: 2 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 b 0.0601 0.0907 0.663 0.509
2 c 0.0411 0.0899 0.457 0.649
我的回答
db %>%
select(-a) %>%
names() %>%
paste('a~0+',.)%>%
map_df(~tidy(lm(as.formula(.x),
data= db,
)))
您可以这样做:取决于您的专栏:
library(broom)
vars <- names(db)[-1]
models <- list()
for (i in 1:2){
vc <- combn(vars,i)
for (j in 1:ncol(vc)){
model <- as.formula(paste0("a ~", paste0(vc[,j], collapse = "+")))
models <- c(models, model)
}
}
lapply(models, function(x) lm(x, data = db) %>% tidy())
[[1]]
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0155 0.0856 0.181 0.857
2 b -0.0502 0.0797 -0.630 0.530
[[2]]
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0113 0.0856 0.132 0.896
2 c 0.0553 0.0865 0.640 0.524
[[3]]
# A tibble: 3 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.0132 0.0860 0.153 0.878
2 b -0.0439 0.0807 -0.544 0.588
3 c 0.0486 0.0877 0.555 0.580