遍历变量并在每次迭代中替换一个变量
iterating over variables and replacing one on each iteration
假设我有这个 df:
df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8), q1 = c(1, 1, 4,
5, 3, 3, 3, 2), q2 = c(5, 4, 4, 1, 1, 2, 3, 3), q3 = c(3, 3,
2, 4, 3, 3, 2, 5), q4 = c(6, 5, 3, 3, 2, 1, 3, 4), q5 = c(2,
1, 3, 4, 5, 4, 3, 2), v1 = c(0, 0, 1, 1, 1, 1, 0, 1), v2 = c("19-25",
"19-25", "19-25", "26-34", "26-34", "35-44", "35-44", "35-44"
), v3 = c("abc", "def", "abc", "abc", "abc", "def", "def", "abc"
)), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"
))
> df
# A tibble: 8 x 9
id q1 q2 q3 q4 q5 v1 v2 v3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 1 1 5 3 6 2 0 19-25 abc
2 2 1 4 3 5 1 0 19-25 def
3 3 4 4 2 3 3 1 19-25 abc
4 4 5 1 4 3 4 1 26-34 abc
5 5 3 1 3 2 5 1 26-34 abc
6 6 3 2 3 1 4 1 35-44 def
7 7 3 3 2 3 3 0 35-44 def
8 8 2 3 5 4 2 1 35-44 abc
我想 运行 一系列模型估计所有其他列(id
除外)回归 q*
的 R^2,然后从第一个开始替换 DV模型与另一个 q*
列等。输出应该是来自 broom::glance
.
的标准输出的小标题
例如,第一个模型是:
glance(
lm(as.numeric(q1) ~
v1 +
as.factor(v2) +
as.factor(v3) +
q2 +
q3 +
q4 +
q5,
data = df))
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
而第二个模型会将 q1
与 q2
交换,因此 q2
成为因变量,而 q1
成为自变量之一。我将对所有 q*
列重复此操作,因此我最终得到 5 行的小标题。 v*
列包含在每个模型中,但永远不会成为因变量。
我还希望在最后的标题中有一个指标,该模型是 运行 的因变量(即一个名为 dv
的列,其中包含 q1
当 q1
是因变量,或者 q2
当它是因变量时,等等)。
这可能吗?我宁愿避免复制和粘贴上述 n 次。
你可以定义
vars <- c("q1", "q2", "q3", "q4", "q5")
并迭代它并创建公式:
library(broom)
library(dplyr)
library(purrr)
vars %>%
map_chr(~ paste0("as.numeric(", .x, ") ~ v1 + as.factor(v2) + as.factor(v3) +",
paste(vars[vars != .x], collapse = "+"))) %>%
map(~ .x %>%
as.formula() %>%
lm(data = df) %>%
glance())
这导致列表中有五个 data.frames:
[[1]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[2]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[3]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[4]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[5]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
因为它们都很相似,你可以创建一个 data.frame:
vars <- c("q1", "q2", "q3", "q4", "q5")
names(vars) <- vars
vars %>%
map_chr(~ paste0("as.numeric(", .x, ") ~ v1 + as.factor(v2) + as.factor(v3) +",
paste(vars[vars != .x], collapse = "+"))) %>%
map_df(~ .x %>%
as.formula() %>%
lm(data = df) %>%
glance(),
.id = "dependent_var")
回归
# A tibble: 5 x 13
dependent_var r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 q1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
2 q2 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
3 q3 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
4 q4 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
5 q5 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
# ... with 2 more variables: df.residual <int>, nobs <int>
假设我有这个 df:
df <- structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8), q1 = c(1, 1, 4,
5, 3, 3, 3, 2), q2 = c(5, 4, 4, 1, 1, 2, 3, 3), q3 = c(3, 3,
2, 4, 3, 3, 2, 5), q4 = c(6, 5, 3, 3, 2, 1, 3, 4), q5 = c(2,
1, 3, 4, 5, 4, 3, 2), v1 = c(0, 0, 1, 1, 1, 1, 0, 1), v2 = c("19-25",
"19-25", "19-25", "26-34", "26-34", "35-44", "35-44", "35-44"
), v3 = c("abc", "def", "abc", "abc", "abc", "def", "def", "abc"
)), row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"
))
> df
# A tibble: 8 x 9
id q1 q2 q3 q4 q5 v1 v2 v3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 1 1 5 3 6 2 0 19-25 abc
2 2 1 4 3 5 1 0 19-25 def
3 3 4 4 2 3 3 1 19-25 abc
4 4 5 1 4 3 4 1 26-34 abc
5 5 3 1 3 2 5 1 26-34 abc
6 6 3 2 3 1 4 1 35-44 def
7 7 3 3 2 3 3 0 35-44 def
8 8 2 3 5 4 2 1 35-44 abc
我想 运行 一系列模型估计所有其他列(id
除外)回归 q*
的 R^2,然后从第一个开始替换 DV模型与另一个 q*
列等。输出应该是来自 broom::glance
.
例如,第一个模型是:
glance(
lm(as.numeric(q1) ~
v1 +
as.factor(v2) +
as.factor(v3) +
q2 +
q3 +
q4 +
q5,
data = df))
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
而第二个模型会将 q1
与 q2
交换,因此 q2
成为因变量,而 q1
成为自变量之一。我将对所有 q*
列重复此操作,因此我最终得到 5 行的小标题。 v*
列包含在每个模型中,但永远不会成为因变量。
我还希望在最后的标题中有一个指标,该模型是 运行 的因变量(即一个名为 dv
的列,其中包含 q1
当 q1
是因变量,或者 q2
当它是因变量时,等等)。
这可能吗?我宁愿避免复制和粘贴上述 n 次。
你可以定义
vars <- c("q1", "q2", "q3", "q4", "q5")
并迭代它并创建公式:
library(broom)
library(dplyr)
library(purrr)
vars %>%
map_chr(~ paste0("as.numeric(", .x, ") ~ v1 + as.factor(v2) + as.factor(v3) +",
paste(vars[vars != .x], collapse = "+"))) %>%
map(~ .x %>%
as.formula() %>%
lm(data = df) %>%
glance())
这导致列表中有五个 data.frames:
[[1]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[2]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[3]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[4]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
[[5]]
# A tibble: 1 x 12
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0 0 8
因为它们都很相似,你可以创建一个 data.frame:
vars <- c("q1", "q2", "q3", "q4", "q5")
names(vars) <- vars
vars %>%
map_chr(~ paste0("as.numeric(", .x, ") ~ v1 + as.factor(v2) + as.factor(v3) +",
paste(vars[vars != .x], collapse = "+"))) %>%
map_df(~ .x %>%
as.formula() %>%
lm(data = df) %>%
glance(),
.id = "dependent_var")
回归
# A tibble: 5 x 13
dependent_var r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 q1 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
2 q2 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
3 q3 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
4 q4 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
5 q5 1 NaN NaN NaN NaN 7 Inf -Inf -Inf 0
# ... with 2 more variables: df.residual <int>, nobs <int>