将多个回归系数合并到一个数据框中
Combine many regression coefficients into one dataframe
我有多条回归线。我想将这些系数合并到一个数据框中以便于可视化。
但是,并非所有回归都具有相同的系数,因此我无法使用 for 循环查找系数名称。
这是一个示例,具有相同的示例数据和所需的输出。
df=structure(list(x1 = c(-0.689814979498939, -0.509885025360363,
-0.20248689168896, -1.79535329549682, 1.60447678701814, -0.408696703105769,
0.97243696942363, -0.688339413750959, -0.359380427396309, 1.11638856659614
), x2 = c(0.775426469430265, 0.367906637531888, 0.965721516497862,
-0.601113535090469, -0.655567870650469, 1.45494263752806, 0.187276141272287,
-0.659949502938592, -0.481763339717836, -0.581132345668067),
x3 = c(-0.17202393327554, 0.022376822081548, -1.05069599269781,
-0.631926480864125, 1.76178640615702, -1.60488439781703,
0.172936842119056, 0.750091896988, -1.60900096983098, 0.443223570706679
), x4 = c(-0.117822668731567, -0.645150368596604, -1.58642572549226,
0.3630617077837, -1.00866095836508, 0.696818785571135, 0.978471598076335,
-0.315392158997475, 1.37594860146428, 0.0574562910914235),
y = c(-1.07067139899979, -0.360297366336307, 0.0328023505398295,
1.07908579247402, 0.185603676169661, 0.384858869675533, 0.62179479088495,
1.44265090318836, 0.340526158232088, -1.20387054108186)), class = "data.frame", row.names = c(NA,
-10L))
model1=lm(y~x1, data=df)
model2=lm(y~x2, data=df)
model3=lm(y~x2+x4, data=df)
model4=lm(y~x2+x3+x4, data=df)
coefs_x1=c(-0.2749230,NA,NA,NA)
coefs_x2=c(NA,-0.2795309,-0.2599686,-0.40977455)
coefs_x3=c(NA,NA,NA,-0.18740855)
coefs_x4=c(NA,NA,0.1568399,0.04981574)
output_df=data.frame(coefs_x1,coefs_x2,coefs_x3,coefs_x4)
> output_df
coefs_x1 coefs_x2 coefs_x3 coefs_x4
1 -0.274923 NA NA NA
2 NA -0.2795309 NA NA
3 NA -0.2599686 NA 0.15683990
4 NA -0.4097746 -0.1874086 0.04981574
有很多方法可以做到这一点,下面是我通常使用 dplyr
的方法。
您可以直接调用每个系数。它们位于名为“模型”的对象“内部”。一旦你调用 model1$coeffcients
它将 return 你的系数,包括截距。由于您不想要拦截(至少您没有在问题中提及它),我将使用 baseR
和 [-1]
参数将其删除,这将删除第一列。
然后我用 bind_rows()
把所有的台词放在一起,用 select()
组织演示文稿。函数 bind_rows()
将合并每一行,并添加新列,同时添加 NA
,用于缺失。这解决了你的问题。
解决方案
library(dplyr)
bind_rows(model1$coefficients[-1],
model2$coefficients[-1],
model3$coefficients[-1],
model4$coefficients[-1]) %>%
select(x1, x2, x3, x4)
输出
# A tibble: 4 x 4
x1 x2 x3 x4
<dbl> <dbl> <dbl> <dbl>
1 -0.275 NA NA NA
2 NA -0.280 NA NA
3 NA -0.260 NA 0.157
4 NA -0.410 -0.187 0.0498
仅供参考,输出与您的相同,但 tibble
通常将其四舍五入以进行展示,但在后台,它具有所有小数位。
使用 base
R(创建列表只是为了更容易命名,最初的想法是 rbind
和 do.call
)
# assumes coefs will be named coefs_x+
coefs <- ls(pattern="coefs_x*")
as.data.frame(coefs, col.names=paste0("coefs_x",1:length(coefs )))
coefs_x1 coefs_x2 coefs_x3 coefs_x4
1 -0.274923 NA NA NA
2 NA -0.2795309 NA NA
3 NA -0.2599686 NA 0.15683990
4 NA -0.4097746 -0.1874086 0.04981574
你可以这样做:
library(tidyverse)
forms <- list(x1 = y~ x1, x2 = y ~ x2, x3 = y ~ x2 + x4, x4 = y ~ x2 + x3 + x4)
map_df(forms, ~coef(lm(.x, data = df))) %>%
select(-1)
# A tibble: 4 x 4
x1 x2 x4 x3
<dbl> <dbl> <dbl> <dbl>
1 -0.275 NA NA NA
2 NA -0.280 NA NA
3 NA -0.260 0.157 NA
4 NA -0.410 0.0498 -0.187
另一个选项:
map(forms, ~t(coef(lm(.x, data = df)))) %>%
plyr::rbind.fill.matrix() %>%
as.data.frame() %>%
select(-1)
x1 x2 x4 x3
1 -0.274923 NA NA NA
2 NA -0.2795309 NA NA
3 NA -0.2599686 0.15683990 NA
4 NA -0.4097745 0.04981574 -0.1874086
我有多条回归线。我想将这些系数合并到一个数据框中以便于可视化。
但是,并非所有回归都具有相同的系数,因此我无法使用 for 循环查找系数名称。
这是一个示例,具有相同的示例数据和所需的输出。
df=structure(list(x1 = c(-0.689814979498939, -0.509885025360363,
-0.20248689168896, -1.79535329549682, 1.60447678701814, -0.408696703105769,
0.97243696942363, -0.688339413750959, -0.359380427396309, 1.11638856659614
), x2 = c(0.775426469430265, 0.367906637531888, 0.965721516497862,
-0.601113535090469, -0.655567870650469, 1.45494263752806, 0.187276141272287,
-0.659949502938592, -0.481763339717836, -0.581132345668067),
x3 = c(-0.17202393327554, 0.022376822081548, -1.05069599269781,
-0.631926480864125, 1.76178640615702, -1.60488439781703,
0.172936842119056, 0.750091896988, -1.60900096983098, 0.443223570706679
), x4 = c(-0.117822668731567, -0.645150368596604, -1.58642572549226,
0.3630617077837, -1.00866095836508, 0.696818785571135, 0.978471598076335,
-0.315392158997475, 1.37594860146428, 0.0574562910914235),
y = c(-1.07067139899979, -0.360297366336307, 0.0328023505398295,
1.07908579247402, 0.185603676169661, 0.384858869675533, 0.62179479088495,
1.44265090318836, 0.340526158232088, -1.20387054108186)), class = "data.frame", row.names = c(NA,
-10L))
model1=lm(y~x1, data=df)
model2=lm(y~x2, data=df)
model3=lm(y~x2+x4, data=df)
model4=lm(y~x2+x3+x4, data=df)
coefs_x1=c(-0.2749230,NA,NA,NA)
coefs_x2=c(NA,-0.2795309,-0.2599686,-0.40977455)
coefs_x3=c(NA,NA,NA,-0.18740855)
coefs_x4=c(NA,NA,0.1568399,0.04981574)
output_df=data.frame(coefs_x1,coefs_x2,coefs_x3,coefs_x4)
> output_df
coefs_x1 coefs_x2 coefs_x3 coefs_x4
1 -0.274923 NA NA NA
2 NA -0.2795309 NA NA
3 NA -0.2599686 NA 0.15683990
4 NA -0.4097746 -0.1874086 0.04981574
有很多方法可以做到这一点,下面是我通常使用 dplyr
的方法。
您可以直接调用每个系数。它们位于名为“模型”的对象“内部”。一旦你调用 model1$coeffcients
它将 return 你的系数,包括截距。由于您不想要拦截(至少您没有在问题中提及它),我将使用 baseR
和 [-1]
参数将其删除,这将删除第一列。
然后我用 bind_rows()
把所有的台词放在一起,用 select()
组织演示文稿。函数 bind_rows()
将合并每一行,并添加新列,同时添加 NA
,用于缺失。这解决了你的问题。
解决方案
library(dplyr)
bind_rows(model1$coefficients[-1],
model2$coefficients[-1],
model3$coefficients[-1],
model4$coefficients[-1]) %>%
select(x1, x2, x3, x4)
输出
# A tibble: 4 x 4
x1 x2 x3 x4
<dbl> <dbl> <dbl> <dbl>
1 -0.275 NA NA NA
2 NA -0.280 NA NA
3 NA -0.260 NA 0.157
4 NA -0.410 -0.187 0.0498
仅供参考,输出与您的相同,但 tibble
通常将其四舍五入以进行展示,但在后台,它具有所有小数位。
使用 base
R(创建列表只是为了更容易命名,最初的想法是 rbind
和 do.call
)
# assumes coefs will be named coefs_x+
coefs <- ls(pattern="coefs_x*")
as.data.frame(coefs, col.names=paste0("coefs_x",1:length(coefs )))
coefs_x1 coefs_x2 coefs_x3 coefs_x4
1 -0.274923 NA NA NA
2 NA -0.2795309 NA NA
3 NA -0.2599686 NA 0.15683990
4 NA -0.4097746 -0.1874086 0.04981574
你可以这样做:
library(tidyverse)
forms <- list(x1 = y~ x1, x2 = y ~ x2, x3 = y ~ x2 + x4, x4 = y ~ x2 + x3 + x4)
map_df(forms, ~coef(lm(.x, data = df))) %>%
select(-1)
# A tibble: 4 x 4
x1 x2 x4 x3
<dbl> <dbl> <dbl> <dbl>
1 -0.275 NA NA NA
2 NA -0.280 NA NA
3 NA -0.260 0.157 NA
4 NA -0.410 0.0498 -0.187
另一个选项:
map(forms, ~t(coef(lm(.x, data = df)))) %>%
plyr::rbind.fill.matrix() %>%
as.data.frame() %>%
select(-1)
x1 x2 x4 x3
1 -0.274923 NA NA NA
2 NA -0.2795309 NA NA
3 NA -0.2599686 0.15683990 NA
4 NA -0.4097745 0.04981574 -0.1874086