将系数正确绑定到汇总 table
Correctly binding coefficients to summarized table
我有一个 glm
模型和一个汇总数据集,需要将模型汇总中的 coefficients
、standard error
和 p.value
绑定到汇总数据集.例如,我使用了 mtcars
数据集。我将 columns
添加到最终联合数据集中,以模拟我希望放置 coefficients, standard errors, and p-values
的位置。对于模型中未显示的基值,我想在 coefficients
中添加一个“1
”并使用 intercept, standard errors and p-value
。我怎么能做到这一切?
library(tidyverse)
mtcars <- as_tibble(mtcars)
mtcars$cyl <- as.factor(mtcars$cyl)
mtcars$gear <- as.factor(mtcars$gear)
#run model
model1 <- glm(mpg ~ cyl + gear, data = mtcars)
summary(model1)
#start developing summarized data set
mtcars_wght <- mtcars %>%
group_by(cyl) %>%
rename(level = cyl) %>%
summarise("sum_weight" = sum(wt)) %>%
mutate("variable" = "cyl")
mtcars_gear <- mtcars %>%
group_by(gear) %>%
summarise("sum_weight" = sum(wt)) %>%
mutate("variable" = "gear") %>%
rename(level = gear)
#make summarized data set example
mtcars_sum <- mtcars_wght %>%
bind_rows(mtcars_gear) %>%
mutate("coefficient" = "x", "std.error" = "y", "p_value" = "z")
你想要这个输出吗?
# A tibble: 6 x 6
level sum_weight variable coefficient std.error p_value
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 4 25.1 cyl NA NA NA
2 6 21.8 cyl -6.66 1.63 0.000353
3 8 56.0 cyl -10.5 1.96 0.0000109
4 3 58.4 gear NA NA NA
5 4 31.4 gear 1.32 1.93 0.498
6 5 13.2 gear 1.50 1.85 0.426
这里是你如何用 dplyr
和 broom
来达到这个目的。
df <- rbind(mtcars_wght, mtcars_gear)
df <- df %>% mutate(
level = paste0(variable, level)
) %>% select(-variable)
mod_summary <- model1 %>% broom::tidy()
left_join(df, mod_summary, by = c('level' = 'term')) %>%
mutate(variable = str_extract(level, '[a-z]+'),
level = str_extract(level, '[0-9]+')) %>%
rename(coefficient = estimate, p_value = p.value) %>%
select(level, sum_weight, variable, coefficient, std.error, p_value)
编辑
如果要包含 Intercept
,请使用 full_join
而不是上面的 left_join
。下面,我将输出保存到 thesummary
.
thesummary <- full_join(df, mod_summary, by = c('level' = 'term')) %>%
mutate(variable = str_extract(level, '[A-Za-z]+'),
level = str_extract(level, '[0-9]+')) %>%
rename(coefficient = estimate, p_value = p.value) %>%
select(level, sum_weight, variable, coefficient, std.error, p_value)
要为缺失值分配 1
,仅对最后 4 列,执行:
cbind(thesummary[,1:2], apply(thesummary[,3:6], 2, function(x) ifelse(is.na(x), 1, x)))
这是输出:
level sum_weight variable coefficient std.error p_value
1 4 25.14 cyl 1 1 1
2 6 21.82 cyl -6.656 1.629 3.528e-04
3 8 55.99 cyl -10.542 1.958 1.087e-05
4 3 58.39 gear 1 1 1
5 4 31.40 gear 1.324 1.928 4.980e-01
6 5 13.16 gear 1.500 1.855 4.257e-01
7 <NA> NA Intercept 25.428 1.881 1.554e-13
如果你想将每个 NA
替换为 `,只需执行以下操作:
thesummary[is.na(thesummary)] <- 1
这是输出。
# A tibble: 7 x 6
level sum_weight variable coefficient std.error p_value
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 4 25.1 cyl 1.00 1.00 1.00e+ 0
2 6 21.8 cyl -6.66 1.63 3.53e- 4
3 8 56.0 cyl -10.5 1.96 1.09e- 5
4 3 58.4 gear 1.00 1.00 1.00e+ 0
5 4 31.4 gear 1.32 1.93 4.98e- 1
6 5 13.2 gear 1.50 1.85 4.26e- 1
7 1 1.00 Intercept 25.4 1.88 1.55e-13
我有一个 glm
模型和一个汇总数据集,需要将模型汇总中的 coefficients
、standard error
和 p.value
绑定到汇总数据集.例如,我使用了 mtcars
数据集。我将 columns
添加到最终联合数据集中,以模拟我希望放置 coefficients, standard errors, and p-values
的位置。对于模型中未显示的基值,我想在 coefficients
中添加一个“1
”并使用 intercept, standard errors and p-value
。我怎么能做到这一切?
library(tidyverse)
mtcars <- as_tibble(mtcars)
mtcars$cyl <- as.factor(mtcars$cyl)
mtcars$gear <- as.factor(mtcars$gear)
#run model
model1 <- glm(mpg ~ cyl + gear, data = mtcars)
summary(model1)
#start developing summarized data set
mtcars_wght <- mtcars %>%
group_by(cyl) %>%
rename(level = cyl) %>%
summarise("sum_weight" = sum(wt)) %>%
mutate("variable" = "cyl")
mtcars_gear <- mtcars %>%
group_by(gear) %>%
summarise("sum_weight" = sum(wt)) %>%
mutate("variable" = "gear") %>%
rename(level = gear)
#make summarized data set example
mtcars_sum <- mtcars_wght %>%
bind_rows(mtcars_gear) %>%
mutate("coefficient" = "x", "std.error" = "y", "p_value" = "z")
你想要这个输出吗?
# A tibble: 6 x 6
level sum_weight variable coefficient std.error p_value
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 4 25.1 cyl NA NA NA
2 6 21.8 cyl -6.66 1.63 0.000353
3 8 56.0 cyl -10.5 1.96 0.0000109
4 3 58.4 gear NA NA NA
5 4 31.4 gear 1.32 1.93 0.498
6 5 13.2 gear 1.50 1.85 0.426
这里是你如何用 dplyr
和 broom
来达到这个目的。
df <- rbind(mtcars_wght, mtcars_gear)
df <- df %>% mutate(
level = paste0(variable, level)
) %>% select(-variable)
mod_summary <- model1 %>% broom::tidy()
left_join(df, mod_summary, by = c('level' = 'term')) %>%
mutate(variable = str_extract(level, '[a-z]+'),
level = str_extract(level, '[0-9]+')) %>%
rename(coefficient = estimate, p_value = p.value) %>%
select(level, sum_weight, variable, coefficient, std.error, p_value)
编辑
如果要包含 Intercept
,请使用 full_join
而不是上面的 left_join
。下面,我将输出保存到 thesummary
.
thesummary <- full_join(df, mod_summary, by = c('level' = 'term')) %>%
mutate(variable = str_extract(level, '[A-Za-z]+'),
level = str_extract(level, '[0-9]+')) %>%
rename(coefficient = estimate, p_value = p.value) %>%
select(level, sum_weight, variable, coefficient, std.error, p_value)
要为缺失值分配 1
,仅对最后 4 列,执行:
cbind(thesummary[,1:2], apply(thesummary[,3:6], 2, function(x) ifelse(is.na(x), 1, x)))
这是输出:
level sum_weight variable coefficient std.error p_value
1 4 25.14 cyl 1 1 1
2 6 21.82 cyl -6.656 1.629 3.528e-04
3 8 55.99 cyl -10.542 1.958 1.087e-05
4 3 58.39 gear 1 1 1
5 4 31.40 gear 1.324 1.928 4.980e-01
6 5 13.16 gear 1.500 1.855 4.257e-01
7 <NA> NA Intercept 25.428 1.881 1.554e-13
如果你想将每个 NA
替换为 `,只需执行以下操作:
thesummary[is.na(thesummary)] <- 1
这是输出。
# A tibble: 7 x 6
level sum_weight variable coefficient std.error p_value
<chr> <dbl> <chr> <dbl> <dbl> <dbl>
1 4 25.1 cyl 1.00 1.00 1.00e+ 0
2 6 21.8 cyl -6.66 1.63 3.53e- 4
3 8 56.0 cyl -10.5 1.96 1.09e- 5
4 3 58.4 gear 1.00 1.00 1.00e+ 0
5 4 31.4 gear 1.32 1.93 4.98e- 1
6 5 13.2 gear 1.50 1.85 4.26e- 1
7 1 1.00 Intercept 25.4 1.88 1.55e-13