如何使用嵌套数据框创建整齐的相关性?
How to create tidy correlations using nested dataframes?
这个问题之前已经部分回答(例如,),但是——据我所知——没有使用可重现示例的完整答案。我想从嵌套数据框中按名称 select 变量,计算成对相关性,然后将相关系数和 p 值添加到具有适当名称列的未嵌套数据框中。以下示例产生了预期的结果:
library(tidyverse)
library(broom)
df <- mtcars %>%
nest(data = everything()) %>%
mutate(cor_test = map(data, ~ cor.test(.x$mpg, .x$disp)),
tidied = map(cor_test, tidy)) %>%
unnest(tidied) %>%
select(-c(cor_test, statistic, parameter, conf.low, conf.high, method, alternative)) %>%
rename(c(mpg_disp_estimate = estimate, mpg_disp_p.value = p.value)) %>%
mutate(cor_test = map(data, ~ cor.test(.x$mpg, .x$cyl)),
tidied = map(cor_test, tidy)) %>%
unnest(tidied) %>%
select(-c(cor_test, statistic, parameter, conf.low, conf.high, method, alternative)) %>%
rename(c(mpg_cyl_estimate = estimate, mpg_cyl_p.value = p.value)) %>%
mutate(cor_test = map(data, ~ cor.test(.x$disp, .x$cyl)),
tidied = map(cor_test, tidy)) %>%
unnest(tidied) %>%
select(-c(cor_test, statistic, parameter, conf.low, conf.high, method, alternative)) %>%
rename(c(disp_cyl_estimate = estimate, disp_cyl_p.value = p.value))
显然,这不是一个好的解决方案,因为它涉及一遍又一遍地重复相同的代码。有没有办法用 purrr
和 broom
更优雅地实现这个目标?
我们可以用 combn
做到这一点。获取数据列名称与 combn
的成对组合,从数据中提取列值,应用 cor.test
、return tidy
ied 输出,创建 'categ' 列来标识测试中使用的列,并将 tibble
输出的 list
绑定到单个 data.frame
library(dplyr)
library(broom)
library(stringr)
out <- combn(names(mtcars), 2, FUN = function(x)
tidy(cor.test(mtcars[[x[1]]], mtcars[[x[2]]])) %>%
mutate(categ = str_c(x, collapse="_"), .before = 1),
simplify = FALSE) %>%
bind_rows
-输出
out
# A tibble: 55 x 9
# categ estimate statistic p.value parameter conf.low conf.high method alternative
# <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> <chr>
# 1 mpg_cyl -0.852 -8.92 6.11e-10 30 -0.926 -0.716 Pearson's product-moment correlation two.sided
# 2 mpg_disp -0.848 -8.75 9.38e-10 30 -0.923 -0.708 Pearson's product-moment correlation two.sided
# 3 mpg_hp -0.776 -6.74 1.79e- 7 30 -0.885 -0.586 Pearson's product-moment correlation two.sided
# 4 mpg_drat 0.681 5.10 1.78e- 5 30 0.436 0.832 Pearson's product-moment correlation two.sided
# 5 mpg_wt -0.868 -9.56 1.29e-10 30 -0.934 -0.744 Pearson's product-moment correlation two.sided
# 6 mpg_qsec 0.419 2.53 1.71e- 2 30 0.0820 0.670 Pearson's product-moment correlation two.sided
# 7 mpg_vs 0.664 4.86 3.42e- 5 30 0.410 0.822 Pearson's product-moment correlation two.sided
# 8 mpg_am 0.600 4.11 2.85e- 4 30 0.318 0.784 Pearson's product-moment correlation two.sided
# 9 mpg_gear 0.480 3.00 5.40e- 3 30 0.158 0.710 Pearson's product-moment correlation two.sided
#10 mpg_carb -0.551 -3.62 1.08e- 3 30 -0.755 -0.250 Pearson's product-moment correlation two.sided
# … with 45 more rows
如果我们想创建一个宽幅面,使用pivot_wider
library(tidyr)
out1 <- combn(names(mtcars), 2, FUN = function(x)
tidy(cor.test(mtcars[[x[1]]], mtcars[[x[2]]])) %>%
mutate(categ = str_c(x, collapse="_"), .before = 1),
simplify = FALSE) %>%
bind_rows %>%
select(categ, estimate, p.value) %>%
pivot_wider(names_from = categ, values_from = c(estimate, p.value))
如果我们想在嵌套数据中使用,将上面的代码包装在一个函数中,然后 map
覆盖 list
'data' 列
library(purrr)
f1 <- function(dat) {
combn(names(dat), 2, FUN = function(x)
tidy(cor.test(dat[[x[1]]], dat[[x[2]]])) %>%
mutate(categ = str_c(x, collapse="_"), .before = 1),
simplify = FALSE) %>%
bind_rows %>%
select(categ, estimate, p.value) %>%
pivot_wider(names_from = categ, values_from = c(estimate, p.value))
}
mtcars %>%
nest(data = everything()) %>%
mutate(out = map(data, f1))
# A tibble: 1 x 2
# data out
# <list> <list>
#1 <tibble [32 × 11]> <tibble [1 × 110]>
这个问题之前已经部分回答(例如,
library(tidyverse)
library(broom)
df <- mtcars %>%
nest(data = everything()) %>%
mutate(cor_test = map(data, ~ cor.test(.x$mpg, .x$disp)),
tidied = map(cor_test, tidy)) %>%
unnest(tidied) %>%
select(-c(cor_test, statistic, parameter, conf.low, conf.high, method, alternative)) %>%
rename(c(mpg_disp_estimate = estimate, mpg_disp_p.value = p.value)) %>%
mutate(cor_test = map(data, ~ cor.test(.x$mpg, .x$cyl)),
tidied = map(cor_test, tidy)) %>%
unnest(tidied) %>%
select(-c(cor_test, statistic, parameter, conf.low, conf.high, method, alternative)) %>%
rename(c(mpg_cyl_estimate = estimate, mpg_cyl_p.value = p.value)) %>%
mutate(cor_test = map(data, ~ cor.test(.x$disp, .x$cyl)),
tidied = map(cor_test, tidy)) %>%
unnest(tidied) %>%
select(-c(cor_test, statistic, parameter, conf.low, conf.high, method, alternative)) %>%
rename(c(disp_cyl_estimate = estimate, disp_cyl_p.value = p.value))
显然,这不是一个好的解决方案,因为它涉及一遍又一遍地重复相同的代码。有没有办法用 purrr
和 broom
更优雅地实现这个目标?
我们可以用 combn
做到这一点。获取数据列名称与 combn
的成对组合,从数据中提取列值,应用 cor.test
、return tidy
ied 输出,创建 'categ' 列来标识测试中使用的列,并将 tibble
输出的 list
绑定到单个 data.frame
library(dplyr)
library(broom)
library(stringr)
out <- combn(names(mtcars), 2, FUN = function(x)
tidy(cor.test(mtcars[[x[1]]], mtcars[[x[2]]])) %>%
mutate(categ = str_c(x, collapse="_"), .before = 1),
simplify = FALSE) %>%
bind_rows
-输出
out
# A tibble: 55 x 9
# categ estimate statistic p.value parameter conf.low conf.high method alternative
# <chr> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <chr> <chr>
# 1 mpg_cyl -0.852 -8.92 6.11e-10 30 -0.926 -0.716 Pearson's product-moment correlation two.sided
# 2 mpg_disp -0.848 -8.75 9.38e-10 30 -0.923 -0.708 Pearson's product-moment correlation two.sided
# 3 mpg_hp -0.776 -6.74 1.79e- 7 30 -0.885 -0.586 Pearson's product-moment correlation two.sided
# 4 mpg_drat 0.681 5.10 1.78e- 5 30 0.436 0.832 Pearson's product-moment correlation two.sided
# 5 mpg_wt -0.868 -9.56 1.29e-10 30 -0.934 -0.744 Pearson's product-moment correlation two.sided
# 6 mpg_qsec 0.419 2.53 1.71e- 2 30 0.0820 0.670 Pearson's product-moment correlation two.sided
# 7 mpg_vs 0.664 4.86 3.42e- 5 30 0.410 0.822 Pearson's product-moment correlation two.sided
# 8 mpg_am 0.600 4.11 2.85e- 4 30 0.318 0.784 Pearson's product-moment correlation two.sided
# 9 mpg_gear 0.480 3.00 5.40e- 3 30 0.158 0.710 Pearson's product-moment correlation two.sided
#10 mpg_carb -0.551 -3.62 1.08e- 3 30 -0.755 -0.250 Pearson's product-moment correlation two.sided
# … with 45 more rows
如果我们想创建一个宽幅面,使用pivot_wider
library(tidyr)
out1 <- combn(names(mtcars), 2, FUN = function(x)
tidy(cor.test(mtcars[[x[1]]], mtcars[[x[2]]])) %>%
mutate(categ = str_c(x, collapse="_"), .before = 1),
simplify = FALSE) %>%
bind_rows %>%
select(categ, estimate, p.value) %>%
pivot_wider(names_from = categ, values_from = c(estimate, p.value))
如果我们想在嵌套数据中使用,将上面的代码包装在一个函数中,然后 map
覆盖 list
'data' 列
library(purrr)
f1 <- function(dat) {
combn(names(dat), 2, FUN = function(x)
tidy(cor.test(dat[[x[1]]], dat[[x[2]]])) %>%
mutate(categ = str_c(x, collapse="_"), .before = 1),
simplify = FALSE) %>%
bind_rows %>%
select(categ, estimate, p.value) %>%
pivot_wider(names_from = categ, values_from = c(estimate, p.value))
}
mtcars %>%
nest(data = everything()) %>%
mutate(out = map(data, f1))
# A tibble: 1 x 2
# data out
# <list> <list>
#1 <tibble [32 × 11]> <tibble [1 × 110]>