如何使用 dplyr 融化 pairwise.wilcox.test 输出?
How to melt pairwise.wilcox.test output using dplyr?
我想一次为多个自变量应用 pairwise.wilcox.test
,然后想要以长格式输出。对于特定的波长,我可以使用以下代码
try <- pairwise.wilcox.test(df$WV_350, as.factor(df$Class), p.adjust.method="bonf")$p.value
而我想要的最终输出是
reshape2::melt(try)
#> Var1 Var2 value
#> 1 2 1 1.00000000
#> 2 3 1 0.07936508
#> 3 4 1 0.07936508
#> 4 5 1 0.07936508
#> 5 2 2 NA
#> 6 3 2 0.07936508
#> 7 4 2 0.07936508
#> 8 5 2 0.07936508
#> 9 2 3 NA
#> 10 3 3 NA
#> 11 4 3 1.00000000
#> 12 5 3 0.74912899
#> 13 2 4 NA
#> 14 3 4 NA
#> 15 4 4 NA
#> 16 5 4 0.55555556
现在一次将它应用于所有波长,我使用了 dplyr
包(最新版本 1.0.0),如
library(tidyverse)
tbl_df(df)%>%
pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>%
group_by(Wavelengths) %>%
summarize(out = pairwise.wilcox.test(value, as.factor(Class), p.adjust.method="bonf")$p.value)
哪个returns我
#> `summarise()` regrouping output by 'Wavelengths' (override with `.groups` argument)
#> # A tibble: 16 x 2
#> # Groups: Wavelengths [4]
#> Wavelengths pval[,1] [,2] [,3] [,4]
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 WV_350 1 NA NA NA
#> 2 WV_350 0.0794 0.0794 NA NA
#> 3 WV_350 0.0794 0.0794 1 NA
#> 4 WV_350 0.0794 0.0794 0.749 0.556
#> 5 WV_351 1 NA NA NA
#> 6 WV_351 0.0794 0.0794 NA NA
#> 7 WV_351 0.0794 0.0794 1 NA
#> 8 WV_351 0.0794 0.0794 0.556 0.556
#> 9 WV_352 1 NA NA NA
#> 10 WV_352 0.0794 0.0794 NA NA
#> 11 WV_352 0.0794 0.0794 1 NA
#> 12 WV_352 0.0794 0.0794 0.556 0.749
#> 13 WV_353 1 NA NA NA
#> 14 WV_353 0.0794 0.0794 NA NA
#> 15 WV_353 0.0794 0.0794 1 NA
#> 16 WV_353 0.0794 0.0794 0.556 0.317
现在如何以长格式输出
Wavelength Var1 Var2 value
数据
df = structure(list(Class = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), WV_350 = c(0.0196, 0.0206,
0.023, 0.0264, 0.029, 0.0201, 0.0181, 0.0216, 0.0225, 0.019,
0.0165, 0.0121, 0.0129, 0.0123, 0.0149, 0.0137, 0.0116, 0.0151,
0.0138, 0.0167, 0.0149, 0.0112, 0.0107, 0.01, 0.0099), WV_351 = c(0.0197,
0.0206, 0.0229, 0.0265, 0.029, 0.0199, 0.0183, 0.0216, 0.0225,
0.0187, 0.0165, 0.0118, 0.0127, 0.0122, 0.0148, 0.0138, 0.0114,
0.0145, 0.0132, 0.0164, 0.0144, 0.0108, 0.01, 0.0093, 0.0095),
WV_352 = c(0.0199, 0.0207, 0.0233, 0.027, 0.0299, 0.0203,
0.0186, 0.0219, 0.0232, 0.019, 0.0169, 0.0124, 0.0133, 0.0126,
0.0152, 0.0145, 0.0118, 0.0148, 0.0132, 0.0168, 0.0148, 0.0111,
0.0102, 0.0096, 0.0098), WV_353 = c(0.0204, 0.0213, 0.0238,
0.0277, 0.0307, 0.0208, 0.0194, 0.0229, 0.0241, 0.0199, 0.0173,
0.013, 0.0142, 0.0134, 0.0161, 0.0152, 0.0126, 0.0153, 0.0137,
0.0175, 0.0151, 0.0116, 0.0105, 0.01, 0.0098)), row.names = c(NA,
25L), class = "data.frame")
有点冗长,我相信可以提高效率:
library(tidyverse)
library(broom)
res <-
tbl_df(df)%>%
pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>%
group_by(Wavelengths) %>%
summarise(pw_wt = list(pairwise.wilcox.test(value,as.factor(Class),
p.adjust.method = "bonf")$p.value)) %>%
ungroup() %>%
mutate(pw_wt_t = map(pw_wt, broom::tidy)) %>%
unnest(pw_wt_t)
我可以使用 rstatix
包解决问题,它“提供了一个简单直观的管道友好框架,与执行基本统计测试的 'tidyverse' 设计理念一致”。
library(tidyverse)
library(rstatix)
as_tibble(df)%>%
pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>%
mutate(Class = as.factor(Class)) %>%
group_by(Wavelengths) %>%
pairwise_wilcox_test(value~Class, p.adjust.method="bonf")
其中returns以下输出
#> # A tibble: 40 x 10
#> Wavelengths .y. group1 group2 n1 n2 statistic p p.adj
#> * <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
#> 1 WV_350 value 1 2 5 5 20 0.151 1
#> 2 WV_350 value 1 3 5 5 25 0.008 0.079
#> 3 WV_350 value 1 4 5 5 25 0.008 0.079
#> 4 WV_350 value 1 5 5 5 25 0.008 0.079
#> 5 WV_350 value 2 3 5 5 25 0.008 0.079
#> 6 WV_350 value 2 4 5 5 25 0.008 0.079
#> 7 WV_350 value 2 5 5 5 25 0.008 0.079
#> 8 WV_350 value 3 4 5 5 10 0.69 1
#> 9 WV_350 value 3 5 5 5 21.5 0.075 0.749
#> 10 WV_350 value 4 5 5 5 22 0.056 0.556
#> # ... with 30 more rows, and 1 more variable: p.adj.signif <chr>
我想一次为多个自变量应用 pairwise.wilcox.test
,然后想要以长格式输出。对于特定的波长,我可以使用以下代码
try <- pairwise.wilcox.test(df$WV_350, as.factor(df$Class), p.adjust.method="bonf")$p.value
而我想要的最终输出是
reshape2::melt(try)
#> Var1 Var2 value
#> 1 2 1 1.00000000
#> 2 3 1 0.07936508
#> 3 4 1 0.07936508
#> 4 5 1 0.07936508
#> 5 2 2 NA
#> 6 3 2 0.07936508
#> 7 4 2 0.07936508
#> 8 5 2 0.07936508
#> 9 2 3 NA
#> 10 3 3 NA
#> 11 4 3 1.00000000
#> 12 5 3 0.74912899
#> 13 2 4 NA
#> 14 3 4 NA
#> 15 4 4 NA
#> 16 5 4 0.55555556
现在一次将它应用于所有波长,我使用了 dplyr
包(最新版本 1.0.0),如
library(tidyverse)
tbl_df(df)%>%
pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>%
group_by(Wavelengths) %>%
summarize(out = pairwise.wilcox.test(value, as.factor(Class), p.adjust.method="bonf")$p.value)
哪个returns我
#> `summarise()` regrouping output by 'Wavelengths' (override with `.groups` argument)
#> # A tibble: 16 x 2
#> # Groups: Wavelengths [4]
#> Wavelengths pval[,1] [,2] [,3] [,4]
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 WV_350 1 NA NA NA
#> 2 WV_350 0.0794 0.0794 NA NA
#> 3 WV_350 0.0794 0.0794 1 NA
#> 4 WV_350 0.0794 0.0794 0.749 0.556
#> 5 WV_351 1 NA NA NA
#> 6 WV_351 0.0794 0.0794 NA NA
#> 7 WV_351 0.0794 0.0794 1 NA
#> 8 WV_351 0.0794 0.0794 0.556 0.556
#> 9 WV_352 1 NA NA NA
#> 10 WV_352 0.0794 0.0794 NA NA
#> 11 WV_352 0.0794 0.0794 1 NA
#> 12 WV_352 0.0794 0.0794 0.556 0.749
#> 13 WV_353 1 NA NA NA
#> 14 WV_353 0.0794 0.0794 NA NA
#> 15 WV_353 0.0794 0.0794 1 NA
#> 16 WV_353 0.0794 0.0794 0.556 0.317
现在如何以长格式输出
Wavelength Var1 Var2 value
数据
df = structure(list(Class = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), WV_350 = c(0.0196, 0.0206,
0.023, 0.0264, 0.029, 0.0201, 0.0181, 0.0216, 0.0225, 0.019,
0.0165, 0.0121, 0.0129, 0.0123, 0.0149, 0.0137, 0.0116, 0.0151,
0.0138, 0.0167, 0.0149, 0.0112, 0.0107, 0.01, 0.0099), WV_351 = c(0.0197,
0.0206, 0.0229, 0.0265, 0.029, 0.0199, 0.0183, 0.0216, 0.0225,
0.0187, 0.0165, 0.0118, 0.0127, 0.0122, 0.0148, 0.0138, 0.0114,
0.0145, 0.0132, 0.0164, 0.0144, 0.0108, 0.01, 0.0093, 0.0095),
WV_352 = c(0.0199, 0.0207, 0.0233, 0.027, 0.0299, 0.0203,
0.0186, 0.0219, 0.0232, 0.019, 0.0169, 0.0124, 0.0133, 0.0126,
0.0152, 0.0145, 0.0118, 0.0148, 0.0132, 0.0168, 0.0148, 0.0111,
0.0102, 0.0096, 0.0098), WV_353 = c(0.0204, 0.0213, 0.0238,
0.0277, 0.0307, 0.0208, 0.0194, 0.0229, 0.0241, 0.0199, 0.0173,
0.013, 0.0142, 0.0134, 0.0161, 0.0152, 0.0126, 0.0153, 0.0137,
0.0175, 0.0151, 0.0116, 0.0105, 0.01, 0.0098)), row.names = c(NA,
25L), class = "data.frame")
有点冗长,我相信可以提高效率:
library(tidyverse)
library(broom)
res <-
tbl_df(df)%>%
pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>%
group_by(Wavelengths) %>%
summarise(pw_wt = list(pairwise.wilcox.test(value,as.factor(Class),
p.adjust.method = "bonf")$p.value)) %>%
ungroup() %>%
mutate(pw_wt_t = map(pw_wt, broom::tidy)) %>%
unnest(pw_wt_t)
我可以使用 rstatix
包解决问题,它“提供了一个简单直观的管道友好框架,与执行基本统计测试的 'tidyverse' 设计理念一致”。
library(tidyverse)
library(rstatix)
as_tibble(df)%>%
pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>%
mutate(Class = as.factor(Class)) %>%
group_by(Wavelengths) %>%
pairwise_wilcox_test(value~Class, p.adjust.method="bonf")
其中returns以下输出
#> # A tibble: 40 x 10
#> Wavelengths .y. group1 group2 n1 n2 statistic p p.adj
#> * <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
#> 1 WV_350 value 1 2 5 5 20 0.151 1
#> 2 WV_350 value 1 3 5 5 25 0.008 0.079
#> 3 WV_350 value 1 4 5 5 25 0.008 0.079
#> 4 WV_350 value 1 5 5 5 25 0.008 0.079
#> 5 WV_350 value 2 3 5 5 25 0.008 0.079
#> 6 WV_350 value 2 4 5 5 25 0.008 0.079
#> 7 WV_350 value 2 5 5 5 25 0.008 0.079
#> 8 WV_350 value 3 4 5 5 10 0.69 1
#> 9 WV_350 value 3 5 5 5 21.5 0.075 0.749
#> 10 WV_350 value 4 5 5 5 22 0.056 0.556
#> # ... with 30 more rows, and 1 more variable: p.adj.signif <chr>