如何使用 dplyr 融化 pairwise.wilcox.test 输出?

How to melt pairwise.wilcox.test output using dplyr?

我想一次为多个自变量应用 pairwise.wilcox.test,然后想要以长格式输出。对于特定的波长,我可以使用以下代码

try <- pairwise.wilcox.test(df$WV_350, as.factor(df$Class), p.adjust.method="bonf")$p.value 

而我想要的最终输出是

reshape2::melt(try)
#>  Var1 Var2      value
#> 1     2    1 1.00000000
#> 2     3    1 0.07936508
#> 3     4    1 0.07936508
#> 4     5    1 0.07936508
#> 5     2    2         NA
#> 6     3    2 0.07936508
#> 7     4    2 0.07936508
#> 8     5    2 0.07936508
#> 9     2    3         NA
#> 10    3    3         NA
#> 11    4    3 1.00000000
#> 12    5    3 0.74912899
#> 13    2    4         NA
#> 14    3    4         NA
#> 15    4    4         NA
#> 16    5    4 0.55555556

现在一次将它应用于所有波长,我使用了 dplyr 包(最新版本 1.0.0),如

library(tidyverse)
tbl_df(df)%>% 
  pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>% 
  group_by(Wavelengths) %>% 
  summarize(out = pairwise.wilcox.test(value, as.factor(Class), p.adjust.method="bonf")$p.value)

哪个returns我

#> `summarise()` regrouping output by 'Wavelengths' (override with `.groups` argument)
#> # A tibble: 16 x 2
#> # Groups:   Wavelengths [4]
#>    Wavelengths pval[,1]    [,2]   [,3]   [,4]
#>    <chr>          <dbl>   <dbl>  <dbl>  <dbl>
#>  1 WV_350        1      NA      NA     NA    
#>  2 WV_350        0.0794  0.0794 NA     NA    
#>  3 WV_350        0.0794  0.0794  1     NA    
#>  4 WV_350        0.0794  0.0794  0.749  0.556
#>  5 WV_351        1      NA      NA     NA    
#>  6 WV_351        0.0794  0.0794 NA     NA    
#>  7 WV_351        0.0794  0.0794  1     NA    
#>  8 WV_351        0.0794  0.0794  0.556  0.556
#>  9 WV_352        1      NA      NA     NA    
#> 10 WV_352        0.0794  0.0794 NA     NA    
#> 11 WV_352        0.0794  0.0794  1     NA    
#> 12 WV_352        0.0794  0.0794  0.556  0.749
#> 13 WV_353        1      NA      NA     NA    
#> 14 WV_353        0.0794  0.0794 NA     NA    
#> 15 WV_353        0.0794  0.0794  1     NA    
#> 16 WV_353        0.0794  0.0794  0.556  0.317

现在如何以长格式输出

Wavelength Var1 Var2      value 

数据

df = structure(list(Class = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5), WV_350 = c(0.0196, 0.0206, 
0.023, 0.0264, 0.029, 0.0201, 0.0181, 0.0216, 0.0225, 0.019, 
0.0165, 0.0121, 0.0129, 0.0123, 0.0149, 0.0137, 0.0116, 0.0151, 
0.0138, 0.0167, 0.0149, 0.0112, 0.0107, 0.01, 0.0099), WV_351 = c(0.0197, 
0.0206, 0.0229, 0.0265, 0.029, 0.0199, 0.0183, 0.0216, 0.0225, 
0.0187, 0.0165, 0.0118, 0.0127, 0.0122, 0.0148, 0.0138, 0.0114, 
0.0145, 0.0132, 0.0164, 0.0144, 0.0108, 0.01, 0.0093, 0.0095), 
    WV_352 = c(0.0199, 0.0207, 0.0233, 0.027, 0.0299, 0.0203, 
    0.0186, 0.0219, 0.0232, 0.019, 0.0169, 0.0124, 0.0133, 0.0126, 
    0.0152, 0.0145, 0.0118, 0.0148, 0.0132, 0.0168, 0.0148, 0.0111, 
    0.0102, 0.0096, 0.0098), WV_353 = c(0.0204, 0.0213, 0.0238, 
    0.0277, 0.0307, 0.0208, 0.0194, 0.0229, 0.0241, 0.0199, 0.0173, 
    0.013, 0.0142, 0.0134, 0.0161, 0.0152, 0.0126, 0.0153, 0.0137, 
    0.0175, 0.0151, 0.0116, 0.0105, 0.01, 0.0098)), row.names = c(NA, 
25L), class = "data.frame")  

有点冗长,我相信可以提高效率:

library(tidyverse)
library(broom)
res <- 
  tbl_df(df)%>% 
  pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>% 
  group_by(Wavelengths) %>% 
  summarise(pw_wt = list(pairwise.wilcox.test(value,as.factor(Class),
                                              p.adjust.method = "bonf")$p.value)) %>% 
  ungroup() %>% 
  mutate(pw_wt_t = map(pw_wt, broom::tidy)) %>% 
  unnest(pw_wt_t)

我可以使用 rstatix 包解决问题,它“提供了一个简单直观的管道友好框架,与执行基本统计测试的 'tidyverse' 设计理念一致”。

library(tidyverse)
library(rstatix)

as_tibble(df)%>% 
  pivot_longer(cols = -Class, names_to = "Wavelengths", values_to = "value") %>% 
  mutate(Class = as.factor(Class)) %>% 
  group_by(Wavelengths) %>% 
  pairwise_wilcox_test(value~Class, p.adjust.method="bonf")

其中returns以下输出

#> # A tibble: 40 x 10
#>   Wavelengths .y.   group1 group2    n1    n2 statistic     p p.adj
#> * <chr>       <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl> <dbl>
#> 1 WV_350      value 1      2          5     5      20   0.151 1    
#> 2 WV_350      value 1      3          5     5      25   0.008 0.079
#> 3 WV_350      value 1      4          5     5      25   0.008 0.079
#> 4 WV_350      value 1      5          5     5      25   0.008 0.079
#> 5 WV_350      value 2      3          5     5      25   0.008 0.079
#> 6 WV_350      value 2      4          5     5      25   0.008 0.079
#> 7 WV_350      value 2      5          5     5      25   0.008 0.079
#> 8 WV_350      value 3      4          5     5      10   0.69  1    
#> 9 WV_350      value 3      5          5     5      21.5 0.075 0.749
#> 10 WV_350      value 4      5          5     5      22   0.056 0.556
#> # ... with 30 more rows, and 1 more variable: p.adj.signif <chr>