R:更改 pivot_wider() 中的列名称 - 后缀到前缀

R: Changing column names in pivot_wider() -- suffix to prefix

我正在尝试弄清楚如何改变 tidyr 的 pivot_wider() 函数在生成的宽数据集中创建新变量名称的方式。具体来说,我希望将“names_from”变量添加到新变量的前缀而不是后缀。

我的数据集如下所示:

list(ID = c("A950", "A950", "A950", "A970", "A970", "A970", "A996", "A996", "A996"), 
Phase = c("P1", "P2", "P3", "P1", "P2", "P3", "P1", "P2", "P3"), 
A = c(23.5, 25.2, 21.9, 21.9, 21.1, 20.3, 19.5, 18.7, 17.9), 
B = c(21.9, 21.1, 20.3, 19.5, 18.7, 17.9, 17.1, 16.3, 15.5), 
C = c(25.2, 21.9, 20.3, 17.6, 15.1, 12.7, 10.3, 7.8, 5.4), 
D = c("M", "M", "M", "F", "F", "F", "N", "N", "N"))

当我使用 pivot_wider() 以 Phase 作为“键”传播数据集时,我的结果如下:

ex_wide <- ex_long %>%
  pivot_wider(names_from = Phase, values_from = c(3:6))

list(ID = c("A950", "A970", "A996"), 
A_P1 = c(23.5, 21.9, 19.5), 
A_P2 = c(25.2, 21.1, 18.7), 
A_P3 = c(21.9, 20.3, 17.9), 
B_P1 = c(21.9, 19.5, 17.1), 
B_P2 = c(21.1, 18.7, 16.3), 
B_P3 = c(20.3, 17.9, 15.5), 
C_P1 = c(25.2, 17.6, 10.3), 
C_P2 = c(21.9, 15.1, 7.8), 
C_P3 = c(20.3, 12.7, 5.4), 
D_P1 = c("M", "F", "N"), 
D_P2 = c("M", "F", "N"), 
D_P3 = c("M", "F", "N"))

我希望列名看起来像 P1_A 而不是 A_P1(即 phase_variable 而不是 variable_phase).

这似乎很容易理解;但是,我一直无法找到适合我需要的任何解决方案。任何帮助将不胜感激。提前致谢。

您可以为此使用 names_glue 参数:

ex_wide <- ex_long %>%
  pivot_wider(names_from = Phase, values_from = c(3:6), names_glue = "{Phase}_{.value}")

您只需通过 _ 分隔符从指定列传递 Phase 名称和 .value

结果

library(dplyr)
library(tidyr)

ex_wide <- ex_long %>%
  pivot_wider(names_from = Phase, values_from = c(3:6), names_glue = "{Phase}_{.value}")

ex_wide
#> # A tibble: 3 x 13
#>   ID     P1_A  P2_A  P3_A  P1_B  P2_B  P3_B  P1_C  P2_C  P3_C P1_D  P2_D  P3_D 
#>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
#> 1 A950   23.5  25.2  21.9  21.9  21.1  20.3  25.2  21.9  20.3 M     M     M    
#> 2 A970   21.9  21.1  20.3  19.5  18.7  17.9  17.6  15.1  12.7 F     F     F    
#> 3 A996   19.5  18.7  17.9  17.1  16.3  15.5  10.3   7.8   5.4 N     N     N

数据

ex_long <- structure(list(ID = c("A950", "A950", "A950", "A970", "A970", 
"A970", "A996", "A996", "A996"), Phase = c("P1", "P2", "P3", 
"P1", "P2", "P3", "P1", "P2", "P3"), A = c(23.5, 25.2, 21.9, 
21.9, 21.1, 20.3, 19.5, 18.7, 17.9), B = c(21.9, 21.1, 20.3, 
19.5, 18.7, 17.9, 17.1, 16.3, 15.5), C = c(25.2, 21.9, 20.3, 
17.6, 15.1, 12.7, 10.3, 7.8, 5.4), D = c("M", "M", "M", "F", 
"F", "F", "N", "N", "N")), class = "data.frame", row.names = c(NA, 
-9L))