旋转多列 R

pivoting multiple columns R

我有这个数据:

    structure(list(classificador = c("classificador 1", "classificador 1", 
"classificador 1", "classificador 1", "classificador 1", "classificador 2", 
"classificador 2", "classificador 2", "classificador 2", "classificador 2", 
"classificador 3", "classificador 3", "classificador 3", "classificador 3", 
"classificador 3"), classe = structure(c(1L, 2L, 3L, 4L, 5L, 
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("bolha", 
"coral_sol", "cosc_mult", "coscinodiscus", "detritus", "fake_coral", 
"multiples", "org_parts", "organism", "shadow"), class = "factor"), 
    media_entrou = c(0.000181463342477325, 9.11739514644428e-06, 
    0, 0.00710836752524651, 0.0523307152250179, 5.76207054303695e-05, 
    2.31948309497038e-05, 0.000533722409264464, 0.00395244621833009, 
    0.0277347189726644, 0.000511091116817504, 2.78792287970657e-05, 
    0.00167363838758039, 0.00714582937886204, 0.03664291179572
    ), desv_pad_entrou = c(0.000625138593404583, 4.01259397230609e-05, 
    0, 0.00982865377212798, 0.0382102106478645, 0.000197304670837601, 
    0.00015803336539453, 0.00133833325218429, 0.00460178168250652, 
    0.0198204066019173, 0.00560789561914724, 0.000295803787791627, 
    0.0011439791450099, 0.00660048491487252, 0.023022755540947
    ), media_saiu = c(6.31000967592145e-05, 0, 0, 0.0117956025289566, 
    0.00890647892432332, 0.000100059064539586, 0, 0, 0.0587231357033222, 
    0.0639343116405082, 0.00028538070772188, 0, 2.54448072255982e-05, 
    0.0616763969528522, 0.0284492872735916), desv_pad_saiu = c(0.00010828405298676, 
    0, 0, 0.00782886688427178, 0.00460945360308487, 0.000418014988563, 
    0, 0, 0.0258394941887122, 0.0678523075381303, 0.000824409079665752, 
    0, 0.000147663248976053, 0.0346114091736888, 0.0165290142815091
    ), N_ciclos = c(118L, 118L, 118L, 118L, 118L, 318L, 318L, 
    318L, 318L, 318L, 158L, 158L, 158L, 158L, 158L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -15L), groups = structure(list(
    classificador = c("classificador 1", "classificador 2", "classificador 3"
    ), .rows = structure(list(1:5, 6:10, 11:15), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))

我想用新列对数据进行透视:classificadorclasseE_Smediadesvio。其中 E_S 列的值名称 entrou 对应于列 media_entroudesvio_entrou 以及值名称 saiu 对应于列 media_saiudesvio_saiu 以及新列中 media_entroumedia_saiu 列的值 media 以及新列中 desvio_entroudesvio_saiu 列的值desvio.

前 4 行的预期输出:

classificador classe E_S media desv N_ciclos
classificador 1 bolha entrou 0.000181 0.000625 118
classificador 1 bolha saiu 0.0000631 0.000108 118
classificador 1 coral_sol entrou 0.00000912 0.0000401 118
classificador 1 coral_sol saiu 0 0 118

谢谢

我们可以使用 pivot_longer,指定正则表达式模式来捕获 names_pattern 中的组,即 ^([^_]+) - 匹配一个或多个不是 _ 的字符开始 (^) 和捕获 ((...)) 后跟 _ 并捕获列名中的其余字符 (.*),这些字符将是 'E_S'列

library(tidyr)
library(dplyr)
library(stringr)
df1 %>% 
   ungroup %>% 
   select(-N_ciclos) %>%
   dplyr::rename_with(~ str_remove(., "_pad")) %>% 
   pivot_longer(cols = contains("_"), names_to = c(".value", "E_S"),
      names_pattern = "^([^_]+)_(.*)")

-输出

# A tibble: 30 × 5
   classificador   classe        E_S         media      desv
   <chr>           <fct>         <chr>       <dbl>     <dbl>
 1 classificador 1 bolha         entrou 0.000181   0.000625 
 2 classificador 1 bolha         saiu   0.0000631  0.000108 
 3 classificador 1 coral_sol     entrou 0.00000912 0.0000401
 4 classificador 1 coral_sol     saiu   0          0        
 5 classificador 1 cosc_mult     entrou 0          0        
 6 classificador 1 cosc_mult     saiu   0          0        
 7 classificador 1 coscinodiscus entrou 0.00711    0.00983  
 8 classificador 1 coscinodiscus saiu   0.0118     0.00783  
 9 classificador 1 detritus      entrou 0.0523     0.0382   
10 classificador 1 detritus      saiu   0.00891    0.00461  
# … with 20 more rows