在 R 中使用 pivot_wider() 时是否有正则表达式将列定向到特定行?

Is there a regular expression to direct columns to specific rows when using pivot_wider() in R?

我的真实数据的前三行是这样的;

# Real data example
fullname<-c("Argemone glauca", "Bacopa monnieri", "Brighamia insignis")
Mean.Germ.coef_0<-c(0.31, 0.768015267, 0.555758514)
Mean.Germ.coef_10<-c(0.119356725, 0.624444444, 0.479357585)
Mean.Germ.coef_20<-c(0.01, 0.202431661, 0.01)
Mean.Germ.coef_35<-c(0.01, 0.021111111, 0.01)
sd.germ.coef_0<-c(0.055079106, 0.148040638, 0.199485791)
sd.germ.coef_10<-c(0.15341342, 0.079546759, 0.068405754)
sd.germ.coef_20<-c(0, 0.059160256, 0)
sd.germ.coef_35<-c(0, 0.022308189, 0)
n_0<-c(5, 5, 5)
n_10<-c(5, 5, 5)    
n_20<-c(5, 5, 5)    
n_35<-c(5, 5, 5)
LRR10<-c(-0.954455598, -0.206947247, -0.147887029)
LRR_var10<-c(0.336731047, 0.010676627, 0.029840885)
LRR20<-c(-3.433987204, -1.333407261, -4.017748779)
LRR_var20<-c(0.006313648, 0.024512868, 0.025768057)
LRR35<-c(-3.433987204, -3.594010117, -4.017748779)
LRR_var35<-c(0.006313648, 0.230755613, 0.025768057)
df<-data.frame(fullname, Mean.Germ.coef_0, Mean.Germ.coef_10, Mean.Germ.coef_20, Mean.Germ.coef_35,
     sd.germ.coef_0, sd.germ.coef_10, sd.germ.coef_20, sd.germ.coef_35,
     LRR10, LRR_var10, LRR20, LRR_var20, LRR35, LRR_var35)

我需要一个很长的 data.frame(或 tibble),其中特定的列进入特定的行;

# Example output
fullname<-c(rep("Argemone glauca", 4), rep("Bacopa monnieri", 4), rep("Brighamia insignis", 4))
Treat<-rep(c(0, 10, 20, 35), 3)
Mean.Germ.coef<-c(0.31, 0.768015267, 0.555758514, 0.01, 0.202431661, 0.01,
                  0.01, 0.202431661, 0.01, 0.01, 0.021111111, 0.01)
sd.germ.coef<-c(0.055079106, 0.148040638, 0.199485791, 0.15341342, 0.079546759, 0.068405754,
               0, 0.059160256, 0, 0, 0.022308189, 0)
n<-rep(5, 12)
LRR<-c("NA", -0.954455598, -0.206947247, -0.147887029, 
       "NA", -3.433987204, -1.333407261, -4.017748779, 
       "NA", -3.433987204, -3.594010117, -4.017748779)
LRR_var<-c("NA", 0.336731047, 0.010676627, 0.029840885, 
           "NA", 0.006313648, 0.024512868, 0.025768057,
           "NA", 0.006313648, 0.230755613, 0.02576805)
output<-data.frame(fullname, Treat, Mean.Germ.coef, sd.germ.coef,
                   n, LRR, LRR_var)

我正在尝试使用 tidyr::pivot_longer(),但我认为我的问题是缺乏对正则表达式的理解。

我们可以使用 pivot_longernames_pattern 来捕获列名中的子字符串,即第二个捕获组仅包含末尾的数字 (\d+) ($) 的字符串。有些列前面有 _,有些则没有。因此,我们在捕获组之前使用 [_\D] 来删除它并捕获前面的其余字符作为第一组 ((.*)

library(dplyr)
library(tidyr)
df %>%
    pivot_longer(cols = -fullname, names_to = c(".value", "Treat"), 
         names_pattern = "(.*)[_\D](\d+)$")

-输出

# A tibble: 12 × 7
   fullname           Treat Mean.Germ.coef sd.germ.coef     LR   LRR_va     n
   <chr>              <chr>          <dbl>        <dbl>  <dbl>    <dbl> <dbl>
 1 Argemone glauca    0             0.31         0.0551 NA     NA           5
 2 Argemone glauca    10            0.119        0.153  -0.954  0.337       5
 3 Argemone glauca    20            0.01         0      -3.43   0.00631     5
 4 Argemone glauca    35            0.01         0      -3.43   0.00631     5
 5 Bacopa monnieri    0             0.768        0.148  NA     NA           5
 6 Bacopa monnieri    10            0.624        0.0795 -0.207  0.0107      5
 7 Bacopa monnieri    20            0.202        0.0592 -1.33   0.0245      5
 8 Bacopa monnieri    35            0.0211       0.0223 -3.59   0.231       5
 9 Brighamia insignis 0             0.556        0.199  NA     NA           5
10 Brighamia insignis 10            0.479        0.0684 -0.148  0.0298      5
11 Brighamia insignis 20            0.01         0      -4.02   0.0258      5
12 Brighamia insignis 35            0.01         0      -4.02   0.0258      5

names_sep

的另一个选项
library(stringr)
df %>%
    pivot_longer(cols = -fullname, names_to = c(".value", "Treat"), 
          names_sep = "(?<=\D)(?=\d+$)") %>%
     rename_with(~ str_remove(.x, "_"))

-输出

# A tibble: 12 × 7
   fullname           Treat Mean.Germ.coef sd.germ.coef    LRR   LRRvar     n
   <chr>              <chr>          <dbl>        <dbl>  <dbl>    <dbl> <dbl>
 1 Argemone glauca    0             0.31         0.0551 NA     NA           5
 2 Argemone glauca    10            0.119        0.153  -0.954  0.337       5
 3 Argemone glauca    20            0.01         0      -3.43   0.00631     5
 4 Argemone glauca    35            0.01         0      -3.43   0.00631     5
 5 Bacopa monnieri    0             0.768        0.148  NA     NA           5
 6 Bacopa monnieri    10            0.624        0.0795 -0.207  0.0107      5
 7 Bacopa monnieri    20            0.202        0.0592 -1.33   0.0245      5
 8 Bacopa monnieri    35            0.0211       0.0223 -3.59   0.231       5
 9 Brighamia insignis 0             0.556        0.199  NA     NA           5
10 Brighamia insignis 10            0.479        0.0684 -0.148  0.0298      5
11 Brighamia insignis 20            0.01         0      -4.02   0.0258      5
12 Brighamia insignis 35            0.01         0      -4.02   0.0258      5

数据

df <- structure(list(fullname = c("Argemone glauca", "Bacopa monnieri", 
"Brighamia insignis"), Mean.Germ.coef_0 = c(0.31, 0.768015267, 
0.555758514), Mean.Germ.coef_10 = c(0.119356725, 0.624444444, 
0.479357585), Mean.Germ.coef_20 = c(0.01, 0.202431661, 0.01), 
    Mean.Germ.coef_35 = c(0.01, 0.021111111, 0.01), 
sd.germ.coef_0 = c(0.055079106, 
    0.148040638, 0.199485791), sd.germ.coef_10 = c(0.15341342, 
    0.079546759, 0.068405754), sd.germ.coef_20 = c(0, 0.059160256, 
    0), sd.germ.coef_35 = c(0, 0.022308189, 0), LRR10 = c(-0.954455598, 
    -0.206947247, -0.147887029), 
LRR_var10 = c(0.336731047, 0.010676627, 
    0.029840885), LRR20 = c(-3.433987204, -1.333407261, -4.017748779
    ), LRR_var20 = c(0.006313648, 0.024512868, 0.025768057), 
    LRR35 = c(-3.433987204, -3.594010117, -4.017748779),
 LRR_var35 = c(0.006313648, 
    0.230755613, 0.025768057), n_0 = c(5, 5, 5), n_10 = c(5, 
    5, 5), n_20 = c(5, 5, 5), n_35 = c(5, 5, 5)), 
class = "data.frame", row.names = c(NA, 
-3L))