如何从组合变量名称创建新的分类变量

how to create a new categorical variable from a combined varibale name

我正在处理以下数据框:

ID    GR    SES   COND    `P3(400-450).FCz` `P3(400-450).Cz` `P3(400-450).Pz` `LPPearly(500-700).FCz` `LPPearly(500-700).Cz` `LPPearly(500-7~
   <chr> <chr> <chr> <chr>               <dbl>            <dbl>            <dbl>                   <dbl>                  <dbl>            <dbl>
 1 01    RP    V     NEG-CTR           -11.6             -5.17           11.9                    -11.8                   -5.96              8.24
 2 01    RP    V     NEG-NOC           -11.1             -5.53           12.1                     -9.15                  -5.11              9.51
 3 01    RP    V     NEU-NOC            -4.00             0.622          13.0                     -7.58                  -2.82              9.42
 4 04    RP    V     NEG-CTR            -0.314            2.23           15.1                     -4.02                  -1.44              5.06
 5 04    RP    V     NEG-NOC             0.239            3.97           15.4                     -6.60                  -3.15              5.03
 6 04    RP    V     NEU-NOC             5.04             7.76           16.7                     -2.03                   0.160             9.08
 7 06    RP    V     NEG-CTR            -0.214           -1.30            3.14                     4.47                   3.52              7.56
 8 06    RP    V     NEG-NOC            -2.96            -3.60           -0.0893                   2.54                   2.10              5.50
 9 06    RP    V     NEU-NOC            -1.97            -1.92            2.15                     2.23                   2.71              4.91
10 07    RP    V     NEG-CTR            -2.83             2.19           13.7                     -0.884                  5.50             13.0 

如果我想从它们出现的变量名称中拆分符号 .FCz、.Cz、.Pz 以创建一个名为 electrode 的新变量,其中应列出这些符号,我应该怎么做?

提前致谢

这里是数据集

> dput(head(data))
structure(list(ID = c("01", "01", "01", "04", "04", "04"), GR = c("RP", 
"RP", "RP", "RP", "RP", "RP"), SES = c("V", "V", "V", "V", "V", 
"V"), COND = c("NEG-CTR", "NEG-NOC", "NEU-NOC", "NEG-CTR", "NEG-NOC", 
"NEU-NOC"), `P3(400-450).FCz` = c(-11.6312151716924, -11.1438413285935, 
-3.99591470944713, -0.314155675382471, 0.238885648959708, 5.03749946898385
), `P3(400-450).Cz` = c(-5.16524399006139, -5.53112490175437, 
0.621502123415388, 2.23100741241039, 3.96990710862955, 7.75899775608441
), `P3(400-450).Pz` = c(11.8802266972569, 12.1053426662461, 12.955441582096, 
15.0981004360619, 15.4046229884164, 16.671036999147), `LPPearly(500-700).FCz` = c(-11.7785042972793, 
-9.14927207125904, -7.58190508537766, -4.01515836011381, -6.60165385653499, 
-2.02861964460179), `LPPearly(500-700).Cz` = c(-5.96429031525769, 
-5.10918437158799, -2.81732229625975, -1.43557366487622, -3.14872157912645, 
0.160393685024631), `LPPearly(500-700).Pz` = c(8.23981597718437, 
9.51261484648731, 9.42367409925817, 5.06332653216481, 5.02619159395405, 
9.07903916629231), `LPP1(500-1000).FCz` = c(-5.67295796971287, 
-4.3918290080777, -2.96652960658775, 0.159183652691071, -1.78361184935376, 
1.97377908783621), `LPP1(500-1000).Cz` = c(-0.774461731301161, 
-0.650009462761383, 1.14010250644923, 1.51403741206392, 0.25571835554024, 
3.76051565494304), `LPP1(500-1000).Pz` = c(9.99385579756163, 
11.1212652173052, 10.6989716871958, 3.7899021820967, 4.59413830322224, 
8.52123662617732), `LPP2(1000-1500).FCz` = c(-0.198736254963744, 
-3.16101041766438, 0.895992279831378, 3.11042068112836, 2.27800090558473, 
3.83846437952292), `LPP2(1000-1500).Cz` = c(2.96437294922766, 
-2.12913230708907, 2.94619035115619, 3.44844607014521, 3.02403433835637, 
4.7045767546583), `LPP2(1000-1500).Pz` = c(6.28027312932027, 
5.24535230966772, 7.68162285335806, 1.08242973465635, 2.99896314000211, 
5.36085942954182), `LPP2(1000-1500).POz` = c(7.91468942320841, 
9.94838815736199, 10.2186482048953, 1.07455889922813, 1.65917850515029, 
3.22422743232682)), row.names = c(NA, 6L), class = "data.frame")
> 

可能我们需要 pivot_longer 将 'wide' 重塑为 'long' 指定 cols 的列名包含 ( 后跟数字 ( \d+),在分隔符处拆分(. 通过指定 names_sepnames_to.value 将保留 . 之前的列值并且新列 'electrode' 在 .

之后保留列名的后缀部分
library(dplyr)
library(tidyr)
data %>% 
   pivot_longer(cols = matches("\(\d+-\d+"), 
     names_to = c(".value", "electrode"), names_sep = "\.")

-输出

# A tibble: 24 × 9
   ID    GR    SES   COND    electrode `P3(400-450)` `LPPearly(500-700)` `LPP1(500-1000)` `LPP2(1000-1500)`
   <chr> <chr> <chr> <chr>   <chr>             <dbl>               <dbl>            <dbl>             <dbl>
 1 01    RP    V     NEG-CTR FCz             -11.6                -11.8            -5.67             -0.199
 2 01    RP    V     NEG-CTR Cz               -5.17                -5.96           -0.774             2.96 
 3 01    RP    V     NEG-CTR Pz               11.9                  8.24            9.99              6.28 
 4 01    RP    V     NEG-CTR POz              NA                   NA              NA                 7.91 
 5 01    RP    V     NEG-NOC FCz             -11.1                 -9.15           -4.39             -3.16 
 6 01    RP    V     NEG-NOC Cz               -5.53                -5.11           -0.650            -2.13 
 7 01    RP    V     NEG-NOC Pz               12.1                  9.51           11.1               5.25 
 8 01    RP    V     NEG-NOC POz              NA                   NA              NA                 9.95 
 9 01    RP    V     NEU-NOC FCz              -4.00                -7.58           -2.97              0.896
10 01    RP    V     NEU-NOC Cz                0.622               -2.82            1.14              2.95 
# … with 14 more rows