旋转多列 R
pivoting multiple columns R
我有这个数据:
structure(list(classificador = c("classificador 1", "classificador 1",
"classificador 1", "classificador 1", "classificador 1", "classificador 2",
"classificador 2", "classificador 2", "classificador 2", "classificador 2",
"classificador 3", "classificador 3", "classificador 3", "classificador 3",
"classificador 3"), classe = structure(c(1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("bolha",
"coral_sol", "cosc_mult", "coscinodiscus", "detritus", "fake_coral",
"multiples", "org_parts", "organism", "shadow"), class = "factor"),
media_entrou = c(0.000181463342477325, 9.11739514644428e-06,
0, 0.00710836752524651, 0.0523307152250179, 5.76207054303695e-05,
2.31948309497038e-05, 0.000533722409264464, 0.00395244621833009,
0.0277347189726644, 0.000511091116817504, 2.78792287970657e-05,
0.00167363838758039, 0.00714582937886204, 0.03664291179572
), desv_pad_entrou = c(0.000625138593404583, 4.01259397230609e-05,
0, 0.00982865377212798, 0.0382102106478645, 0.000197304670837601,
0.00015803336539453, 0.00133833325218429, 0.00460178168250652,
0.0198204066019173, 0.00560789561914724, 0.000295803787791627,
0.0011439791450099, 0.00660048491487252, 0.023022755540947
), media_saiu = c(6.31000967592145e-05, 0, 0, 0.0117956025289566,
0.00890647892432332, 0.000100059064539586, 0, 0, 0.0587231357033222,
0.0639343116405082, 0.00028538070772188, 0, 2.54448072255982e-05,
0.0616763969528522, 0.0284492872735916), desv_pad_saiu = c(0.00010828405298676,
0, 0, 0.00782886688427178, 0.00460945360308487, 0.000418014988563,
0, 0, 0.0258394941887122, 0.0678523075381303, 0.000824409079665752,
0, 0.000147663248976053, 0.0346114091736888, 0.0165290142815091
), N_ciclos = c(118L, 118L, 118L, 118L, 118L, 318L, 318L,
318L, 318L, 318L, 158L, 158L, 158L, 158L, 158L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -15L), groups = structure(list(
classificador = c("classificador 1", "classificador 2", "classificador 3"
), .rows = structure(list(1:5, 6:10, 11:15), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))
我想用新列对数据进行透视:classificador
、classe
、E_S
、media
和 desvio
。其中 E_S
列的值名称 entrou
对应于列 media_entrou
和 desvio_entrou
以及值名称 saiu
对应于列 media_saiu
和desvio_saiu
以及新列中 media_entrou
和 media_saiu
列的值 media
以及新列中 desvio_entrou
和 desvio_saiu
列的值desvio
.
前 4 行的预期输出:
classificador
classe
E_S
media
desv
N_ciclos
classificador 1
bolha
entrou
0.000181
0.000625
118
classificador 1
bolha
saiu
0.0000631
0.000108
118
classificador 1
coral_sol
entrou
0.00000912
0.0000401
118
classificador 1
coral_sol
saiu
0
0
118
谢谢
我们可以使用 pivot_longer
,指定正则表达式模式来捕获 names_pattern
中的组,即 ^([^_]+)
- 匹配一个或多个不是 _
的字符开始 (^
) 和捕获 ((...)
) 后跟 _
并捕获列名中的其余字符 (.*
),这些字符将是 'E_S'列
library(tidyr)
library(dplyr)
library(stringr)
df1 %>%
ungroup %>%
select(-N_ciclos) %>%
dplyr::rename_with(~ str_remove(., "_pad")) %>%
pivot_longer(cols = contains("_"), names_to = c(".value", "E_S"),
names_pattern = "^([^_]+)_(.*)")
-输出
# A tibble: 30 × 5
classificador classe E_S media desv
<chr> <fct> <chr> <dbl> <dbl>
1 classificador 1 bolha entrou 0.000181 0.000625
2 classificador 1 bolha saiu 0.0000631 0.000108
3 classificador 1 coral_sol entrou 0.00000912 0.0000401
4 classificador 1 coral_sol saiu 0 0
5 classificador 1 cosc_mult entrou 0 0
6 classificador 1 cosc_mult saiu 0 0
7 classificador 1 coscinodiscus entrou 0.00711 0.00983
8 classificador 1 coscinodiscus saiu 0.0118 0.00783
9 classificador 1 detritus entrou 0.0523 0.0382
10 classificador 1 detritus saiu 0.00891 0.00461
# … with 20 more rows
我有这个数据:
structure(list(classificador = c("classificador 1", "classificador 1",
"classificador 1", "classificador 1", "classificador 1", "classificador 2",
"classificador 2", "classificador 2", "classificador 2", "classificador 2",
"classificador 3", "classificador 3", "classificador 3", "classificador 3",
"classificador 3"), classe = structure(c(1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("bolha",
"coral_sol", "cosc_mult", "coscinodiscus", "detritus", "fake_coral",
"multiples", "org_parts", "organism", "shadow"), class = "factor"),
media_entrou = c(0.000181463342477325, 9.11739514644428e-06,
0, 0.00710836752524651, 0.0523307152250179, 5.76207054303695e-05,
2.31948309497038e-05, 0.000533722409264464, 0.00395244621833009,
0.0277347189726644, 0.000511091116817504, 2.78792287970657e-05,
0.00167363838758039, 0.00714582937886204, 0.03664291179572
), desv_pad_entrou = c(0.000625138593404583, 4.01259397230609e-05,
0, 0.00982865377212798, 0.0382102106478645, 0.000197304670837601,
0.00015803336539453, 0.00133833325218429, 0.00460178168250652,
0.0198204066019173, 0.00560789561914724, 0.000295803787791627,
0.0011439791450099, 0.00660048491487252, 0.023022755540947
), media_saiu = c(6.31000967592145e-05, 0, 0, 0.0117956025289566,
0.00890647892432332, 0.000100059064539586, 0, 0, 0.0587231357033222,
0.0639343116405082, 0.00028538070772188, 0, 2.54448072255982e-05,
0.0616763969528522, 0.0284492872735916), desv_pad_saiu = c(0.00010828405298676,
0, 0, 0.00782886688427178, 0.00460945360308487, 0.000418014988563,
0, 0, 0.0258394941887122, 0.0678523075381303, 0.000824409079665752,
0, 0.000147663248976053, 0.0346114091736888, 0.0165290142815091
), N_ciclos = c(118L, 118L, 118L, 118L, 118L, 318L, 318L,
318L, 318L, 318L, 158L, 158L, 158L, 158L, 158L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -15L), groups = structure(list(
classificador = c("classificador 1", "classificador 2", "classificador 3"
), .rows = structure(list(1:5, 6:10, 11:15), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), .drop = TRUE))
我想用新列对数据进行透视:classificador
、classe
、E_S
、media
和 desvio
。其中 E_S
列的值名称 entrou
对应于列 media_entrou
和 desvio_entrou
以及值名称 saiu
对应于列 media_saiu
和desvio_saiu
以及新列中 media_entrou
和 media_saiu
列的值 media
以及新列中 desvio_entrou
和 desvio_saiu
列的值desvio
.
前 4 行的预期输出:
classificador | classe | E_S | media | desv | N_ciclos |
---|---|---|---|---|---|
classificador 1 | bolha | entrou | 0.000181 | 0.000625 | 118 |
classificador 1 | bolha | saiu | 0.0000631 | 0.000108 | 118 |
classificador 1 | coral_sol | entrou | 0.00000912 | 0.0000401 | 118 |
classificador 1 | coral_sol | saiu | 0 | 0 | 118 |
谢谢
我们可以使用 pivot_longer
,指定正则表达式模式来捕获 names_pattern
中的组,即 ^([^_]+)
- 匹配一个或多个不是 _
的字符开始 (^
) 和捕获 ((...)
) 后跟 _
并捕获列名中的其余字符 (.*
),这些字符将是 'E_S'列
library(tidyr)
library(dplyr)
library(stringr)
df1 %>%
ungroup %>%
select(-N_ciclos) %>%
dplyr::rename_with(~ str_remove(., "_pad")) %>%
pivot_longer(cols = contains("_"), names_to = c(".value", "E_S"),
names_pattern = "^([^_]+)_(.*)")
-输出
# A tibble: 30 × 5
classificador classe E_S media desv
<chr> <fct> <chr> <dbl> <dbl>
1 classificador 1 bolha entrou 0.000181 0.000625
2 classificador 1 bolha saiu 0.0000631 0.000108
3 classificador 1 coral_sol entrou 0.00000912 0.0000401
4 classificador 1 coral_sol saiu 0 0
5 classificador 1 cosc_mult entrou 0 0
6 classificador 1 cosc_mult saiu 0 0
7 classificador 1 coscinodiscus entrou 0.00711 0.00983
8 classificador 1 coscinodiscus saiu 0.0118 0.00783
9 classificador 1 detritus entrou 0.0523 0.0382
10 classificador 1 detritus saiu 0.00891 0.00461
# … with 20 more rows