在 r 的多个列中同时应用 str_split_fixed 函数
applying str_split_fixed function simultaneously across multiple columns in r
我有一个包含多列的数据框,每列都有字符串值。我想在输出数据框中通过逗号分隔符拆分列值。输入和所需的输出如下
Col1=c("a,b,c","9,a,5")
Col2=c("c,b,e","4,r,t")
Col3=c("e,f,g","y,z,d")
Input=data.frame(Col1,Col2,Col3)
Column1=c("a","9")
Column2=c("b","a")
Column3=c("c","5")
Column4=c("c","4")
Column5=c("b","r")
Column6=c("e","t")
Column7=c("e","y")
Column8=c("f","z")
Column9=c("g","d")
Output=data.frame(Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,Column9)
如果有人知道解决方法请帮忙。
软件包splitstackshape
就是为这样的操作而制作的,
library(splitstackshape)
cSplit(Input, names(Input), type.convert = 'as.is')
# Col1_1 Col1_2 Col1_3 Col2_1 Col2_2 Col2_3 Col3_1 Col3_2 Col3_3
#1: a b c c b e e f g
#2: 9 a 5 4 r t y z d
Sotos 已经提供了一个更好的建议,但这里有一个替代方案 dplyr
/tidyr
解决方案:
library(dplyr)
library(tidyr)
Input %>%
mutate(id = row_number()) %>%
pivot_longer(-id) %>%
separate(value, c('1', '2', '3')) %>%
pivot_wider(names_from=name, names_glue = "{name}_{.value}", values_from = '1':'3') %>%
select(-id)
#> # A tibble: 2 × 9
#> Col1_1 Col2_1 Col3_1 Col1_2 Col2_2 Col3_2 Col1_3 Col2_3 Col3_3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 a c e b b f c e g
#> 2 9 4 y a r z 5 t d
使用 base R
使用 read.csv
read.csv(text = do.call(paste, c(Input, sep = ",")), header = FALSE)
V1 V2 V3 V4 V5 V6 V7 V8 V9
1 a b c c b e e f g
2 9 a 5 4 r t y z d
我有一个包含多列的数据框,每列都有字符串值。我想在输出数据框中通过逗号分隔符拆分列值。输入和所需的输出如下
Col1=c("a,b,c","9,a,5")
Col2=c("c,b,e","4,r,t")
Col3=c("e,f,g","y,z,d")
Input=data.frame(Col1,Col2,Col3)
Column1=c("a","9")
Column2=c("b","a")
Column3=c("c","5")
Column4=c("c","4")
Column5=c("b","r")
Column6=c("e","t")
Column7=c("e","y")
Column8=c("f","z")
Column9=c("g","d")
Output=data.frame(Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,Column9)
如果有人知道解决方法请帮忙。
软件包splitstackshape
就是为这样的操作而制作的,
library(splitstackshape)
cSplit(Input, names(Input), type.convert = 'as.is')
# Col1_1 Col1_2 Col1_3 Col2_1 Col2_2 Col2_3 Col3_1 Col3_2 Col3_3
#1: a b c c b e e f g
#2: 9 a 5 4 r t y z d
Sotos 已经提供了一个更好的建议,但这里有一个替代方案 dplyr
/tidyr
解决方案:
library(dplyr)
library(tidyr)
Input %>%
mutate(id = row_number()) %>%
pivot_longer(-id) %>%
separate(value, c('1', '2', '3')) %>%
pivot_wider(names_from=name, names_glue = "{name}_{.value}", values_from = '1':'3') %>%
select(-id)
#> # A tibble: 2 × 9
#> Col1_1 Col2_1 Col3_1 Col1_2 Col2_2 Col3_2 Col1_3 Col2_3 Col3_3
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 a c e b b f c e g
#> 2 9 4 y a r z 5 t d
使用 base R
使用 read.csv
read.csv(text = do.call(paste, c(Input, sep = ",")), header = FALSE)
V1 V2 V3 V4 V5 V6 V7 V8 V9
1 a b c c b e e f g
2 9 a 5 4 r t y z d