如何将一列(逗号拆分)转换为 R 中的多列?
How to convert one (comma split) column into multiple columns in R?
例如,我有这个数据:
data <- data.frame(person=paste0("person_", 1:5),
keyword=sapply(1:5, function(x) paste0(sample(letters, sample(1:5, 1)), collapse = ","))
)
> data
person keyword
1 person_1 k,f,p,w
2 person_2 y,j
3 person_3 y,r
4 person_4 g,w
5 person_5 u,x,c,n
我想将关键字拆分成多列,最后将它们转换成二进制数据,像这样:
person k f p w y j r g w u x c n
1 person_1 1 1 1 1 0 0 0 0 0 0 0 0 0
2 person_2 0 0 0 0 1 1 0 0 0 0 0 0 0
3 person_3 0 0 0 0 1 0 1 0 0 0 0 0 0
4 person_4 0 0 0 0 0 0 0 1 1 0 0 0 0
5 person_5 0 0 0 0 0 0 0 0 0 1 1 1 1
实现此目标的最佳方法是什么?
谢谢。
你可以使用
library(tidyr)
library(dplyr)
data %>%
mutate(keyword = strsplit(keyword, ",")) %>%
unnest(keyword) %>%
mutate(value = 1) %>%
pivot_wider(names_from = keyword, values_fill = 0)
哪个returns
# A tibble: 5 x 16
person p f i u r v q j d k x o c s b
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 person_1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
2 person_2 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0
3 person_3 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0
4 person_4 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
5 person_5 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1
使用cSplit_e
library(splitstackshape)
out <- cSplit_e(data, 'keyword', sep= ',', type =
'character', fill = 0, drop = TRUE)
names(out) <- sub('keyword_', '', names(out))
-输出
> out
person c f g j k n p r u w x y
1 person_1 0 1 0 0 1 0 1 0 0 1 0 0
2 person_2 0 0 0 1 0 0 0 0 0 0 0 1
3 person_3 0 0 0 0 0 0 0 1 0 0 0 1
4 person_4 0 0 1 0 0 0 0 0 0 1 0 0
5 person_5 1 0 0 0 0 1 0 0 1 0 1 0
数据
data <- structure(list(person = c("person_1", "person_2", "person_3",
"person_4", "person_5"), keyword = c("k,f,p,w", "y,j", "y,r",
"g,w", "u,x,c,n")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5"))
例如,我有这个数据:
data <- data.frame(person=paste0("person_", 1:5),
keyword=sapply(1:5, function(x) paste0(sample(letters, sample(1:5, 1)), collapse = ","))
)
> data
person keyword
1 person_1 k,f,p,w
2 person_2 y,j
3 person_3 y,r
4 person_4 g,w
5 person_5 u,x,c,n
我想将关键字拆分成多列,最后将它们转换成二进制数据,像这样:
person k f p w y j r g w u x c n
1 person_1 1 1 1 1 0 0 0 0 0 0 0 0 0
2 person_2 0 0 0 0 1 1 0 0 0 0 0 0 0
3 person_3 0 0 0 0 1 0 1 0 0 0 0 0 0
4 person_4 0 0 0 0 0 0 0 1 1 0 0 0 0
5 person_5 0 0 0 0 0 0 0 0 0 1 1 1 1
实现此目标的最佳方法是什么?
谢谢。
你可以使用
library(tidyr)
library(dplyr)
data %>%
mutate(keyword = strsplit(keyword, ",")) %>%
unnest(keyword) %>%
mutate(value = 1) %>%
pivot_wider(names_from = keyword, values_fill = 0)
哪个returns
# A tibble: 5 x 16
person p f i u r v q j d k x o c s b
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 person_1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0
2 person_2 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0
3 person_3 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0
4 person_4 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
5 person_5 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1
使用cSplit_e
library(splitstackshape)
out <- cSplit_e(data, 'keyword', sep= ',', type =
'character', fill = 0, drop = TRUE)
names(out) <- sub('keyword_', '', names(out))
-输出
> out
person c f g j k n p r u w x y
1 person_1 0 1 0 0 1 0 1 0 0 1 0 0
2 person_2 0 0 0 1 0 0 0 0 0 0 0 1
3 person_3 0 0 0 0 0 0 0 1 0 0 0 1
4 person_4 0 0 1 0 0 0 0 0 0 1 0 0
5 person_5 1 0 0 0 0 1 0 0 1 0 1 0
数据
data <- structure(list(person = c("person_1", "person_2", "person_3",
"person_4", "person_5"), keyword = c("k,f,p,w", "y,j", "y,r",
"g,w", "u,x,c,n")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5"))