传播多列 [tidyr]
Spread multiple columns [tidyr]
我想使用 tidyr
.
将数据分散到多个列中
dat <- data.frame(ID = rep(1,10),
col1 = LETTERS[seq(1,10)],
col2 = c(letters[seq(1,8)],NA,NA),
col3 = c(rep(NA,8),"5",NA),
col4 = c(rep(NA,8),NA,"value"))
预期结果是:
Out <- data.frame(t(c(1,letters[seq(1,8)],"5","value")),row.names=NULL)
colnames(Out) <- c("ID",LETTERS[seq(1,10)])
我想到了:
a <- dat %>% gather(variable, value, -(ID:col1)) %>%
unite(temp, col1, variable) %>%
spread(temp, value)
a[,-which(is.na(a))]
这很笨拙,而且还更改了列名。对此有更好的解决方案吗?
我们可以在gather
中使用na.rm=TRUE
,删除'variable'和select
并使用spread
library(dplyr)
library(tidyr)
gather(dat, variable, val, -(ID:col1), na.rm=TRUE) %>%
select(-variable) %>%
spread(col1, val)
# ID A B C D E F G H I J
#1 1 d b b c b b b a 5 value
更新
开发版tidyr
(tidyr_0.8.3.9000
),当有多个值列需要考虑时,我们可以使用pivot_wider
dat %>%
pivot_wider(names_from = col1, values_from = str_c("col", 2:4)) %>%
select_if(~ any(!is.na(.)))
# A tibble: 1 x 11
# ID col2_A col2_B col2_C col2_D col2_E col2_F col2_G col2_H col3_I col4_J
# <dbl> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
#1 1 a b c d e f g h 5 value
如果我们使用reshape2
,类似的选项是
library(reshape2)
dcast(melt(dat, measure = 3:5, na.rm=TRUE),
ID~col1, value.var='value')
我想使用 tidyr
.
dat <- data.frame(ID = rep(1,10),
col1 = LETTERS[seq(1,10)],
col2 = c(letters[seq(1,8)],NA,NA),
col3 = c(rep(NA,8),"5",NA),
col4 = c(rep(NA,8),NA,"value"))
预期结果是:
Out <- data.frame(t(c(1,letters[seq(1,8)],"5","value")),row.names=NULL)
colnames(Out) <- c("ID",LETTERS[seq(1,10)])
我想到了:
a <- dat %>% gather(variable, value, -(ID:col1)) %>%
unite(temp, col1, variable) %>%
spread(temp, value)
a[,-which(is.na(a))]
这很笨拙,而且还更改了列名。对此有更好的解决方案吗?
我们可以在gather
中使用na.rm=TRUE
,删除'variable'和select
并使用spread
library(dplyr)
library(tidyr)
gather(dat, variable, val, -(ID:col1), na.rm=TRUE) %>%
select(-variable) %>%
spread(col1, val)
# ID A B C D E F G H I J
#1 1 d b b c b b b a 5 value
更新
开发版tidyr
(tidyr_0.8.3.9000
),当有多个值列需要考虑时,我们可以使用pivot_wider
dat %>%
pivot_wider(names_from = col1, values_from = str_c("col", 2:4)) %>%
select_if(~ any(!is.na(.)))
# A tibble: 1 x 11
# ID col2_A col2_B col2_C col2_D col2_E col2_F col2_G col2_H col3_I col4_J
# <dbl> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct> <fct>
#1 1 a b c d e f g h 5 value
如果我们使用reshape2
,类似的选项是
library(reshape2)
dcast(melt(dat, measure = 3:5, na.rm=TRUE),
ID~col1, value.var='value')