根据另一列中的唯一值转置一列中的数据
transpose data in one column based on unique values in another column
我基本上想做 r
中描述的 here
示例数据
names<-c("k127_60234", "k127_60234","k127_60234","k127_60234","k127_50234","k127_50234")
values<-c("ko235", "ko123", "ko543", "ko623", "ko443", "ko123")
df <- data.frame (names,values)
这是我希望输出的样子 - 记录实际文件会更大(高达 200k)因此我无法预先定义列数
names<-c("k127_60234", "k127_50234")
values1<-c("ko235", "ko443")
values2<-c("ko123", "ko123")
values3<-c("ko543",NA)
values4<-c("ko623",NA)
df.out <- data.frame (names,values1,values2,values3,values4)
我们可以在一行中使用dcast
library(data.table)
dcast(setDT(df), names ~ paste0('values', rowid(names)))
-输出
# names values1 values2 values3 values4
#1: k127_50234 ko443 ko123 <NA> <NA>
#2: k127_60234 ko235 ko123 ko543 ko623
或使用tidyverse
library(dplyr)
library(tidyr)
library(stringr)
df %>%
mutate(nm1 = str_c('values', rowid(names))) %>%
pivot_wider(names_from = nm1, values_from = values)
-输出
# A tibble: 2 x 5
# names values1 values2 values3 values4
# <chr> <chr> <chr> <chr> <chr>
#1 k127_60234 ko235 ko123 ko543 ko623
#2 k127_50234 ko443 ko123 <NA> <NA>
或使用base R
do.call(rbind, lapply(unstack(df[2:1]), `length<-`, 4))
library(tidyverse)
df %>%
group_by(names) %>%
mutate(variable = str_c("values", row_number())) %>%
pivot_wider(names_from = variable, values_from = values)
names values1 values2 values3 values4
<chr> <chr> <chr> <chr> <chr>
1 k127_60234 ko235 ko123 ko543 ko623
2 k127_50234 ko443 ko123 NA NA
在基础 R 中你可以这样做:
df1 <- transform(df, time = ave(values, names, FUN = seq))
reshape(df1, idvar = "names", dir="wide", sep="")
names values1 values2 values3 values4
1 k127_60234 ko235 ko123 ko543 ko623
5 k127_50234 ko443 ko123 <NA> <NA>
这可能会有帮助:
df %>% tidyr::spread(values, value = values)
我基本上想做 r
中描述的 here示例数据
names<-c("k127_60234", "k127_60234","k127_60234","k127_60234","k127_50234","k127_50234")
values<-c("ko235", "ko123", "ko543", "ko623", "ko443", "ko123")
df <- data.frame (names,values)
这是我希望输出的样子 - 记录实际文件会更大(高达 200k)因此我无法预先定义列数
names<-c("k127_60234", "k127_50234")
values1<-c("ko235", "ko443")
values2<-c("ko123", "ko123")
values3<-c("ko543",NA)
values4<-c("ko623",NA)
df.out <- data.frame (names,values1,values2,values3,values4)
我们可以在一行中使用dcast
library(data.table)
dcast(setDT(df), names ~ paste0('values', rowid(names)))
-输出
# names values1 values2 values3 values4
#1: k127_50234 ko443 ko123 <NA> <NA>
#2: k127_60234 ko235 ko123 ko543 ko623
或使用tidyverse
library(dplyr)
library(tidyr)
library(stringr)
df %>%
mutate(nm1 = str_c('values', rowid(names))) %>%
pivot_wider(names_from = nm1, values_from = values)
-输出
# A tibble: 2 x 5
# names values1 values2 values3 values4
# <chr> <chr> <chr> <chr> <chr>
#1 k127_60234 ko235 ko123 ko543 ko623
#2 k127_50234 ko443 ko123 <NA> <NA>
或使用base R
do.call(rbind, lapply(unstack(df[2:1]), `length<-`, 4))
library(tidyverse)
df %>%
group_by(names) %>%
mutate(variable = str_c("values", row_number())) %>%
pivot_wider(names_from = variable, values_from = values)
names values1 values2 values3 values4
<chr> <chr> <chr> <chr> <chr>
1 k127_60234 ko235 ko123 ko543 ko623
2 k127_50234 ko443 ko123 NA NA
在基础 R 中你可以这样做:
df1 <- transform(df, time = ave(values, names, FUN = seq))
reshape(df1, idvar = "names", dir="wide", sep="")
names values1 values2 values3 values4
1 k127_60234 ko235 ko123 ko543 ko623
5 k127_50234 ko443 ko123 <NA> <NA>
这可能会有帮助:
df %>% tidyr::spread(values, value = values)