使用 R tidyr pivot_wide 从多个列名和值中获取宽格式数据
Using R tidyr pivot_wide to get wide-form data from multiple column names and values
如何使用 tidyr pivot_wide 将此数据框从长格式转换为宽格式?我尝试应用文档页面上的示例,但我一定遗漏了一些东西。
数据框
id <- c(1,1,2,2,3,3)
filename <- c('file1a.txt', 'file1b.txt',
'file2a.txt', 'file2b.txt',
'file3a.txt', 'file3b.txt')
val <- c(832, 834, 221, 878, 2, 19)
df1 <- data.frame(id, filename, val)
view(df1)
id
filename
val
1
file1a.txt
832
1
file1b.txt
834
2
file2a.txt
221
2
file2b.txt
878
3
file3a.txt
2
3
file3b.txt
19
期望输出
id
filename1
filename2
val1
val2
1
file1a.txt
file1b.txt
832
834
2
file2a.txt
file2b.txt
221
878
3
file3a.txt
file3b.txt
2
19
尝试失败
df_wide <- pivot_wider(data = df1,
id_cols = id,
values_from = c("filename", "val"))
view(df_wide)
id
filename_
val_
1
1:2
c(832,834)
2
3:4
c(221,878)
3
5:6
c(2,19)
df_wide <- pivot_wider(data = df1,
id_cols = id,
names_from = c("filename", "val"),
values_from = c("filename", "val"))
view(df_wide)
id
filename_file1a.txt_832
filename_file1b.txt_834
filename_file2a.txt_221
...etc
1
file1a.txt
file1b.txt
NA
...etc
2
NA
NA
file2a.txt
...etc
3
NA
NA
NA
...etc
我们需要一个行序列
library(dplyr)
library(tidyr)
library(data.table)
df1 %>%
mutate(cn = rowid(id)) %>%
pivot_wider(names_from = cn, values_from = c(filename, val), names_sep="")
-输出
# A tibble: 3 x 5
# id filename1 filename2 val1 val2
# <dbl> <chr> <chr> <dbl> <dbl>
#1 1 file1a.txt file1b.txt 832 834
#2 2 file2a.txt file2b.txt 221 878
#3 3 file3a.txt file3b.txt 2 19
或按row_number
分组
df1 %>%
group_by(id)
mutate(cn = row_number()) %>%
pivot_wider(names_from = cn, values_from = c(filename, val), names_sep="")
如果我们不需要使用%>%
,指定data
作为mutate
d原始数据集,根据[=的顺序添加列'cn' 29=]
pivot_wider(mutate(df1, cn = rowid(id)),
names_from = cn, values_from = c(filename, val), names_sep="")
data.table
选项 dcast
> dcast(setDT(df1), id ~ rowid(id), value.var = c("filename", "val"))
id filename_1 filename_2 val_1 val_2
1: 1 file1a.txt file1b.txt 832 834
2: 2 file2a.txt file2b.txt 221 878
3: 3 file3a.txt file3b.txt 2 19
如何使用 tidyr pivot_wide 将此数据框从长格式转换为宽格式?我尝试应用文档页面上的示例,但我一定遗漏了一些东西。
数据框
id <- c(1,1,2,2,3,3)
filename <- c('file1a.txt', 'file1b.txt',
'file2a.txt', 'file2b.txt',
'file3a.txt', 'file3b.txt')
val <- c(832, 834, 221, 878, 2, 19)
df1 <- data.frame(id, filename, val)
view(df1)
id | filename | val |
---|---|---|
1 | file1a.txt | 832 |
1 | file1b.txt | 834 |
2 | file2a.txt | 221 |
2 | file2b.txt | 878 |
3 | file3a.txt | 2 |
3 | file3b.txt | 19 |
期望输出
id | filename1 | filename2 | val1 | val2 |
---|---|---|---|---|
1 | file1a.txt | file1b.txt | 832 | 834 |
2 | file2a.txt | file2b.txt | 221 | 878 |
3 | file3a.txt | file3b.txt | 2 | 19 |
尝试失败
df_wide <- pivot_wider(data = df1,
id_cols = id,
values_from = c("filename", "val"))
view(df_wide)
id | filename_ | val_ |
---|---|---|
1 | 1:2 | c(832,834) |
2 | 3:4 | c(221,878) |
3 | 5:6 | c(2,19) |
df_wide <- pivot_wider(data = df1,
id_cols = id,
names_from = c("filename", "val"),
values_from = c("filename", "val"))
view(df_wide)
id | filename_file1a.txt_832 | filename_file1b.txt_834 | filename_file2a.txt_221 | ...etc |
---|---|---|---|---|
1 | file1a.txt | file1b.txt | NA | ...etc |
2 | NA | NA | file2a.txt | ...etc |
3 | NA | NA | NA | ...etc |
我们需要一个行序列
library(dplyr)
library(tidyr)
library(data.table)
df1 %>%
mutate(cn = rowid(id)) %>%
pivot_wider(names_from = cn, values_from = c(filename, val), names_sep="")
-输出
# A tibble: 3 x 5
# id filename1 filename2 val1 val2
# <dbl> <chr> <chr> <dbl> <dbl>
#1 1 file1a.txt file1b.txt 832 834
#2 2 file2a.txt file2b.txt 221 878
#3 3 file3a.txt file3b.txt 2 19
或按row_number
df1 %>%
group_by(id)
mutate(cn = row_number()) %>%
pivot_wider(names_from = cn, values_from = c(filename, val), names_sep="")
如果我们不需要使用%>%
,指定data
作为mutate
d原始数据集,根据[=的顺序添加列'cn' 29=]
pivot_wider(mutate(df1, cn = rowid(id)),
names_from = cn, values_from = c(filename, val), names_sep="")
data.table
选项 dcast
> dcast(setDT(df1), id ~ rowid(id), value.var = c("filename", "val"))
id filename_1 filename_2 val_1 val_2
1: 1 file1a.txt file1b.txt 832 834
2: 2 file2a.txt file2b.txt 221 878
3: 3 file3a.txt file3b.txt 2 19