将 tidyr unnest 应用于框架时出现错误消息
Error message when applying tidyr unnest to a frame
我有一个 table,其中有一列包含 ID 号,另外 10 列包含我想要绘制的基因表达水平。在这 10 列中,6 列一式两份,4 列一式四份。
dput(head(GSE114056.table))
structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L,
17200009L, 17200011L), `NF empty vector` = c(7.819902, 7.376688,
6.374864, 6.397508, 7.358877, 6.823258), `NF empty vector` = c(7.428268,
7.31238, 6.732675, 6.454915, 7.281378, 6.697764), `NF DKK3` = c(7.70196,
7.305377, 6.628506, 6.667738, 7.325195, 7.066599), `NF DKK3` = c(7.693524,
7.201907, 6.503982, 6.395881, 7.156792, 6.520116), `CAF siCtr` = c(7.041349,
6.894989, 6.175385, 6.102115, 7.022776, 6.552555), `CAF siCtr` = c(6.907807,
7.280088, 6.000358, 5.984045, 6.715977, 6.479581), `CAF siDKK3` = c(7.17559,
6.138037, 6.104392, 6.258108, 6.91867, 6.347572), `CAF siDKK3` = c(7.144398,
6.633998, 6.217089, 5.372207, 6.72605, 6.197155), `CAF siDKK3` = c(6.848402,
5.648935, 5.64459, 4.926477, 6.104917, 5.21501), `CAF siDKK3` = c(7.175592,
5.827932, 5.507035, 5.764134, 6.708101, 6.176258)), row.names = c(NA,
6L), class = "data.frame")
我设法将值放在同一列中,但我收到 unnest
错误。这就是我当前的 table 的样子:
dput(head(GSE114056.table_replicates))
structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L,
17200009L, 17200011L), `NF empty vector` = list(list(7.819902,
7.428268), list(7.376688, 7.31238), list(6.374864, 6.732675),
list(6.397508, 6.454915), list(7.358877, 7.281378), list(
6.823258, 6.697764)), `NF DKK3` = list(list(7.70196,
7.693524), list(7.305377, 7.201907), list(6.628506, 6.503982),
list(6.667738, 6.395881), list(7.325195, 7.156792), list(
7.066599, 6.520116)), `CAF siCtr` = list(list(7.041349,
6.907807), list(6.894989, 7.280088), list(6.175385, 6.000358),
list(6.102115, 5.984045), list(7.022776, 6.715977), list(
6.552555, 6.479581)), `CAF siDKK3` = list(list(7.17559,
7.144398, 6.848402, 7.175592), list(6.138037, 6.633998, 5.648935,
5.827932), list(6.104392, 6.217089, 5.64459, 5.507035), list(
6.258108, 5.372207, 4.926477, 5.764134), list(6.91867, 6.72605,
6.104917, 6.708101), list(6.347572, 6.197155, 5.21501, 6.176258))), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
当我尝试应用 unnest
时,我收到以下错误:Error: Incompatible lengths: 2, 4.
期望的输出:
dput(head(example))
structure(list(ID = c(17200001L, 17200001L, 17200001L, 17200001L
), NF.empty.vector = c(7.819902, 7.428268, NA, NA), NF.DKK3 = c(7.70196,
7.693524, NA, NA), CAF.siCtr = c(7.041349, 6.907807, NA, NA),
CAF.siDKK3 = c(7.17559, 7.144398, 6.848402, 7.175592)), row.names = c(NA,
4L), class = "data.frame")
> head(example)
ID NF.empty.vector NF.DKK3 CAF.siCtr CAF.siDKK3
1 17200001 7.819902 7.701960 7.041349 7.175590
2 17200001 7.428268 7.693524 6.907807 7.144398
3 17200001 NA NA NA 6.848402
4 17200001 NA NA NA 7.175592
输出:
table<-names(GSE114056.table)[-1] <- paste0(names(GSE114056.table)[-1], "_",
+ ave(seq_along(names(GSE114056.table)[-1]), names(GSE114056.table)[-1], FUN = seq_along))
> pivot_longer(GSE114056.table, cols = -ID, names_sep="_",
+ names_to = c('.value', 'grp'))
# A tibble: 165,380 x 6
ID grp `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 17200001 1 7.82 7.70 7.04 7.18
2 17200001 2 7.43 7.69 6.91 7.14
3 17200001 3 NA NA NA 6.85
4 17200001 4 NA NA NA 7.18
5 17200003 1 7.38 7.31 6.89 6.14
6 17200003 2 7.31 7.20 7.28 6.63
7 17200003 3 NA NA NA 5.65
8 17200003 4 NA NA NA 5.83
9 17200005 1 6.37 6.63 6.18 6.10
10 17200005 2 6.73 6.50 6.00 6.22
# ... with 165,370 more rows
Warning message:
Expected 2 pieces. Additional pieces discarded in 10 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
新错误:
table<-names(GSE114056.table)[-1] <- paste0(names(GSE114056.table)[-1], "_",
+ ave(seq_along(names(GSE114056.table)[-1]), names(GSE114056.table)[-1], FUN = seq_along))
> pivot_longer(GSE114056.table, cols = -ID, names_sep="_",
+ names_to = c('.value', 'grp'))
# A tibble: 165,380 x 6
ID grp `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 17200001 1 7.82 7.70 7.04 7.18
2 17200001 2 7.43 7.69 6.91 7.14
3 17200001 3 NA NA NA 6.85
4 17200001 4 NA NA NA 7.18
5 17200003 1 7.38 7.31 6.89 6.14
6 17200003 2 7.31 7.20 7.28 6.63
7 17200003 3 NA NA NA 5.65
8 17200003 4 NA NA NA 5.83
9 17200005 1 6.37 6.63 6.18 6.10
10 17200005 2 6.73 6.50 6.00 6.22
# ... with 165,370 more rows
Warning message:
Expected 2 pieces. Additional pieces discarded in 10 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
> table.finale<-as.data.frame(table)
> head(table.finale)
table
1 NF empty vector_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
2 NF empty vector_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
3 NF DKK3_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
4 NF DKK3_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
5 CAF siCtr_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
6 CAF siCtr_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
我该如何解决?
谢谢
如果我们需要从重复的列名创建单个列,请重命名第一个数据的列名,因为 data.frame 中不允许重复的列名。然后,我们使用 pivot_longer
将数据从 'wide' 重塑为 'long'
library(dplyr)
library(tidyr)
names(df1)[-1] <- paste0(names(df1)[-1], "_",
ave(seq_along(names(df1)[-1]), names(df1)[-1], FUN = seq_along))
out <- pivot_longer(df1, cols = -ID, names_sep="_",
names_to = c('.value', 'grp'))
out
-输出
# A tibble: 24 x 6
# ID grp `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
# <int> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 17200001 1 7.82 7.70 7.04 7.18
# 2 17200001 2 7.43 7.69 6.91 7.14
# 3 17200001 3 NA NA NA 6.85
# 4 17200001 4 NA NA NA 7.18
# 5 17200003 1 7.38 7.31 6.89 6.14
# 6 17200003 2 7.31 7.20 7.28 6.63
# 7 17200003 3 NA NA NA 5.65
# 8 17200003 4 NA NA NA 5.83
# 9 17200005 1 6.37 6.63 6.18 6.10
#10 17200005 2 6.73 6.50 6.00 6.22
# … with 14 more rows
数据
df1 <- structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L,
17200009L, 17200011L), `NF empty vector` = c(7.819902, 7.376688,
6.374864, 6.397508, 7.358877, 6.823258), `NF empty vector` = c(7.428268,
7.31238, 6.732675, 6.454915, 7.281378, 6.697764), `NF DKK3` = c(7.70196,
7.305377, 6.628506, 6.667738, 7.325195, 7.066599), `NF DKK3` = c(7.693524,
7.201907, 6.503982, 6.395881, 7.156792, 6.520116), `CAF siCtr` = c(7.041349,
6.894989, 6.175385, 6.102115, 7.022776, 6.552555), `CAF siCtr` = c(6.907807,
7.280088, 6.000358, 5.984045, 6.715977, 6.479581), `CAF siDKK3` = c(7.17559,
6.138037, 6.104392, 6.258108, 6.91867, 6.347572), `CAF siDKK3` = c(7.144398,
6.633998, 6.217089, 5.372207, 6.72605, 6.197155), `CAF siDKK3` = c(6.848402,
5.648935, 5.64459, 4.926477, 6.104917, 5.21501), `CAF siDKK3` = c(7.175592,
5.827932, 5.507035, 5.764134, 6.708101, 6.176258)), row.names = c(NA,
6L), class = "data.frame")
我有一个 table,其中有一列包含 ID 号,另外 10 列包含我想要绘制的基因表达水平。在这 10 列中,6 列一式两份,4 列一式四份。
dput(head(GSE114056.table))
structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L,
17200009L, 17200011L), `NF empty vector` = c(7.819902, 7.376688,
6.374864, 6.397508, 7.358877, 6.823258), `NF empty vector` = c(7.428268,
7.31238, 6.732675, 6.454915, 7.281378, 6.697764), `NF DKK3` = c(7.70196,
7.305377, 6.628506, 6.667738, 7.325195, 7.066599), `NF DKK3` = c(7.693524,
7.201907, 6.503982, 6.395881, 7.156792, 6.520116), `CAF siCtr` = c(7.041349,
6.894989, 6.175385, 6.102115, 7.022776, 6.552555), `CAF siCtr` = c(6.907807,
7.280088, 6.000358, 5.984045, 6.715977, 6.479581), `CAF siDKK3` = c(7.17559,
6.138037, 6.104392, 6.258108, 6.91867, 6.347572), `CAF siDKK3` = c(7.144398,
6.633998, 6.217089, 5.372207, 6.72605, 6.197155), `CAF siDKK3` = c(6.848402,
5.648935, 5.64459, 4.926477, 6.104917, 5.21501), `CAF siDKK3` = c(7.175592,
5.827932, 5.507035, 5.764134, 6.708101, 6.176258)), row.names = c(NA,
6L), class = "data.frame")
我设法将值放在同一列中,但我收到 unnest
错误。这就是我当前的 table 的样子:
dput(head(GSE114056.table_replicates))
structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L,
17200009L, 17200011L), `NF empty vector` = list(list(7.819902,
7.428268), list(7.376688, 7.31238), list(6.374864, 6.732675),
list(6.397508, 6.454915), list(7.358877, 7.281378), list(
6.823258, 6.697764)), `NF DKK3` = list(list(7.70196,
7.693524), list(7.305377, 7.201907), list(6.628506, 6.503982),
list(6.667738, 6.395881), list(7.325195, 7.156792), list(
7.066599, 6.520116)), `CAF siCtr` = list(list(7.041349,
6.907807), list(6.894989, 7.280088), list(6.175385, 6.000358),
list(6.102115, 5.984045), list(7.022776, 6.715977), list(
6.552555, 6.479581)), `CAF siDKK3` = list(list(7.17559,
7.144398, 6.848402, 7.175592), list(6.138037, 6.633998, 5.648935,
5.827932), list(6.104392, 6.217089, 5.64459, 5.507035), list(
6.258108, 5.372207, 4.926477, 5.764134), list(6.91867, 6.72605,
6.104917, 6.708101), list(6.347572, 6.197155, 5.21501, 6.176258))), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
当我尝试应用 unnest
时,我收到以下错误:Error: Incompatible lengths: 2, 4.
期望的输出:
dput(head(example))
structure(list(ID = c(17200001L, 17200001L, 17200001L, 17200001L
), NF.empty.vector = c(7.819902, 7.428268, NA, NA), NF.DKK3 = c(7.70196,
7.693524, NA, NA), CAF.siCtr = c(7.041349, 6.907807, NA, NA),
CAF.siDKK3 = c(7.17559, 7.144398, 6.848402, 7.175592)), row.names = c(NA,
4L), class = "data.frame")
> head(example)
ID NF.empty.vector NF.DKK3 CAF.siCtr CAF.siDKK3
1 17200001 7.819902 7.701960 7.041349 7.175590
2 17200001 7.428268 7.693524 6.907807 7.144398
3 17200001 NA NA NA 6.848402
4 17200001 NA NA NA 7.175592
输出:
table<-names(GSE114056.table)[-1] <- paste0(names(GSE114056.table)[-1], "_",
+ ave(seq_along(names(GSE114056.table)[-1]), names(GSE114056.table)[-1], FUN = seq_along))
> pivot_longer(GSE114056.table, cols = -ID, names_sep="_",
+ names_to = c('.value', 'grp'))
# A tibble: 165,380 x 6
ID grp `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 17200001 1 7.82 7.70 7.04 7.18
2 17200001 2 7.43 7.69 6.91 7.14
3 17200001 3 NA NA NA 6.85
4 17200001 4 NA NA NA 7.18
5 17200003 1 7.38 7.31 6.89 6.14
6 17200003 2 7.31 7.20 7.28 6.63
7 17200003 3 NA NA NA 5.65
8 17200003 4 NA NA NA 5.83
9 17200005 1 6.37 6.63 6.18 6.10
10 17200005 2 6.73 6.50 6.00 6.22
# ... with 165,370 more rows
Warning message:
Expected 2 pieces. Additional pieces discarded in 10 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
新错误:
table<-names(GSE114056.table)[-1] <- paste0(names(GSE114056.table)[-1], "_",
+ ave(seq_along(names(GSE114056.table)[-1]), names(GSE114056.table)[-1], FUN = seq_along))
> pivot_longer(GSE114056.table, cols = -ID, names_sep="_",
+ names_to = c('.value', 'grp'))
# A tibble: 165,380 x 6
ID grp `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
<int> <chr> <dbl> <dbl> <dbl> <dbl>
1 17200001 1 7.82 7.70 7.04 7.18
2 17200001 2 7.43 7.69 6.91 7.14
3 17200001 3 NA NA NA 6.85
4 17200001 4 NA NA NA 7.18
5 17200003 1 7.38 7.31 6.89 6.14
6 17200003 2 7.31 7.20 7.28 6.63
7 17200003 3 NA NA NA 5.65
8 17200003 4 NA NA NA 5.83
9 17200005 1 6.37 6.63 6.18 6.10
10 17200005 2 6.73 6.50 6.00 6.22
# ... with 165,370 more rows
Warning message:
Expected 2 pieces. Additional pieces discarded in 10 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].
> table.finale<-as.data.frame(table)
> head(table.finale)
table
1 NF empty vector_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
2 NF empty vector_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
3 NF DKK3_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
4 NF DKK3_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
5 CAF siCtr_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
6 CAF siCtr_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
我该如何解决? 谢谢
如果我们需要从重复的列名创建单个列,请重命名第一个数据的列名,因为 data.frame 中不允许重复的列名。然后,我们使用 pivot_longer
将数据从 'wide' 重塑为 'long'
library(dplyr)
library(tidyr)
names(df1)[-1] <- paste0(names(df1)[-1], "_",
ave(seq_along(names(df1)[-1]), names(df1)[-1], FUN = seq_along))
out <- pivot_longer(df1, cols = -ID, names_sep="_",
names_to = c('.value', 'grp'))
out
-输出
# A tibble: 24 x 6
# ID grp `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
# <int> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 17200001 1 7.82 7.70 7.04 7.18
# 2 17200001 2 7.43 7.69 6.91 7.14
# 3 17200001 3 NA NA NA 6.85
# 4 17200001 4 NA NA NA 7.18
# 5 17200003 1 7.38 7.31 6.89 6.14
# 6 17200003 2 7.31 7.20 7.28 6.63
# 7 17200003 3 NA NA NA 5.65
# 8 17200003 4 NA NA NA 5.83
# 9 17200005 1 6.37 6.63 6.18 6.10
#10 17200005 2 6.73 6.50 6.00 6.22
# … with 14 more rows
数据
df1 <- structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L,
17200009L, 17200011L), `NF empty vector` = c(7.819902, 7.376688,
6.374864, 6.397508, 7.358877, 6.823258), `NF empty vector` = c(7.428268,
7.31238, 6.732675, 6.454915, 7.281378, 6.697764), `NF DKK3` = c(7.70196,
7.305377, 6.628506, 6.667738, 7.325195, 7.066599), `NF DKK3` = c(7.693524,
7.201907, 6.503982, 6.395881, 7.156792, 6.520116), `CAF siCtr` = c(7.041349,
6.894989, 6.175385, 6.102115, 7.022776, 6.552555), `CAF siCtr` = c(6.907807,
7.280088, 6.000358, 5.984045, 6.715977, 6.479581), `CAF siDKK3` = c(7.17559,
6.138037, 6.104392, 6.258108, 6.91867, 6.347572), `CAF siDKK3` = c(7.144398,
6.633998, 6.217089, 5.372207, 6.72605, 6.197155), `CAF siDKK3` = c(6.848402,
5.648935, 5.64459, 4.926477, 6.104917, 5.21501), `CAF siDKK3` = c(7.175592,
5.827932, 5.507035, 5.764134, 6.708101, 6.176258)), row.names = c(NA,
6L), class = "data.frame")