将 tidyr unnest 应用于框架时出现错误消息

Error message when applying tidyr unnest to a frame

我有一个 table,其中有一列包含 ID 号,另外 10 列包含我想要绘制的基因表达水平。在这 10 列中,6 列一式两份,4 列一式四份。

dput(head(GSE114056.table))
structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L, 
17200009L, 17200011L), `NF empty vector` = c(7.819902, 7.376688, 
6.374864, 6.397508, 7.358877, 6.823258), `NF empty vector` = c(7.428268, 
7.31238, 6.732675, 6.454915, 7.281378, 6.697764), `NF DKK3` = c(7.70196, 
7.305377, 6.628506, 6.667738, 7.325195, 7.066599), `NF DKK3` = c(7.693524, 
7.201907, 6.503982, 6.395881, 7.156792, 6.520116), `CAF siCtr` = c(7.041349, 
6.894989, 6.175385, 6.102115, 7.022776, 6.552555), `CAF siCtr` = c(6.907807, 
7.280088, 6.000358, 5.984045, 6.715977, 6.479581), `CAF siDKK3` = c(7.17559, 
6.138037, 6.104392, 6.258108, 6.91867, 6.347572), `CAF siDKK3` = c(7.144398, 
6.633998, 6.217089, 5.372207, 6.72605, 6.197155), `CAF siDKK3` = c(6.848402, 
5.648935, 5.64459, 4.926477, 6.104917, 5.21501), `CAF siDKK3` = c(7.175592, 
5.827932, 5.507035, 5.764134, 6.708101, 6.176258)), row.names = c(NA, 
6L), class = "data.frame")

我设法将值放在同一列中,但我收到 unnest 错误。这就是我当前的 table 的样子:

dput(head(GSE114056.table_replicates))
structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L, 
17200009L, 17200011L), `NF empty vector` = list(list(7.819902, 
    7.428268), list(7.376688, 7.31238), list(6.374864, 6.732675), 
    list(6.397508, 6.454915), list(7.358877, 7.281378), list(
        6.823258, 6.697764)), `NF DKK3` = list(list(7.70196, 
    7.693524), list(7.305377, 7.201907), list(6.628506, 6.503982), 
    list(6.667738, 6.395881), list(7.325195, 7.156792), list(
        7.066599, 6.520116)), `CAF siCtr` = list(list(7.041349, 
    6.907807), list(6.894989, 7.280088), list(6.175385, 6.000358), 
    list(6.102115, 5.984045), list(7.022776, 6.715977), list(
        6.552555, 6.479581)), `CAF siDKK3` = list(list(7.17559, 
    7.144398, 6.848402, 7.175592), list(6.138037, 6.633998, 5.648935, 
    5.827932), list(6.104392, 6.217089, 5.64459, 5.507035), list(
    6.258108, 5.372207, 4.926477, 5.764134), list(6.91867, 6.72605, 
    6.104917, 6.708101), list(6.347572, 6.197155, 5.21501, 6.176258))), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

当我尝试应用 unnest 时,我收到以下错误:Error: Incompatible lengths: 2, 4.

期望的输出:

dput(head(example))
structure(list(ID = c(17200001L, 17200001L, 17200001L, 17200001L
), NF.empty.vector = c(7.819902, 7.428268, NA, NA), NF.DKK3 = c(7.70196, 
7.693524, NA, NA), CAF.siCtr = c(7.041349, 6.907807, NA, NA), 
    CAF.siDKK3 = c(7.17559, 7.144398, 6.848402, 7.175592)), row.names = c(NA, 
4L), class = "data.frame")
> head(example)
        ID NF.empty.vector  NF.DKK3 CAF.siCtr CAF.siDKK3
1 17200001        7.819902 7.701960  7.041349   7.175590
2 17200001        7.428268 7.693524  6.907807   7.144398
3 17200001              NA       NA        NA   6.848402
4 17200001              NA       NA        NA   7.175592

输出:

table<-names(GSE114056.table)[-1] <- paste0(names(GSE114056.table)[-1], "_", 
+                          ave(seq_along(names(GSE114056.table)[-1]), names(GSE114056.table)[-1], FUN = seq_along))
> pivot_longer(GSE114056.table, cols = -ID, names_sep="_",
+              names_to = c('.value', 'grp'))
# A tibble: 165,380 x 6
         ID grp   `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
      <int> <chr>             <dbl>     <dbl>       <dbl>        <dbl>
 1 17200001 1                  7.82      7.70        7.04         7.18
 2 17200001 2                  7.43      7.69        6.91         7.14
 3 17200001 3                 NA        NA          NA            6.85
 4 17200001 4                 NA        NA          NA            7.18
 5 17200003 1                  7.38      7.31        6.89         6.14
 6 17200003 2                  7.31      7.20        7.28         6.63
 7 17200003 3                 NA        NA          NA            5.65
 8 17200003 4                 NA        NA          NA            5.83
 9 17200005 1                  6.37      6.63        6.18         6.10
10 17200005 2                  6.73      6.50        6.00         6.22
# ... with 165,370 more rows
Warning message:
Expected 2 pieces. Additional pieces discarded in 10 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].

新错误:

table<-names(GSE114056.table)[-1] <- paste0(names(GSE114056.table)[-1], "_", 
+                          ave(seq_along(names(GSE114056.table)[-1]), names(GSE114056.table)[-1], FUN = seq_along))
> pivot_longer(GSE114056.table, cols = -ID, names_sep="_",
+              names_to = c('.value', 'grp'))
# A tibble: 165,380 x 6
         ID grp   `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
      <int> <chr>             <dbl>     <dbl>       <dbl>        <dbl>
 1 17200001 1                  7.82      7.70        7.04         7.18
 2 17200001 2                  7.43      7.69        6.91         7.14
 3 17200001 3                 NA        NA          NA            6.85
 4 17200001 4                 NA        NA          NA            7.18
 5 17200003 1                  7.38      7.31        6.89         6.14
 6 17200003 2                  7.31      7.20        7.28         6.63
 7 17200003 3                 NA        NA          NA            5.65
 8 17200003 4                 NA        NA          NA            5.83
 9 17200005 1                  6.37      6.63        6.18         6.10
10 17200005 2                  6.73      6.50        6.00         6.22
# ... with 165,370 more rows
Warning message:
Expected 2 pieces. Additional pieces discarded in 10 rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]. 
> table.finale<-as.data.frame(table)
> head(table.finale)
                                            table
1 NF empty vector_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
2 NF empty vector_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
3         NF DKK3_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
4         NF DKK3_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
5       CAF siCtr_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1
6       CAF siCtr_2_1_1_1_1_1_1_1_1_1_1_1_1_1_1_1  

我该如何解决? 谢谢

如果我们需要从重复的列名创建单个列,请重命名第一个数据的列名,因为 data.frame 中不允许重复的列名。然后,我们使用 pivot_longer 将数据从 'wide' 重塑为 'long'

library(dplyr)
library(tidyr)
names(df1)[-1] <- paste0(names(df1)[-1], "_", 
       ave(seq_along(names(df1)[-1]), names(df1)[-1], FUN = seq_along))
out <- pivot_longer(df1, cols = -ID, names_sep="_",
          names_to = c('.value', 'grp'))
out

-输出

# A tibble: 24 x 6
#         ID grp   `NF empty vector` `NF DKK3` `CAF siCtr` `CAF siDKK3`
#      <int> <chr>             <dbl>     <dbl>       <dbl>        <dbl>
# 1 17200001 1                  7.82      7.70        7.04         7.18
# 2 17200001 2                  7.43      7.69        6.91         7.14
# 3 17200001 3                 NA        NA          NA            6.85
# 4 17200001 4                 NA        NA          NA            7.18
# 5 17200003 1                  7.38      7.31        6.89         6.14
# 6 17200003 2                  7.31      7.20        7.28         6.63
# 7 17200003 3                 NA        NA          NA            5.65
# 8 17200003 4                 NA        NA          NA            5.83
# 9 17200005 1                  6.37      6.63        6.18         6.10
#10 17200005 2                  6.73      6.50        6.00         6.22
# … with 14 more rows

数据

df1 <- structure(list(ID = c(17200001L, 17200003L, 17200005L, 17200007L, 
17200009L, 17200011L), `NF empty vector` = c(7.819902, 7.376688, 
6.374864, 6.397508, 7.358877, 6.823258), `NF empty vector` = c(7.428268, 
7.31238, 6.732675, 6.454915, 7.281378, 6.697764), `NF DKK3` = c(7.70196, 
7.305377, 6.628506, 6.667738, 7.325195, 7.066599), `NF DKK3` = c(7.693524, 
7.201907, 6.503982, 6.395881, 7.156792, 6.520116), `CAF siCtr` = c(7.041349, 
6.894989, 6.175385, 6.102115, 7.022776, 6.552555), `CAF siCtr` = c(6.907807, 
7.280088, 6.000358, 5.984045, 6.715977, 6.479581), `CAF siDKK3` = c(7.17559, 
6.138037, 6.104392, 6.258108, 6.91867, 6.347572), `CAF siDKK3` = c(7.144398, 
6.633998, 6.217089, 5.372207, 6.72605, 6.197155), `CAF siDKK3` = c(6.848402, 
5.648935, 5.64459, 4.926477, 6.104917, 5.21501), `CAF siDKK3` = c(7.175592, 
5.827932, 5.507035, 5.764134, 6.708101, 6.176258)), row.names = c(NA, 
6L), class = "data.frame")