将嵌套列表中的元素提取到 data.matrix

Extracting elements from a nested list to a data.matrix

我有一个包含 2 个元素的列表,每个元素有 3 个元素。在每 3 个元素中,有一个长度为 2 的嵌套元素列表,因此每个元素都包含一个包含行和列的小标题。 tibbles 的维度对于行是固定的,但列与第三列不同。

结构如下:

my_list <- list(list(list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")))), list(list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame"))), list(ENSG0000014 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000014", "ENSG0000014", 
"ENSG0000014"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0"), `1_43223126_C_T_b37` = c("0", "1", "0"), `1_43223317_T_C_b37` = c("1", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")), ENSG0000015 = structure(list(name = c("GTEX-1122O", 
"GTEX-11EM3", "GTEX-11EQ9"), ENSG = c("ENSG0000015", "ENSG0000015", 
"ENSG0000015"), expr = c(" 9.940670e-02", " 1.289670e-01", "-7.394904e-03"
), `1_43222779_A_G_b37` = c("1", "1", "2"), `1_43222856_A_G_b37` = c("0", 
"0", "0")), row.names = c(NA, -6L), class = c("tbl_df", "tbl", 
"data.frame")))))

对于每 3 个元素,我首先需要提取除前三列之外的那些小标题,以创建新的 data.matrix。然后大概在一个循环中自动执行此作业,以便为 3 个元素中的每一个获取相同的 data.matrix。因此,列表的 2 个主要元素中的每一个都将输出三个矩阵。

如果你能提供帮助,我将不胜感激。

因为是嵌套的,一个选项是rrapply

library(rrapply)
library(dplyr)
out <- rrapply(my_list, classes = "data.frame",
    f = function(x) x %>% 
        dplyr::select(-(1:3)), how = "list")

-输出

out
[[1]]
[[1]][[1]]
[[1]][[1]]$ENSG0000014
# A tibble: 6 × 4
  `1_43222779_A_G_b37` `1_43222856_A_G_b37` `1_43223126_C_T_b37` `1_43223317_T_C_b37`
  <chr>                <chr>                <chr>                <chr>               
1 1                    0                    0                    1                   
2 1                    0                    1                    0                   
3 2                    0                    0                    0                   
4 1                    0                    0                    1                   
5 1                    0                    1                    0                   
6 2                    0                    0                    0                   

[[1]][[1]]$ENSG0000015
# A tibble: 6 × 2
  `1_43222779_A_G_b37` `1_43222856_A_G_b37`
  <chr>                <chr>               
1 1                    0                   
2 1                    0                   
3 2                    0                   
4 1                    0                   
5 1                    0                   
6 2                    0       
...