在 R 中的嵌套数据帧列表上进行过滤和重新分类
Filter and re-categorize on a nested list of dataframes in R
我有一个包含 n
个元素的列表,每个元素都包含一个数据框。我们以start_list
为例:
start_list <- list(ENSG0000014 = structure(list(name = c("E-1122O", "E-11EM3",
"E-11EMC", "E-1442O", "E-1132O"), ENSG = c("ENSG0000014", "ENSG0000014",
"ENSG0000014", "ENSG0000014", "ENSG0000014"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 9.940670e-02"
), `1_43222779_A_G_b37` = c("1", "1", "2", "1", "0"), `1_43222856_A_G_b37` = c("0",
"0", "0", "1", "1"), `1_43223126_C_T_b37` = c("0", "1", "0",
"1", "2"), `1_43223317_T_C_b37` = c("1", "0", "0", "2", "1")), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame")), ENSG0000015 = structure(list(
name = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O", "E-1132O"
), ENSG = c("ENSG0000015", "ENSG0000015", "ENSG0000015",
"ENSG0000015", "ENSG0000015"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 1.289670e-01"
), `1_43222779_A_G_b37` = c("0", "1", "0", "1", "2"),
`1_43222856_A_G_b37` = c("1", "1", "2", "1", "0")),
row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")))
此外,还有一个名为 set_id
的数据框,其中包含来自 start_list
的 name
列的个体列表,这些个体被分为五组 TRUE/FALSE 个字符:
set_id <- structure(list(IID = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O",
"E-1132O"), set_1 = c(TRUE, FALSE, TRUE, TRUE, TRUE), set_2 = c(TRUE,
TRUE, FALSE, FALSE, TRUE), set_3 = c(FALSE, TRUE, TRUE, FALSE,
TRUE), set_4 = c(TRUE, FALSE, TRUE, TRUE, FALSE), set_5 = c(TRUE,
FALSE, FALSE, TRUE, TRUE)), row.names = c(NA, -5L), class = "data.frame")
我需要根据这些个人群体过滤 start_list
以保留那些 IID
如果等于 'FALSE' 并且还删除 [=32] 的第二列和第三列=], ENSG, expr
并创建一个新列表,'list_prime_out':
list_prime_out <- list(ENSG0000014 = list(set_1 = structure(list(name = "E-11EM3",
`1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), set_3 = structure(list(name = c("E-1122O", "E-1442O"), `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = c("1",
"1"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_4 = structure(list(
name = c("E-11EM3", "E-1132O"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("0",
"0"), `1_43223317_T_C_b37` = c("0", "0")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_5 = structure(list(
name = c("E-11EM3", "E-11EMC"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("2",
"0"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))), ENSG0000015 = list(
set_1 = structure(list(name = "E-11EM3", `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_3 = structure(list(name = c("E-1122O",
"E-1442O"), `1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0",
`1_43223126_C_T_b37` = c("1", "1"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_4 = structure(list(name = c("E-11EM3",
"E-1132O"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("0", "0"), `1_43223317_T_C_b37` = c("0",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_5 = structure(list(name = c("E-11EM3",
"E-11EMC"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"))))
str(list_prime_out)
List of 2
$ ENSG0000014:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
$ ENSG0000015:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
非常感谢你的帮助。
这是使用 {dplyr} 和 {purrr} 的解决方案:
library(dplyr)
library(purrr)
# create a list containing a vector of `IID`s for each set
set_id_list <- set_id %>%
transmute(across(set_1:set_5, ~ if_else(.x, NA_character_, IID))) %>%
map(~ discard(.x, is.na))
# nested loop:
# for each `start_list` dataframe, remove ENSG and expr columns,
# then create versions filtered by each set of `IID`s
list_prime_out <- map(start_list, function(data) {
data <- select(data, !ENSG:expr)
map(set_id_list, ~ filter(data, name %in% .x))
})
输出:
# > list_prime_out
$ENSG0000014
$ENSG0000014$set_1
# A tibble: 1 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EM3 1 0 1 0
$ENSG0000014$set_2
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EMC 2 0 0 0
2 E-1442O 1 1 1 2
$ENSG0000014$set_3
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-1122O 1 0 0 1
2 E-1442O 1 1 1 2
$ENSG0000014$set_4
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EM3 1 0 1 0
2 E-1132O 0 1 2 1
$ENSG0000014$set_5
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EM3 1 0 1 0
2 E-11EMC 2 0 0 0
$ENSG0000015
$ENSG0000015$set_1
# A tibble: 1 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EM3 1 1
$ENSG0000015$set_2
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EMC 0 2
2 E-1442O 1 1
$ENSG0000015$set_3
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-1122O 0 1
2 E-1442O 1 1
$ENSG0000015$set_4
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EM3 1 1
2 E-1132O 2 0
$ENSG0000015$set_5
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EM3 1 1
2 E-11EMC 0 2
由 reprex package (v2.0.1)
创建于 2022-03-02
我有一个包含 n
个元素的列表,每个元素都包含一个数据框。我们以start_list
为例:
start_list <- list(ENSG0000014 = structure(list(name = c("E-1122O", "E-11EM3",
"E-11EMC", "E-1442O", "E-1132O"), ENSG = c("ENSG0000014", "ENSG0000014",
"ENSG0000014", "ENSG0000014", "ENSG0000014"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 9.940670e-02"
), `1_43222779_A_G_b37` = c("1", "1", "2", "1", "0"), `1_43222856_A_G_b37` = c("0",
"0", "0", "1", "1"), `1_43223126_C_T_b37` = c("0", "1", "0",
"1", "2"), `1_43223317_T_C_b37` = c("1", "0", "0", "2", "1")), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame")), ENSG0000015 = structure(list(
name = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O", "E-1132O"
), ENSG = c("ENSG0000015", "ENSG0000015", "ENSG0000015",
"ENSG0000015", "ENSG0000015"), expr = c(" 9.940670e-02",
" 1.289670e-01", "-7.394904e-03", " 9.940670e-02", " 1.289670e-01"
), `1_43222779_A_G_b37` = c("0", "1", "0", "1", "2"),
`1_43222856_A_G_b37` = c("1", "1", "2", "1", "0")),
row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame")))
此外,还有一个名为 set_id
的数据框,其中包含来自 start_list
的 name
列的个体列表,这些个体被分为五组 TRUE/FALSE 个字符:
set_id <- structure(list(IID = c("E-1122O", "E-11EM3", "E-11EMC", "E-1442O",
"E-1132O"), set_1 = c(TRUE, FALSE, TRUE, TRUE, TRUE), set_2 = c(TRUE,
TRUE, FALSE, FALSE, TRUE), set_3 = c(FALSE, TRUE, TRUE, FALSE,
TRUE), set_4 = c(TRUE, FALSE, TRUE, TRUE, FALSE), set_5 = c(TRUE,
FALSE, FALSE, TRUE, TRUE)), row.names = c(NA, -5L), class = "data.frame")
我需要根据这些个人群体过滤 start_list
以保留那些 IID
如果等于 'FALSE' 并且还删除 [=32] 的第二列和第三列=], ENSG, expr
并创建一个新列表,'list_prime_out':
list_prime_out <- list(ENSG0000014 = list(set_1 = structure(list(name = "E-11EM3",
`1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), set_3 = structure(list(name = c("E-1122O", "E-1442O"), `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = c("1",
"1"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_4 = structure(list(
name = c("E-11EM3", "E-1132O"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("0",
"0"), `1_43223317_T_C_b37` = c("0", "0")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame")), set_5 = structure(list(
name = c("E-11EM3", "E-11EMC"), `1_43222779_A_G_b37` = c("1",
"0"), `1_43222856_A_G_b37` = c("1", "1"), `1_43223126_C_T_b37` = c("2",
"0"), `1_43223317_T_C_b37` = c("1", "2")), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))), ENSG0000015 = list(
set_1 = structure(list(name = "E-11EM3", `1_43222779_A_G_b37` = "1",
`1_43222856_A_G_b37` = "0", `1_43223126_C_T_b37` = "0",
`1_43223317_T_C_b37` = "1"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), set_2 = structure(list(name = c("E-11EMC",
"E-14420"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("2",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_3 = structure(list(name = c("E-1122O",
"E-1442O"), `1_43222779_A_G_b37` = "1", `1_43222856_A_G_b37` = "0",
`1_43223126_C_T_b37` = c("1", "1"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_4 = structure(list(name = c("E-11EM3",
"E-1132O"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("0", "0"), `1_43223317_T_C_b37` = c("0",
"0")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame")), set_5 = structure(list(name = c("E-11EM3",
"E-11EMC"), `1_43222779_A_G_b37` = c("1", "0"), `1_43222856_A_G_b37` = c("1",
"1"), `1_43223126_C_T_b37` = c("2", "0"), `1_43223317_T_C_b37` = c("1",
"2")), row.names = c(NA, -2L), class = c("tbl_df", "tbl",
"data.frame"))))
str(list_prime_out)
List of 2
$ ENSG0000014:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
$ ENSG0000015:List of 5
..$ set_1: tibble [1 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr "E-11EM3"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr "0"
.. ..$ 1_43223317_T_C_b37: chr "1"
..$ set_2: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EMC" "E-14420"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "2" "0"
..$ set_3: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-1122O" "E-1442O"
.. ..$ 1_43222779_A_G_b37: chr "1"
.. ..$ 1_43222856_A_G_b37: chr "0"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "1" "1"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
..$ set_4: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-1132O"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "0" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "0" "0"
..$ set_5: tibble [2 × 5] (S3: tbl_df/tbl/data.frame)
.. ..$ name : chr [1:2] "E-11EM3" "E-11EMC"
.. ..$ 1_43222779_A_G_b37: chr [1:2] "1" "0"
.. ..$ 1_43222856_A_G_b37: chr [1:2] "1" "1"
.. ..$ 1_43223126_C_T_b37: chr [1:2] "2" "0"
.. ..$ 1_43223317_T_C_b37: chr [1:2] "1" "2"
非常感谢你的帮助。
这是使用 {dplyr} 和 {purrr} 的解决方案:
library(dplyr)
library(purrr)
# create a list containing a vector of `IID`s for each set
set_id_list <- set_id %>%
transmute(across(set_1:set_5, ~ if_else(.x, NA_character_, IID))) %>%
map(~ discard(.x, is.na))
# nested loop:
# for each `start_list` dataframe, remove ENSG and expr columns,
# then create versions filtered by each set of `IID`s
list_prime_out <- map(start_list, function(data) {
data <- select(data, !ENSG:expr)
map(set_id_list, ~ filter(data, name %in% .x))
})
输出:
# > list_prime_out
$ENSG0000014
$ENSG0000014$set_1
# A tibble: 1 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EM3 1 0 1 0
$ENSG0000014$set_2
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EMC 2 0 0 0
2 E-1442O 1 1 1 2
$ENSG0000014$set_3
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-1122O 1 0 0 1
2 E-1442O 1 1 1 2
$ENSG0000014$set_4
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EM3 1 0 1 0
2 E-1132O 0 1 2 1
$ENSG0000014$set_5
# A tibble: 2 x 5
name `1_43222779_A_~` `1_43222856_A_~` `1_43223126_C_~` `1_43223317_T_~`
<chr> <chr> <chr> <chr> <chr>
1 E-11EM3 1 0 1 0
2 E-11EMC 2 0 0 0
$ENSG0000015
$ENSG0000015$set_1
# A tibble: 1 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EM3 1 1
$ENSG0000015$set_2
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EMC 0 2
2 E-1442O 1 1
$ENSG0000015$set_3
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-1122O 0 1
2 E-1442O 1 1
$ENSG0000015$set_4
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EM3 1 1
2 E-1132O 2 0
$ENSG0000015$set_5
# A tibble: 2 x 3
name `1_43222779_A_G_b37` `1_43222856_A_G_b37`
<chr> <chr> <chr>
1 E-11EM3 1 1
2 E-11EMC 0 2
由 reprex package (v2.0.1)
创建于 2022-03-02