按向量条件对嵌套列表的子列表进行子集化
Subset sublists of nested list by vector condition
我有一个嵌套列表 list1
:
list1 <- list(Alpha = structure(list(sample_0 = c(3, NA, 7, 9, 2),
sample_1 = c(NA, 3, 5, 4, NA),
sample_2 = c(7, 3, 5, NA, NA)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
Beta = structure (list(sample_0 = c(2, 9, NA, 3, 7),
sample_1 = c(3, 7, 9, 3, NA),
sample_2 = c(4, 2, 6, 4, 6)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
Gamma = structure(list(sample_0 = c(NA, NA, 4, 6, 3),
sample_1 = c(3, 5, 3, NA, 2)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
Delta = structure(list(sample_0 = c(3, NA, 7, 9, 2),
sample_1 = c(3, 8, 5, 4, 9)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")))
我需要将一列更改为因子水平,如下所示(有比使用 for 循环更优雅的方法吗?):
for (i in names(list1)) {
list1[[i]]$sample_1 <- list1[[i]]$sample_1 %>% as.numeric() %>%
cut(breaks=c(0, 2, 4, 5, 8, 10),
labels=c("-", "+", "++", "+++", "++++"), right = TRUE, na.rm = TRUE)
}
我现在只想保留 sample_1...
中至少包含级别“+++”(或“++++”)的子列表
sapply(sapply(list1, `[`, "sample_1"), unique)
$Alpha.sample_1
[1] <NA> + ++
Levels: - + ++ +++ ++++
$Beta.sample_1
[1] + +++ ++++ <NA>
Levels: - + ++ +++ ++++
$Gamma.sample_1
[1] + ++ <NA> -
Levels: - + ++ +++ ++++
$Delta.sample_1
[1] + +++ ++ ++++
Levels: - + ++ +++ ++++
... 以及新列表 2 中同一子列表的所有其他元素。期望的结果如下所示:
list1
$Beta
# A tibble: 5 x 3
sample_0 sample_1 sample_2
<dbl> <fct> <dbl>
1 2 + 4
2 9 +++ 2
3 NA ++++ 6
4 3 + 4
5 7 NA 6
$Delta
# A tibble: 5 x 2
sample_0 sample_1
<dbl> <fct>
1 3 +
2 NA +++
3 7 ++
4 9 +
5 2 ++++
我想我必须将矢量转换为 as.integer,但我想不出解决方案。
一个小函数 + sapply 可以帮助:
contains_3plus <- function(greek_letter){
("+++" %in% greek_letter$sample_1) | ("++++" %in% greek_letter$sample_1)
}
list1[which(sapply(list1, contains_3plus))]
# $Beta
# # A tibble: 5 × 3
# sample_0 sample_1 sample_2
# <dbl> <fct> <dbl>
# 1 2 + 4
# 2 9 +++ 2
# 3 NA ++++ 6
# 4 3 + 4
# 5 7 NA 6
#
# $Delta
# # A tibble: 5 × 2
# sample_0 sample_1
# <dbl> <fct>
# 1 3 +
# 2 NA +++
# 3 7 ++
# 4 9 +
# 5 2 ++++
您可以在函数 myFun
中定义要执行的操作,在 lapply
中使用它,并将结果子集化为 nrow
> 0。
myFun <- \(x) {
x$sample_1 <- with(x, cut(sample_1, breaks=c(0, 2, 4, 5, 8, 10),
labels=c("-", "+", "++", "+++", "++++"),
right=TRUE, na.rm=TRUE))
subset(x, sample_1 %in% c("+++", "++++"))
}
res <- lapply(list1, myFun)
res[sapply(res, nrow) > 0]
# $Beta
# sample_0 sample_1 sample_2
# 2 9 +++ 2
# 3 NA ++++ 6
#
# $Delta
# sample_0 sample_1
# 2 NA +++
# 5 2 ++++
#
编辑
要在阈值处将列表元素拆分为子列表,您可以在函数中使用split
。我们也可以拆分为基础整数,即 as.numeric(x$sample_1)
.
myFun2 <- \(x) {
x$sample_1 <- with(x, cut(sample_1, breaks=c(0, 2, 4, 5, 8, 10),
labels=c("-", "+", "++", "+++", "++++"),
right=TRUE, na.rm=TRUE))
split(x, as.numeric(x$sample_1) > 4)
}
lapply(list1, myFun2)
# $Alpha
# $Alpha$`FALSE`
# sample_0 sample_1 sample_2
# 2 NA + 3
# 3 7 ++ 5
# 4 9 + NA
#
#
# $Beta
# $Beta$`FALSE`
# sample_0 sample_1 sample_2
# 1 2 + 4
# 2 9 +++ 2
# 4 3 + 4
#
# $Beta$`TRUE`
# sample_0 sample_1 sample_2
# 3 NA ++++ 6
#
#
# $Gamma
# $Gamma$`FALSE`
# sample_0 sample_1
# 1 NA +
# 2 NA ++
# 3 4 +
# 5 3 -
#
#
# $Delta
# $Delta$`FALSE`
# sample_0 sample_1
# 1 3 +
# 2 NA +++
# 3 7 ++
# 4 9 +
#
# $Delta$`TRUE`
# sample_0 sample_1
# 5 2 ++++
我有一个嵌套列表 list1
:
list1 <- list(Alpha = structure(list(sample_0 = c(3, NA, 7, 9, 2),
sample_1 = c(NA, 3, 5, 4, NA),
sample_2 = c(7, 3, 5, NA, NA)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
Beta = structure (list(sample_0 = c(2, 9, NA, 3, 7),
sample_1 = c(3, 7, 9, 3, NA),
sample_2 = c(4, 2, 6, 4, 6)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
Gamma = structure(list(sample_0 = c(NA, NA, 4, 6, 3),
sample_1 = c(3, 5, 3, NA, 2)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")),
Delta = structure(list(sample_0 = c(3, NA, 7, 9, 2),
sample_1 = c(3, 8, 5, 4, 9)),
row.names = c(NA, -5L),
class = c("tbl_df", "tbl", "data.frame")))
我需要将一列更改为因子水平,如下所示(有比使用 for 循环更优雅的方法吗?):
for (i in names(list1)) {
list1[[i]]$sample_1 <- list1[[i]]$sample_1 %>% as.numeric() %>%
cut(breaks=c(0, 2, 4, 5, 8, 10),
labels=c("-", "+", "++", "+++", "++++"), right = TRUE, na.rm = TRUE)
}
我现在只想保留 sample_1...
中至少包含级别“+++”(或“++++”)的子列表sapply(sapply(list1, `[`, "sample_1"), unique)
$Alpha.sample_1
[1] <NA> + ++
Levels: - + ++ +++ ++++
$Beta.sample_1
[1] + +++ ++++ <NA>
Levels: - + ++ +++ ++++
$Gamma.sample_1
[1] + ++ <NA> -
Levels: - + ++ +++ ++++
$Delta.sample_1
[1] + +++ ++ ++++
Levels: - + ++ +++ ++++
... 以及新列表 2 中同一子列表的所有其他元素。期望的结果如下所示:
list1
$Beta
# A tibble: 5 x 3
sample_0 sample_1 sample_2
<dbl> <fct> <dbl>
1 2 + 4
2 9 +++ 2
3 NA ++++ 6
4 3 + 4
5 7 NA 6
$Delta
# A tibble: 5 x 2
sample_0 sample_1
<dbl> <fct>
1 3 +
2 NA +++
3 7 ++
4 9 +
5 2 ++++
我想我必须将矢量转换为 as.integer,但我想不出解决方案。
一个小函数 + sapply 可以帮助:
contains_3plus <- function(greek_letter){
("+++" %in% greek_letter$sample_1) | ("++++" %in% greek_letter$sample_1)
}
list1[which(sapply(list1, contains_3plus))]
# $Beta
# # A tibble: 5 × 3
# sample_0 sample_1 sample_2
# <dbl> <fct> <dbl>
# 1 2 + 4
# 2 9 +++ 2
# 3 NA ++++ 6
# 4 3 + 4
# 5 7 NA 6
#
# $Delta
# # A tibble: 5 × 2
# sample_0 sample_1
# <dbl> <fct>
# 1 3 +
# 2 NA +++
# 3 7 ++
# 4 9 +
# 5 2 ++++
您可以在函数 myFun
中定义要执行的操作,在 lapply
中使用它,并将结果子集化为 nrow
> 0。
myFun <- \(x) {
x$sample_1 <- with(x, cut(sample_1, breaks=c(0, 2, 4, 5, 8, 10),
labels=c("-", "+", "++", "+++", "++++"),
right=TRUE, na.rm=TRUE))
subset(x, sample_1 %in% c("+++", "++++"))
}
res <- lapply(list1, myFun)
res[sapply(res, nrow) > 0]
# $Beta
# sample_0 sample_1 sample_2
# 2 9 +++ 2
# 3 NA ++++ 6
#
# $Delta
# sample_0 sample_1
# 2 NA +++
# 5 2 ++++
#
编辑
要在阈值处将列表元素拆分为子列表,您可以在函数中使用split
。我们也可以拆分为基础整数,即 as.numeric(x$sample_1)
.
myFun2 <- \(x) {
x$sample_1 <- with(x, cut(sample_1, breaks=c(0, 2, 4, 5, 8, 10),
labels=c("-", "+", "++", "+++", "++++"),
right=TRUE, na.rm=TRUE))
split(x, as.numeric(x$sample_1) > 4)
}
lapply(list1, myFun2)
# $Alpha
# $Alpha$`FALSE`
# sample_0 sample_1 sample_2
# 2 NA + 3
# 3 7 ++ 5
# 4 9 + NA
#
#
# $Beta
# $Beta$`FALSE`
# sample_0 sample_1 sample_2
# 1 2 + 4
# 2 9 +++ 2
# 4 3 + 4
#
# $Beta$`TRUE`
# sample_0 sample_1 sample_2
# 3 NA ++++ 6
#
#
# $Gamma
# $Gamma$`FALSE`
# sample_0 sample_1
# 1 NA +
# 2 NA ++
# 3 4 +
# 5 3 -
#
#
# $Delta
# $Delta$`FALSE`
# sample_0 sample_1
# 1 3 +
# 2 NA +++
# 3 7 ++
# 4 9 +
#
# $Delta$`TRUE`
# sample_0 sample_1
# 5 2 ++++