按向量条件对嵌套列表的子列表进行子集化

Subset sublists of nested list by vector condition

我有一个嵌套列表 list1:

list1 <- list(Alpha = structure(list(sample_0 = c(3, NA, 7, 9, 2),
                                     sample_1 = c(NA, 3, 5, 4, NA),
                                     sample_2 = c(7, 3, 5, NA, NA)),
                                row.names = c(NA, -5L),
                                class = c("tbl_df", "tbl", "data.frame")),
              Beta = structure (list(sample_0 = c(2, 9, NA, 3, 7),
                                     sample_1 = c(3, 7, 9, 3, NA),
                                     sample_2 = c(4, 2, 6, 4, 6)),
                                row.names = c(NA, -5L),
                                class = c("tbl_df", "tbl", "data.frame")),
              Gamma = structure(list(sample_0 = c(NA, NA, 4, 6, 3),
                                     sample_1 = c(3, 5, 3, NA, 2)),
                                row.names = c(NA, -5L),
                                class = c("tbl_df", "tbl", "data.frame")),
              Delta = structure(list(sample_0 = c(3, NA, 7, 9, 2),
                                     sample_1 = c(3, 8, 5, 4, 9)),
                                     row.names = c(NA, -5L),
                                     class = c("tbl_df", "tbl", "data.frame")))

我需要将一列更改为因子水平,如下所示(有比使用 for 循环更优雅的方法吗?):

for (i in names(list1)) {
  list1[[i]]$sample_1 <- list1[[i]]$sample_1 %>% as.numeric() %>% 
    cut(breaks=c(0, 2, 4, 5, 8, 10),
        labels=c("-", "+", "++", "+++", "++++"), right = TRUE, na.rm = TRUE)
 }

我现在只想保留 sample_1...

中至少包含级别“+++”(或“++++”)的子列表
sapply(sapply(list1, `[`, "sample_1"), unique)
$Alpha.sample_1
[1] <NA> +    ++  
Levels: - + ++ +++ ++++

$Beta.sample_1
[1] +    +++  ++++ <NA>
Levels: - + ++ +++ ++++

$Gamma.sample_1
[1] +    ++   <NA> -   
Levels: - + ++ +++ ++++

$Delta.sample_1
[1] +    +++  ++   ++++
Levels: - + ++ +++ ++++

... 以及新列表 2 中同一子列表的所有其他元素。期望的结果如下所示:

list1
$Beta
# A tibble: 5 x 3
  sample_0 sample_1 sample_2
     <dbl> <fct>       <dbl>
1        2 +               4
2        9 +++             2
3       NA ++++            6
4        3 +               4
5        7 NA              6
    
$Delta
# A tibble: 5 x 2
  sample_0 sample_1
     <dbl> <fct>   
1        3 +       
2       NA +++     
3        7 ++      
4        9 +       
5        2 ++++  

我想我必须将矢量转换为 as.integer,但我想不出解决方案。

一个小函数 + sapply 可以帮助:

contains_3plus <- function(greek_letter){
    ("+++" %in% greek_letter$sample_1) | ("++++" %in% greek_letter$sample_1)
}


list1[which(sapply(list1, contains_3plus))]

# $Beta
# # A tibble: 5 × 3
# sample_0 sample_1 sample_2
# <dbl> <fct>       <dbl>
#   1        2 +               4
# 2        9 +++             2
# 3       NA ++++            6
# 4        3 +               4
# 5        7 NA              6
# 
# $Delta
# # A tibble: 5 × 2
# sample_0 sample_1
# <dbl> <fct>   
#   1        3 +       
#   2       NA +++     
#   3        7 ++      
#   4        9 +       
#   5        2 ++++

您可以在函数 myFun 中定义要执行的操作,在 lapply 中使用它,并将结果子集化为 nrow > 0。

myFun <- \(x) {
  x$sample_1  <- with(x, cut(sample_1, breaks=c(0, 2, 4, 5, 8, 10),
                             labels=c("-", "+", "++", "+++", "++++"), 
                             right=TRUE, na.rm=TRUE))
  subset(x, sample_1 %in% c("+++", "++++"))
}
    
res <- lapply(list1, myFun)
res[sapply(res, nrow) > 0]
# $Beta
#   sample_0 sample_1 sample_2
# 2        9      +++        2
# 3       NA     ++++        6
# 
# $Delta
#   sample_0 sample_1
# 2       NA      +++
# 5        2     ++++
#   

编辑

要在阈值处将列表元素拆分为子列表,您可以在函数中使用split。我们也可以拆分为基础整数,即 as.numeric(x$sample_1).

myFun2 <- \(x) {
  x$sample_1 <- with(x, cut(sample_1, breaks=c(0, 2, 4, 5, 8, 10), 
                            labels=c("-", "+", "++", "+++", "++++"), 
                            right=TRUE, na.rm=TRUE))
  split(x, as.numeric(x$sample_1) > 4)
}

lapply(list1, myFun2)
# $Alpha
# $Alpha$`FALSE`
# sample_0 sample_1 sample_2
# 2       NA        +        3
# 3        7       ++        5
# 4        9        +       NA
# 
# 
# $Beta
# $Beta$`FALSE`
# sample_0 sample_1 sample_2
# 1        2        +        4
# 2        9      +++        2
# 4        3        +        4
# 
# $Beta$`TRUE`
# sample_0 sample_1 sample_2
# 3       NA     ++++        6
# 
# 
# $Gamma
# $Gamma$`FALSE`
# sample_0 sample_1
# 1       NA        +
#   2       NA       ++
#   3        4        +
#   5        3        -
#   
#   
#   $Delta
# $Delta$`FALSE`
# sample_0 sample_1
# 1        3        +
#   2       NA      +++
#   3        7       ++
#   4        9        +
#   
#   $Delta$`TRUE`
# sample_0 sample_1
# 5        2     ++++