R - 根据参数获取列表中每个数据框的列名

R - Obtain Column Names for Each Dataframe in a List based on a Parameter

我有一个数据帧列表,我已经划分了这些数据帧以便对其进行单独统计。数据框具有我要根据行中的值提取的列的名称。以 df_list:

中编译的这些数据帧为例
$df1
  aa bb cc
a  9  9  2
b  9  9  6
c  2 10  5
d  2  9  6
e  3  4  7

$df2
  aa bb cc
a  2  4  3
b  2  8  2
c  6  4  3
d  9  5  8
e  1  5  4

$df3
  aa bb cc
a  1  8  2
b  1  8  9
c  4  1  2
d  9  9  8
e  9  7  3

如果行 e 大于 5,我想获取列的名称。所以我会在同一个列表或全新列表中得到类似这样的输出:

$df1
  [1] "cc" 

$df2
  NULL  #or integer(0), I'm not quite sure what I would see here

$df3
  [1] "aa"    "bb"

我想出了这段代码来获取所有列的名称:

names_list <- lapply(df_list, function(x) colnames(x))

$df1
[1] "aa" "bb" "cc"

$df2
[1] "aa" "bb" "cc"

$df3
[1] "aa" "bb" "cc"

这是列大于 5 的代码行:

num_list <- lapply(df_list, function(x) which(x[c("e"),] > 5))

$df1
[1] 3

$df2
integer(0)

$df3
[1] 1 2

虽然最后两个结果很有希望,但我似乎无法将这两个想法联系在一起以获得所需的输出。嵌套它们给了我无数我无法识别的错误。有可行的方法吗?有没有更有效的方法?

您需要对数据帧的 names 进行子集化:

lapply(df_list, function(x) names(x)[x['e', ] > 5])
#Similar to OP's attempt. 
#lapply(df_list, function(x) names(x)[which(x[c("e"),] > 5)])

#$df1
#[1] "cc"

#$df2
#character(0)

#$df3
#[1] "aa" "bb"

数据

df_list <- list(df1 = structure(list(aa = c(9L, 9L, 2L, 2L, 3L), bb = c(9L, 
9L, 10L, 9L, 4L), cc = c(2L, 6L, 5L, 6L, 7L)), class = "data.frame", 
row.names = c("a", "b", "c", "d", "e")), 
df2 = structure(list(aa = c(2L, 2L, 6L, 
9L, 1L), bb = c(4L, 8L, 4L, 5L, 5L), cc = c(3L, 2L, 3L, 8L, 4L
)), class = "data.frame", row.names = c("a", "b", "c", "d", "e"
)), df3 = structure(list(aa = c(1L, 1L, 4L, 9L, 9L), bb = c(8L, 
8L, 1L, 9L, 7L), cc = c(2L, 9L, 2L, 8L, 3L)), 
class = "data.frame", row.names = c("a", "b", "c", "d", "e")))

我们可以使用tidyverse

library(dplyr)
library(purrr)
 map(df_list, ~  .x %>% 
         rownames_to_column('rn') %>% 
         filter(rn == 'e') %>% 
         pivot_longer(cols = -rn) %>% 
         filter(value > 5) %>%
         pull(name))
#$df1
#[1] "cc"

#$df2
#character(0)

#$df3
#[1] "aa" "bb"

数据

df_list <- list(df1 = structure(list(aa = c(9L, 9L, 2L, 2L, 3L), bb = c(9L, 
9L, 10L, 9L, 4L), cc = c(2L, 6L, 5L, 6L, 7L)), class = "data.frame", 
row.names = c("a", "b", "c", "d", "e")), 
df2 = structure(list(aa = c(2L, 2L, 6L, 
9L, 1L), bb = c(4L, 8L, 4L, 5L, 5L), cc = c(3L, 2L, 3L, 8L, 4L
)), class = "data.frame", row.names = c("a", "b", "c", "d", "e"
)), df3 = structure(list(aa = c(1L, 1L, 4L, 9L, 9L), bb = c(8L, 
8L, 1L, 9L, 7L), cc = c(2L, 9L, 2L, 8L, 3L)), 
class = "data.frame", row.names = c("a", "b", "c", "d", "e")))