R - 根据参数获取列表中每个数据框的列名
R - Obtain Column Names for Each Dataframe in a List based on a Parameter
我有一个数据帧列表,我已经划分了这些数据帧以便对其进行单独统计。数据框具有我要根据行中的值提取的列的名称。以 df_list
:
中编译的这些数据帧为例
$df1
aa bb cc
a 9 9 2
b 9 9 6
c 2 10 5
d 2 9 6
e 3 4 7
$df2
aa bb cc
a 2 4 3
b 2 8 2
c 6 4 3
d 9 5 8
e 1 5 4
$df3
aa bb cc
a 1 8 2
b 1 8 9
c 4 1 2
d 9 9 8
e 9 7 3
如果行 e 大于 5,我想获取列的名称。所以我会在同一个列表或全新列表中得到类似这样的输出:
$df1
[1] "cc"
$df2
NULL #or integer(0), I'm not quite sure what I would see here
$df3
[1] "aa" "bb"
我想出了这段代码来获取所有列的名称:
names_list <- lapply(df_list, function(x) colnames(x))
$df1
[1] "aa" "bb" "cc"
$df2
[1] "aa" "bb" "cc"
$df3
[1] "aa" "bb" "cc"
这是列大于 5 的代码行:
num_list <- lapply(df_list, function(x) which(x[c("e"),] > 5))
$df1
[1] 3
$df2
integer(0)
$df3
[1] 1 2
虽然最后两个结果很有希望,但我似乎无法将这两个想法联系在一起以获得所需的输出。嵌套它们给了我无数我无法识别的错误。有可行的方法吗?有没有更有效的方法?
您需要对数据帧的 names
进行子集化:
lapply(df_list, function(x) names(x)[x['e', ] > 5])
#Similar to OP's attempt.
#lapply(df_list, function(x) names(x)[which(x[c("e"),] > 5)])
#$df1
#[1] "cc"
#$df2
#character(0)
#$df3
#[1] "aa" "bb"
数据
df_list <- list(df1 = structure(list(aa = c(9L, 9L, 2L, 2L, 3L), bb = c(9L,
9L, 10L, 9L, 4L), cc = c(2L, 6L, 5L, 6L, 7L)), class = "data.frame",
row.names = c("a", "b", "c", "d", "e")),
df2 = structure(list(aa = c(2L, 2L, 6L,
9L, 1L), bb = c(4L, 8L, 4L, 5L, 5L), cc = c(3L, 2L, 3L, 8L, 4L
)), class = "data.frame", row.names = c("a", "b", "c", "d", "e"
)), df3 = structure(list(aa = c(1L, 1L, 4L, 9L, 9L), bb = c(8L,
8L, 1L, 9L, 7L), cc = c(2L, 9L, 2L, 8L, 3L)),
class = "data.frame", row.names = c("a", "b", "c", "d", "e")))
我们可以使用tidyverse
library(dplyr)
library(purrr)
map(df_list, ~ .x %>%
rownames_to_column('rn') %>%
filter(rn == 'e') %>%
pivot_longer(cols = -rn) %>%
filter(value > 5) %>%
pull(name))
#$df1
#[1] "cc"
#$df2
#character(0)
#$df3
#[1] "aa" "bb"
数据
df_list <- list(df1 = structure(list(aa = c(9L, 9L, 2L, 2L, 3L), bb = c(9L,
9L, 10L, 9L, 4L), cc = c(2L, 6L, 5L, 6L, 7L)), class = "data.frame",
row.names = c("a", "b", "c", "d", "e")),
df2 = structure(list(aa = c(2L, 2L, 6L,
9L, 1L), bb = c(4L, 8L, 4L, 5L, 5L), cc = c(3L, 2L, 3L, 8L, 4L
)), class = "data.frame", row.names = c("a", "b", "c", "d", "e"
)), df3 = structure(list(aa = c(1L, 1L, 4L, 9L, 9L), bb = c(8L,
8L, 1L, 9L, 7L), cc = c(2L, 9L, 2L, 8L, 3L)),
class = "data.frame", row.names = c("a", "b", "c", "d", "e")))
我有一个数据帧列表,我已经划分了这些数据帧以便对其进行单独统计。数据框具有我要根据行中的值提取的列的名称。以 df_list
:
$df1
aa bb cc
a 9 9 2
b 9 9 6
c 2 10 5
d 2 9 6
e 3 4 7
$df2
aa bb cc
a 2 4 3
b 2 8 2
c 6 4 3
d 9 5 8
e 1 5 4
$df3
aa bb cc
a 1 8 2
b 1 8 9
c 4 1 2
d 9 9 8
e 9 7 3
如果行 e 大于 5,我想获取列的名称。所以我会在同一个列表或全新列表中得到类似这样的输出:
$df1
[1] "cc"
$df2
NULL #or integer(0), I'm not quite sure what I would see here
$df3
[1] "aa" "bb"
我想出了这段代码来获取所有列的名称:
names_list <- lapply(df_list, function(x) colnames(x))
$df1
[1] "aa" "bb" "cc"
$df2
[1] "aa" "bb" "cc"
$df3
[1] "aa" "bb" "cc"
这是列大于 5 的代码行:
num_list <- lapply(df_list, function(x) which(x[c("e"),] > 5))
$df1
[1] 3
$df2
integer(0)
$df3
[1] 1 2
虽然最后两个结果很有希望,但我似乎无法将这两个想法联系在一起以获得所需的输出。嵌套它们给了我无数我无法识别的错误。有可行的方法吗?有没有更有效的方法?
您需要对数据帧的 names
进行子集化:
lapply(df_list, function(x) names(x)[x['e', ] > 5])
#Similar to OP's attempt.
#lapply(df_list, function(x) names(x)[which(x[c("e"),] > 5)])
#$df1
#[1] "cc"
#$df2
#character(0)
#$df3
#[1] "aa" "bb"
数据
df_list <- list(df1 = structure(list(aa = c(9L, 9L, 2L, 2L, 3L), bb = c(9L,
9L, 10L, 9L, 4L), cc = c(2L, 6L, 5L, 6L, 7L)), class = "data.frame",
row.names = c("a", "b", "c", "d", "e")),
df2 = structure(list(aa = c(2L, 2L, 6L,
9L, 1L), bb = c(4L, 8L, 4L, 5L, 5L), cc = c(3L, 2L, 3L, 8L, 4L
)), class = "data.frame", row.names = c("a", "b", "c", "d", "e"
)), df3 = structure(list(aa = c(1L, 1L, 4L, 9L, 9L), bb = c(8L,
8L, 1L, 9L, 7L), cc = c(2L, 9L, 2L, 8L, 3L)),
class = "data.frame", row.names = c("a", "b", "c", "d", "e")))
我们可以使用tidyverse
library(dplyr)
library(purrr)
map(df_list, ~ .x %>%
rownames_to_column('rn') %>%
filter(rn == 'e') %>%
pivot_longer(cols = -rn) %>%
filter(value > 5) %>%
pull(name))
#$df1
#[1] "cc"
#$df2
#character(0)
#$df3
#[1] "aa" "bb"
数据
df_list <- list(df1 = structure(list(aa = c(9L, 9L, 2L, 2L, 3L), bb = c(9L,
9L, 10L, 9L, 4L), cc = c(2L, 6L, 5L, 6L, 7L)), class = "data.frame",
row.names = c("a", "b", "c", "d", "e")),
df2 = structure(list(aa = c(2L, 2L, 6L,
9L, 1L), bb = c(4L, 8L, 4L, 5L, 5L), cc = c(3L, 2L, 3L, 8L, 4L
)), class = "data.frame", row.names = c("a", "b", "c", "d", "e"
)), df3 = structure(list(aa = c(1L, 1L, 4L, 9L, 9L), bb = c(8L,
8L, 1L, 9L, 7L), cc = c(2L, 9L, 2L, 8L, 3L)),
class = "data.frame", row.names = c("a", "b", "c", "d", "e")))