减少名称与模式匹配的列
Reduce columns that whose names matches a pattern
我正在尝试在我的数据表中创建多个列,这些列表示按行匹配给定模式的列的总和。
df <- data.frame(first_column = c("Alpha", "Beta", "Charlie", "Tango", "Alpha, Beta,Alpha", "Alpha,Beta,Charlie", 'Tango,Tango,Tango,Tango', 'Tango,Tango,Tango, Tango', 'Tango,Tango,Tango, Tango , Alpha,Beta,Charlie, Alpha, Alpha ,Alpha '),
number_1 = 1:9,
number_2 = 11:19,
number_3 = 2:10,
number_4 = 12:20)
testing <- df %>%
mutate(number_1 = as.numeric(number_1),
number_2 = as.numeric(number_2))%>%
as.data.table
testing3 = testing[,`:=` ("Total 1" = Reduce(`+`, grep("number_1|number_2", names(testing), value = TRUE)),
"Total 2" = Reduce(`+`, grep("number_3|number_4", names(testing), value = TRUE)))]
这是我尝试过的,但没有成功。我应该看到的是 Total 1 row 1, 12 和 Total 2 row 1, 14
但是我做不到
原因是 grep
returns 只有带 value = TRUE
的列名,我们需要列的值,使用 .SD
对列进行子集化列名
library(data.table)
testing[,`:=` (
"Total 1" = Reduce(`+`, .SD[, grep("number_1|number_2", names(.SD),
value = TRUE), with = FALSE]),
"Total 2" = Reduce(`+`, .SD[, grep("number_3|number_4", names(.SD),
value = TRUE), with = FALSE]))]
-输出
> testing
first_column number_1 number_2 number_3 number_4 Total 1 Total 2
<char> <num> <num> <int> <int> <num> <int>
1: Alpha 1 11 2 12 12 14
2: Beta 2 12 3 13 14 16
3: Charlie 3 13 4 14 16 18
4: Tango 4 14 5 15 18 20
5: Alpha, Beta,Alpha 5 15 6 16 20 22
6: Alpha,Beta,Charlie 6 16 7 17 22 24
7: Tango,Tango,Tango,Tango 7 17 8 18 24 26
8: Tango,Tango,Tango, Tango 8 18 9 19 26 28
9: Tango,Tango,Tango, Tango , Alpha,Beta,Charlie, Alpha, Alpha ,Alpha 9 19 10 20 28 30
如果有多个集合,我们还可以创建一个命名的 list
, Filter
list
元素基于名字的出现
lst_names <- list(c("number_1", "number_2"),
c("number_3", "number_4"),
c("number_5", "number_6"))
names(lst_names) <- paste("Total", seq_along(lst_names))
lst_names_sub <- Filter(length, lapply(lst_names, function(x)
intersect(x, names(testing))))
testing[, names(lst_names_sub) := lapply(lst_names_sub, function(x)
Reduce(`+`, .SD[, x, with = FALSE]))]
我正在尝试在我的数据表中创建多个列,这些列表示按行匹配给定模式的列的总和。
df <- data.frame(first_column = c("Alpha", "Beta", "Charlie", "Tango", "Alpha, Beta,Alpha", "Alpha,Beta,Charlie", 'Tango,Tango,Tango,Tango', 'Tango,Tango,Tango, Tango', 'Tango,Tango,Tango, Tango , Alpha,Beta,Charlie, Alpha, Alpha ,Alpha '),
number_1 = 1:9,
number_2 = 11:19,
number_3 = 2:10,
number_4 = 12:20)
testing <- df %>%
mutate(number_1 = as.numeric(number_1),
number_2 = as.numeric(number_2))%>%
as.data.table
testing3 = testing[,`:=` ("Total 1" = Reduce(`+`, grep("number_1|number_2", names(testing), value = TRUE)),
"Total 2" = Reduce(`+`, grep("number_3|number_4", names(testing), value = TRUE)))]
这是我尝试过的,但没有成功。我应该看到的是 Total 1 row 1, 12 和 Total 2 row 1, 14
但是我做不到
原因是 grep
returns 只有带 value = TRUE
的列名,我们需要列的值,使用 .SD
对列进行子集化列名
library(data.table)
testing[,`:=` (
"Total 1" = Reduce(`+`, .SD[, grep("number_1|number_2", names(.SD),
value = TRUE), with = FALSE]),
"Total 2" = Reduce(`+`, .SD[, grep("number_3|number_4", names(.SD),
value = TRUE), with = FALSE]))]
-输出
> testing
first_column number_1 number_2 number_3 number_4 Total 1 Total 2
<char> <num> <num> <int> <int> <num> <int>
1: Alpha 1 11 2 12 12 14
2: Beta 2 12 3 13 14 16
3: Charlie 3 13 4 14 16 18
4: Tango 4 14 5 15 18 20
5: Alpha, Beta,Alpha 5 15 6 16 20 22
6: Alpha,Beta,Charlie 6 16 7 17 22 24
7: Tango,Tango,Tango,Tango 7 17 8 18 24 26
8: Tango,Tango,Tango, Tango 8 18 9 19 26 28
9: Tango,Tango,Tango, Tango , Alpha,Beta,Charlie, Alpha, Alpha ,Alpha 9 19 10 20 28 30
如果有多个集合,我们还可以创建一个命名的 list
, Filter
list
元素基于名字的出现
lst_names <- list(c("number_1", "number_2"),
c("number_3", "number_4"),
c("number_5", "number_6"))
names(lst_names) <- paste("Total", seq_along(lst_names))
lst_names_sub <- Filter(length, lapply(lst_names, function(x)
intersect(x, names(testing))))
testing[, names(lst_names_sub) := lapply(lst_names_sub, function(x)
Reduce(`+`, .SD[, x, with = FALSE]))]