如何在 R 中管道序列中的列选择上应用函数?
How to apply a function on a selection of columns in a pipe sequence in R?
我有一个包含许多列的数据框(或 tibble,不管怎样),我只想在其中的 7 个列上应用一个函数(比如说 rowSums),但我不想让其他列消失。诀窍是我想在管道序列中这样做
- 创建(或读取数据)
- 应用功能
- 之后的可选操作
这是数据框上的一个可重现示例,我想在其中对前 3 列进行 rowSums
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10), "v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10))
我通常的做法是
data$sum <- rowSums(data[,1:3])
但我想要这样的东西
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10), "v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10)) %>%
mutate(sum = rowSums())
感谢您的帮助!
您可以使用 .
在管道内访问您的数据对象。因此 mutate(sum = rowSums(.[, 1:3]))
成功了:
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10), "v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10)) %>%
mutate(sum = rowSums(.[, 1:3]))
data
v1 v2 v3 v4 sum
1 2.280871 0.1981815 7.5349128 some_charchter 10.013965
2 1.250208 7.6687056 0.6193483 some_charchter 9.538262
3 6.782954 3.6973201 2.7694021 some_charchter 13.249677
4 3.809574 6.8641731 3.1271489 some_charchter 13.800896
5 9.339726 4.4571677 5.4489081 some_charchter 19.245802
6 6.623371 3.9594287 0.6025072 some_charchter 11.185307
7 6.843193 1.3548732 3.1826649 some_charchter 11.380731
8 2.377099 7.5661778 9.6320561 some_charchter 19.575333
9 3.582874 2.1485691 8.2970807 some_charchter 14.028524
10 4.565336 3.7073800 0.3355328 some_charchter 8.608248
另一种选择是使用 pmap_*
的变体,具体取决于您在所选列上的 return 类型。
library(dplyr)
library(purrr)
data %>% mutate(sum = pmap_dbl(list(v1, v2, v3), sum))
# v1 v2 v3 v4 sum
#1 1.13703411 6.935913 3.1661245 some_charchter 11.239072
#2 6.22299405 5.449748 3.0269337 some_charchter 14.699676
#3 6.09274733 2.827336 1.5904600 some_charchter 10.510543
#4 6.23379442 9.234335 0.3999592 some_charchter 15.868088
#5 8.60915384 2.923158 2.1879954 some_charchter 13.720308
#6 6.40310605 8.372956 8.1059855 some_charchter 22.882048
#7 0.09495756 2.862233 5.2569755 some_charchter 8.214166
#8 2.32550506 2.668208 9.1465817 some_charchter 14.140295
#9 6.66083758 1.867228 8.3134505 some_charchter 16.841516
#10 5.14251141 2.322259 0.4577026 some_charchter 7.922473
数据
set.seed(1234)
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10),
"v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10))
我们可能还需要进行轻微的类型检查以很好地自动化该过程
library(tidyverse)
data %>%
mutate(Sum = select_if(., is.numeric) %>%
reduce(`+`))
# v1 v2 v3 v4 Sum
#1 1.13703411 6.935913 3.1661245 some_charchter 11.239072
#2 6.22299405 5.449748 3.0269337 some_charchter 14.699676
#3 6.09274733 2.827336 1.5904600 some_charchter 10.510543
#4 6.23379442 9.234335 0.3999592 some_charchter 15.868088
#5 8.60915384 2.923158 2.1879954 some_charchter 13.720308
#6 6.40310605 8.372956 8.1059855 some_charchter 22.882048
#7 0.09495756 2.862233 5.2569755 some_charchter 8.214166
#8 2.32550506 2.668208 9.1465817 some_charchter 14.140295
#9 6.66083758 1.867228 8.3134505 some_charchter 16.841516
#10 5.14251141 2.322259 0.4577026 some_charchter 7.922473
注意:这将是一个类似于@symbolrush
的rowSums
解决方案的矢量化解决方案
数据
set.seed(1234)
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10),
"v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10))
我有一个包含许多列的数据框(或 tibble,不管怎样),我只想在其中的 7 个列上应用一个函数(比如说 rowSums),但我不想让其他列消失。诀窍是我想在管道序列中这样做 - 创建(或读取数据) - 应用功能 - 之后的可选操作
这是数据框上的一个可重现示例,我想在其中对前 3 列进行 rowSums
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10), "v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10))
我通常的做法是
data$sum <- rowSums(data[,1:3])
但我想要这样的东西
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10), "v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10)) %>%
mutate(sum = rowSums())
感谢您的帮助!
您可以使用 .
在管道内访问您的数据对象。因此 mutate(sum = rowSums(.[, 1:3]))
成功了:
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10), "v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10)) %>%
mutate(sum = rowSums(.[, 1:3]))
data
v1 v2 v3 v4 sum
1 2.280871 0.1981815 7.5349128 some_charchter 10.013965
2 1.250208 7.6687056 0.6193483 some_charchter 9.538262
3 6.782954 3.6973201 2.7694021 some_charchter 13.249677
4 3.809574 6.8641731 3.1271489 some_charchter 13.800896
5 9.339726 4.4571677 5.4489081 some_charchter 19.245802
6 6.623371 3.9594287 0.6025072 some_charchter 11.185307
7 6.843193 1.3548732 3.1826649 some_charchter 11.380731
8 2.377099 7.5661778 9.6320561 some_charchter 19.575333
9 3.582874 2.1485691 8.2970807 some_charchter 14.028524
10 4.565336 3.7073800 0.3355328 some_charchter 8.608248
另一种选择是使用 pmap_*
的变体,具体取决于您在所选列上的 return 类型。
library(dplyr)
library(purrr)
data %>% mutate(sum = pmap_dbl(list(v1, v2, v3), sum))
# v1 v2 v3 v4 sum
#1 1.13703411 6.935913 3.1661245 some_charchter 11.239072
#2 6.22299405 5.449748 3.0269337 some_charchter 14.699676
#3 6.09274733 2.827336 1.5904600 some_charchter 10.510543
#4 6.23379442 9.234335 0.3999592 some_charchter 15.868088
#5 8.60915384 2.923158 2.1879954 some_charchter 13.720308
#6 6.40310605 8.372956 8.1059855 some_charchter 22.882048
#7 0.09495756 2.862233 5.2569755 some_charchter 8.214166
#8 2.32550506 2.668208 9.1465817 some_charchter 14.140295
#9 6.66083758 1.867228 8.3134505 some_charchter 16.841516
#10 5.14251141 2.322259 0.4577026 some_charchter 7.922473
数据
set.seed(1234)
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10),
"v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10))
我们可能还需要进行轻微的类型检查以很好地自动化该过程
library(tidyverse)
data %>%
mutate(Sum = select_if(., is.numeric) %>%
reduce(`+`))
# v1 v2 v3 v4 Sum
#1 1.13703411 6.935913 3.1661245 some_charchter 11.239072
#2 6.22299405 5.449748 3.0269337 some_charchter 14.699676
#3 6.09274733 2.827336 1.5904600 some_charchter 10.510543
#4 6.23379442 9.234335 0.3999592 some_charchter 15.868088
#5 8.60915384 2.923158 2.1879954 some_charchter 13.720308
#6 6.40310605 8.372956 8.1059855 some_charchter 22.882048
#7 0.09495756 2.862233 5.2569755 some_charchter 8.214166
#8 2.32550506 2.668208 9.1465817 some_charchter 14.140295
#9 6.66083758 1.867228 8.3134505 some_charchter 16.841516
#10 5.14251141 2.322259 0.4577026 some_charchter 7.922473
注意:这将是一个类似于@symbolrush
的rowSums
解决方案的矢量化解决方案
数据
set.seed(1234)
data <- data.frame("v1" = runif(10, 0, 10), "v2" = runif(10, 0 ,10),
"v3" = runif(10, 0 ,10), "v4" = rep("some_charchter", 10))