循环遍历列表的自定义函数
Custom function to loop over a list
我有一个可用的自定义函数,但不确定如何让它循环输入列表。看起来我需要了解 apply() 等等,但我目前的设置还不够。该函数使用 rollapply() 查找给定时间范围内的最大指标。
library(zoo)
library(dplyr)
# Data
set.seed(1)
df <- tibble(player = rep(LETTERS[1:2], each = 10),
minute = rep(1:10, times = 2),
tdc = sample(100:200,size = 20),
sumad = sample(1:10, size = 20, replace = TRUE))
# Custom function
x_min_roll <- function(df, metric, n_minutes, fun){
metric <- ensym(metric)
newname <- glue::glue("{rlang::as_string(metric)}_x{as.character(n_minutes)}")
df %>%
# dynamically create new column name based on input
mutate("{newname}" := rollapply(!!metric, n_minutes, fun, align='left', fill=NA)) %>%
group_by(player) %>%
slice_max(.data[[newname]]) %>%
select(player, .data[[newname]])
}
# This works
df %>%
x_min_roll(metric = tdc, n_minutes = 2, fun = sum)
# A tibble: 2 x 2
# Groups: player [2]
player tdc_x2
<chr> <int>
1 A 339
2 B 380
我希望能够做到这一点:
metric_list <- c('tdc', 'sumad')
minutes_list <- c(2,5)
df %>%
x_min_roll(metric = metric_list, n_minutes = minutes_list, fun = sum) %>%
# maybe a few more steps here.... to get this
# A tibble: 2 x 5
player tdc_x2 tdc_x5 sumad_x2 sumad_x5
<chr> <dbl> <dbl> <dbl> <dbl>
1 A 339 793 20 36
2 B 380 866 19 41
我们可以使用map2
循环遍历两个向量的相应元素
library(purrr)
library(dplyr)
map2(metric_list, minutes_list,
~ df %>%
x_min_roll(metric = !!.x, n_minutes = .y, fun = sum))
-输出
[[1]]
# A tibble: 2 × 2
# Groups: player [2]
player tdc_x2
<chr> <int>
1 A 339
2 B 380
[[2]]
# A tibble: 3 × 2
# Groups: player [2]
player sumad_x5
<chr> <int>
1 A 36
2 B 41
3 B 41
编辑:基于@Onyambu 的评论
如果我们想要每个组合,则使用crossing
创建组合
library(tidyr)
crossing(metric_list, minutes_list) %>%
pmap(~ df %>%
x_min_roll(metric = !!.x, n_minutes = .y, fun = sum))
根据OP的评论,如果我们想合并数据集
crossing(metric_list, minutes_list) %>%
pmap(~ df %>% x_min_roll(metric = !!.x, n_minutes = .y, fun = sum)) %>%
reduce(inner_join, by = 'player')
我有一个可用的自定义函数,但不确定如何让它循环输入列表。看起来我需要了解 apply() 等等,但我目前的设置还不够。该函数使用 rollapply() 查找给定时间范围内的最大指标。
library(zoo)
library(dplyr)
# Data
set.seed(1)
df <- tibble(player = rep(LETTERS[1:2], each = 10),
minute = rep(1:10, times = 2),
tdc = sample(100:200,size = 20),
sumad = sample(1:10, size = 20, replace = TRUE))
# Custom function
x_min_roll <- function(df, metric, n_minutes, fun){
metric <- ensym(metric)
newname <- glue::glue("{rlang::as_string(metric)}_x{as.character(n_minutes)}")
df %>%
# dynamically create new column name based on input
mutate("{newname}" := rollapply(!!metric, n_minutes, fun, align='left', fill=NA)) %>%
group_by(player) %>%
slice_max(.data[[newname]]) %>%
select(player, .data[[newname]])
}
# This works
df %>%
x_min_roll(metric = tdc, n_minutes = 2, fun = sum)
# A tibble: 2 x 2
# Groups: player [2]
player tdc_x2
<chr> <int>
1 A 339
2 B 380
我希望能够做到这一点:
metric_list <- c('tdc', 'sumad')
minutes_list <- c(2,5)
df %>%
x_min_roll(metric = metric_list, n_minutes = minutes_list, fun = sum) %>%
# maybe a few more steps here.... to get this
# A tibble: 2 x 5
player tdc_x2 tdc_x5 sumad_x2 sumad_x5
<chr> <dbl> <dbl> <dbl> <dbl>
1 A 339 793 20 36
2 B 380 866 19 41
我们可以使用map2
循环遍历两个向量的相应元素
library(purrr)
library(dplyr)
map2(metric_list, minutes_list,
~ df %>%
x_min_roll(metric = !!.x, n_minutes = .y, fun = sum))
-输出
[[1]]
# A tibble: 2 × 2
# Groups: player [2]
player tdc_x2
<chr> <int>
1 A 339
2 B 380
[[2]]
# A tibble: 3 × 2
# Groups: player [2]
player sumad_x5
<chr> <int>
1 A 36
2 B 41
3 B 41
编辑:基于@Onyambu 的评论
如果我们想要每个组合,则使用crossing
创建组合
library(tidyr)
crossing(metric_list, minutes_list) %>%
pmap(~ df %>%
x_min_roll(metric = !!.x, n_minutes = .y, fun = sum))
根据OP的评论,如果我们想合并数据集
crossing(metric_list, minutes_list) %>%
pmap(~ df %>% x_min_roll(metric = !!.x, n_minutes = .y, fun = sum)) %>%
reduce(inner_join, by = 'player')