user-defined/dynamic dplyr::summarise 内的汇总函数
R user-defined/dynamic summary function within dplyr::summarise
很难定义这个问题而不听起来像很多类似的问题!
我有一个函数,我希望其中一个参数是一个函数名,它将传递给 dplyr::summarise,例如“平均值”或“总和”:
data(mtcars)
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
zColquo = quo_name(zCol)
cellSummaries <- x %>%
group_by(gear, !!sym(groupcol)) %>% # 1 preset grouper, 1 user-defined
summarise(Count = n(), # 1 preset summary, 1 user defined
!!zColquo := mean(!!sym(zColquo))) # mean should be zFun, user-defined
ungroup
}
(按齿轮和气缸分组,然后 returns,每组,计数和平均值(disp))
根据我的说明,我希望 'mean' 是动态的,执行由 zFun 定义的功能,但我一辈子都不知道该怎么做!在此先感谢您的任何建议。
您可以使用 match.fun
使函数动态化。我还删除了 zColquo
,因为它不需要。
library(dplyr)
library(rlang)
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
cellSummaries <- x %>%
group_by(gear, !!sym(groupcol)) %>%
summarise(Count = n(),
!!zCol := match.fun(zFun)(!!sym(zCol))) %>%
ungroup
return(cellSummaries)
}
然后您可以检查输出
f()
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 242.
#3 3 8 12 358.
#4 4 4 8 103.
#5 4 6 4 164.
#6 5 4 2 108.
#7 5 6 1 145
#8 5 8 2 326
f(zFun = "sum")
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 483
#3 3 8 12 4291.
#4 4 4 8 821
#5 4 6 4 655.
#6 5 4 2 215.
#7 5 6 1 145
#8 5 8 2 652
我们可以使用get
library(dplyr)
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
zColquo = quo_name(zCol)
x %>%
group_by(gear, !!sym(groupcol)) %>% # 1 preset grouper, 1 user-defined
summarise(Count = n(), # 1 preset summary, 1 user defined
!!zColquo := get(zFun)(!!sym(zCol))) %>%
ungroup
}
f()
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 242.
#3 3 8 12 358.
#4 4 4 8 103.
#5 4 6 4 164.
#6 5 4 2 108.
#7 5 6 1 145
#8 5 8 2 326
f(zFun = "sum")
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 483
#3 3 8 12 4291.
#4 4 4 8 821
#5 4 6 4 655.
#6 5 4 2 215.
#7 5 6 1 145
#8 5 8 2 652
此外,如果我们用 across
换行,我们可以删除 group_by
和 summarise
中的 sym
评估
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
x %>%
group_by(across(c(gear, groupcol))) %>% # 1 preset grouper, 1 user-defined
summarise(Count = n(), # 1 preset summary, 1 user defined
across(zCol, ~ get(zFun)(.))) %>%
ungroup
}
f()
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 242.
#3 3 8 12 358.
#4 4 4 8 103.
#5 4 6 4 164.
#6 5 4 2 108.
#7 5 6 1 145
#8 5 8 2 326
很难定义这个问题而不听起来像很多类似的问题!
我有一个函数,我希望其中一个参数是一个函数名,它将传递给 dplyr::summarise,例如“平均值”或“总和”:
data(mtcars)
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
zColquo = quo_name(zCol)
cellSummaries <- x %>%
group_by(gear, !!sym(groupcol)) %>% # 1 preset grouper, 1 user-defined
summarise(Count = n(), # 1 preset summary, 1 user defined
!!zColquo := mean(!!sym(zColquo))) # mean should be zFun, user-defined
ungroup
}
(按齿轮和气缸分组,然后 returns,每组,计数和平均值(disp))
根据我的说明,我希望 'mean' 是动态的,执行由 zFun 定义的功能,但我一辈子都不知道该怎么做!在此先感谢您的任何建议。
您可以使用 match.fun
使函数动态化。我还删除了 zColquo
,因为它不需要。
library(dplyr)
library(rlang)
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
cellSummaries <- x %>%
group_by(gear, !!sym(groupcol)) %>%
summarise(Count = n(),
!!zCol := match.fun(zFun)(!!sym(zCol))) %>%
ungroup
return(cellSummaries)
}
然后您可以检查输出
f()
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 242.
#3 3 8 12 358.
#4 4 4 8 103.
#5 4 6 4 164.
#6 5 4 2 108.
#7 5 6 1 145
#8 5 8 2 326
f(zFun = "sum")
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 483
#3 3 8 12 4291.
#4 4 4 8 821
#5 4 6 4 655.
#6 5 4 2 215.
#7 5 6 1 145
#8 5 8 2 652
我们可以使用get
library(dplyr)
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
zColquo = quo_name(zCol)
x %>%
group_by(gear, !!sym(groupcol)) %>% # 1 preset grouper, 1 user-defined
summarise(Count = n(), # 1 preset summary, 1 user defined
!!zColquo := get(zFun)(!!sym(zCol))) %>%
ungroup
}
f()
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 242.
#3 3 8 12 358.
#4 4 4 8 103.
#5 4 6 4 164.
#6 5 4 2 108.
#7 5 6 1 145
#8 5 8 2 326
f(zFun = "sum")
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 483
#3 3 8 12 4291.
#4 4 4 8 821
#5 4 6 4 655.
#6 5 4 2 215.
#7 5 6 1 145
#8 5 8 2 652
此外,如果我们用 across
group_by
和 summarise
中的 sym
评估
f <- function(x = mtcars,
groupcol = "cyl",
zCol = "disp",
zFun = "mean") {
x %>%
group_by(across(c(gear, groupcol))) %>% # 1 preset grouper, 1 user-defined
summarise(Count = n(), # 1 preset summary, 1 user defined
across(zCol, ~ get(zFun)(.))) %>%
ungroup
}
f()
# A tibble: 8 x 4
# gear cyl Count disp
# <dbl> <dbl> <int> <dbl>
#1 3 4 1 120.
#2 3 6 2 242.
#3 3 8 12 358.
#4 4 4 8 103.
#5 4 6 4 164.
#6 5 4 2 108.
#7 5 6 1 145
#8 5 8 2 326