user-defined/dynamic dplyr::summarise 内的汇总函数

R user-defined/dynamic summary function within dplyr::summarise

很难定义这个问题而不听起来像很多类似的问题!

我有一个函数,我希望其中一个参数是一个函数名,它将传递给 dplyr::summarise,例如“平均值”或“总和”:

data(mtcars)
  f <- function(x = mtcars,
                groupcol = "cyl",
                zCol = "disp",
                zFun = "mean") {
    
    zColquo = quo_name(zCol)
    
    cellSummaries <- x %>%
      group_by(gear, !!sym(groupcol)) %>% # 1 preset grouper, 1 user-defined
      summarise(Count = n(), # 1 preset summary, 1 user defined
                !!zColquo := mean(!!sym(zColquo))) # mean should be zFun, user-defined
    ungroup
  }

(按齿轮和气缸分组,然后 returns,每组,计数和平均值(disp))

根据我的说明,我希望 'mean' 是动态的,执行由 zFun 定义的功能,但我一辈子都不知道该怎么做!在此先感谢您的任何建议。

您可以使用 match.fun 使函数动态化。我还删除了 zColquo,因为它不需要。

library(dplyr)
library(rlang)

f <- function(x = mtcars,
              groupcol = "cyl",
              zCol = "disp",
              zFun = "mean") {

  cellSummaries <- x %>%
                   group_by(gear, !!sym(groupcol)) %>% 
                   summarise(Count = n(), 
                             !!zCol := match.fun(zFun)(!!sym(zCol))) %>%
                   ungroup

  return(cellSummaries)
}

然后您可以检查输出

f()

# A tibble: 8 x 4
#   gear   cyl Count  disp
#  <dbl> <dbl> <int> <dbl>
#1     3     4     1  120.
#2     3     6     2  242.
#3     3     8    12  358.
#4     4     4     8  103.
#5     4     6     4  164.
#6     5     4     2  108.
#7     5     6     1  145 
#8     5     8     2  326 

f(zFun = "sum")

# A tibble: 8 x 4
#   gear   cyl Count  disp
#  <dbl> <dbl> <int> <dbl>
#1     3     4     1  120.
#2     3     6     2  483 
#3     3     8    12 4291.
#4     4     4     8  821 
#5     4     6     4  655.
#6     5     4     2  215.
#7     5     6     1  145 
#8     5     8     2  652 

我们可以使用get

library(dplyr)    
f <- function(x = mtcars,
            groupcol = "cyl",
            zCol = "disp",
            zFun = "mean") {


  zColquo = quo_name(zCol)
  x %>%
  group_by(gear, !!sym(groupcol)) %>% # 1 preset grouper, 1 user-defined
  summarise(Count = n(), # 1 preset summary, 1 user defined
            !!zColquo := get(zFun)(!!sym(zCol))) %>% 
ungroup
 }

f()
# A tibble: 8 x 4
#   gear   cyl Count  disp
#  <dbl> <dbl> <int> <dbl>
#1     3     4     1  120.
#2     3     6     2  242.
#3     3     8    12  358.
#4     4     4     8  103.
#5     4     6     4  164.
#6     5     4     2  108.
#7     5     6     1  145 
#8     5     8     2  326 


f(zFun = "sum")
# A tibble: 8 x 4
#   gear   cyl Count  disp
#  <dbl> <dbl> <int> <dbl>
#1     3     4     1  120.
#2     3     6     2  483 
#3     3     8    12 4291.
#4     4     4     8  821 
#5     4     6     4  655.
#6     5     4     2  215.
#7     5     6     1  145 
#8     5     8     2  652 

此外,如果我们用 across

换行,我们可以删除 group_bysummarise 中的 sym 评估
f <- function(x = mtcars,
            groupcol = "cyl",
            zCol = "disp",
            zFun = "mean") {



 x %>%
    group_by(across(c(gear, groupcol))) %>% # 1 preset grouper, 1 user-defined
    summarise(Count = n(), # 1 preset summary, 1 user defined
            across(zCol, ~ get(zFun)(.))) %>% 
    ungroup
 }
f()
# A tibble: 8 x 4
#   gear   cyl Count  disp
#  <dbl> <dbl> <int> <dbl>
#1     3     4     1  120.
#2     3     6     2  242.
#3     3     8    12  358.
#4     4     4     8  103.
#5     4     6     4  164.
#6     5     4     2  108.
#7     5     6     1  145 
#8     5     8     2  326