使用 group_by、summarize 和 max() 在 R 中循环字符向量

Loop over character vector in R with group_by, summarise and max()

我有一个数据集,我想对其进行分组,然后获取每组输出的最大值、第二大值和总计。在示例数据集 mtcars 上,它看起来像这样:

df <- mtcars %>% group_by(cyl) %>%
  dplyr::summarise(
    MaxVal = max(hp, na.rm = T),
    MAXsecVal = max(hp[hp != max(hp)], na.rm = F),
    Sum = sum(hp)
  ) %>%  arrange(cyl)

 cyl MaxVal MAXsecVal   Sum
  <dbl>  <dbl>     <dbl> <dbl>
1     4    113       109   909
2     6    175       123   856
3     8    335       264  2929

并且有效。但现在我想动态访问 (max, sum) 变量。最好的方法是像这样的字符向量:

var <- c("hp", "drat", "wt")

这可以通过循环或类似的方式对现有编程进行 运行。有人有想法吗?

请在下面找到您的问题的解决方案(如果我理解正确的话!):

library(dplyr)


# Creating a function:

my_function <- function(table, groupingVar, x){
  
  df <- table %>% group_by(get(groupingVar)) %>%
    dplyr::summarise(
      MaxVal = max(get(x), na.rm = T),
      MAXsecVal = max(get(x)[get(x) != max(get(x))], na.rm = F),
      Sum = sum(get(x))
    ) %>%  arrange(get(deparse(substitute(groupingVar))))
  
  return(df)
  
}


# Defining variables:

table <- mtcars                        # select your data.frame/tibble
groupingVar <- "cyl"                   # select the grouping variable
selectedCols <- c("hp", "drat", "wt")  # select the columns to be computed


# Using the function in a loop for:

results <- list()

for (i in seq(selectedCols)){
  
  results[[i]] <- my_function(table = table, 
                              groupingVar = groupingVar, 
                              x=selectedCols[i])
  
  names(results)[i] <- selectedCols[i]
  colnames(results[[i]])[1] <- get(deparse(substitute(groupingVar)))
}
#> `summarise()` ungrouping output (override with `.groups` argument)
#> `summarise()` ungrouping output (override with `.groups` argument)
#> `summarise()` ungrouping output (override with `.groups` argument)


# Results in a list:

results
#> $hp
#> # A tibble: 3 x 4
#>     cyl MaxVal MAXsecVal   Sum
#>   <dbl>  <dbl>     <dbl> <dbl>
#> 1     4    113       109   909
#> 2     6    175       123   856
#> 3     8    335       264  2929
#> 
#> $drat
#> # A tibble: 3 x 4
#>     cyl MaxVal MAXsecVal   Sum
#>   <dbl>  <dbl>     <dbl> <dbl>
#> 1     4   4.93      4.43  44.8
#> 2     6   3.92      3.9   25.1
#> 3     8   4.22      3.73  45.2
#> 
#> $wt
#> # A tibble: 3 x 4
#>     cyl MaxVal MAXsecVal   Sum
#>   <dbl>  <dbl>     <dbl> <dbl>
#> 1     4   3.19      3.15  25.1
#> 2     6   3.46      3.44  21.8
#> 3     8   5.42      5.34  56.0

reprex package (v0.3.0)

创建于 2021-09-12