按多个组聚合并将每个级别的结果保存在 R 中

Aggregate by multiple groups and keep the result for each level in R

是否有可能以最简单的方式获得预期结果(没有中间步骤,就像我在代码中所做的那样)? 我一直在寻找它,但找不到任何解决方案...

因此,例如:

df <- data.frame(   A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
                    B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
                    C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
                    D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
              value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36), 
              value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
              value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))

我想总结每个级别(A、B、C、D)的值以及它们的组合,以在最终 table 中显示每个级别的总和。

A <- df %>%
  group_by(A) %>% 
  summarise(across(value_1:value_3, sum))

B <- df %>% 
  group_by(A,B) %>% 
  summarise(across(value_1:value_3, sum))

C <- df %>% 
  group_by(A,B,C) %>% 
  summarise(across(value_1:value_3, sum))

AD <- df %>% 
  group_by(A,D) %>% 
  summarise(across(value_1:value_3, sum))

BD <- df %>% 
  group_by(A,B,D) %>% 
  summarise(across(value_1:value_3, sum))

result <-bind_rows(A, B, C, AD, BD)
result[is.na(result)] <- "00"
result <- result[,c("A","B","C","D", "value_1", "value_2", "value_3")]
result <- result[order(result$A, result$B, result$C, result$D),]

我的数据有更多的层次,所以我怎么能不做像这样的中间步骤:B,C..得到这样的结果:

> head(result)
# A tibble: 6 x 7
  A     B     C     D     value_1 value_2 value_3
  <chr> <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
1 01    00    00    00         70     109     132
2 01    00    00    1          41      97      92
3 01    00    00    2          29      12      40
4 01    12    00    00         55      62     123
5 01    12    00    1          26      50      92
6 01    12    00    2          29      12      31

是否有可能使用 .groups 参数或使用 aggregate 函数而不是组合 group_by总结?

df <- data.frame(   A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
                    B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
                    C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
                    D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
                    value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36), 
                    value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
                    value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))

grp_vars <- list("A", c("A", "B"), c("A", "B", "C"))

library(tidyverse)

map_df(grp_vars,
    ~ df %>%
      group_by(across(.x)) %>%
      summarise(across(value_1:value_3, sum), .groups = "drop")) %>%
  mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
  relocate(where(is.numeric), .after = where(is.character))


#> # A tibble: 15 x 6
#>    A     B     C     value_1 value_2 value_3
#>    <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
#>  1 01    00    00         70     109     132
#>  2 02    00    00        256     423      16
#>  3 01    12    00         55      62     123
#>  4 01    14    00         15      47       9
#>  5 02    18    00         34     297       4
#>  6 02    22    00        222     126      12
#>  7 01    12    01         39       2      83
#>  8 01    12    02         16      60      40
#>  9 01    14    01          0       0       9
#> 10 01    14    02         15      47       0
#> 11 02    18    01          0      95       4
#> 12 02    18    02         16      14       0
#> 13 02    18    05         18     188       0
#> 14 02    22    01         74      65       8
#> 15 02    22    02        148      61       4

reprex package (v2.0.1)

创建于 2022-03-31

如果我没理解错的话

map_df(
  grp_vars,
  ~ df %>%
    group_by(across(.x)) %>%
    summarise(across(value_1:value_3, sum), .groups = "drop") %>%
    mutate(level = paste0(.x, collapse = ""))
) %>%
  mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
  relocate(level, where(is.numeric), .after = where(is.character))


# A tibble: 15 x 7
   A     B     C     level value_1 value_2 value_3
   <chr> <chr> <chr> <chr>   <dbl>   <dbl>   <dbl>
 1 01    00    00    A          70     109     132
 2 02    00    00    A         256     423      16
 3 01    12    00    AB         55      62     123
 4 01    14    00    AB         15      47       9
 5 02    18    00    AB         34     297       4
 6 02    22    00    AB        222     126      12
 7 01    12    01    ABC        39       2      83
 8 01    12    02    ABC        16      60      40
 9 01    14    01    ABC         0       0       9
10 01    14    02    ABC        15      47       0
11 02    18    01    ABC         0      95       4
12 02    18    02    ABC        16      14       0
13 02    18    05    ABC        18     188       0
14 02    22    01    ABC        74      65       8
15 02    22    02    ABC       148      61       4