按多个组聚合并将每个级别的结果保存在 R 中
Aggregate by multiple groups and keep the result for each level in R
是否有可能以最简单的方式获得预期结果(没有中间步骤,就像我在代码中所做的那样)?
我一直在寻找它,但找不到任何解决方案...
因此,例如:
df <- data.frame( A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36),
value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
我想总结每个级别(A、B、C、D)的值以及它们的组合,以在最终 table 中显示每个级别的总和。
A <- df %>%
group_by(A) %>%
summarise(across(value_1:value_3, sum))
B <- df %>%
group_by(A,B) %>%
summarise(across(value_1:value_3, sum))
C <- df %>%
group_by(A,B,C) %>%
summarise(across(value_1:value_3, sum))
AD <- df %>%
group_by(A,D) %>%
summarise(across(value_1:value_3, sum))
BD <- df %>%
group_by(A,B,D) %>%
summarise(across(value_1:value_3, sum))
result <-bind_rows(A, B, C, AD, BD)
result[is.na(result)] <- "00"
result <- result[,c("A","B","C","D", "value_1", "value_2", "value_3")]
result <- result[order(result$A, result$B, result$C, result$D),]
我的数据有更多的层次,所以我怎么能不做像这样的中间步骤:B,C..得到这样的结果:
> head(result)
# A tibble: 6 x 7
A B C D value_1 value_2 value_3
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 01 00 00 00 70 109 132
2 01 00 00 1 41 97 92
3 01 00 00 2 29 12 40
4 01 12 00 00 55 62 123
5 01 12 00 1 26 50 92
6 01 12 00 2 29 12 31
是否有可能使用 .groups 参数或使用 aggregate 函数而不是组合 group_by 和 总结?
df <- data.frame( A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36),
value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
grp_vars <- list("A", c("A", "B"), c("A", "B", "C"))
library(tidyverse)
map_df(grp_vars,
~ df %>%
group_by(across(.x)) %>%
summarise(across(value_1:value_3, sum), .groups = "drop")) %>%
mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
relocate(where(is.numeric), .after = where(is.character))
#> # A tibble: 15 x 6
#> A B C value_1 value_2 value_3
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 01 00 00 70 109 132
#> 2 02 00 00 256 423 16
#> 3 01 12 00 55 62 123
#> 4 01 14 00 15 47 9
#> 5 02 18 00 34 297 4
#> 6 02 22 00 222 126 12
#> 7 01 12 01 39 2 83
#> 8 01 12 02 16 60 40
#> 9 01 14 01 0 0 9
#> 10 01 14 02 15 47 0
#> 11 02 18 01 0 95 4
#> 12 02 18 02 16 14 0
#> 13 02 18 05 18 188 0
#> 14 02 22 01 74 65 8
#> 15 02 22 02 148 61 4
由 reprex package (v2.0.1)
创建于 2022-03-31
如果我没理解错的话
map_df(
grp_vars,
~ df %>%
group_by(across(.x)) %>%
summarise(across(value_1:value_3, sum), .groups = "drop") %>%
mutate(level = paste0(.x, collapse = ""))
) %>%
mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
relocate(level, where(is.numeric), .after = where(is.character))
# A tibble: 15 x 7
A B C level value_1 value_2 value_3
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 01 00 00 A 70 109 132
2 02 00 00 A 256 423 16
3 01 12 00 AB 55 62 123
4 01 14 00 AB 15 47 9
5 02 18 00 AB 34 297 4
6 02 22 00 AB 222 126 12
7 01 12 01 ABC 39 2 83
8 01 12 02 ABC 16 60 40
9 01 14 01 ABC 0 0 9
10 01 14 02 ABC 15 47 0
11 02 18 01 ABC 0 95 4
12 02 18 02 ABC 16 14 0
13 02 18 05 ABC 18 188 0
14 02 22 01 ABC 74 65 8
15 02 22 02 ABC 148 61 4
是否有可能以最简单的方式获得预期结果(没有中间步骤,就像我在代码中所做的那样)? 我一直在寻找它,但找不到任何解决方案...
因此,例如:
df <- data.frame( A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36),
value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
我想总结每个级别(A、B、C、D)的值以及它们的组合,以在最终 table 中显示每个级别的总和。
A <- df %>%
group_by(A) %>%
summarise(across(value_1:value_3, sum))
B <- df %>%
group_by(A,B) %>%
summarise(across(value_1:value_3, sum))
C <- df %>%
group_by(A,B,C) %>%
summarise(across(value_1:value_3, sum))
AD <- df %>%
group_by(A,D) %>%
summarise(across(value_1:value_3, sum))
BD <- df %>%
group_by(A,B,D) %>%
summarise(across(value_1:value_3, sum))
result <-bind_rows(A, B, C, AD, BD)
result[is.na(result)] <- "00"
result <- result[,c("A","B","C","D", "value_1", "value_2", "value_3")]
result <- result[order(result$A, result$B, result$C, result$D),]
我的数据有更多的层次,所以我怎么能不做像这样的中间步骤:B,C..得到这样的结果:
> head(result)
# A tibble: 6 x 7
A B C D value_1 value_2 value_3
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 01 00 00 00 70 109 132
2 01 00 00 1 41 97 92
3 01 00 00 2 29 12 40
4 01 12 00 00 55 62 123
5 01 12 00 1 26 50 92
6 01 12 00 2 29 12 31
是否有可能使用 .groups 参数或使用 aggregate 函数而不是组合 group_by 和 总结?
df <- data.frame( A = c("01","01","01","01","01","01","02","02","02","02","02","02"),
B = c("12","12","12","12","14","14","18","18","18","22","22","22"),
C = c("01","01","02","02","01","02","01","02","05","01","02","02"),
D = c("1","2","1","2","2","1","2","1","3","2","1","3"),
value_1 = c(25,14,1,15,0,15,0,16,18,74,112,36),
value_2 = c(2,0,48,12,0,47,95,14,188,65,14,47),
value_3 = c(78,5,14,26,9,0,4,0,0,8,0,4))
grp_vars <- list("A", c("A", "B"), c("A", "B", "C"))
library(tidyverse)
map_df(grp_vars,
~ df %>%
group_by(across(.x)) %>%
summarise(across(value_1:value_3, sum), .groups = "drop")) %>%
mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
relocate(where(is.numeric), .after = where(is.character))
#> # A tibble: 15 x 6
#> A B C value_1 value_2 value_3
#> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 01 00 00 70 109 132
#> 2 02 00 00 256 423 16
#> 3 01 12 00 55 62 123
#> 4 01 14 00 15 47 9
#> 5 02 18 00 34 297 4
#> 6 02 22 00 222 126 12
#> 7 01 12 01 39 2 83
#> 8 01 12 02 16 60 40
#> 9 01 14 01 0 0 9
#> 10 01 14 02 15 47 0
#> 11 02 18 01 0 95 4
#> 12 02 18 02 16 14 0
#> 13 02 18 05 18 188 0
#> 14 02 22 01 74 65 8
#> 15 02 22 02 148 61 4
由 reprex package (v2.0.1)
创建于 2022-03-31如果我没理解错的话
map_df(
grp_vars,
~ df %>%
group_by(across(.x)) %>%
summarise(across(value_1:value_3, sum), .groups = "drop") %>%
mutate(level = paste0(.x, collapse = ""))
) %>%
mutate(across(where(is.character), ~ replace_na(.x, replace = "00"))) %>%
relocate(level, where(is.numeric), .after = where(is.character))
# A tibble: 15 x 7
A B C level value_1 value_2 value_3
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 01 00 00 A 70 109 132
2 02 00 00 A 256 423 16
3 01 12 00 AB 55 62 123
4 01 14 00 AB 15 47 9
5 02 18 00 AB 34 297 4
6 02 22 00 AB 222 126 12
7 01 12 01 ABC 39 2 83
8 01 12 02 ABC 16 60 40
9 01 14 01 ABC 0 0 9
10 01 14 02 ABC 15 47 0
11 02 18 01 ABC 0 95 4
12 02 18 02 ABC 16 14 0
13 02 18 05 ABC 18 188 0
14 02 22 01 ABC 74 65 8
15 02 22 02 ABC 148 61 4