tidyverse:在汇总输出中附加总计行
tidyverse: append rows of totals in summary output
我想在与 group_by
一起使用的 summarise
的输出中附加总计行。
Data <-
structure(list(CT = c("1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2"), SCT = c("1", "1", "1", "1", "1", "1", "2", "2", "2", "2",
"2", "2", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2"),
SSCT = c("1", "2", "3", "1", "2", "3", "1", "2", "1", "2",
"1", "2", "1", "2", "1", "2", "1", "2", "3", "1", "2", "3"
), Category = c(111L, 112L, 113L, 111L, 112L, 113L, 121L,
122L, 121L, 122L, 121L, 122L, 211L, 212L, 211L, 212L, 221L,
222L, 223L, 221L, 222L, 223L), Y = c(10L, 12L, 15L, 11L,
10L, 13L, 21L, 22L, 20L, 25L, 23L, 24L, 31L, 33L, 36L, 39L,
32L, 31L, 36L, 41L, 44L, 45L)), row.names = c(NA, -22L), class = "data.frame")
head(Data)
CT SCT SSCT Category Y
1 1 1 1 111 10
2 1 1 2 112 12
3 1 1 3 113 15
4 1 1 1 111 11
5 1 1 2 112 10
6 1 1 3 113 13
这里
CT: Category, SCT: Sub Category, SSCT: Sub Sub Category
Data %>%
group_by(CT) %>%
summarise(Total = sum(Y))
# A tibble: 2 x 2
CT Total
<chr> <int>
1 1 206
2 2 368
Data %>%
group_by(CT, SCT) %>%
summarise(Total = sum(Y))
# A tibble: 4 x 3
# Groups: CT [2]
CT SCT Total
<chr> <chr> <int>
1 1 1 71
2 1 2 135
3 2 1 139
4 2 2 229
Data %>%
group_by(CT, SCT, SSCT) %>%
summarise(Total = sum(Y))
# A tibble: 10 x 4
# Groups: CT, SCT [4]
CT SCT SSCT Total
<chr> <chr> <chr> <int>
1 1 1 1 21
2 1 1 2 22
3 1 1 3 28
4 1 2 1 64
5 1 2 2 71
6 2 1 1 67
7 2 1 2 72
8 2 2 1 73
9 2 2 2 75
10 2 2 3 81
需要输出
需要输出以下内容:
CT SCT SSCT Total
Total 206
Total 71
1 1 1 21
1 1 2 22
1 1 3 28
Total 135
1 2 1 64
1 2 2 71
Total 368
Total 139
2 1 1 67
2 1 2 72
Total 229
2 2 1 73
2 2 2 75
2 2 3 81
如果您希望最后一列是数字而不是字符,则必须接受 NA 而不是 ""
bind_rows(
Data %>%
group_by(CT) %>%
summarise(Total = sum(Y)) %>%
mutate(SCT = "Total", SSCT = "Total"),
Data %>%
group_by(CT, SCT) %>%
summarise(Total = sum(Y)) %>%
mutate(SSCT = "Total"),
Data %>%
group_by(CT, SCT, SSCT) %>%
summarise(Total = sum(Y))
) %>%
mutate(across(c("SCT","SSCT"), ~ fct_relevel(., "Total"))) %>%
arrange(CT, SCT, SSCT) %>%
mutate(CT = ifelse(SSCT == "Total", "Total", CT),
SCT = ifelse(SSCT == "Total", "", as.numeric(SCT)-1),
SSCT = gsub("Total", "", SSCT)) %>%
select(CT, SCT, SSCT, Total)
# A tibble: 16 x 4
CT SCT SSCT Total
<chr> <chr> <chr> <int>
1 Total "" "" 206
2 Total "" "" 71
3 1 "1" "1" 21
4 1 "1" "2" 22
5 1 "1" "3" 28
6 Total "" "" 135
7 1 "2" "1" 64
8 1 "2" "2" 71
9 Total "" "" 368
10 Total "" "" 139
11 2 "1" "1" 67
12 2 "1" "2" 72
13 Total "" "" 229
14 2 "2" "1" 73
15 2 "2" "2" 75
16 2 "2" "3" 81
您可以在 data.table 包中使用 rollup
library(data.table)
setDT(Data)
group_vars <- c('CT', 'SCT', 'SSCT')
piv <-
rollup(Data, j = .(Total = sum(Y)), by = group_vars)
setorderv(piv, group_vars)[-1]
#> CT SCT SSCT Total
#> 1: 1 <NA> <NA> 206
#> 2: 1 1 <NA> 71
#> 3: 1 1 1 21
#> 4: 1 1 2 22
#> 5: 1 1 3 28
#> 6: 1 2 <NA> 135
#> 7: 1 2 1 64
#> 8: 1 2 2 71
#> 9: 2 <NA> <NA> 368
#> 10: 2 1 <NA> 139
#> 11: 2 1 1 67
#> 12: 2 1 2 72
#> 13: 2 2 <NA> 229
#> 14: 2 2 1 73
#> 15: 2 2 2 75
#> 16: 2 2 3 81
由 reprex package (v2.0.0)
于 2021-06-05 创建
1) 我们可以使用 janitor 包中的 adorn_totals
。在看门人中,总计通常在总计的组之后,但我们可以使用名称“0”代替总计并排序,以便总计首先排序,然后在最后用总计一词替换“0”。过滤器会删除包含多个字段且包含总计一词的行。
library(dplyr)
library(janitor)
Data %>%
group_by(CT, SCT, SSCT) %>%
summarize(Y = sum(Y), .groups = "drop") %>%
group_by(CT) %>%
group_modify(~ adorn_totals(., name = "0")) %>%
group_by(CT, SCT) %>%
group_modify(~ adorn_totals(., name = "0")) %>%
ungroup %>%
filter(rowSums(across() == "0") <= 1) %>%
arrange(CT, SCT, SSCT) %>%
mutate(across(where(is.character), ~ replace(., . == "0", "Total")))
给予:
# A tibble: 16 x 4
CT SCT SSCT Y
<chr> <chr> <chr> <int>
1 1 Total - 206
2 1 1 Total 71
3 1 1 1 21
4 1 1 2 22
5 1 1 3 28
6 1 2 Total 135
7 1 2 1 64
8 1 2 2 71
9 2 Total - 368
10 2 1 Total 139
11 2 1 1 67
12 2 1 2 72
13 2 2 Total 229
14 2 2 1 73
15 2 2 2 75
16 2 2 3 81
2)如果合计可以放在组后而不是组前,那么可以简化。
Data %>%
group_by(CT, SCT, SSCT) %>%
summarize(Y = sum(Y), .groups = "drop") %>%
group_by(CT) %>%
group_modify(~ adorn_totals(.)) %>%
group_by(CT, SCT) %>%
group_modify(~ adorn_totals(.)) %>%
ungroup %>%
filter(rowSums(across() == "Total") <= 1)
给予:
# A tibble: 16 x 4
CT SCT SSCT Y
<chr> <chr> <chr> <int>
1 1 1 1 21
2 1 1 2 22
3 1 1 3 28
4 1 1 Total 71
5 1 2 1 64
6 1 2 2 71
7 1 2 Total 135
8 1 Total - 206
9 2 1 1 67
10 2 1 2 72
11 2 1 Total 139
12 2 2 1 73
13 2 2 2 75
14 2 2 3 81
15 2 2 Total 229
16 2 Total - 368
我想在与 group_by
一起使用的 summarise
的输出中附加总计行。
Data <-
structure(list(CT = c("1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2"), SCT = c("1", "1", "1", "1", "1", "1", "2", "2", "2", "2",
"2", "2", "1", "1", "1", "1", "2", "2", "2", "2", "2", "2"),
SSCT = c("1", "2", "3", "1", "2", "3", "1", "2", "1", "2",
"1", "2", "1", "2", "1", "2", "1", "2", "3", "1", "2", "3"
), Category = c(111L, 112L, 113L, 111L, 112L, 113L, 121L,
122L, 121L, 122L, 121L, 122L, 211L, 212L, 211L, 212L, 221L,
222L, 223L, 221L, 222L, 223L), Y = c(10L, 12L, 15L, 11L,
10L, 13L, 21L, 22L, 20L, 25L, 23L, 24L, 31L, 33L, 36L, 39L,
32L, 31L, 36L, 41L, 44L, 45L)), row.names = c(NA, -22L), class = "data.frame")
head(Data)
CT SCT SSCT Category Y
1 1 1 1 111 10
2 1 1 2 112 12
3 1 1 3 113 15
4 1 1 1 111 11
5 1 1 2 112 10
6 1 1 3 113 13
这里
CT: Category, SCT: Sub Category, SSCT: Sub Sub Category
Data %>%
group_by(CT) %>%
summarise(Total = sum(Y))
# A tibble: 2 x 2
CT Total
<chr> <int>
1 1 206
2 2 368
Data %>%
group_by(CT, SCT) %>%
summarise(Total = sum(Y))
# A tibble: 4 x 3
# Groups: CT [2]
CT SCT Total
<chr> <chr> <int>
1 1 1 71
2 1 2 135
3 2 1 139
4 2 2 229
Data %>%
group_by(CT, SCT, SSCT) %>%
summarise(Total = sum(Y))
# A tibble: 10 x 4
# Groups: CT, SCT [4]
CT SCT SSCT Total
<chr> <chr> <chr> <int>
1 1 1 1 21
2 1 1 2 22
3 1 1 3 28
4 1 2 1 64
5 1 2 2 71
6 2 1 1 67
7 2 1 2 72
8 2 2 1 73
9 2 2 2 75
10 2 2 3 81
需要输出
需要输出以下内容:
CT SCT SSCT Total
Total 206
Total 71
1 1 1 21
1 1 2 22
1 1 3 28
Total 135
1 2 1 64
1 2 2 71
Total 368
Total 139
2 1 1 67
2 1 2 72
Total 229
2 2 1 73
2 2 2 75
2 2 3 81
如果您希望最后一列是数字而不是字符,则必须接受 NA 而不是 ""
bind_rows(
Data %>%
group_by(CT) %>%
summarise(Total = sum(Y)) %>%
mutate(SCT = "Total", SSCT = "Total"),
Data %>%
group_by(CT, SCT) %>%
summarise(Total = sum(Y)) %>%
mutate(SSCT = "Total"),
Data %>%
group_by(CT, SCT, SSCT) %>%
summarise(Total = sum(Y))
) %>%
mutate(across(c("SCT","SSCT"), ~ fct_relevel(., "Total"))) %>%
arrange(CT, SCT, SSCT) %>%
mutate(CT = ifelse(SSCT == "Total", "Total", CT),
SCT = ifelse(SSCT == "Total", "", as.numeric(SCT)-1),
SSCT = gsub("Total", "", SSCT)) %>%
select(CT, SCT, SSCT, Total)
# A tibble: 16 x 4
CT SCT SSCT Total
<chr> <chr> <chr> <int>
1 Total "" "" 206
2 Total "" "" 71
3 1 "1" "1" 21
4 1 "1" "2" 22
5 1 "1" "3" 28
6 Total "" "" 135
7 1 "2" "1" 64
8 1 "2" "2" 71
9 Total "" "" 368
10 Total "" "" 139
11 2 "1" "1" 67
12 2 "1" "2" 72
13 Total "" "" 229
14 2 "2" "1" 73
15 2 "2" "2" 75
16 2 "2" "3" 81
您可以在 data.table 包中使用 rollup
library(data.table)
setDT(Data)
group_vars <- c('CT', 'SCT', 'SSCT')
piv <-
rollup(Data, j = .(Total = sum(Y)), by = group_vars)
setorderv(piv, group_vars)[-1]
#> CT SCT SSCT Total
#> 1: 1 <NA> <NA> 206
#> 2: 1 1 <NA> 71
#> 3: 1 1 1 21
#> 4: 1 1 2 22
#> 5: 1 1 3 28
#> 6: 1 2 <NA> 135
#> 7: 1 2 1 64
#> 8: 1 2 2 71
#> 9: 2 <NA> <NA> 368
#> 10: 2 1 <NA> 139
#> 11: 2 1 1 67
#> 12: 2 1 2 72
#> 13: 2 2 <NA> 229
#> 14: 2 2 1 73
#> 15: 2 2 2 75
#> 16: 2 2 3 81
由 reprex package (v2.0.0)
于 2021-06-05 创建1) 我们可以使用 janitor 包中的 adorn_totals
。在看门人中,总计通常在总计的组之后,但我们可以使用名称“0”代替总计并排序,以便总计首先排序,然后在最后用总计一词替换“0”。过滤器会删除包含多个字段且包含总计一词的行。
library(dplyr)
library(janitor)
Data %>%
group_by(CT, SCT, SSCT) %>%
summarize(Y = sum(Y), .groups = "drop") %>%
group_by(CT) %>%
group_modify(~ adorn_totals(., name = "0")) %>%
group_by(CT, SCT) %>%
group_modify(~ adorn_totals(., name = "0")) %>%
ungroup %>%
filter(rowSums(across() == "0") <= 1) %>%
arrange(CT, SCT, SSCT) %>%
mutate(across(where(is.character), ~ replace(., . == "0", "Total")))
给予:
# A tibble: 16 x 4
CT SCT SSCT Y
<chr> <chr> <chr> <int>
1 1 Total - 206
2 1 1 Total 71
3 1 1 1 21
4 1 1 2 22
5 1 1 3 28
6 1 2 Total 135
7 1 2 1 64
8 1 2 2 71
9 2 Total - 368
10 2 1 Total 139
11 2 1 1 67
12 2 1 2 72
13 2 2 Total 229
14 2 2 1 73
15 2 2 2 75
16 2 2 3 81
2)如果合计可以放在组后而不是组前,那么可以简化。
Data %>%
group_by(CT, SCT, SSCT) %>%
summarize(Y = sum(Y), .groups = "drop") %>%
group_by(CT) %>%
group_modify(~ adorn_totals(.)) %>%
group_by(CT, SCT) %>%
group_modify(~ adorn_totals(.)) %>%
ungroup %>%
filter(rowSums(across() == "Total") <= 1)
给予:
# A tibble: 16 x 4
CT SCT SSCT Y
<chr> <chr> <chr> <int>
1 1 1 1 21
2 1 1 2 22
3 1 1 3 28
4 1 1 Total 71
5 1 2 1 64
6 1 2 2 71
7 1 2 Total 135
8 1 Total - 206
9 2 1 1 67
10 2 1 2 72
11 2 1 Total 139
12 2 2 1 73
13 2 2 2 75
14 2 2 3 81
15 2 2 Total 229
16 2 Total - 368