创建行作为按多列条件分组的其他行的总和
create rows as the Aggregate sums of other rows grouped by conditions of multiple columns
我有如下 df,想按 GEO 排序,创建名为 SumTrade 和 SumNOT 的行总和,并按不同类别的 APPR。
structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"),
NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L,
7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L,
3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives",
"Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"Transport truck drivers"), class = "factor"), JPA = c(118L,
91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L),
APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-12L))
所以我的输出应该是
structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L,
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L,
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L,
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L,
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT",
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-16L))
我试过这段代码,但它没有生成我想要的结果
df1%>%
group_by(GEO,APPR)%>%
mutate(sumval = sum(JPA))->df1
这个怎么样:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
inp <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"),
NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L,
7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L,
3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives",
"Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"Transport truck drivers"), class = "factor"), JPA = c(118L,
91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L),
APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-12L))
out <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L,
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L,
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L,
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L,
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT",
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-16L))
inp %>%
group_by(GEO, APPR) %>%
summarise(JPA = sum(JPA)) %>%
mutate(NOC = case_when(APPR == "Trade" ~ 7777,
APPR == "NOT" ~ 6666),
Occupation = case_when(APPR == "Trade" ~ "SumTrade",
APPR == "NOT" ~ "SumNot")) %>%
select(all_of(names(inp))) %>%
bind_rows(inp, .)
#> `summarise()` has grouped output by 'GEO'. You can override using the `.groups`
#> argument.
#> GEO NOC
#> 1 Barrie 6552
#> 2 Barrie 6322
#> 3 Barrie 7511
#> 4 Barrie 6722
#> 5 Barrie 122
#> 6 Guelph 6552
#> 7 Guelph 6322
#> 8 Guelph 7511
#> 9 Guelph 7611
#> 10 Guelph 9619
#> 11 Guelph 6411
#> 12 Guelph 6211
#> 13 Barrie 6666
#> 14 Barrie 7777
#> 15 Guelph 6666
#> 16 Guelph 7777
#> Occupation JPA
#> 1 Other customer and information services representatives 118
#> 2 Cooks 91
#> 3 Transport truck drivers 59
#> 4 Operators and attendants in amusement, recreation and sport 27
#> 5 Banking, credit and other investment managers 27
#> 6 Other customer and information services representatives 106
#> 7 Cooks 72
#> 8 Transport truck drivers 58
#> 9 Construction trades helpers and labourers 49
#> 10 Other labourers in processing, manufacturing and utilities 115
#> 11 Sales and account representatives - wholesale trade (non-technical) 109
#> 12 Retail sales supervisors 88
#> 13 SumNot 54
#> 14 SumTrade 268
#> 15 SumNot 312
#> 16 SumTrade 285
#> APPR
#> 1 Trade
#> 2 Trade
#> 3 Trade
#> 4 NOT
#> 5 NOT
#> 6 Trade
#> 7 Trade
#> 8 Trade
#> 9 Trade
#> 10 NOT
#> 11 NOT
#> 12 NOT
#> 13 NOT
#> 14 Trade
#> 15 NOT
#> 16 Trade
由 reprex package (v2.0.1)
于 2022-04-20 创建
我有如下 df,想按 GEO 排序,创建名为 SumTrade 和 SumNOT 的行总和,并按不同类别的 APPR。
structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"),
NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L,
7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L,
3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives",
"Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"Transport truck drivers"), class = "factor"), JPA = c(118L,
91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L),
APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-12L))
所以我的输出应该是
structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L,
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L,
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L,
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L,
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT",
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-16L))
我试过这段代码,但它没有生成我想要的结果
df1%>%
group_by(GEO,APPR)%>%
mutate(sumval = sum(JPA))->df1
这个怎么样:
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
inp <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"),
NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L,
7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L,
3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives",
"Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"Transport truck drivers"), class = "factor"), JPA = c(118L,
91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L),
APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-12L))
out <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L,
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L,
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L,
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers",
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport",
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities",
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)",
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L,
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT",
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA,
-16L))
inp %>%
group_by(GEO, APPR) %>%
summarise(JPA = sum(JPA)) %>%
mutate(NOC = case_when(APPR == "Trade" ~ 7777,
APPR == "NOT" ~ 6666),
Occupation = case_when(APPR == "Trade" ~ "SumTrade",
APPR == "NOT" ~ "SumNot")) %>%
select(all_of(names(inp))) %>%
bind_rows(inp, .)
#> `summarise()` has grouped output by 'GEO'. You can override using the `.groups`
#> argument.
#> GEO NOC
#> 1 Barrie 6552
#> 2 Barrie 6322
#> 3 Barrie 7511
#> 4 Barrie 6722
#> 5 Barrie 122
#> 6 Guelph 6552
#> 7 Guelph 6322
#> 8 Guelph 7511
#> 9 Guelph 7611
#> 10 Guelph 9619
#> 11 Guelph 6411
#> 12 Guelph 6211
#> 13 Barrie 6666
#> 14 Barrie 7777
#> 15 Guelph 6666
#> 16 Guelph 7777
#> Occupation JPA
#> 1 Other customer and information services representatives 118
#> 2 Cooks 91
#> 3 Transport truck drivers 59
#> 4 Operators and attendants in amusement, recreation and sport 27
#> 5 Banking, credit and other investment managers 27
#> 6 Other customer and information services representatives 106
#> 7 Cooks 72
#> 8 Transport truck drivers 58
#> 9 Construction trades helpers and labourers 49
#> 10 Other labourers in processing, manufacturing and utilities 115
#> 11 Sales and account representatives - wholesale trade (non-technical) 109
#> 12 Retail sales supervisors 88
#> 13 SumNot 54
#> 14 SumTrade 268
#> 15 SumNot 312
#> 16 SumTrade 285
#> APPR
#> 1 Trade
#> 2 Trade
#> 3 Trade
#> 4 NOT
#> 5 NOT
#> 6 Trade
#> 7 Trade
#> 8 Trade
#> 9 Trade
#> 10 NOT
#> 11 NOT
#> 12 NOT
#> 13 NOT
#> 14 Trade
#> 15 NOT
#> 16 Trade
由 reprex package (v2.0.1)
于 2022-04-20 创建