创建行作为按多列条件分组的其他行的总和

Question

我有如下 df，想按 GEO 排序，创建名为 SumTrade 和 SumNOT 的行总和，并按不同类别的 APPR。

structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"), 
    NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L, 
    7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L, 
    3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers", 
    "Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
    "Other customer and information services representatives", 
    "Other labourers in processing, manufacturing and utilities", 
    "Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
    "Transport truck drivers"), class = "factor"), JPA = c(118L, 
    91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L), 
    APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
    1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
-12L))

所以我的输出应该是

structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 
6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L, 
7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L, 
5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers", 
"Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
"Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities", 
"Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
"SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L, 
72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L, 
2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT", 
"Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
-16L))

我试过这段代码，但它没有生成我想要的结果

  df1%>% 
    group_by(GEO,APPR)%>% 
    mutate(sumval = sum(JPA))->df1

Answer 1

这个怎么样：

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
inp <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
                                 2L, 2L, 2L, 2L, 2L), .Label = c("Barrie", "Guelph"), class = "factor"), 
               NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 6552L, 6322L, 7511L, 
                       7611L, 9619L, 6411L, 6211L), Occupation = structure(c(5L, 
                                                                             3L, 9L, 4L, 1L, 5L, 3L, 9L, 2L, 6L, 8L, 7L), .Label = c("Banking, credit and other investment managers", 
                                                                                                                                     "Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
                                                                                                                                     "Other customer and information services representatives", 
                                                                                                                                     "Other labourers in processing, manufacturing and utilities", 
                                                                                                                                     "Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
                                                                                                                                     "Transport truck drivers"), class = "factor"), JPA = c(118L, 
                                                                                                                                                                                            91L, 59L, 27L, 27L, 106L, 72L, 58L, 49L, 115L, 109L, 88L), 
               APPR = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
                                  1L, 1L), .Label = c("NOT", "Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                               -12L))

out <- structure(list(GEO = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
                                        2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("Barrie", "Guelph"
                                        ), class = "factor"), NOC = c(6552L, 6322L, 7511L, 6722L, 122L, 
                                                                      6552L, 6322L, 7511L, 7611L, 9619L, 6411L, 6211L, 7777L, 6666L, 
                                                                      7777L, 6666L), Occupation = structure(c(5L, 3L, 12L, 4L, 1L, 
                                                                                                              5L, 3L, 12L, 2L, 6L, 8L, 7L, 11L, 10L, 11L, 9L), .Label = c("Banking, credit and other investment managers", 
                                                                                                                                                                          "Construction trades helpers and labourers", "Cooks", "Operators and attendants in amusement, recreation and sport", 
                                                                                                                                                                          "Other customer and information services representatives", "Other labourers in processing, manufacturing and utilities", 
                                                                                                                                                                          "Retail sales supervisors", "Sales and account representatives - wholesale trade (non-technical)", 
                                                                                                                                                                          "SumNot", "SumNotTrade", "SumTrade", "Transport truck drivers"
                                                                                                              ), class = "factor"), JPA = c(118L, 91L, 59L, 27L, 27L, 106L, 
                                                                                                                                            72L, 58L, 49L, 115L, 109L, 88L, 268L, 54L, 285L, 312L), APPR = structure(c(2L, 
                                                                                                                                                                                                                       2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("NOT", 
                                                                                                                                                                                                                                                                                               "Trade"), class = "factor")), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                 -16L))

inp %>% 
  group_by(GEO, APPR) %>% 
  summarise(JPA = sum(JPA)) %>% 
  mutate(NOC = case_when(APPR == "Trade" ~ 7777, 
                         APPR == "NOT" ~ 6666), 
         Occupation = case_when(APPR == "Trade" ~ "SumTrade", 
                                APPR == "NOT" ~ "SumNot")) %>% 
  select(all_of(names(inp))) %>% 
  bind_rows(inp, .)
#> `summarise()` has grouped output by 'GEO'. You can override using the `.groups`
#> argument.
#>       GEO  NOC
#> 1  Barrie 6552
#> 2  Barrie 6322
#> 3  Barrie 7511
#> 4  Barrie 6722
#> 5  Barrie  122
#> 6  Guelph 6552
#> 7  Guelph 6322
#> 8  Guelph 7511
#> 9  Guelph 7611
#> 10 Guelph 9619
#> 11 Guelph 6411
#> 12 Guelph 6211
#> 13 Barrie 6666
#> 14 Barrie 7777
#> 15 Guelph 6666
#> 16 Guelph 7777
#>                                                             Occupation JPA
#> 1              Other customer and information services representatives 118
#> 2                                                                Cooks  91
#> 3                                              Transport truck drivers  59
#> 4          Operators and attendants in amusement, recreation and sport  27
#> 5                        Banking, credit and other investment managers  27
#> 6              Other customer and information services representatives 106
#> 7                                                                Cooks  72
#> 8                                              Transport truck drivers  58
#> 9                            Construction trades helpers and labourers  49
#> 10          Other labourers in processing, manufacturing and utilities 115
#> 11 Sales and account representatives - wholesale trade (non-technical) 109
#> 12                                            Retail sales supervisors  88
#> 13                                                              SumNot  54
#> 14                                                            SumTrade 268
#> 15                                                              SumNot 312
#> 16                                                            SumTrade 285
#>     APPR
#> 1  Trade
#> 2  Trade
#> 3  Trade
#> 4    NOT
#> 5    NOT
#> 6  Trade
#> 7  Trade
#> 8  Trade
#> 9  Trade
#> 10   NOT
#> 11   NOT
#> 12   NOT
#> 13   NOT
#> 14 Trade
#> 15   NOT
#> 16 Trade

^{由 reprex package (v2.0.1)}

于 2022-04-20 创建

创建行作为按多列条件分组的其他行的总和

create rows as the Aggregate sums of other rows grouped by conditions of multiple columns

r

dplyr