如何分别为每个组正确地按周汇总日期

Question

dput()

a=structure(list(sales_point_id = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 15L, 15L, 15L, 15L, 15L, 15L), calendar_id_operday = c(20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 20210102L, 
20210102L, 20210102L, 20210102L), line_fact_amt = c(23749.14, 
1000, 3050, 1550, 8900, 1550, 0, 300, 0, 499, 5450, 300, 0, 499, 
599, 599, 6050, 300, 599, 1400, 300, 0, 2000, 700, 0, 5990, 8877, 
1999, 257.5, 200, 361, 300, 1990, 2453, 3140, 0, 0, 199, 599, 
10990, 7990, 773, 400, 6000, 2269, 2000, 1999, 999, 300, 0, 200, 
11990, 300, 200, 3599, 200, 50, 100, 100, 100, 50, 0, 1836, 19749, 
1399, 1266, 1538, 6031, 34846, 200, 1799, 250, 899, 4049, 379, 
799, 200, 200, 200, 599, 999, 0, 0, 300, 300, 3499, 4211, 626, 
12801, 999, 899, 799, 299, 1218, 200, 99, 999, 590, 200, 17990
)), row.names = c(NA, 100L), class = "data.frame")

calendar_id_operday 是日期栏。格式 20210102 是（年|月|日）。我需要按周汇总 line_fact_amt 列，每个 sales_point_id 分别按总和。我试试看

df2<-a %>% group_by( weekk = week(ymd(calendar_id_operday)), line_fact_amt ) %>% 
  summarise(new = sum(sales_point_id))

但我得到不需要的结果

   weekk line_fact_amt   new
   <dbl>         <dbl> <int>
 1     1            0     40
 2     1           50     10
 3     1           99     15
 4     1          100     15
 5     1          199      2
 6     1          200     76
 7     1          250      8
 8     1          258.     2
 9     1          299      8
10     1          300     36

我需要这样的结果

sales_point_id  calendar_id_operday line_fact_amt
1   20210102    100
1   20210109    200

其中 100 和 200 是 sales_point_id 到 line_fact_amt 的一周总和。我怎样才能得到所需的结果？

Answer 1

您必须对 line_fact_amt 和 group_by sales_point_id 求和，但不能用 line_fact_amt.

a %>% 
  group_by(sales_point_id, week = week(ymd(calendar_id_operday))) %>% 
  summarise(line_fact_amt = sum(line_fact_amt))

# A tibble: 4 x 3
# Groups:   sales_point_id [4]
  sales_point_id  week line_fact_amt
           <int> <dbl>         <dbl>
1              2     1       118181.
2              5     1        85053 
3              8     1        36324 
4             15     1        20078

如何分别为每个组正确地按周汇总日期

How to correctly aggregate dates by weeks for each group separately

r

dplyr

lubridate