组内的匹配值以计算百分比变化 R

matching values within group to calculate percent change R

我的前 200 行的数据帧输出如下:

structure(list(week_id = c("2021100301", "2021092601", "2021091901", 
"2021091201", "2021082901", "2021082201", "2021081501", "2021080801", 
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401", 
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001", 
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501", 
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101", 
"2021031401", "2021030701", "2021022101", "2021021401", "2021020701", 
"2021013101", "2021012401", "2021011701", "2021011001", "2021010301", 
"2020122701", "2020122001", "2020121301", "2020120601", "2020112901", 
"2020112201", "2020111501", "2020110801", "2020110101", "2020102501", 
"2020101801", "2020101101", "2020100401", "2020092701", "2020092001", 
"2020091301", "2020090601", "2020083001", "2020082301", "2020081601", 
"2020080901", "2020080201", "2020072601", "2020071901", "2020071201", 
"2020070501", "2020062801", "2020062101", "2020061401", "2020060701", 
"2020053101", "2020052401", "2020051701", "2020051001", "2020050301", 
"2020042601", "2020041901", "2020041201", "2020040501", "2020032901", 
"2020032201", "2020031501", "2020030101", "2020022301", "2020021601", 
"2020020901", "2020020201", "2020012601", "2020011901", "2020011201", 
"2020010501", "2021100301", "2021092601", "2021091901", "2021091201", 
"2021090501", "2021082901", "2021082201", "2021081501", "2021080801", 
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401", 
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001", 
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501", 
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101", 
"2021031401", "2021030701", "2021022801", "2021022101", "2021021401", 
"2021020701", "2021013101", "2021012401", "2021011701", "2021011001", 
"2021010301", "2020122701", "2020122001", "2020121301", "2020120601", 
"2020112901", "2020112201", "2020111501", "2020110801", "2020110101", 
"2020102501", "2020101801", "2020101101", "2020100401", "2020092701", 
"2020092001", "2020091301", "2020090601", "2020083001", "2020082301", 
"2020081601", "2020080901", "2020080201", "2020072601", "2020071901", 
"2020071201", "2020070501", "2020062801", "2020062101", "2020061401", 
"2020060701", "2020053101", "2020052401", "2020051701", "2020051001", 
"2020050301", "2020042601", "2020041901", "2020041201", "2020040501", 
"2020032901", "2020032201", "2020031501", "2020030801", "2020030101", 
"2020022301", "2020021601", "2020020901", "2020020201", "2020012601", 
"2020011901", "2020011201", "2020010501", "2021100301", "2021092601", 
"2021091901", "2021091201", "2021090501", "2021082901", "2021082201", 
"2021081501", "2021080801", "2021080101", "2021072501", "2021071801", 
"2021071101", "2021070401", "2021062701", "2021062001", "2021061301", 
"2021060601", "2021053001"), retailer_id = c(2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L), store_id = c(167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 
167L, 167L, 167L, 167L, 167L, 167L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5515L, 5515L, 
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L), dollars = c(121817.89, 
123837.04, 118670.04, 125059.625, 108753.24, 103086.164, 93883.47, 
97233.34, 104718.26, 106884.21, 105388.664, 103774.02, 105210.89, 
110289.805, 117055.945, 113839.07, 112145.44, 120899.42, 123817.1, 
115896.3, 125034.63, 121903.98, 114903.13, 117721.086, 105937.99, 
124112.63, 112720.3, 120637.18, 120471.27, 110500.37, 110503.08, 
112969.96, 115869.27, 118878.375, 105458.195, 106309.5, 110294.62, 
108330.82, 146878.53, 126304.055, 116068.125, 107469.35, 136214.23, 
113220.75, 112149.66, 100493.305, 92780.78, 97302.18, 93464.86, 
96070.89, 95837.6, 94597.96, 98911.57, 85228.016, 94473.36, 93500.97, 
87337.375, 88164.125, 87065.74, 66466, 90029.71, 97007.35, 102726.36, 
91905.15, 104088.82, 110672.805, 103954.48, 109547.445, 105544.875, 
120075.03, 118728, 134109.06, 123252.85, 130008.92, 114023.5, 
144025.67, 125673.97, 109288.62, 145974.9, 162897.97, 99991.97, 
98035.12, 95207.695, 95602.53, 91691.84, 95935.414, 96264.945, 
83052.78, 81082.19, 308727.38, 316221.84, 307510.53, 331655.47, 
325742.12, 313349.8, 314485.25, 296090.7, 294508.4, 289239.16, 
291942.7, 305845.53, 319315.16, 326085.72, 337905.4, 344706.47, 
327700.53, 371220.06, 359514.38, 337943.56, 373691.47, 349582.7, 
334601.75, 336816.66, 337005.75, 347109.06, 326315.97, 311354.84, 
322053.34, 336316.03, 338193.53, 313785.03, 344732.66, 342939.25, 
346253.62, 324986.56, 326116.2, 338265.66, 268262.62, 386346.6, 
626153.6, 365243.03, 340560.28, 293116.5, 465687.8, 353180.1, 
351834.12, 320955.12, 334259.28, 325735.22, 333862.25, 324085.34, 
334968.44, 315079.22, 317985.3, 330642.66, 326725.25, 318800.8, 
326554, 333438.25, 330099.03, 321774.66, 330337.6, 341311.16, 
315433.25, 385807.25, 386330.25, 374521.12, 376984.34, 387566.56, 
382244.22, 380587.84, 417656.53, 398990, 393197.03, 385053.06, 
391217.75, 416653.6, 360383.8, 296253.6, 479710.7, 346368.38, 
277086.44, 286023.53, 270573, 307881.22, 280272.6, 288636.3, 
265937.72, 277954.03, 273006.03, 281253.47, 851667.25, 820867.3, 
828810.06, 827808.7, 728991.94, 713789.44, 685840.06, 701648.6, 
697648.25, 719623.3, 709753.9, 731245.5, 751857.94, 723741.4, 
788438.6, 816226.7, 843330.25, 879954.7, 870035.94), fill = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), spins_week = c("20211040", "20211039", 
"20211038", "20211037", "20210935", "20210934", "20210933", "20210832", 
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726", 
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520", 
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414", 
"20210413", "20210312", "20210311", "20210310", "20210208", "20210207", 
"20210206", "20210205", "20210104", "20210103", "20210102", "20210101", 
"20201352", "20201351", "20201350", "20201349", "20201248", "20201247", 
"20201246", "20201245", "20201144", "20201143", "20201142", "20201141", 
"20201040", "20201039", "20201038", "20201037", "20200936", "20200935", 
"20200934", "20200933", "20200832", "20200831", "20200830", "20200829", 
"20200728", "20200727", "20200726", "20200725", "20200624", "20200623", 
"20200622", "20200621", "20200520", "20200519", "20200518", "20200517", 
"20200416", "20200415", "20200414", "20200413", "20200312", "20200311", 
"20200309", "20200208", "20200207", "20200206", "20200205", "20200104", 
"20200103", "20200102", "20200101", "20211040", "20211039", "20211038", 
"20211037", "20210936", "20210935", "20210934", "20210933", "20210832", 
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726", 
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520", 
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414", 
"20210413", "20210312", "20210311", "20210310", "20210309", "20210208", 
"20210207", "20210206", "20210205", "20210104", "20210103", "20210102", 
"20210101", "20201352", "20201351", "20201350", "20201349", "20201248", 
"20201247", "20201246", "20201245", "20201144", "20201143", "20201142", 
"20201141", "20201040", "20201039", "20201038", "20201037", "20200936", 
"20200935", "20200934", "20200933", "20200832", "20200831", "20200830", 
"20200829", "20200728", "20200727", "20200726", "20200725", "20200624", 
"20200623", "20200622", "20200621", "20200520", "20200519", "20200518", 
"20200517", "20200416", "20200415", "20200414", "20200413", "20200312", 
"20200311", "20200310", "20200309", "20200208", "20200207", "20200206", 
"20200205", "20200104", "20200103", "20200102", "20200101", "20211040", 
"20211039", "20211038", "20211037", "20210936", "20210935", "20210934", 
"20210933", "20210832", "20210831", "20210830", "20210829", "20210728", 
"20210727", "20210726", "20210725", "20210624", "20210623", "20210622"
), id = c("1040", "1039", "1038", "1037", "0935", "0934", "0933", 
"0832", "0831", "0830", "0829", "0728", "0727", "0726", "0725", 
"0624", "0623", "0622", "0621", "0520", "0519", "0518", "0517", 
"0416", "0415", "0414", "0413", "0312", "0311", "0310", "0208", 
"0207", "0206", "0205", "0104", "0103", "0102", "0101", "1352", 
"1351", "1350", "1349", "1248", "1247", "1246", "1245", "1144", 
"1143", "1142", "1141", "1040", "1039", "1038", "1037", "0936", 
"0935", "0934", "0933", "0832", "0831", "0830", "0829", "0728", 
"0727", "0726", "0725", "0624", "0623", "0622", "0621", "0520", 
"0519", "0518", "0517", "0416", "0415", "0414", "0413", "0312", 
"0311", "0309", "0208", "0207", "0206", "0205", "0104", "0103", 
"0102", "0101", "1040", "1039", "1038", "1037", "0936", "0935", 
"0934", "0933", "0832", "0831", "0830", "0829", "0728", "0727", 
"0726", "0725", "0624", "0623", "0622", "0621", "0520", "0519", 
"0518", "0517", "0416", "0415", "0414", "0413", "0312", "0311", 
"0310", "0309", "0208", "0207", "0206", "0205", "0104", "0103", 
"0102", "0101", "1352", "1351", "1350", "1349", "1248", "1247", 
"1246", "1245", "1144", "1143", "1142", "1141", "1040", "1039", 
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831", 
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623", 
"0622", "0621", "0520", "0519", "0518", "0517", "0416", "0415", 
"0414", "0413", "0312", "0311", "0310", "0309", "0208", "0207", 
"0206", "0205", "0104", "0103", "0102", "0101", "1040", "1039", 
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831", 
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623", 
"0622")), row.names = c(NA, -200L), groups = structure(list(retailer_id = c(2L, 
4L, 5L), store_id = c(167L, 5987L, 5515L), .rows = structure(list(
    1:89, 90:181, 182:200), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

我想获取每个商店组内 id 列中的前 4 个值,并找出这些值与组内匹配值之间的美元变化百分比。

例如 retailer_id = 2 和 store_id = 167,前 4 个“id”值分别为 1040、1039、1038 和 1037。这些值在每个分组中出现两次,但对应到不同的美元金额。如您所见,id = 1040(对于 retailer_id = 2 和 store_id = 167)对应于美元金额 121817.89 和 95837.60。我想计算第二个值与第一个值的百分比差异 (121817.89 - 95837.60 / 95837.60)。

我通过以下操作实现了上述当前输出:

A = dollars.weeks %>%
  select(-c(retail_chain, retail_tag, store_name, store_tag)) %>%
  group_by(retailer_id, store_id) %>%
  arrange(retailer_id, store_id, desc(spins_week)) %>%
  mutate(id = substr(spins_week,5,8))

按 retailer_id、store_id 分组然后按 spins_week 降序排列后,我将 spins_week 值缩短到最后 4 位数字以构成 id 列.

我想将前四个出现的 id 值与其在列下方的相应值相匹配(保留零售商和商店组,因为我想按组计算),但我不确定如何添加到我的当前代码这样做。任何帮助都会非常感谢!

根据描述,我们可以添加'id'作为分组变量,然后除以美元的偏差(与[=27=的max值)计算'percecnt_change' ]) 美元列

library(dplyr)
A %>%
    group_by(id, .add = TRUE) %>% 
    mutate(percent_change = (max(dollars) - dollars)/dollars)

或者可能是

A %>%
    group_by(id, .add = TRUE) %>% 
    mutate(percent_change = if(n() == 2) 
        (first(dollars) - last(dollars))/last(dollars) else NA)

-输出

# A tibble: 200 × 8
# Groups:   retailer_id, store_id, id [123]
   week_id    retailer_id store_id dollars  fill spins_week id    percent_change
   <chr>            <int>    <int>   <dbl> <dbl> <chr>      <chr>          <dbl>
 1 2021100301           2      167 121818.     0 20211040   1040          0.271 
 2 2021092601           2      167 123837.     0 20211039   1039          0.309 
 3 2021091901           2      167 118670.     0 20211038   1038          0.200 
 4 2021091201           2      167 125060.     0 20211037   1037          0.467 
 5 2021082901           2      167 108753.     0 20210935   0935          0.163 
 6 2021082201           2      167 103086.     0 20210934   0934          0.180 
 7 2021081501           2      167  93883.     0 20210933   0933          0.0649
 8 2021080801           2      167  97233.     0 20210832   0832          0.117 
 9 2021080101           2      167 104718.     0 20210831   0831          0.576 
10 2021072501           2      167 106884.     0 20210830   0830          0.187 
# … with 190 more rows

或者如@Greg 在评论中提到的那样,如果我们想要前 4 个值

A %>% 
   mutate(r_num = row_number()) %>%
   group_by(id, .add = TRUE) %>% 
   mutate(percent_change = if(n() == 2) 
        (first(dollars) - last(dollars))/last(dollars) else NA) %>% 
   filter(r_num <= 4)