组内的匹配值以计算百分比变化 R
matching values within group to calculate percent change R
我的前 200 行的数据帧输出如下:
structure(list(week_id = c("2021100301", "2021092601", "2021091901",
"2021091201", "2021082901", "2021082201", "2021081501", "2021080801",
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401",
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001",
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501",
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101",
"2021031401", "2021030701", "2021022101", "2021021401", "2021020701",
"2021013101", "2021012401", "2021011701", "2021011001", "2021010301",
"2020122701", "2020122001", "2020121301", "2020120601", "2020112901",
"2020112201", "2020111501", "2020110801", "2020110101", "2020102501",
"2020101801", "2020101101", "2020100401", "2020092701", "2020092001",
"2020091301", "2020090601", "2020083001", "2020082301", "2020081601",
"2020080901", "2020080201", "2020072601", "2020071901", "2020071201",
"2020070501", "2020062801", "2020062101", "2020061401", "2020060701",
"2020053101", "2020052401", "2020051701", "2020051001", "2020050301",
"2020042601", "2020041901", "2020041201", "2020040501", "2020032901",
"2020032201", "2020031501", "2020030101", "2020022301", "2020021601",
"2020020901", "2020020201", "2020012601", "2020011901", "2020011201",
"2020010501", "2021100301", "2021092601", "2021091901", "2021091201",
"2021090501", "2021082901", "2021082201", "2021081501", "2021080801",
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401",
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001",
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501",
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101",
"2021031401", "2021030701", "2021022801", "2021022101", "2021021401",
"2021020701", "2021013101", "2021012401", "2021011701", "2021011001",
"2021010301", "2020122701", "2020122001", "2020121301", "2020120601",
"2020112901", "2020112201", "2020111501", "2020110801", "2020110101",
"2020102501", "2020101801", "2020101101", "2020100401", "2020092701",
"2020092001", "2020091301", "2020090601", "2020083001", "2020082301",
"2020081601", "2020080901", "2020080201", "2020072601", "2020071901",
"2020071201", "2020070501", "2020062801", "2020062101", "2020061401",
"2020060701", "2020053101", "2020052401", "2020051701", "2020051001",
"2020050301", "2020042601", "2020041901", "2020041201", "2020040501",
"2020032901", "2020032201", "2020031501", "2020030801", "2020030101",
"2020022301", "2020021601", "2020020901", "2020020201", "2020012601",
"2020011901", "2020011201", "2020010501", "2021100301", "2021092601",
"2021091901", "2021091201", "2021090501", "2021082901", "2021082201",
"2021081501", "2021080801", "2021080101", "2021072501", "2021071801",
"2021071101", "2021070401", "2021062701", "2021062001", "2021061301",
"2021060601", "2021053001"), retailer_id = c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L), store_id = c(167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5515L, 5515L,
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L,
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L), dollars = c(121817.89,
123837.04, 118670.04, 125059.625, 108753.24, 103086.164, 93883.47,
97233.34, 104718.26, 106884.21, 105388.664, 103774.02, 105210.89,
110289.805, 117055.945, 113839.07, 112145.44, 120899.42, 123817.1,
115896.3, 125034.63, 121903.98, 114903.13, 117721.086, 105937.99,
124112.63, 112720.3, 120637.18, 120471.27, 110500.37, 110503.08,
112969.96, 115869.27, 118878.375, 105458.195, 106309.5, 110294.62,
108330.82, 146878.53, 126304.055, 116068.125, 107469.35, 136214.23,
113220.75, 112149.66, 100493.305, 92780.78, 97302.18, 93464.86,
96070.89, 95837.6, 94597.96, 98911.57, 85228.016, 94473.36, 93500.97,
87337.375, 88164.125, 87065.74, 66466, 90029.71, 97007.35, 102726.36,
91905.15, 104088.82, 110672.805, 103954.48, 109547.445, 105544.875,
120075.03, 118728, 134109.06, 123252.85, 130008.92, 114023.5,
144025.67, 125673.97, 109288.62, 145974.9, 162897.97, 99991.97,
98035.12, 95207.695, 95602.53, 91691.84, 95935.414, 96264.945,
83052.78, 81082.19, 308727.38, 316221.84, 307510.53, 331655.47,
325742.12, 313349.8, 314485.25, 296090.7, 294508.4, 289239.16,
291942.7, 305845.53, 319315.16, 326085.72, 337905.4, 344706.47,
327700.53, 371220.06, 359514.38, 337943.56, 373691.47, 349582.7,
334601.75, 336816.66, 337005.75, 347109.06, 326315.97, 311354.84,
322053.34, 336316.03, 338193.53, 313785.03, 344732.66, 342939.25,
346253.62, 324986.56, 326116.2, 338265.66, 268262.62, 386346.6,
626153.6, 365243.03, 340560.28, 293116.5, 465687.8, 353180.1,
351834.12, 320955.12, 334259.28, 325735.22, 333862.25, 324085.34,
334968.44, 315079.22, 317985.3, 330642.66, 326725.25, 318800.8,
326554, 333438.25, 330099.03, 321774.66, 330337.6, 341311.16,
315433.25, 385807.25, 386330.25, 374521.12, 376984.34, 387566.56,
382244.22, 380587.84, 417656.53, 398990, 393197.03, 385053.06,
391217.75, 416653.6, 360383.8, 296253.6, 479710.7, 346368.38,
277086.44, 286023.53, 270573, 307881.22, 280272.6, 288636.3,
265937.72, 277954.03, 273006.03, 281253.47, 851667.25, 820867.3,
828810.06, 827808.7, 728991.94, 713789.44, 685840.06, 701648.6,
697648.25, 719623.3, 709753.9, 731245.5, 751857.94, 723741.4,
788438.6, 816226.7, 843330.25, 879954.7, 870035.94), fill = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), spins_week = c("20211040", "20211039",
"20211038", "20211037", "20210935", "20210934", "20210933", "20210832",
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726",
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520",
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414",
"20210413", "20210312", "20210311", "20210310", "20210208", "20210207",
"20210206", "20210205", "20210104", "20210103", "20210102", "20210101",
"20201352", "20201351", "20201350", "20201349", "20201248", "20201247",
"20201246", "20201245", "20201144", "20201143", "20201142", "20201141",
"20201040", "20201039", "20201038", "20201037", "20200936", "20200935",
"20200934", "20200933", "20200832", "20200831", "20200830", "20200829",
"20200728", "20200727", "20200726", "20200725", "20200624", "20200623",
"20200622", "20200621", "20200520", "20200519", "20200518", "20200517",
"20200416", "20200415", "20200414", "20200413", "20200312", "20200311",
"20200309", "20200208", "20200207", "20200206", "20200205", "20200104",
"20200103", "20200102", "20200101", "20211040", "20211039", "20211038",
"20211037", "20210936", "20210935", "20210934", "20210933", "20210832",
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726",
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520",
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414",
"20210413", "20210312", "20210311", "20210310", "20210309", "20210208",
"20210207", "20210206", "20210205", "20210104", "20210103", "20210102",
"20210101", "20201352", "20201351", "20201350", "20201349", "20201248",
"20201247", "20201246", "20201245", "20201144", "20201143", "20201142",
"20201141", "20201040", "20201039", "20201038", "20201037", "20200936",
"20200935", "20200934", "20200933", "20200832", "20200831", "20200830",
"20200829", "20200728", "20200727", "20200726", "20200725", "20200624",
"20200623", "20200622", "20200621", "20200520", "20200519", "20200518",
"20200517", "20200416", "20200415", "20200414", "20200413", "20200312",
"20200311", "20200310", "20200309", "20200208", "20200207", "20200206",
"20200205", "20200104", "20200103", "20200102", "20200101", "20211040",
"20211039", "20211038", "20211037", "20210936", "20210935", "20210934",
"20210933", "20210832", "20210831", "20210830", "20210829", "20210728",
"20210727", "20210726", "20210725", "20210624", "20210623", "20210622"
), id = c("1040", "1039", "1038", "1037", "0935", "0934", "0933",
"0832", "0831", "0830", "0829", "0728", "0727", "0726", "0725",
"0624", "0623", "0622", "0621", "0520", "0519", "0518", "0517",
"0416", "0415", "0414", "0413", "0312", "0311", "0310", "0208",
"0207", "0206", "0205", "0104", "0103", "0102", "0101", "1352",
"1351", "1350", "1349", "1248", "1247", "1246", "1245", "1144",
"1143", "1142", "1141", "1040", "1039", "1038", "1037", "0936",
"0935", "0934", "0933", "0832", "0831", "0830", "0829", "0728",
"0727", "0726", "0725", "0624", "0623", "0622", "0621", "0520",
"0519", "0518", "0517", "0416", "0415", "0414", "0413", "0312",
"0311", "0309", "0208", "0207", "0206", "0205", "0104", "0103",
"0102", "0101", "1040", "1039", "1038", "1037", "0936", "0935",
"0934", "0933", "0832", "0831", "0830", "0829", "0728", "0727",
"0726", "0725", "0624", "0623", "0622", "0621", "0520", "0519",
"0518", "0517", "0416", "0415", "0414", "0413", "0312", "0311",
"0310", "0309", "0208", "0207", "0206", "0205", "0104", "0103",
"0102", "0101", "1352", "1351", "1350", "1349", "1248", "1247",
"1246", "1245", "1144", "1143", "1142", "1141", "1040", "1039",
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831",
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623",
"0622", "0621", "0520", "0519", "0518", "0517", "0416", "0415",
"0414", "0413", "0312", "0311", "0310", "0309", "0208", "0207",
"0206", "0205", "0104", "0103", "0102", "0101", "1040", "1039",
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831",
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623",
"0622")), row.names = c(NA, -200L), groups = structure(list(retailer_id = c(2L,
4L, 5L), store_id = c(167L, 5987L, 5515L), .rows = structure(list(
1:89, 90:181, 182:200), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
我想获取每个商店组内 id 列中的前 4 个值,并找出这些值与组内匹配值之间的美元变化百分比。
例如 retailer_id = 2 和 store_id = 167,前 4 个“id”值分别为 1040、1039、1038 和 1037。这些值在每个分组中出现两次,但对应到不同的美元金额。如您所见,id = 1040(对于 retailer_id = 2 和 store_id = 167)对应于美元金额 121817.89 和 95837.60。我想计算第二个值与第一个值的百分比差异 (121817.89 - 95837.60 / 95837.60)。
我通过以下操作实现了上述当前输出:
A = dollars.weeks %>%
select(-c(retail_chain, retail_tag, store_name, store_tag)) %>%
group_by(retailer_id, store_id) %>%
arrange(retailer_id, store_id, desc(spins_week)) %>%
mutate(id = substr(spins_week,5,8))
按 retailer_id、store_id 分组然后按 spins_week 降序排列后,我将 spins_week 值缩短到最后 4 位数字以构成 id 列.
我想将前四个出现的 id 值与其在列下方的相应值相匹配(保留零售商和商店组,因为我想按组计算),但我不确定如何添加到我的当前代码这样做。任何帮助都会非常感谢!
根据描述,我们可以添加'id'作为分组变量,然后除以美元的偏差(与[=27=的max
值)计算'percecnt_change' ]) 美元列
library(dplyr)
A %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = (max(dollars) - dollars)/dollars)
或者可能是
A %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = if(n() == 2)
(first(dollars) - last(dollars))/last(dollars) else NA)
-输出
# A tibble: 200 × 8
# Groups: retailer_id, store_id, id [123]
week_id retailer_id store_id dollars fill spins_week id percent_change
<chr> <int> <int> <dbl> <dbl> <chr> <chr> <dbl>
1 2021100301 2 167 121818. 0 20211040 1040 0.271
2 2021092601 2 167 123837. 0 20211039 1039 0.309
3 2021091901 2 167 118670. 0 20211038 1038 0.200
4 2021091201 2 167 125060. 0 20211037 1037 0.467
5 2021082901 2 167 108753. 0 20210935 0935 0.163
6 2021082201 2 167 103086. 0 20210934 0934 0.180
7 2021081501 2 167 93883. 0 20210933 0933 0.0649
8 2021080801 2 167 97233. 0 20210832 0832 0.117
9 2021080101 2 167 104718. 0 20210831 0831 0.576
10 2021072501 2 167 106884. 0 20210830 0830 0.187
# … with 190 more rows
或者如@Greg 在评论中提到的那样,如果我们想要前 4 个值
A %>%
mutate(r_num = row_number()) %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = if(n() == 2)
(first(dollars) - last(dollars))/last(dollars) else NA) %>%
filter(r_num <= 4)
我的前 200 行的数据帧输出如下:
structure(list(week_id = c("2021100301", "2021092601", "2021091901",
"2021091201", "2021082901", "2021082201", "2021081501", "2021080801",
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401",
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001",
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501",
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101",
"2021031401", "2021030701", "2021022101", "2021021401", "2021020701",
"2021013101", "2021012401", "2021011701", "2021011001", "2021010301",
"2020122701", "2020122001", "2020121301", "2020120601", "2020112901",
"2020112201", "2020111501", "2020110801", "2020110101", "2020102501",
"2020101801", "2020101101", "2020100401", "2020092701", "2020092001",
"2020091301", "2020090601", "2020083001", "2020082301", "2020081601",
"2020080901", "2020080201", "2020072601", "2020071901", "2020071201",
"2020070501", "2020062801", "2020062101", "2020061401", "2020060701",
"2020053101", "2020052401", "2020051701", "2020051001", "2020050301",
"2020042601", "2020041901", "2020041201", "2020040501", "2020032901",
"2020032201", "2020031501", "2020030101", "2020022301", "2020021601",
"2020020901", "2020020201", "2020012601", "2020011901", "2020011201",
"2020010501", "2021100301", "2021092601", "2021091901", "2021091201",
"2021090501", "2021082901", "2021082201", "2021081501", "2021080801",
"2021080101", "2021072501", "2021071801", "2021071101", "2021070401",
"2021062701", "2021062001", "2021061301", "2021060601", "2021053001",
"2021052301", "2021051601", "2021050901", "2021050201", "2021042501",
"2021041801", "2021041101", "2021040401", "2021032801", "2021032101",
"2021031401", "2021030701", "2021022801", "2021022101", "2021021401",
"2021020701", "2021013101", "2021012401", "2021011701", "2021011001",
"2021010301", "2020122701", "2020122001", "2020121301", "2020120601",
"2020112901", "2020112201", "2020111501", "2020110801", "2020110101",
"2020102501", "2020101801", "2020101101", "2020100401", "2020092701",
"2020092001", "2020091301", "2020090601", "2020083001", "2020082301",
"2020081601", "2020080901", "2020080201", "2020072601", "2020071901",
"2020071201", "2020070501", "2020062801", "2020062101", "2020061401",
"2020060701", "2020053101", "2020052401", "2020051701", "2020051001",
"2020050301", "2020042601", "2020041901", "2020041201", "2020040501",
"2020032901", "2020032201", "2020031501", "2020030801", "2020030101",
"2020022301", "2020021601", "2020020901", "2020020201", "2020012601",
"2020011901", "2020011201", "2020010501", "2021100301", "2021092601",
"2021091901", "2021091201", "2021090501", "2021082901", "2021082201",
"2021081501", "2021080801", "2021080101", "2021072501", "2021071801",
"2021071101", "2021070401", "2021062701", "2021062001", "2021061301",
"2021060601", "2021053001"), retailer_id = c(2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L), store_id = c(167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L, 167L,
167L, 167L, 167L, 167L, 167L, 167L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L,
5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5987L, 5515L, 5515L,
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L,
5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L, 5515L), dollars = c(121817.89,
123837.04, 118670.04, 125059.625, 108753.24, 103086.164, 93883.47,
97233.34, 104718.26, 106884.21, 105388.664, 103774.02, 105210.89,
110289.805, 117055.945, 113839.07, 112145.44, 120899.42, 123817.1,
115896.3, 125034.63, 121903.98, 114903.13, 117721.086, 105937.99,
124112.63, 112720.3, 120637.18, 120471.27, 110500.37, 110503.08,
112969.96, 115869.27, 118878.375, 105458.195, 106309.5, 110294.62,
108330.82, 146878.53, 126304.055, 116068.125, 107469.35, 136214.23,
113220.75, 112149.66, 100493.305, 92780.78, 97302.18, 93464.86,
96070.89, 95837.6, 94597.96, 98911.57, 85228.016, 94473.36, 93500.97,
87337.375, 88164.125, 87065.74, 66466, 90029.71, 97007.35, 102726.36,
91905.15, 104088.82, 110672.805, 103954.48, 109547.445, 105544.875,
120075.03, 118728, 134109.06, 123252.85, 130008.92, 114023.5,
144025.67, 125673.97, 109288.62, 145974.9, 162897.97, 99991.97,
98035.12, 95207.695, 95602.53, 91691.84, 95935.414, 96264.945,
83052.78, 81082.19, 308727.38, 316221.84, 307510.53, 331655.47,
325742.12, 313349.8, 314485.25, 296090.7, 294508.4, 289239.16,
291942.7, 305845.53, 319315.16, 326085.72, 337905.4, 344706.47,
327700.53, 371220.06, 359514.38, 337943.56, 373691.47, 349582.7,
334601.75, 336816.66, 337005.75, 347109.06, 326315.97, 311354.84,
322053.34, 336316.03, 338193.53, 313785.03, 344732.66, 342939.25,
346253.62, 324986.56, 326116.2, 338265.66, 268262.62, 386346.6,
626153.6, 365243.03, 340560.28, 293116.5, 465687.8, 353180.1,
351834.12, 320955.12, 334259.28, 325735.22, 333862.25, 324085.34,
334968.44, 315079.22, 317985.3, 330642.66, 326725.25, 318800.8,
326554, 333438.25, 330099.03, 321774.66, 330337.6, 341311.16,
315433.25, 385807.25, 386330.25, 374521.12, 376984.34, 387566.56,
382244.22, 380587.84, 417656.53, 398990, 393197.03, 385053.06,
391217.75, 416653.6, 360383.8, 296253.6, 479710.7, 346368.38,
277086.44, 286023.53, 270573, 307881.22, 280272.6, 288636.3,
265937.72, 277954.03, 273006.03, 281253.47, 851667.25, 820867.3,
828810.06, 827808.7, 728991.94, 713789.44, 685840.06, 701648.6,
697648.25, 719623.3, 709753.9, 731245.5, 751857.94, 723741.4,
788438.6, 816226.7, 843330.25, 879954.7, 870035.94), fill = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), spins_week = c("20211040", "20211039",
"20211038", "20211037", "20210935", "20210934", "20210933", "20210832",
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726",
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520",
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414",
"20210413", "20210312", "20210311", "20210310", "20210208", "20210207",
"20210206", "20210205", "20210104", "20210103", "20210102", "20210101",
"20201352", "20201351", "20201350", "20201349", "20201248", "20201247",
"20201246", "20201245", "20201144", "20201143", "20201142", "20201141",
"20201040", "20201039", "20201038", "20201037", "20200936", "20200935",
"20200934", "20200933", "20200832", "20200831", "20200830", "20200829",
"20200728", "20200727", "20200726", "20200725", "20200624", "20200623",
"20200622", "20200621", "20200520", "20200519", "20200518", "20200517",
"20200416", "20200415", "20200414", "20200413", "20200312", "20200311",
"20200309", "20200208", "20200207", "20200206", "20200205", "20200104",
"20200103", "20200102", "20200101", "20211040", "20211039", "20211038",
"20211037", "20210936", "20210935", "20210934", "20210933", "20210832",
"20210831", "20210830", "20210829", "20210728", "20210727", "20210726",
"20210725", "20210624", "20210623", "20210622", "20210621", "20210520",
"20210519", "20210518", "20210517", "20210416", "20210415", "20210414",
"20210413", "20210312", "20210311", "20210310", "20210309", "20210208",
"20210207", "20210206", "20210205", "20210104", "20210103", "20210102",
"20210101", "20201352", "20201351", "20201350", "20201349", "20201248",
"20201247", "20201246", "20201245", "20201144", "20201143", "20201142",
"20201141", "20201040", "20201039", "20201038", "20201037", "20200936",
"20200935", "20200934", "20200933", "20200832", "20200831", "20200830",
"20200829", "20200728", "20200727", "20200726", "20200725", "20200624",
"20200623", "20200622", "20200621", "20200520", "20200519", "20200518",
"20200517", "20200416", "20200415", "20200414", "20200413", "20200312",
"20200311", "20200310", "20200309", "20200208", "20200207", "20200206",
"20200205", "20200104", "20200103", "20200102", "20200101", "20211040",
"20211039", "20211038", "20211037", "20210936", "20210935", "20210934",
"20210933", "20210832", "20210831", "20210830", "20210829", "20210728",
"20210727", "20210726", "20210725", "20210624", "20210623", "20210622"
), id = c("1040", "1039", "1038", "1037", "0935", "0934", "0933",
"0832", "0831", "0830", "0829", "0728", "0727", "0726", "0725",
"0624", "0623", "0622", "0621", "0520", "0519", "0518", "0517",
"0416", "0415", "0414", "0413", "0312", "0311", "0310", "0208",
"0207", "0206", "0205", "0104", "0103", "0102", "0101", "1352",
"1351", "1350", "1349", "1248", "1247", "1246", "1245", "1144",
"1143", "1142", "1141", "1040", "1039", "1038", "1037", "0936",
"0935", "0934", "0933", "0832", "0831", "0830", "0829", "0728",
"0727", "0726", "0725", "0624", "0623", "0622", "0621", "0520",
"0519", "0518", "0517", "0416", "0415", "0414", "0413", "0312",
"0311", "0309", "0208", "0207", "0206", "0205", "0104", "0103",
"0102", "0101", "1040", "1039", "1038", "1037", "0936", "0935",
"0934", "0933", "0832", "0831", "0830", "0829", "0728", "0727",
"0726", "0725", "0624", "0623", "0622", "0621", "0520", "0519",
"0518", "0517", "0416", "0415", "0414", "0413", "0312", "0311",
"0310", "0309", "0208", "0207", "0206", "0205", "0104", "0103",
"0102", "0101", "1352", "1351", "1350", "1349", "1248", "1247",
"1246", "1245", "1144", "1143", "1142", "1141", "1040", "1039",
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831",
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623",
"0622", "0621", "0520", "0519", "0518", "0517", "0416", "0415",
"0414", "0413", "0312", "0311", "0310", "0309", "0208", "0207",
"0206", "0205", "0104", "0103", "0102", "0101", "1040", "1039",
"1038", "1037", "0936", "0935", "0934", "0933", "0832", "0831",
"0830", "0829", "0728", "0727", "0726", "0725", "0624", "0623",
"0622")), row.names = c(NA, -200L), groups = structure(list(retailer_id = c(2L,
4L, 5L), store_id = c(167L, 5987L, 5515L), .rows = structure(list(
1:89, 90:181, 182:200), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
我想获取每个商店组内 id 列中的前 4 个值,并找出这些值与组内匹配值之间的美元变化百分比。
例如 retailer_id = 2 和 store_id = 167,前 4 个“id”值分别为 1040、1039、1038 和 1037。这些值在每个分组中出现两次,但对应到不同的美元金额。如您所见,id = 1040(对于 retailer_id = 2 和 store_id = 167)对应于美元金额 121817.89 和 95837.60。我想计算第二个值与第一个值的百分比差异 (121817.89 - 95837.60 / 95837.60)。
我通过以下操作实现了上述当前输出:
A = dollars.weeks %>%
select(-c(retail_chain, retail_tag, store_name, store_tag)) %>%
group_by(retailer_id, store_id) %>%
arrange(retailer_id, store_id, desc(spins_week)) %>%
mutate(id = substr(spins_week,5,8))
按 retailer_id、store_id 分组然后按 spins_week 降序排列后,我将 spins_week 值缩短到最后 4 位数字以构成 id 列.
我想将前四个出现的 id 值与其在列下方的相应值相匹配(保留零售商和商店组,因为我想按组计算),但我不确定如何添加到我的当前代码这样做。任何帮助都会非常感谢!
根据描述,我们可以添加'id'作为分组变量,然后除以美元的偏差(与[=27=的max
值)计算'percecnt_change' ]) 美元列
library(dplyr)
A %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = (max(dollars) - dollars)/dollars)
或者可能是
A %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = if(n() == 2)
(first(dollars) - last(dollars))/last(dollars) else NA)
-输出
# A tibble: 200 × 8
# Groups: retailer_id, store_id, id [123]
week_id retailer_id store_id dollars fill spins_week id percent_change
<chr> <int> <int> <dbl> <dbl> <chr> <chr> <dbl>
1 2021100301 2 167 121818. 0 20211040 1040 0.271
2 2021092601 2 167 123837. 0 20211039 1039 0.309
3 2021091901 2 167 118670. 0 20211038 1038 0.200
4 2021091201 2 167 125060. 0 20211037 1037 0.467
5 2021082901 2 167 108753. 0 20210935 0935 0.163
6 2021082201 2 167 103086. 0 20210934 0934 0.180
7 2021081501 2 167 93883. 0 20210933 0933 0.0649
8 2021080801 2 167 97233. 0 20210832 0832 0.117
9 2021080101 2 167 104718. 0 20210831 0831 0.576
10 2021072501 2 167 106884. 0 20210830 0830 0.187
# … with 190 more rows
或者如@Greg 在评论中提到的那样,如果我们想要前 4 个值
A %>%
mutate(r_num = row_number()) %>%
group_by(id, .add = TRUE) %>%
mutate(percent_change = if(n() == 2)
(first(dollars) - last(dollars))/last(dollars) else NA) %>%
filter(r_num <= 4)