重塑每周数据
Reshape weekly data
我有一个数据集,不同 beverages.Data 的每周销售额包含从 2019 年第 1 周到 2020 年第 18 周的值。您可以看到下面的数据代码
ARTEFICIAL_DATA<-data.frame(structure(list(year_week = c("2019 01", "2019 02", "2019 03",
"2019 04", "2019 05", "2019 06", "2019 07", "2019 08", "2019 09",
"2019 10", "2019 11", "2019 12", "2019 13", "2019 14", "2019 15",
"2019 16", "2019 17", "2019 18", "2019 19", "2019 20", "2019 21",
"2019 22", "2019 23", "2019 24", "2019 25", "2019 26", "2019 27",
"2019 28", "2019 29", "2019 30", "2019 31", "2019 32", "2019 33",
"2019 34", "2019 35", "2019 36", "2019 37", "2019 38", "2019 39",
"2019 40", "2019 41", "2019 42", "2019 43", "2019 44", "2019 45",
"2019 46", "2019 47", "2019 48", "2019 49", "2019 50", "2019 51",
"2019 52", "2020 01", "2020 02", "2020 03", "2020 04", "2020 05",
"2020 06", "2020 07", "2020 08", "2020 09", "2020 10", "2020 11",
"2020 12", "2020 13", "2020 14", "2020 15", "2020 16", "2020 17",
"2020 18"), Year = c(2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020), Pepsi = c(309,
201, 369, 211, 262, 306, 477, 269, 175, 315, 373, 309, 280, 450,
331, 394, 295, 266, 349, 434, 247, 302, 423, 347, 384, 327, 369,
373, 380, 305, 432, 371, 437, 242, 270, 444, 432, 331, 309, 422,
300, 495, 285, 408, 336, 484, 324, 327, 359, 441, 392, 373, 168,
137, 404, 325, 279, 320, 461, 311, 336, 286, 333, 299, 209, 199,
197, 344, 170, 176), Coke = c(139, 119, 189, 160, 184, 174, 246,
203, 117, 193, 233, 226, 219, 228, 219, 238, 220, 136, 198, 229,
172, 225, 191, 183, 224, 223, 205, 232, 228, 215, 193, 219, 205,
178, 172, 219, 206, 245, 234, 218, 180, 269, 220, 224, 211, 253,
231, 239, 225, 247, 270, 265, 68, 87, 211, 193, 200, 188, 247,
216, 226, 203, 231, 183, 143, 147, 143, 162, 125, 122), Fanta = c(24,
18, 22, 20, 22, 20, 25, 23, 13, 22, 25, 24, 23, 31, 22, 23, 20,
23, 19, 20, 19, 22, 22, 21, 26, 21, 27, 24, 22, 23, 21, 25, 19,
21, 20, 22, 23, 23, 21, 32, 21, 28, 22, 23, 24, 24, 23, 26, 24,
23, 28, 29, 7, 18, 23, 27, 25, 24, 27, 24, 29, 24, 24, 20, 17,
21, 19, 13, 12, 16), Beer = c(145, 64, 158, 31, 56, 112, 206,
42, 45, 100, 114, 59, 38, 191, 89, 133, 56, 107, 131, 184, 56,
55, 211, 142, 134, 84, 137, 116, 130, 66, 218, 127, 213, 43,
78, 202, 203, 62, 54, 171, 99, 198, 44, 161, 100, 206, 70, 61,
110, 172, 94, 79, 93, 32, 171, 105, 54, 109, 187, 71, 81, 58,
78, 96, 49, 31, 34, 168, 33, 39), Juice = c(28, 40, 105, 26,
41, 31, 96, 37, 20, 46, 78, 53, 29, 57, 46, 111, 49, 18, 58,
112, 37, 43, 38, 93, 48, 69, 43, 57, 114, 43, 48, 47, 141, 35,
47, 69, 158, 43, 49, 29, 34, 139, 40, 42, 38, 132, 38, 42, 41,
103, 42, 48, 30, 22, 136, 39, 37, 39, 92, 40, 40, 37, 67, 73,
31, 23, 27, 102, 31, 25), Lemonade = c(111, 22, 31, 0, 12, 77,
95, 0, 22, 50, 22, 2, 3, 129, 39, 0, 2, 84, 70, 42, 16, 6, 168,
31, 76, 10, 88, 37, 3, 17, 166, 73, 37, 5, 27, 128, 20, 5, 0,
136, 61, 32, 0, 113, 53, 52, 28, 14, 62, 54, 39, 22, 63, 8, 14,
63, 16, 69, 85, 26, 40, 19, 2, 17, 16, 8, 5, 50, 1, 13), Radler = c(2,
1, 21, 2, 1, 2, 11, 2, 1, 2, 12, 2, 3, 3, 2, 19, 2, 4, 2, 28,
2, 3, 3, 16, 8, 3, 3, 19, 11, 3, 2, 4, 34, 1, 3, 3, 22, 11, 3,
4, 1, 24, 1, 3, 7, 20, 1, 3, 3, 12, 8, 2, 1, 2, 21, 3, 2, 1,
10, 5, 1, 2, 10, 5, 2, 1, 2, 15, 1, 0)), row.names = c(NA, -70L
), class = c("tbl_df", "tbl", "data.frame")))
那么接下来应该如何转换这些数据,如下图
我尝试使用 reshape2 包中的 melt 函数,但我得到了 table 这不太好。
ARTEFICIAL_DATA1<-melt(ARTEFICIAL_DATA,id.variables = 'year_week')
所以有人可以帮助重塑数据(熔化、旋转任何东西),比如图片的 table 吗?
我们可以使用 pivot_wider
library(dplyr)
library(tidyr)
ARTEFICIAL_DATA %>%
group_by(Year) %>%
mutate(rn = row_number()) %>%
ungroup %>%
pivot_wider(names_from = Year, values_from = c(year_week, Pepsi:Radler)) %>%
select(-rn)
# A tibble: 52 x 16
# year_week_2019 year_week_2020 Pepsi_2019 Pepsi_2020 Coke_2019 Coke_2020 Fanta_2019 Fanta_2020 Beer_2019 Beer_2020
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2019 01 2020 01 309 168 139 68 24 7 145 93
# 2 2019 02 2020 02 201 137 119 87 18 18 64 32
# 3 2019 03 2020 03 369 404 189 211 22 23 158 171
# 4 2019 04 2020 04 211 325 160 193 20 27 31 105
# 5 2019 05 2020 05 262 279 184 200 22 25 56 54
# 6 2019 06 2020 06 306 320 174 188 20 24 112 109
# 7 2019 07 2020 07 477 461 246 247 25 27 206 187
# 8 2019 08 2020 08 269 311 203 216 23 24 42 71
# 9 2019 09 2020 09 175 336 117 226 13 29 45 81
#10 2019 10 2020 10 315 286 193 203 22 24 100 58
# … with 42 more rows, and 6 more variables: Juice_2019 <dbl>, Juice_2020 <dbl>, Lemonade_2019 <dbl>,
# Lemonade_2020 <dbl>, Radler_2019 <dbl>, Radler_2020 <dbl>
我有一个数据集,不同 beverages.Data 的每周销售额包含从 2019 年第 1 周到 2020 年第 18 周的值。您可以看到下面的数据代码
ARTEFICIAL_DATA<-data.frame(structure(list(year_week = c("2019 01", "2019 02", "2019 03",
"2019 04", "2019 05", "2019 06", "2019 07", "2019 08", "2019 09",
"2019 10", "2019 11", "2019 12", "2019 13", "2019 14", "2019 15",
"2019 16", "2019 17", "2019 18", "2019 19", "2019 20", "2019 21",
"2019 22", "2019 23", "2019 24", "2019 25", "2019 26", "2019 27",
"2019 28", "2019 29", "2019 30", "2019 31", "2019 32", "2019 33",
"2019 34", "2019 35", "2019 36", "2019 37", "2019 38", "2019 39",
"2019 40", "2019 41", "2019 42", "2019 43", "2019 44", "2019 45",
"2019 46", "2019 47", "2019 48", "2019 49", "2019 50", "2019 51",
"2019 52", "2020 01", "2020 02", "2020 03", "2020 04", "2020 05",
"2020 06", "2020 07", "2020 08", "2020 09", "2020 10", "2020 11",
"2020 12", "2020 13", "2020 14", "2020 15", "2020 16", "2020 17",
"2020 18"), Year = c(2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,
2019, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020,
2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020), Pepsi = c(309,
201, 369, 211, 262, 306, 477, 269, 175, 315, 373, 309, 280, 450,
331, 394, 295, 266, 349, 434, 247, 302, 423, 347, 384, 327, 369,
373, 380, 305, 432, 371, 437, 242, 270, 444, 432, 331, 309, 422,
300, 495, 285, 408, 336, 484, 324, 327, 359, 441, 392, 373, 168,
137, 404, 325, 279, 320, 461, 311, 336, 286, 333, 299, 209, 199,
197, 344, 170, 176), Coke = c(139, 119, 189, 160, 184, 174, 246,
203, 117, 193, 233, 226, 219, 228, 219, 238, 220, 136, 198, 229,
172, 225, 191, 183, 224, 223, 205, 232, 228, 215, 193, 219, 205,
178, 172, 219, 206, 245, 234, 218, 180, 269, 220, 224, 211, 253,
231, 239, 225, 247, 270, 265, 68, 87, 211, 193, 200, 188, 247,
216, 226, 203, 231, 183, 143, 147, 143, 162, 125, 122), Fanta = c(24,
18, 22, 20, 22, 20, 25, 23, 13, 22, 25, 24, 23, 31, 22, 23, 20,
23, 19, 20, 19, 22, 22, 21, 26, 21, 27, 24, 22, 23, 21, 25, 19,
21, 20, 22, 23, 23, 21, 32, 21, 28, 22, 23, 24, 24, 23, 26, 24,
23, 28, 29, 7, 18, 23, 27, 25, 24, 27, 24, 29, 24, 24, 20, 17,
21, 19, 13, 12, 16), Beer = c(145, 64, 158, 31, 56, 112, 206,
42, 45, 100, 114, 59, 38, 191, 89, 133, 56, 107, 131, 184, 56,
55, 211, 142, 134, 84, 137, 116, 130, 66, 218, 127, 213, 43,
78, 202, 203, 62, 54, 171, 99, 198, 44, 161, 100, 206, 70, 61,
110, 172, 94, 79, 93, 32, 171, 105, 54, 109, 187, 71, 81, 58,
78, 96, 49, 31, 34, 168, 33, 39), Juice = c(28, 40, 105, 26,
41, 31, 96, 37, 20, 46, 78, 53, 29, 57, 46, 111, 49, 18, 58,
112, 37, 43, 38, 93, 48, 69, 43, 57, 114, 43, 48, 47, 141, 35,
47, 69, 158, 43, 49, 29, 34, 139, 40, 42, 38, 132, 38, 42, 41,
103, 42, 48, 30, 22, 136, 39, 37, 39, 92, 40, 40, 37, 67, 73,
31, 23, 27, 102, 31, 25), Lemonade = c(111, 22, 31, 0, 12, 77,
95, 0, 22, 50, 22, 2, 3, 129, 39, 0, 2, 84, 70, 42, 16, 6, 168,
31, 76, 10, 88, 37, 3, 17, 166, 73, 37, 5, 27, 128, 20, 5, 0,
136, 61, 32, 0, 113, 53, 52, 28, 14, 62, 54, 39, 22, 63, 8, 14,
63, 16, 69, 85, 26, 40, 19, 2, 17, 16, 8, 5, 50, 1, 13), Radler = c(2,
1, 21, 2, 1, 2, 11, 2, 1, 2, 12, 2, 3, 3, 2, 19, 2, 4, 2, 28,
2, 3, 3, 16, 8, 3, 3, 19, 11, 3, 2, 4, 34, 1, 3, 3, 22, 11, 3,
4, 1, 24, 1, 3, 7, 20, 1, 3, 3, 12, 8, 2, 1, 2, 21, 3, 2, 1,
10, 5, 1, 2, 10, 5, 2, 1, 2, 15, 1, 0)), row.names = c(NA, -70L
), class = c("tbl_df", "tbl", "data.frame")))
那么接下来应该如何转换这些数据,如下图
我尝试使用 reshape2 包中的 melt 函数,但我得到了 table 这不太好。
ARTEFICIAL_DATA1<-melt(ARTEFICIAL_DATA,id.variables = 'year_week')
所以有人可以帮助重塑数据(熔化、旋转任何东西),比如图片的 table 吗?
我们可以使用 pivot_wider
library(dplyr)
library(tidyr)
ARTEFICIAL_DATA %>%
group_by(Year) %>%
mutate(rn = row_number()) %>%
ungroup %>%
pivot_wider(names_from = Year, values_from = c(year_week, Pepsi:Radler)) %>%
select(-rn)
# A tibble: 52 x 16
# year_week_2019 year_week_2020 Pepsi_2019 Pepsi_2020 Coke_2019 Coke_2020 Fanta_2019 Fanta_2020 Beer_2019 Beer_2020
# <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2019 01 2020 01 309 168 139 68 24 7 145 93
# 2 2019 02 2020 02 201 137 119 87 18 18 64 32
# 3 2019 03 2020 03 369 404 189 211 22 23 158 171
# 4 2019 04 2020 04 211 325 160 193 20 27 31 105
# 5 2019 05 2020 05 262 279 184 200 22 25 56 54
# 6 2019 06 2020 06 306 320 174 188 20 24 112 109
# 7 2019 07 2020 07 477 461 246 247 25 27 206 187
# 8 2019 08 2020 08 269 311 203 216 23 24 42 71
# 9 2019 09 2020 09 175 336 117 226 13 29 45 81
#10 2019 10 2020 10 315 286 193 203 22 24 100 58
# … with 42 more rows, and 6 more variables: Juice_2019 <dbl>, Juice_2020 <dbl>, Lemonade_2019 <dbl>,
# Lemonade_2020 <dbl>, Radler_2019 <dbl>, Radler_2020 <dbl>