使用预测数据进行数据操作
Data manipulation with forecasting data
我正在尝试对两家商店的销售额进行预测:商店 1 和商店 2。就像使用预测包进行预测的结果一样,我得到了这两个 table.First table 分别包含有关 MAPE 错误的数据按每个模型(列值)。您可以在下面看到数据和数据的屏幕截图。
Table_1<-structure(list(...1 = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20"), X1 = c("SNAIVE", "HW", "ETS", "ARIMA", "STL", "TBATS",
"NNETAR", "RWF", "TSLM", "FOURIER", "SNAIVE", "HW", "ETS", "ARIMA",
"STL", "TBATS", "NNETAR", "RWF", "TSLM", "FOURIER"), X2 = c("Store 1",
"Store 1", "Store 1", "Store 1", "Store 1", "Store 1", "Store 1",
"Store 1", "Store 1", "Store 1", "Store 2", "Store 2", "Store 2",
"Store 2", "Store 2", "Store 2", "Store 2", "Store 2", "Store 2",
"Store 2"), value = c(11.2819379803024, 4.90469397146697, 4.90469397146697,
4.64808116952175, 4.92695563666538, 6.11286061911487, 7.66061575087076,
8.95984865369006, 5.07614708345642, 4.57448859126253, 22.7760224588221,
24.0502857269679, 18.9376978459644, 21.6693712888351, 21.6029490199174,
24.692214948761, 26.2680955559159, 30.5302345480261, 22.2367412218357,
22.6100823447494)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
我已经用黄色突出显示了商店 1 和商店 2.For 商店 1 的预测中最好的三个模型(具有最低 MAPE 错误)2.For 商店 1 是(ETS、ARIMA 和傅里叶),商店 2 是(ETS、ARIMA)和 STL)。
第二个 table 包含每个月的预测数据 models.Below 你可以看到数据和数据的屏幕截图。
Table2<-structure(list(Date = structure(c(1575158400, 1577836800, 1580515200,
1583020800, 1585699200, 1588291200, 1590969600, 1593561600, 1596240000,
1598918400, 1601510400, 1604188800, 1606780800, 1575158400, 1577836800,
1580515200, 1583020800, 1585699200, 1588291200, 1590969600, 1593561600,
1596240000, 1598918400, 1601510400, 1604188800, 1606780800, 1575158400,
1577836800, 1580515200, 1583020800, 1585699200, 1588291200, 1590969600,
1593561600, 1596240000, 1598918400, 1601510400, 1604188800, 1606780800,
1575158400, 1577836800, 1580515200, 1583020800, 1585699200, 1588291200,
1590969600, 1593561600, 1596240000, 1598918400, 1601510400, 1604188800,
1606780800, 1575158400, 1577836800, 1580515200, 1583020800, 1585699200,
1588291200, 1590969600, 1593561600, 1596240000, 1598918400, 1601510400,
1604188800, 1606780800, 1575158400, 1577836800, 1580515200, 1583020800,
1585699200, 1588291200, 1590969600, 1593561600, 1596240000, 1598918400,
1601510400, 1604188800, 1606780800, 1575158400, 1577836800, 1580515200,
1583020800, 1585699200, 1588291200, 1590969600, 1593561600, 1596240000,
1598918400, 1601510400, 1604188800, 1606780800, 1575158400, 1577836800,
1580515200, 1583020800, 1585699200, 1588291200, 1590969600, 1593561600,
1596240000, 1598918400, 1601510400, 1604188800, 1606780800, 1575158400,
1577836800, 1580515200, 1583020800, 1585699200, 1588291200, 1590969600,
1593561600, 1596240000, 1598918400, 1601510400, 1604188800, 1606780800,
1575158400, 1577836800, 1580515200, 1583020800, 1585699200, 1588291200,
1590969600, 1593561600, 1596240000, 1598918400, 1601510400, 1604188800,
1606780800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Forecasting_model = c("SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE",
"SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE",
"SNAIVE", "SNAIVE", "SNAIVE", "HW", "HW", "HW", "HW", "HW",
"HW", "HW", "HW", "HW", "HW", "HW", "HW", "HW", "ETS", "ETS",
"ETS", "ETS", "ETS", "ETS", "ETS", "ETS", "ETS", "ETS", "ETS",
"ETS", "ETS", "ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA",
"ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA",
"ARIMA", "STL", "STL", "STL", "STL", "STL", "STL", "STL",
"STL", "STL", "STL", "STL", "STL", "STL", "TBATS", "TBATS",
"TBATS", "TBATS", "TBATS", "TBATS", "TBATS", "TBATS", "TBATS",
"TBATS", "TBATS", "TBATS", "TBATS", "NNAR", "NNAR", "NNAR",
"NNAR", "NNAR", "NNAR", "NNAR", "NNAR", "NNAR", "NNAR", "NNAR",
"NNAR", "NNAR", "RWF", "RWF", "RWF", "RWF", "RWF", "RWF",
"RWF", "RWF", "RWF", "RWF", "RWF", "RWF", "RWF", "TSLM",
"TSLM", "TSLM", "TSLM", "TSLM", "TSLM", "TSLM", "TSLM", "TSLM",
"TSLM", "TSLM", "TSLM", "TSLM", "FOURIER", "FOURIER", "FOURIER",
"FOURIER", "FOURIER", "FOURIER", "FOURIER", "FOURIER", "FOURIER",
"FOURIER", "FOURIER", "FOURIER", "FOURIER"), `Store 1` = c(8083,
1171, 1328, 1281, 1281, 1118, 1107, 1611, 1116, 1133, 1618,
1261, 8083, 8312, 1336, 1261, 1673, 1667, 1223, 1603, 1621,
1211, 1633, 1637, 1672, 8138, 8312, 1336, 1261, 1673, 1667,
1223, 1603, 1621, 1211, 1633, 1637, 1672, 8138, 8818, 1363,
1282, 1671, 1623, 1276, 1283, 1687, 1261, 1632, 1676, 1631,
8367, 8827, 1108, 1226, 1681, 1661, 1288, 1616, 1683, 1278,
1663, 1678, 1703, 8338, 8371, 1183, 1237, 1738, 1701, 1637,
1681, 1721, 1271, 1738, 1663, 1732, 8180, 8076, 1318, 1271,
1732, 1883, 1286, 1607, 1336, 1281, 1711, 1873, 1881, 8183,
1271, 1283, 1233, 1608, 1618, 1681, 1631, 1611, 1620, 1660,
1663, 1673, 1688, 8166, 1317, 1188, 1233, 1273, 1183, 1212,
1276, 1178, 1221, 1226, 1283, 8863, 8811, 1118, 1223, 1661,
1621, 1260, 1286, 1617, 1213, 1688, 1687, 1660, 8311), `Store 2` = c(1180,
811, 312, 1612, 1387, 878, 812, 883, 362, 768, 800, 760,
1180, 1021, 761, 1002, 1106, 1271, 337, 1113, 373, 833, 1012,
333, 303, 1166, 336, 708, 332, 1312, 1168, 838, 1010, 862,
773, 883, 861, 767, 1000, 1070, 636, 838, 1161, 1183, 887,
1001, 813, 331, 820, 738, 732, 1087, 333, 688, 810, 1311,
1183, 876, 338, 818, 816, 818, 816, 773, 333, 337, 888, 871,
1378, 1100, 1008, 368, 380, 883, 386, 872, 838, 363, 1102,
301, 831, 1133, 1331, 831, 333, 321, 338, 883, 832, 881,
1303, 766, 778, 773, 782, 731, 737, 801, 810, 816, 888, 883,
832, 811, 1820, 1000, 1136, 1270, 1718, 1188, 1873, 1162,
1136, 1130, 1178, 1110, 1371, 380, 703, 306, 1862, 1110,
873, 327, 837, 808, 817, 838, 726, 371)), row.names = c(NA,
-130L), class = c("tbl_df", "tbl", "data.frame"))
所以我的目的是根据最低的 MAPE 错误自动选择最好的三个模型,如上面突出显示的模型,并计算商店 1 和商店 2 的最佳三个模型的月份平均值,如下例所示。
我试过这段代码,但不知道如何继续。
# Arrange data by MAPE error
Table_1a<-data.frame(Table_1)%>%
select(X1,X2,value)%>%
arrange((value),.by_group = TRUE)
# Select three best models
Table_1b <-data.frame(rbind(Table_1a[1:3, 1:3],Table_1a[10:13, 1:3]))%>%
select(X1,X2)%>%
group_by(X1,X2)
# Тhis line does not work
Forecasting_Store_1<-mutate(Table_2,
ifelse(Table_1b$X1==Table_2$Forecasting_model,Table_2$Forecasting_model,"")
)
谁能帮我解决这个问题?
这是一个可能的解决方案:
首先,您 select 3 位最佳模特。我更喜欢使用 top_n
,它与您的解决方案类似,但更简洁一些。诀窍是粘贴模型和商店以获得唯一键。
model_ok = Table_1 %>%
group_by(X2) %>%
top_n(-3, value) %>% ungroup %>%
transmute(model_ok=paste(X1,X2)) %>% unlist
请注意,在您的示例中,并列第三,因此我的代码 select 为商店 1 编辑了 4 个模型,而不是 3 个(您的代码也是如此)。
然后您可以旋转第二个 table 以将商店放在行而不是列中,再次进行粘贴并过滤与接受的键匹配的行。
table3=Table2 %>%
pivot_longer(c(`Store 1`,`Store 2`), names_to = "store") %>%
mutate(model_store=paste(Forecasting_model, store)) %>%
filter(model_store %in% model_ok) %>%
select(-model_store)
最后,您可以再次旋转 table 以将模型作为列并计算 3 个模型(在我的例子中是 4 个)的平均值。如果您只有 2 家商店,您可以使用 "Store 2".
重复此代码
table3 %>%
filter(store=="Store 1") %>%
pivot_wider(names_from = Forecasting_model) %>%
mutate(average=rowMeans(select(., -Date, -store)))
编辑:
由于您似乎有多个商店,下面是一个示例,说明如何使用 purrr::map
遍历这些商店。首先,您需要将不同的商店作为命名向量。我使用了 Table_1$X2 %>% unique %>% set_names
但你可能想使用更干净的对象。
library(purrrr)
output=Table_1$X2 %>% unique %>% set_names %>% map(~{
table3 %>%
filter(store==.x) %>%
pivot_wider(names_from = Forecasting_model) %>%
mutate(average=rowMeans(select(., -Date, -store)))
})
output$`Store 1`
output$`Store 2`
希望对您有所帮助。
我正在尝试对两家商店的销售额进行预测:商店 1 和商店 2。就像使用预测包进行预测的结果一样,我得到了这两个 table.First table 分别包含有关 MAPE 错误的数据按每个模型(列值)。您可以在下面看到数据和数据的屏幕截图。
Table_1<-structure(list(...1 = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20"), X1 = c("SNAIVE", "HW", "ETS", "ARIMA", "STL", "TBATS",
"NNETAR", "RWF", "TSLM", "FOURIER", "SNAIVE", "HW", "ETS", "ARIMA",
"STL", "TBATS", "NNETAR", "RWF", "TSLM", "FOURIER"), X2 = c("Store 1",
"Store 1", "Store 1", "Store 1", "Store 1", "Store 1", "Store 1",
"Store 1", "Store 1", "Store 1", "Store 2", "Store 2", "Store 2",
"Store 2", "Store 2", "Store 2", "Store 2", "Store 2", "Store 2",
"Store 2"), value = c(11.2819379803024, 4.90469397146697, 4.90469397146697,
4.64808116952175, 4.92695563666538, 6.11286061911487, 7.66061575087076,
8.95984865369006, 5.07614708345642, 4.57448859126253, 22.7760224588221,
24.0502857269679, 18.9376978459644, 21.6693712888351, 21.6029490199174,
24.692214948761, 26.2680955559159, 30.5302345480261, 22.2367412218357,
22.6100823447494)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
我已经用黄色突出显示了商店 1 和商店 2.For 商店 1 的预测中最好的三个模型(具有最低 MAPE 错误)2.For 商店 1 是(ETS、ARIMA 和傅里叶),商店 2 是(ETS、ARIMA)和 STL)。
第二个 table 包含每个月的预测数据 models.Below 你可以看到数据和数据的屏幕截图。
Table2<-structure(list(Date = structure(c(1575158400, 1577836800, 1580515200,
1583020800, 1585699200, 1588291200, 1590969600, 1593561600, 1596240000,
1598918400, 1601510400, 1604188800, 1606780800, 1575158400, 1577836800,
1580515200, 1583020800, 1585699200, 1588291200, 1590969600, 1593561600,
1596240000, 1598918400, 1601510400, 1604188800, 1606780800, 1575158400,
1577836800, 1580515200, 1583020800, 1585699200, 1588291200, 1590969600,
1593561600, 1596240000, 1598918400, 1601510400, 1604188800, 1606780800,
1575158400, 1577836800, 1580515200, 1583020800, 1585699200, 1588291200,
1590969600, 1593561600, 1596240000, 1598918400, 1601510400, 1604188800,
1606780800, 1575158400, 1577836800, 1580515200, 1583020800, 1585699200,
1588291200, 1590969600, 1593561600, 1596240000, 1598918400, 1601510400,
1604188800, 1606780800, 1575158400, 1577836800, 1580515200, 1583020800,
1585699200, 1588291200, 1590969600, 1593561600, 1596240000, 1598918400,
1601510400, 1604188800, 1606780800, 1575158400, 1577836800, 1580515200,
1583020800, 1585699200, 1588291200, 1590969600, 1593561600, 1596240000,
1598918400, 1601510400, 1604188800, 1606780800, 1575158400, 1577836800,
1580515200, 1583020800, 1585699200, 1588291200, 1590969600, 1593561600,
1596240000, 1598918400, 1601510400, 1604188800, 1606780800, 1575158400,
1577836800, 1580515200, 1583020800, 1585699200, 1588291200, 1590969600,
1593561600, 1596240000, 1598918400, 1601510400, 1604188800, 1606780800,
1575158400, 1577836800, 1580515200, 1583020800, 1585699200, 1588291200,
1590969600, 1593561600, 1596240000, 1598918400, 1601510400, 1604188800,
1606780800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Forecasting_model = c("SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE",
"SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE", "SNAIVE",
"SNAIVE", "SNAIVE", "SNAIVE", "HW", "HW", "HW", "HW", "HW",
"HW", "HW", "HW", "HW", "HW", "HW", "HW", "HW", "ETS", "ETS",
"ETS", "ETS", "ETS", "ETS", "ETS", "ETS", "ETS", "ETS", "ETS",
"ETS", "ETS", "ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA",
"ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA", "ARIMA",
"ARIMA", "STL", "STL", "STL", "STL", "STL", "STL", "STL",
"STL", "STL", "STL", "STL", "STL", "STL", "TBATS", "TBATS",
"TBATS", "TBATS", "TBATS", "TBATS", "TBATS", "TBATS", "TBATS",
"TBATS", "TBATS", "TBATS", "TBATS", "NNAR", "NNAR", "NNAR",
"NNAR", "NNAR", "NNAR", "NNAR", "NNAR", "NNAR", "NNAR", "NNAR",
"NNAR", "NNAR", "RWF", "RWF", "RWF", "RWF", "RWF", "RWF",
"RWF", "RWF", "RWF", "RWF", "RWF", "RWF", "RWF", "TSLM",
"TSLM", "TSLM", "TSLM", "TSLM", "TSLM", "TSLM", "TSLM", "TSLM",
"TSLM", "TSLM", "TSLM", "TSLM", "FOURIER", "FOURIER", "FOURIER",
"FOURIER", "FOURIER", "FOURIER", "FOURIER", "FOURIER", "FOURIER",
"FOURIER", "FOURIER", "FOURIER", "FOURIER"), `Store 1` = c(8083,
1171, 1328, 1281, 1281, 1118, 1107, 1611, 1116, 1133, 1618,
1261, 8083, 8312, 1336, 1261, 1673, 1667, 1223, 1603, 1621,
1211, 1633, 1637, 1672, 8138, 8312, 1336, 1261, 1673, 1667,
1223, 1603, 1621, 1211, 1633, 1637, 1672, 8138, 8818, 1363,
1282, 1671, 1623, 1276, 1283, 1687, 1261, 1632, 1676, 1631,
8367, 8827, 1108, 1226, 1681, 1661, 1288, 1616, 1683, 1278,
1663, 1678, 1703, 8338, 8371, 1183, 1237, 1738, 1701, 1637,
1681, 1721, 1271, 1738, 1663, 1732, 8180, 8076, 1318, 1271,
1732, 1883, 1286, 1607, 1336, 1281, 1711, 1873, 1881, 8183,
1271, 1283, 1233, 1608, 1618, 1681, 1631, 1611, 1620, 1660,
1663, 1673, 1688, 8166, 1317, 1188, 1233, 1273, 1183, 1212,
1276, 1178, 1221, 1226, 1283, 8863, 8811, 1118, 1223, 1661,
1621, 1260, 1286, 1617, 1213, 1688, 1687, 1660, 8311), `Store 2` = c(1180,
811, 312, 1612, 1387, 878, 812, 883, 362, 768, 800, 760,
1180, 1021, 761, 1002, 1106, 1271, 337, 1113, 373, 833, 1012,
333, 303, 1166, 336, 708, 332, 1312, 1168, 838, 1010, 862,
773, 883, 861, 767, 1000, 1070, 636, 838, 1161, 1183, 887,
1001, 813, 331, 820, 738, 732, 1087, 333, 688, 810, 1311,
1183, 876, 338, 818, 816, 818, 816, 773, 333, 337, 888, 871,
1378, 1100, 1008, 368, 380, 883, 386, 872, 838, 363, 1102,
301, 831, 1133, 1331, 831, 333, 321, 338, 883, 832, 881,
1303, 766, 778, 773, 782, 731, 737, 801, 810, 816, 888, 883,
832, 811, 1820, 1000, 1136, 1270, 1718, 1188, 1873, 1162,
1136, 1130, 1178, 1110, 1371, 380, 703, 306, 1862, 1110,
873, 327, 837, 808, 817, 838, 726, 371)), row.names = c(NA,
-130L), class = c("tbl_df", "tbl", "data.frame"))
所以我的目的是根据最低的 MAPE 错误自动选择最好的三个模型,如上面突出显示的模型,并计算商店 1 和商店 2 的最佳三个模型的月份平均值,如下例所示。
我试过这段代码,但不知道如何继续。
# Arrange data by MAPE error
Table_1a<-data.frame(Table_1)%>%
select(X1,X2,value)%>%
arrange((value),.by_group = TRUE)
# Select three best models
Table_1b <-data.frame(rbind(Table_1a[1:3, 1:3],Table_1a[10:13, 1:3]))%>%
select(X1,X2)%>%
group_by(X1,X2)
# Тhis line does not work
Forecasting_Store_1<-mutate(Table_2,
ifelse(Table_1b$X1==Table_2$Forecasting_model,Table_2$Forecasting_model,"")
)
谁能帮我解决这个问题?
这是一个可能的解决方案:
首先,您 select 3 位最佳模特。我更喜欢使用 top_n
,它与您的解决方案类似,但更简洁一些。诀窍是粘贴模型和商店以获得唯一键。
model_ok = Table_1 %>%
group_by(X2) %>%
top_n(-3, value) %>% ungroup %>%
transmute(model_ok=paste(X1,X2)) %>% unlist
请注意,在您的示例中,并列第三,因此我的代码 select 为商店 1 编辑了 4 个模型,而不是 3 个(您的代码也是如此)。
然后您可以旋转第二个 table 以将商店放在行而不是列中,再次进行粘贴并过滤与接受的键匹配的行。
table3=Table2 %>%
pivot_longer(c(`Store 1`,`Store 2`), names_to = "store") %>%
mutate(model_store=paste(Forecasting_model, store)) %>%
filter(model_store %in% model_ok) %>%
select(-model_store)
最后,您可以再次旋转 table 以将模型作为列并计算 3 个模型(在我的例子中是 4 个)的平均值。如果您只有 2 家商店,您可以使用 "Store 2".
重复此代码table3 %>%
filter(store=="Store 1") %>%
pivot_wider(names_from = Forecasting_model) %>%
mutate(average=rowMeans(select(., -Date, -store)))
编辑:
由于您似乎有多个商店,下面是一个示例,说明如何使用 purrr::map
遍历这些商店。首先,您需要将不同的商店作为命名向量。我使用了 Table_1$X2 %>% unique %>% set_names
但你可能想使用更干净的对象。
library(purrrr)
output=Table_1$X2 %>% unique %>% set_names %>% map(~{
table3 %>%
filter(store==.x) %>%
pivot_wider(names_from = Forecasting_model) %>%
mutate(average=rowMeans(select(., -Date, -store)))
})
output$`Store 1`
output$`Store 2`
希望对您有所帮助。