如何计算python中时间序列数据组的SMAPE?
How to calculate SMAPE for groups in time-series data in python?
我的数据如下所示,我正在使用 facebook FbProphet
进行预测。接下来,我想为我的数据框中的每个组计算 SMAPE
。我找到了 Kaggle 用户 here 描述的功能,但我不确定如何在我当前的代码中实现。这样 SMAPE
就可以为每个组计算。另外,我知道fbProphet有验证功能,但我想为每个组计算SMAPE
。
注意:我是新手python,请提供代码说明。
数据集
import pandas as pd
data = {'Date':['2017-01-01', '2017-01-01','2017-01-01','2017-01-01','2017-01-01','2017-01-01','2017-01-01','2017-01-01',
'2017-02-01', '2017-02-01','2017-02-01','2017-02-01','2017-02-01','2017-02-01','2017-02-01','2017-02-01'],'Group':['A','A','B','B','C','C','D','D','A','A','B','B','C','C','D','D'],
'Amount':['12.1','13.2','15.1','10.7','12.9','9.0','5.6','6.7','4.3','2.3','4.0','5.6','7.8','2.3','5.6','8.9']}
df = pd.DataFrame(data)
print (df)
到目前为止的代码...
def get_prediction(df):
prediction = {}
df = df.rename(columns={'Date': 'ds','Amount': 'y', 'Group': 'group'})
df=df.groupby(['ds','group'])['y'].sum()
df=pd.DataFrame(df).reset_index()
list_articles = df.group.unique()
for group in list_articles:
article_df = df.loc[df['group'] == group]
# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = Prophet(weekly_seasonality= True, daily_seasonality=True,seasonality_prior_scale=1.0)
my_model.fit(article_df)
future_dates = my_model.make_future_dataframe(periods=6, freq='MS')
forecast = my_model.predict(future_dates)
prediction[group] = forecast
my_model.plot(forecast)
return prediction
您仍然可以使用 fbprophet 自己的 cross_validation
功能,但使用您自己的评分。这是来自 uber 的一篇很好的博客,介绍了他们如何进行回测(滑动 window 和扩展 window):https://eng.uber.com/forecasting-introduction/
fbprophet 的 cv 函数在滑动 window 上运行。如果可以,您可以将其与自定义评分功能结合使用。我认为一个很好的方法是扩展 Prophet 并实现一个 .score()
方法。
这里是一个示例实现:
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
import numpy as np
class ProphetEstimator(Prophet):
"""
Wrapper with custom scoring
"""
def __init__(self, *args, **kwargs):
super(ProphetEstimator, self).__init__(*args, **kwargs)
def score(self):
# cross val score reusing prophets own cv implementation
df_cv = cross_validation(self, horizon='6 days')
# Here decide how you want to calculate SMAPE.
# Here each sliding window is summed up,
# and the SMAPE is calculated over the sum of periods, for all windows.
df_cv = df_cv.groupby('cutoff').agg({
"yhat": "sum",
'y': "sum"
})
smape = self.calc_smape(df_cv['yhat'], df_cv['y'])
return smape
def calc_smape(self, y_hat, y):
return 100/len(y) * np.sum(2 * np.abs(y_hat - y) / (np.abs(y) + np.abs(y_hat)))
def get_prediction(df):
prediction = {}
df = df.rename(columns={'Date': 'ds','Amount': 'y', 'Group': 'group'})
df=df.groupby(['ds','group'])['y'].sum()
df=pd.DataFrame(df).reset_index()
list_articles = df.group.unique()
for group in list_articles:
article_df = df.loc[df['group'] == group]
# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = ProphetEstimator(weekly_seasonality= True, daily_seasonality=True,seasonality_prior_scale=1.0)
my_model.fit(article_df)
smape = my_model.score() # store this somewhere
future_dates = my_model.make_future_dataframe(periods=6, freq='MS')
forecast = my_model.predict(future_dates)
prediction[group] = (forecast, smape)
my_model.plot(forecast)
return prediction
我的数据如下所示,我正在使用 facebook FbProphet
进行预测。接下来,我想为我的数据框中的每个组计算 SMAPE
。我找到了 Kaggle 用户 here 描述的功能,但我不确定如何在我当前的代码中实现。这样 SMAPE
就可以为每个组计算。另外,我知道fbProphet有验证功能,但我想为每个组计算SMAPE
。
注意:我是新手python,请提供代码说明。
数据集
import pandas as pd
data = {'Date':['2017-01-01', '2017-01-01','2017-01-01','2017-01-01','2017-01-01','2017-01-01','2017-01-01','2017-01-01',
'2017-02-01', '2017-02-01','2017-02-01','2017-02-01','2017-02-01','2017-02-01','2017-02-01','2017-02-01'],'Group':['A','A','B','B','C','C','D','D','A','A','B','B','C','C','D','D'],
'Amount':['12.1','13.2','15.1','10.7','12.9','9.0','5.6','6.7','4.3','2.3','4.0','5.6','7.8','2.3','5.6','8.9']}
df = pd.DataFrame(data)
print (df)
到目前为止的代码...
def get_prediction(df):
prediction = {}
df = df.rename(columns={'Date': 'ds','Amount': 'y', 'Group': 'group'})
df=df.groupby(['ds','group'])['y'].sum()
df=pd.DataFrame(df).reset_index()
list_articles = df.group.unique()
for group in list_articles:
article_df = df.loc[df['group'] == group]
# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = Prophet(weekly_seasonality= True, daily_seasonality=True,seasonality_prior_scale=1.0)
my_model.fit(article_df)
future_dates = my_model.make_future_dataframe(periods=6, freq='MS')
forecast = my_model.predict(future_dates)
prediction[group] = forecast
my_model.plot(forecast)
return prediction
您仍然可以使用 fbprophet 自己的 cross_validation
功能,但使用您自己的评分。这是来自 uber 的一篇很好的博客,介绍了他们如何进行回测(滑动 window 和扩展 window):https://eng.uber.com/forecasting-introduction/
fbprophet 的 cv 函数在滑动 window 上运行。如果可以,您可以将其与自定义评分功能结合使用。我认为一个很好的方法是扩展 Prophet 并实现一个 .score()
方法。
这里是一个示例实现:
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation
import numpy as np
class ProphetEstimator(Prophet):
"""
Wrapper with custom scoring
"""
def __init__(self, *args, **kwargs):
super(ProphetEstimator, self).__init__(*args, **kwargs)
def score(self):
# cross val score reusing prophets own cv implementation
df_cv = cross_validation(self, horizon='6 days')
# Here decide how you want to calculate SMAPE.
# Here each sliding window is summed up,
# and the SMAPE is calculated over the sum of periods, for all windows.
df_cv = df_cv.groupby('cutoff').agg({
"yhat": "sum",
'y': "sum"
})
smape = self.calc_smape(df_cv['yhat'], df_cv['y'])
return smape
def calc_smape(self, y_hat, y):
return 100/len(y) * np.sum(2 * np.abs(y_hat - y) / (np.abs(y) + np.abs(y_hat)))
def get_prediction(df):
prediction = {}
df = df.rename(columns={'Date': 'ds','Amount': 'y', 'Group': 'group'})
df=df.groupby(['ds','group'])['y'].sum()
df=pd.DataFrame(df).reset_index()
list_articles = df.group.unique()
for group in list_articles:
article_df = df.loc[df['group'] == group]
# set the uncertainty interval to 95% (the Prophet default is 80%)
my_model = ProphetEstimator(weekly_seasonality= True, daily_seasonality=True,seasonality_prior_scale=1.0)
my_model.fit(article_df)
smape = my_model.score() # store this somewhere
future_dates = my_model.make_future_dataframe(periods=6, freq='MS')
forecast = my_model.predict(future_dates)
prediction[group] = (forecast, smape)
my_model.plot(forecast)
return prediction