使用来自 statsmodels 的指数平滑进行插值
Interpolation using ExponentialSmoothing from stats models
我在时间序列上使用 ExponentialSmoothing
从 statsmodels 到 运行 Holt-Winters 方法。
我得到了预测值,但无法提取计算值并将它们与观测值进行比较。
from pandas import Series
from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.api import ExponentialSmoothing
modelHW = ExponentialSmoothing(np.asarray(passtrain_df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
y_hat_avg['Holt_Winter'] = modelHW.forecast(prediction_size)
所以在这里,prediction_size = number of forecasted datapoints
(在我的例子中是 4 个)
passtrain_df
是一个包含观察值(140 个数据点)的数据框,Holt_Winter 模型基于该数据框构建(回归)。
我可以轻松显示 4 个预测值。
如何提取 140 个计算值?
尝试使用:
print(ExponentialSmoothing.predict(np.asarray(passtrain_df), start=0, end=139))
但我可能某处有语法错误
谢谢!
编辑:
用来自 OP
的示例数据替换了合成数据集
建立新预测周期的固定功能
根据 OP 请求修复了 x 轴日期格式
答案:
如果您要查找估算期内的计算值,您应该使用 modelHW.fittedvalues
而不是 modelHW.forecast()
。后者会给你它所说的;预测。这非常棒。让我告诉你如何做这两件事:
图 1 - 估计期内的模型
图 2 - 预测
代码:
#imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
import matplotlib.dates as mdates
#%%
#
# Load data
pass_df = pd.read_csv('https://raw.githubusercontent.com/dacatay/time-series-analysis/master/data/passengers.csv', sep=';')
pass_df = pass_df.set_index('month')
type(pass_df.index)
df = pass_df.copy()
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model = df_model.set_index(pd.DatetimeIndex(df_model.index))
# Model, plot
fig, ax = plt.subplots()
myFmt = mdates.DateFormatter('%Y-%m')
df_model.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_formatter(myFmt)
# Forecasts
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
# Forecasts, build new period
forecast_start = df.index[-1]
forecast_start = pd.to_datetime(forecast_start, format='%Y-%m-%d')
forecast_period = pd.period_range(forecast_start, periods=prediction_size+1, freq='M')
forecast_period = forecast_period[1:]
# Forecasts, create dataframe
df_forecast = pd.Series(forecast_values, index = forecast_period.values).to_frame()
df_forecast.columns = ['HWforecast']
# merge input and forecast dataframes
df_all = pd.merge(df,df_forecast, how='outer', left_index=True, right_index=True)
#df_all = df_all.set_index(pd.DatetimeIndex(df_all.index.values))
ix = df_all.index
ixp = pd.PeriodIndex(ix, freq = 'M')
df_all = df_all.set_index(ixp)
# Forecast, plot
fig, ax = plt.subplots()
myFmt = mdates.DateFormatter('%Y-%m')
df_all.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_formatter(myFmt)
之前的尝试:
# imports
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
# Time
date = pd.to_datetime("1st of Jan, 2019")
dates = date+pd.to_timedelta(np.arange(140), 'D')
# Data
n_passengers = np.random.normal(loc=0.0, scale=5.0, size=140).cumsum()
n_passengers = n_passengers.astype(int) + 100
df = pd.DataFrame({'n_passengers':n_passengers},index=dates)
1.如何绘制估计期内的观察值与估计值:
以下代码片段将提取所有拟合值并将其与您的观察值进行对比。
代码段 2:
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model.plot()
情节 1:
2。如何生成和绘制一定长度的模型预测:
以下代码段将从您的模型中生成 10 个预测,并将其绘制为与您的观察值相比的延长周期。
片段 3:
# Forecast
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
forecast_period = df.index[-1] + pd.to_timedelta(np.arange(prediction_size+1), 'D')
forecast_period = forecast_period[1:]
df_forecast = pd.concat([df['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
df_forecast.columns = ['n_passengers', 'HWforecast']
df_forecast.plot()
情节 2:
这里是简单复制和粘贴的全部内容:
# imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
# Time
date = pd.to_datetime("1st of Jan, 2019")
dates = date+pd.to_timedelta(np.arange(140), 'D')
# Data
n_passengers = np.random.normal(loc=0.0, scale=5.0, size=140).cumsum()
n_passengers = n_passengers.astype(int) + 100
df = pd.DataFrame({'n_passengers':n_passengers},index=dates)
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model.plot()
# Forecast
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
forecast_period = df.index[-1] + pd.to_timedelta(np.arange(prediction_size+1), 'D')
forecast_period = forecast_period[1:]
df_forecast = pd.concat([df['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
df_forecast.columns = ['n_passengers', 'HWforecast']
df_forecast.plot()
@vestland - 这是代码和错误:
y_train = passtrain_df.copy(deep=True)
model_HW = ExponentialSmoothing(np.asarray(y_train['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
model_values = model_HW.fittedvalues
model_period = y_train.index
hw_model = pd.concat([y_train['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
hw_model.columns = ['Observed Passengers', 'Holt-Winters']
plt.figure(figsize=(18,12))
hw_model.plot()
forecast_values = model_HW.forecast(prediction_size)
forecast_period = y_train.index[-1] + pd.to_timedelta(np.arange(prediction_size+1),'D')
forecast_period = forecast_period[1:]
hw_forecast = pd.concat([y_train['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
hw_forecast.columns = ['Observed Passengers', 'HW-Forecast']
hw_forecast.plot()
错误:
NullFrequencyError Traceback (most recent call last)
<ipython-input-25-5f37a0dd0cfa> in <module>()
17
18 forecast_values = model_HW.forecast(prediction_size)
---> 19 forecast_period = y_train.index[-1] + pd.to_timedelta(np.arange(prediction_size+1),'D')
20 forecast_period = forecast_period[1:]
21
/anaconda3/lib/python3.6/site- packages/pandas/core/indexes/datetimelike.py in __radd__(self, other)
879 def __radd__(self, other):
880 # alias for __add__
--> 881 return self.__add__(other)
882 cls.__radd__ = __radd__
883
/anaconda3/lib/python3.6/site- packages/pandas/core/indexes/datetimelike.py in __add__(self, other)
842 # This check must come after the check for np.timedelta64
843 # as is_integer returns True for these
--> 844 result = self.shift(other)
845
846 # array-like others
/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in shift(self, n, freq)
1049
1050 if self.freq is None:
-> 1051 raise NullFrequencyError("Cannot shift with no freq")
1052
1053 start = self[0] + n * self.freq
NullFrequencyError: Cannot shift with no freq
我在时间序列上使用 ExponentialSmoothing
从 statsmodels 到 运行 Holt-Winters 方法。
我得到了预测值,但无法提取计算值并将它们与观测值进行比较。
from pandas import Series
from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.api import ExponentialSmoothing
modelHW = ExponentialSmoothing(np.asarray(passtrain_df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
y_hat_avg['Holt_Winter'] = modelHW.forecast(prediction_size)
所以在这里,prediction_size = number of forecasted datapoints
(在我的例子中是 4 个)
passtrain_df
是一个包含观察值(140 个数据点)的数据框,Holt_Winter 模型基于该数据框构建(回归)。
我可以轻松显示 4 个预测值。
如何提取 140 个计算值?
尝试使用:
print(ExponentialSmoothing.predict(np.asarray(passtrain_df), start=0, end=139))
但我可能某处有语法错误
谢谢!
编辑:
用来自 OP
的示例数据替换了合成数据集
建立新预测周期的固定功能
根据 OP 请求修复了 x 轴日期格式
答案:
如果您要查找估算期内的计算值,您应该使用 modelHW.fittedvalues
而不是 modelHW.forecast()
。后者会给你它所说的;预测。这非常棒。让我告诉你如何做这两件事:
图 1 - 估计期内的模型
图 2 - 预测
代码:
#imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
import matplotlib.dates as mdates
#%%
#
# Load data
pass_df = pd.read_csv('https://raw.githubusercontent.com/dacatay/time-series-analysis/master/data/passengers.csv', sep=';')
pass_df = pass_df.set_index('month')
type(pass_df.index)
df = pass_df.copy()
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model = df_model.set_index(pd.DatetimeIndex(df_model.index))
# Model, plot
fig, ax = plt.subplots()
myFmt = mdates.DateFormatter('%Y-%m')
df_model.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_formatter(myFmt)
# Forecasts
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
# Forecasts, build new period
forecast_start = df.index[-1]
forecast_start = pd.to_datetime(forecast_start, format='%Y-%m-%d')
forecast_period = pd.period_range(forecast_start, periods=prediction_size+1, freq='M')
forecast_period = forecast_period[1:]
# Forecasts, create dataframe
df_forecast = pd.Series(forecast_values, index = forecast_period.values).to_frame()
df_forecast.columns = ['HWforecast']
# merge input and forecast dataframes
df_all = pd.merge(df,df_forecast, how='outer', left_index=True, right_index=True)
#df_all = df_all.set_index(pd.DatetimeIndex(df_all.index.values))
ix = df_all.index
ixp = pd.PeriodIndex(ix, freq = 'M')
df_all = df_all.set_index(ixp)
# Forecast, plot
fig, ax = plt.subplots()
myFmt = mdates.DateFormatter('%Y-%m')
df_all.plot(ax = ax, x_compat=True)
ax.xaxis.set_major_formatter(myFmt)
之前的尝试:
# imports
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
# Time
date = pd.to_datetime("1st of Jan, 2019")
dates = date+pd.to_timedelta(np.arange(140), 'D')
# Data
n_passengers = np.random.normal(loc=0.0, scale=5.0, size=140).cumsum()
n_passengers = n_passengers.astype(int) + 100
df = pd.DataFrame({'n_passengers':n_passengers},index=dates)
1.如何绘制估计期内的观察值与估计值:
以下代码片段将提取所有拟合值并将其与您的观察值进行对比。
代码段 2:
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model.plot()
情节 1:
2。如何生成和绘制一定长度的模型预测:
以下代码段将从您的模型中生成 10 个预测,并将其绘制为与您的观察值相比的延长周期。
片段 3:
# Forecast
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
forecast_period = df.index[-1] + pd.to_timedelta(np.arange(prediction_size+1), 'D')
forecast_period = forecast_period[1:]
df_forecast = pd.concat([df['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
df_forecast.columns = ['n_passengers', 'HWforecast']
df_forecast.plot()
情节 2:
这里是简单复制和粘贴的全部内容:
# imports
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
# Data that matches your setup, but with a random
# seed to make it reproducible
np.random.seed(42)
# Time
date = pd.to_datetime("1st of Jan, 2019")
dates = date+pd.to_timedelta(np.arange(140), 'D')
# Data
n_passengers = np.random.normal(loc=0.0, scale=5.0, size=140).cumsum()
n_passengers = n_passengers.astype(int) + 100
df = pd.DataFrame({'n_passengers':n_passengers},index=dates)
# Model
modelHW = ExponentialSmoothing(np.asarray(df['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
modelHW.summary()
# Model, fitted values
model_values = modelHW.fittedvalues
model_period = df.index
df_model = pd.concat([df['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
df_model.columns = ['n_passengers', 'HWmodel']
df_model.plot()
# Forecast
prediction_size = 10
forecast_values = modelHW.forecast(prediction_size)
forecast_period = df.index[-1] + pd.to_timedelta(np.arange(prediction_size+1), 'D')
forecast_period = forecast_period[1:]
df_forecast = pd.concat([df['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
df_forecast.columns = ['n_passengers', 'HWforecast']
df_forecast.plot()
@vestland - 这是代码和错误:
y_train = passtrain_df.copy(deep=True)
model_HW = ExponentialSmoothing(np.asarray(y_train['n_passengers']), seasonal_periods=12, trend='add', seasonal='mul',).fit()
model_values = model_HW.fittedvalues
model_period = y_train.index
hw_model = pd.concat([y_train['n_passengers'], pd.Series(model_values, index = model_period)], axis = 1)
hw_model.columns = ['Observed Passengers', 'Holt-Winters']
plt.figure(figsize=(18,12))
hw_model.plot()
forecast_values = model_HW.forecast(prediction_size)
forecast_period = y_train.index[-1] + pd.to_timedelta(np.arange(prediction_size+1),'D')
forecast_period = forecast_period[1:]
hw_forecast = pd.concat([y_train['n_passengers'], pd.Series(forecast_values, index = forecast_period)], axis = 1)
hw_forecast.columns = ['Observed Passengers', 'HW-Forecast']
hw_forecast.plot()
错误:
NullFrequencyError Traceback (most recent call last)
<ipython-input-25-5f37a0dd0cfa> in <module>()
17
18 forecast_values = model_HW.forecast(prediction_size)
---> 19 forecast_period = y_train.index[-1] + pd.to_timedelta(np.arange(prediction_size+1),'D')
20 forecast_period = forecast_period[1:]
21
/anaconda3/lib/python3.6/site- packages/pandas/core/indexes/datetimelike.py in __radd__(self, other)
879 def __radd__(self, other):
880 # alias for __add__
--> 881 return self.__add__(other)
882 cls.__radd__ = __radd__
883
/anaconda3/lib/python3.6/site- packages/pandas/core/indexes/datetimelike.py in __add__(self, other)
842 # This check must come after the check for np.timedelta64
843 # as is_integer returns True for these
--> 844 result = self.shift(other)
845
846 # array-like others
/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/datetimelike.py in shift(self, n, freq)
1049
1050 if self.freq is None:
-> 1051 raise NullFrequencyError("Cannot shift with no freq")
1052
1053 start = self[0] + n * self.freq
NullFrequencyError: Cannot shift with no freq