在管道上滚动平均以训练模型
Rolling Average on a pipeline to train a model
我在使用管道拟合模型时遇到问题,该管道希望添加具有某些特征滚动平均值的列,然后训练模型。
数据框:
columns=['yr', 'mnth', 'hr', 'season', 'holiday', 'weekday', 'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'y']
df=pd.DataFrame(np.array([ [0, 1, 0, 1, 0, 6, 0, 1, 0.24, 2.879, 0.81, 0, 16],
[0, 1, 1, 1, 0, 6, 0, 1, 0.22, 2.727, 0.80, 0, 40],
[0, 1, 2, 1, 0, 6, 0, 1, 0.22, 2.727, 0.80, 0, 32],
[0, 1, 3, 1, 0, 6, 0, 1, 0.24, 2.879, 0.75, 0, 13],
[0, 1, 4, 1, 0, 6, 0, 1, 0.24, 2.879, 0.75, 0, 1]]), columns=columns)
X_train=df.drop('y')
y_train=df['y']
部分特征的滚动平均函数:
def rollingAv(Data):
a=Data['atemp']
a_shifted = a.shift(1)
a_window = a_shifted.rolling(window=4)
a_means = a_window.mean()
Data['a_means'] = a_means
h=Data['hum']
h_shifted = h.shift(1)
h_window = h_shifted.rolling(window=4)
h_means = h_window.mean()
Data['h_means'] = h_means
w=Data['windspeed']
w_shifted = w.shift(1)
w_window = w_shifted.rolling(window=4)
w_means = w_window.mean()
Data['w_means'] = w_means
Data=Data.dropna(subset=['a_means', 'h_means','w_means'])
return Data.values
滚动平均值Class 以适应和转换管道
class BikeRentalFeatureExtractor(BaseEstimator):
def __init__(self):
pass
def fit(self,X, y=None):
X=X.values
if y.shape[0]>0:
y=y[4:]
return y
else:
pass
def transform(x):
return rollingAv(x)
管道和模型
model = Pipeline(steps=[
("extractor", BikeRentalFeatureExtractor()),
("regressor", RandomForestRegressor())
])
parameters = {'regressor__n_estimators':[50,100,200,300]}
st = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
clf = GridSearchCV(estimator=model, param_grid=parameters)
clf.fit(X_train,y_train)
在 clf.fit(X_train,y_train)
之前我没有错误,因为它似乎与数据有关,因为尽管我有以下消息,但我删除了该列,我再次尝试,但问题仍然存在于下一列:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-21-86937c1966f0> in <module>()
----> 1 clf.fit(X_train,y_train)
12 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/base.py in _try_aggregate_string_function(self, arg, *args, **kwargs)
276
277 raise AttributeError(
--> 278 f"'{arg}' is not a valid function for '{type(self).__name__}' object"
279 )
280
AttributeError: 'yr' is not a valid function for 'Series' object
fit
假设为 return self
transform
是一种方法,应该将 self
作为第一个参数。
我在使用管道拟合模型时遇到问题,该管道希望添加具有某些特征滚动平均值的列,然后训练模型。
数据框:
columns=['yr', 'mnth', 'hr', 'season', 'holiday', 'weekday', 'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'y']
df=pd.DataFrame(np.array([ [0, 1, 0, 1, 0, 6, 0, 1, 0.24, 2.879, 0.81, 0, 16],
[0, 1, 1, 1, 0, 6, 0, 1, 0.22, 2.727, 0.80, 0, 40],
[0, 1, 2, 1, 0, 6, 0, 1, 0.22, 2.727, 0.80, 0, 32],
[0, 1, 3, 1, 0, 6, 0, 1, 0.24, 2.879, 0.75, 0, 13],
[0, 1, 4, 1, 0, 6, 0, 1, 0.24, 2.879, 0.75, 0, 1]]), columns=columns)
X_train=df.drop('y')
y_train=df['y']
部分特征的滚动平均函数:
def rollingAv(Data):
a=Data['atemp']
a_shifted = a.shift(1)
a_window = a_shifted.rolling(window=4)
a_means = a_window.mean()
Data['a_means'] = a_means
h=Data['hum']
h_shifted = h.shift(1)
h_window = h_shifted.rolling(window=4)
h_means = h_window.mean()
Data['h_means'] = h_means
w=Data['windspeed']
w_shifted = w.shift(1)
w_window = w_shifted.rolling(window=4)
w_means = w_window.mean()
Data['w_means'] = w_means
Data=Data.dropna(subset=['a_means', 'h_means','w_means'])
return Data.values
滚动平均值Class 以适应和转换管道
class BikeRentalFeatureExtractor(BaseEstimator):
def __init__(self):
pass
def fit(self,X, y=None):
X=X.values
if y.shape[0]>0:
y=y[4:]
return y
else:
pass
def transform(x):
return rollingAv(x)
管道和模型
model = Pipeline(steps=[
("extractor", BikeRentalFeatureExtractor()),
("regressor", RandomForestRegressor())
])
parameters = {'regressor__n_estimators':[50,100,200,300]}
st = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
clf = GridSearchCV(estimator=model, param_grid=parameters)
clf.fit(X_train,y_train)
在 clf.fit(X_train,y_train)
之前我没有错误,因为它似乎与数据有关,因为尽管我有以下消息,但我删除了该列,我再次尝试,但问题仍然存在于下一列:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-21-86937c1966f0> in <module>()
----> 1 clf.fit(X_train,y_train)
12 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/base.py in _try_aggregate_string_function(self, arg, *args, **kwargs)
276
277 raise AttributeError(
--> 278 f"'{arg}' is not a valid function for '{type(self).__name__}' object"
279 )
280
AttributeError: 'yr' is not a valid function for 'Series' object
fit
假设为 returnself
transform
是一种方法,应该将self
作为第一个参数。