与 MultiOutputRegressor 一起使用时 SelectKBest() 出现问题
Issue with SelectKBest() when using with MultiOutputRegressor
我无法在以 MultiOutputRegressor 结束的管道中使用 SelectKBest 进行特征选择(见下文,其中 pipe1 工作正常,但 pipe2 导致错误 - 如下所示)。似乎 SelectKBest 无法处理多列的 y 。这是已知的限制吗?
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
X = np.random.normal(0,1,(100,10))
y = np.random.normal(0,1,(100,2))
pipe1 = Pipeline([('poly', PolynomialFeatures(2, include_bias=False)), \
('regr', MultiOutputRegressor(Ridge()))])
pipe1.fit(X, y)
pipe2 = Pipeline([('poly', PolynomialFeatures(2, include_bias=False)), \
('kbst', SelectKBest(f_regression, k=5)), \
('regr', MultiOutputRegressor(Ridge()))])
pipe2.fit(X, y)
错误信息如下:
ValueError
---> 17 pipe2.fit(X, y) [...] /home/ecm/.conda/envs/mlpolar/lib/python3.7/site-packages/sklearn/utils/validation.py
in column_or_1d(y, warn)
845 raise ValueError(
846 "y should be a 1d array, "
--> 847 "got an array of shape {} instead.".format(shape))
848
849
ValueError: y should be a 1d array, got an array of shape (100, 2) instead.
根据评论中的建议 post,这里是解决方法
问题:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
X = np.random.normal(0,1,(100,10))
y = np.random.normal(0,1,(100,2))
pipe_in = Pipeline([('kbst', SelectKBest(f_regression, k=5)), \
('regr', Ridge())])
pipe2 = Pipeline([('poly', PolynomialFeatures(2, include_bias=False)), \
('pipe', MultiOutputRegressor(pipe_in))])
pipe2.fit(X, y)
我无法在以 MultiOutputRegressor 结束的管道中使用 SelectKBest 进行特征选择(见下文,其中 pipe1 工作正常,但 pipe2 导致错误 - 如下所示)。似乎 SelectKBest 无法处理多列的 y 。这是已知的限制吗?
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
X = np.random.normal(0,1,(100,10))
y = np.random.normal(0,1,(100,2))
pipe1 = Pipeline([('poly', PolynomialFeatures(2, include_bias=False)), \
('regr', MultiOutputRegressor(Ridge()))])
pipe1.fit(X, y)
pipe2 = Pipeline([('poly', PolynomialFeatures(2, include_bias=False)), \
('kbst', SelectKBest(f_regression, k=5)), \
('regr', MultiOutputRegressor(Ridge()))])
pipe2.fit(X, y)
错误信息如下:
ValueError
---> 17 pipe2.fit(X, y) [...] /home/ecm/.conda/envs/mlpolar/lib/python3.7/site-packages/sklearn/utils/validation.py
in column_or_1d(y, warn)
845 raise ValueError(
846 "y should be a 1d array, "
--> 847 "got an array of shape {} instead.".format(shape))
848
849
ValueError: y should be a 1d array, got an array of shape (100, 2) instead.
根据评论中的建议 post,这里是解决方法 问题:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import PolynomialFeatures
X = np.random.normal(0,1,(100,10))
y = np.random.normal(0,1,(100,2))
pipe_in = Pipeline([('kbst', SelectKBest(f_regression, k=5)), \
('regr', Ridge())])
pipe2 = Pipeline([('poly', PolynomialFeatures(2, include_bias=False)), \
('pipe', MultiOutputRegressor(pipe_in))])
pipe2.fit(X, y)