计算递归特征消除的 RFE
Calculating RFE for Recursive Feature Elimination
我有一个名为 "dataset_con_enc" 的数据框。
dataset_con_enc.head()
OFFER_TYPE_PROXY OPPLINE_PRODUCT_BU OPPLINE_PRODUCT_FAMILY OPP_FLAG_LED_BY_PARTNER OPP_SOURCE target
0 0 0 8 0 19 2 1 11 137 1 ... 0 0 5 8 13 578 1 100 100 1
.....
我尝试对特征选择进行递归特征消除,所以:
# Load libraries
from sklearn.datasets import make_regression
from sklearn.feature_selection import RFECV
from sklearn import datasets, linear_model
import warnings
# Suppress an annoying but harmless warning
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
# Calculating RFE for non-discretised dataset, and graphing the Importance for each feature, per dataset
selector1 = RFECV(LogisticRegression(), step=1, cv=5, n_jobs=-1)
selector1 = selector1.fit(dataset_con_enc.drop('target', axis=1).values, dataset_con_enc['target'].values)
但我在最后一行代码中遇到错误:
ImportError Traceback (most recent call last)
<ipython-input-509-5e50f1655a89> in <module>()
3 # Calculating RFE for non-discretised dataset, and graphing the Importance for each feature, per dataset
4 selector1 = RFECV(LogisticRegression(), step=1, cv=5, n_jobs=-1)
----> 5 selector1 = selector1.fit(dataset_con_enc.drop('target', axis=1).values, dataset_con_enc['target'].values)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\feature_selection\rfe.py in fit(self, X, y)
434 scores = parallel(
435 func(rfe, self.estimator, X, y, train, test, scorer)
--> 436 for train, test in cv.split(X, y))
437
438 scores = np.sum(scores, axis=0)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
747 self._aborting = False
748 if not self._managed_backend:
--> 749 n_jobs = self._initialize_backend()
750 else:
751 n_jobs = self._effective_n_jobs()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _initialize_backend(self)
545 try:
546 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
--> 547 **self._backend_args)
548 if self.timeout is not None and not self._backend.supports_timeout:
549 warnings.warn(
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, **backend_args)
303 if already_forked:
304 raise ImportError(
--> 305 '[joblib] Attempting to do parallel computing '
306 'without protecting your import on a system that does '
307 'not support forking. To use parallel-computing in a '
ImportError: [joblib] Attempting to do parallel computing without protecting your import on a system that does not support forking. To use parallel-computing in a script, you must protect your main loop using "if __name__ == '__main__'". Please see the joblib documentation on Parallel for more information
你能帮我解决这个问题吗?
谢谢
明显的解决方法是 n_jobs=1
(禁用并行计算)--- 但我不确定这是否是您正在寻找的解决方案。
我有一个名为 "dataset_con_enc" 的数据框。
dataset_con_enc.head()
OFFER_TYPE_PROXY OPPLINE_PRODUCT_BU OPPLINE_PRODUCT_FAMILY OPP_FLAG_LED_BY_PARTNER OPP_SOURCE target
0 0 0 8 0 19 2 1 11 137 1 ... 0 0 5 8 13 578 1 100 100 1
.....
我尝试对特征选择进行递归特征消除,所以:
# Load libraries
from sklearn.datasets import make_regression
from sklearn.feature_selection import RFECV
from sklearn import datasets, linear_model
import warnings
# Suppress an annoying but harmless warning
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
# Calculating RFE for non-discretised dataset, and graphing the Importance for each feature, per dataset
selector1 = RFECV(LogisticRegression(), step=1, cv=5, n_jobs=-1)
selector1 = selector1.fit(dataset_con_enc.drop('target', axis=1).values, dataset_con_enc['target'].values)
但我在最后一行代码中遇到错误:
ImportError Traceback (most recent call last)
<ipython-input-509-5e50f1655a89> in <module>()
3 # Calculating RFE for non-discretised dataset, and graphing the Importance for each feature, per dataset
4 selector1 = RFECV(LogisticRegression(), step=1, cv=5, n_jobs=-1)
----> 5 selector1 = selector1.fit(dataset_con_enc.drop('target', axis=1).values, dataset_con_enc['target'].values)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\feature_selection\rfe.py in fit(self, X, y)
434 scores = parallel(
435 func(rfe, self.estimator, X, y, train, test, scorer)
--> 436 for train, test in cv.split(X, y))
437
438 scores = np.sum(scores, axis=0)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
747 self._aborting = False
748 if not self._managed_backend:
--> 749 n_jobs = self._initialize_backend()
750 else:
751 n_jobs = self._effective_n_jobs()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _initialize_backend(self)
545 try:
546 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self,
--> 547 **self._backend_args)
548 if self.timeout is not None and not self._backend.supports_timeout:
549 warnings.warn(
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in configure(self, n_jobs, parallel, **backend_args)
303 if already_forked:
304 raise ImportError(
--> 305 '[joblib] Attempting to do parallel computing '
306 'without protecting your import on a system that does '
307 'not support forking. To use parallel-computing in a '
ImportError: [joblib] Attempting to do parallel computing without protecting your import on a system that does not support forking. To use parallel-computing in a script, you must protect your main loop using "if __name__ == '__main__'". Please see the joblib documentation on Parallel for more information
你能帮我解决这个问题吗? 谢谢
明显的解决方法是 n_jobs=1
(禁用并行计算)--- 但我不确定这是否是您正在寻找的解决方案。