Sklearn.model_selection GridsearchCV ValueError: C <= 0
Sklearn.model_selection GridsearchCV ValueError: C <= 0
我正在尝试使用 sklearn.model_selection
中的 GridSearhCV 进行参数调整
不知何故,我一直收到 ValueError: C <= 0。我认为这与网格搜索的拟合方法有关。如果有人可以帮助解决这个问题,我们会很高兴。
尝试在 sklearn 中对 SVR 模型进行网格搜索
这是我的代码:
print(x_train.shape,y_train.shape, x_train.dtype,y_train.dtype)
#output: (3023, 1) (3023, 14) float64 float64
#svr model:
from sklearn.svm import SVR
reg = SVR(kernel = 'linear')
reg.fit(x_train,y_train)
#output: SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
gamma='auto', kernel='linear', max_iter=-1, shrinking=True, tol=0.001,
verbose=False)
#GridSearch
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0,1,5], 'kernel':['linear']},
{'C': [0,1,5], 'kernel':['rbf'], 'gamma':[0.01, 0.05]}]
gs = GridSearchCV(estimator = reg, param_grid = parameters, scoring =
'accuracy',cv =10)
gs = gs.fit(x_train, y_train)
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-120-cf037d4a6af8> in <module>()
1 gs = GridSearchCV(estimator = reg, param_grid = parameters, scoring = 'accuracy',cv =10)
----> 2 gs = gs.fit(x_train, y_train)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
637 error_score=self.error_score)
638 for parameters, (train, test) in product(candidate_params,
--> 639 cv.split(X, y, groups)))
640
641 # if one choose to see train score, "out" will contain train score info
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
C:\Program Files\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
456 estimator.fit(X_train, **fit_params)
457 else:
--> 458 estimator.fit(X_train, y_train, **fit_params)
459
460 except Exception as e:
C:\Program Files\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
185
186 seed = rnd.randint(np.iinfo('i').max)
--> 187 fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
188 # see comment on the other call to np.iinfo in this file
189
C:\Program Files\Anaconda3\lib\site-packages\sklearn\svm\base.py in _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
252 cache_size=self.cache_size, coef0=self.coef0,
253 gamma=self._gamma, epsilon=self.epsilon,
--> 254 max_iter=self.max_iter, random_seed=random_seed)
255
256 self._warn_from_fit_status()
sklearn\svm\libsvm.pyx in sklearn.svm.libsvm.fit()
ValueError: C <= 0
将参数网格中的 C 值更改为始终 > 0。当前您的参数网格为
parameters = [{'C': [0,1,5], 'kernel':['linear']},
{'C': [0,1,5], 'kernel':['rbf'], 'gamma':[0.01, 0.05]}]
并且它有 C=0 作为一种可能性。因此,当网格搜索去拟合 C=0 的 SVR 模型时,SVR 会抱怨 C 不能 <=0(小于或等于 0)。
所以将其更改为:
parameters = [{'C': [0.001, 0.1 ,1,5], 'kernel':['linear']},
{'C': [0.001, 0.1, 1,5], 'kernel':['rbf'], 'gamma':[0.01, 0.05]}]
您可以查看这些示例来检查 C 的搜索是如何完成的:
- http://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html#sphx-glr-auto-examples-svm-plot-rbf-parameters-py
- http://scikit-learn.org/stable/auto_examples/exercises/plot_cv_digits.html#sphx-glr-auto-examples-exercises-plot-cv-digits-py
评论更新:
您正在使用 scoring='accuracy'
。准确性主要针对分类任务定义。对于回归模型,准确性不是有效的指标。请检查此页面以获取有效指标:-
您可以像这样从网格搜索中删除评分参数:
gs = GridSearchCV(estimator = reg, param_grid = parameters,cv =10)
在这种情况下,将使用估计器的默认评分方法(在这种情况下为 SVR),即 R 平方评分(最常用于回归)
或者您可以在我上面链接的页面上为任何有效的回归指标设置评分。像这样:
gs = GridSearchCV(estimator = reg, param_grid = parameters,
scoring='neg_mean_squared_error', cv =10)
我正在尝试使用 sklearn.model_selection
中的 GridSearhCV 进行参数调整不知何故,我一直收到 ValueError: C <= 0。我认为这与网格搜索的拟合方法有关。如果有人可以帮助解决这个问题,我们会很高兴。
尝试在 sklearn 中对 SVR 模型进行网格搜索
这是我的代码:
print(x_train.shape,y_train.shape, x_train.dtype,y_train.dtype)
#output: (3023, 1) (3023, 14) float64 float64
#svr model:
from sklearn.svm import SVR
reg = SVR(kernel = 'linear')
reg.fit(x_train,y_train)
#output: SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
gamma='auto', kernel='linear', max_iter=-1, shrinking=True, tol=0.001,
verbose=False)
#GridSearch
from sklearn.model_selection import GridSearchCV
parameters = [{'C': [0,1,5], 'kernel':['linear']},
{'C': [0,1,5], 'kernel':['rbf'], 'gamma':[0.01, 0.05]}]
gs = GridSearchCV(estimator = reg, param_grid = parameters, scoring =
'accuracy',cv =10)
gs = gs.fit(x_train, y_train)
Error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-120-cf037d4a6af8> in <module>()
1 gs = GridSearchCV(estimator = reg, param_grid = parameters, scoring = 'accuracy',cv =10)
----> 2 gs = gs.fit(x_train, y_train)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
637 error_score=self.error_score)
638 for parameters, (train, test) in product(candidate_params,
--> 639 cv.split(X, y, groups)))
640
641 # if one choose to see train score, "out" will contain train score info
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
777 # was dispatched. In particular this covers the edge
778 # case of Parallel used with an exhausted iterator.
--> 779 while self.dispatch_one_batch(iterator):
780 self._iterating = True
781 else:
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
623 return False
624 else:
--> 625 self._dispatch(tasks)
626 return True
627
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
586 dispatch_timestamp = time.time()
587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588 job = self._backend.apply_async(batch, callback=cb)
589 self._jobs.append(job)
590
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
109 def apply_async(self, func, callback=None):
110 """Schedule a func to be run"""
--> 111 result = ImmediateResult(func)
112 if callback:
113 callback(result)
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
330 # Don't delay the application, to avoid keeping the input
331 # arguments in memory
--> 332 self.results = batch()
333
334 def get(self):
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
C:\Program Files\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
129
130 def __call__(self):
--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items]
132
133 def __len__(self):
C:\Program Files\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score)
456 estimator.fit(X_train, **fit_params)
457 else:
--> 458 estimator.fit(X_train, y_train, **fit_params)
459
460 except Exception as e:
C:\Program Files\Anaconda3\lib\site-packages\sklearn\svm\base.py in fit(self, X, y, sample_weight)
185
186 seed = rnd.randint(np.iinfo('i').max)
--> 187 fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
188 # see comment on the other call to np.iinfo in this file
189
C:\Program Files\Anaconda3\lib\site-packages\sklearn\svm\base.py in _dense_fit(self, X, y, sample_weight, solver_type, kernel, random_seed)
252 cache_size=self.cache_size, coef0=self.coef0,
253 gamma=self._gamma, epsilon=self.epsilon,
--> 254 max_iter=self.max_iter, random_seed=random_seed)
255
256 self._warn_from_fit_status()
sklearn\svm\libsvm.pyx in sklearn.svm.libsvm.fit()
ValueError: C <= 0
将参数网格中的 C 值更改为始终 > 0。当前您的参数网格为
parameters = [{'C': [0,1,5], 'kernel':['linear']},
{'C': [0,1,5], 'kernel':['rbf'], 'gamma':[0.01, 0.05]}]
并且它有 C=0 作为一种可能性。因此,当网格搜索去拟合 C=0 的 SVR 模型时,SVR 会抱怨 C 不能 <=0(小于或等于 0)。
所以将其更改为:
parameters = [{'C': [0.001, 0.1 ,1,5], 'kernel':['linear']},
{'C': [0.001, 0.1, 1,5], 'kernel':['rbf'], 'gamma':[0.01, 0.05]}]
您可以查看这些示例来检查 C 的搜索是如何完成的:
- http://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html#sphx-glr-auto-examples-svm-plot-rbf-parameters-py
- http://scikit-learn.org/stable/auto_examples/exercises/plot_cv_digits.html#sphx-glr-auto-examples-exercises-plot-cv-digits-py
评论更新:
您正在使用 scoring='accuracy'
。准确性主要针对分类任务定义。对于回归模型,准确性不是有效的指标。请检查此页面以获取有效指标:-
您可以像这样从网格搜索中删除评分参数:
gs = GridSearchCV(estimator = reg, param_grid = parameters,cv =10)
在这种情况下,将使用估计器的默认评分方法(在这种情况下为 SVR),即 R 平方评分(最常用于回归)
或者您可以在我上面链接的页面上为任何有效的回归指标设置评分。像这样:
gs = GridSearchCV(estimator = reg, param_grid = parameters,
scoring='neg_mean_squared_error', cv =10)