Stratified Shuffle Split ValueError: The least populated class in y has only 1 member, which is too few
Stratified Shuffle Split ValueError: The least populated class in y has only 1 member, which is too few
我正在努力让我的分层洗牌拆分工作。我有两组数据,features
和 labels
,我正在尝试 return 我的名为 results
的列表,它应该包含所有 accuracy/precision/recall/f1 分数的列表.
但是,我想我只是对这应该如何 return 返回给我的结果感到困惑和困惑。谁能发现我在这里做错了什么?
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score,confusion_matrix
sss = StratifiedShuffleSplit(n_splits=1, random_state=42, test_size=0.33)
clf_obj = RandomForestClassifier(n_estimators=10)
scoring = {'accuracy' : make_scorer(accuracy_score),
'precision' : make_scorer(precision_score),
'recall' : make_scorer(recall_score),
'f1_score' : make_scorer(f1_score)}
results = cross_validate(estimator=clf_obj,
X=features,
y=labels,
cv=sss,
scoring=scoring)
我想这里让我感到困惑的是我收到了这个错误:
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
但我不明白我的 x 和 y 值发生了什么。我看到的第一个错误似乎是 scoring
参数:
---> 29 scoring=scoring)
...但是据我所见,我认为我已经正确填写了 cross_validate()
函数的参数?
完整错误跟踪:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-27-2af4c433ccc9> in <module>
27 y=labels,
28 cv=sss,
---> 29 scoring=scoring)
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
238 return_times=True, return_estimator=return_estimator,
239 error_score=error_score)
--> 240 for train, test in cv.split(X, y, groups))
241
242 zipped_scores = list(zip(*scores))
/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
752 tasks = BatchedCalls(itertools.islice(iterator, batch_size),
753 self._backend.get_nested_backend(),
--> 754 self._pickle_cache)
755 if len(tasks) == 0:
756 # No more tasks available in the iterator: tell caller to stop.
/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in __init__(self, iterator_slice, backend_and_jobs, pickle_cache)
208
209 def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
--> 210 self.items = list(iterator_slice)
211 self._size = len(self.items)
212 if isinstance(backend_and_jobs, tuple):
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in <genexpr>(.0)
233 pre_dispatch=pre_dispatch)
234 scores = parallel(
--> 235 delayed(_fit_and_score)(
236 clone(estimator), X, y, scorers, train, test, verbose, None,
237 fit_params, return_train_score=return_train_score,
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py in split(self, X, y, groups)
1313 """
1314 X, y, groups = indexable(X, y, groups)
-> 1315 for train, test in self._iter_indices(X, y, groups):
1316 yield train, test
1317
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py in _iter_indices(self, X, y, groups)
1693 class_counts = np.bincount(y_indices)
1694 if np.min(class_counts) < 2:
-> 1695 raise ValueError("The least populated class in y has only 1"
1696 " member, which is too few. The minimum"
1697 " number of groups for any class cannot"
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
错误消息实际上说明了一切:
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
您的 y
中可能有一个 class,它只有一个样本,因此实际上不可能进行任何分层拆分。
您可以做的是从您的数据中删除该(单个)样本 - 在任何情况下,由单个样本表示的 classes 对于 class化没有任何用处...
我正在努力让我的分层洗牌拆分工作。我有两组数据,features
和 labels
,我正在尝试 return 我的名为 results
的列表,它应该包含所有 accuracy/precision/recall/f1 分数的列表.
但是,我想我只是对这应该如何 return 返回给我的结果感到困惑和困惑。谁能发现我在这里做错了什么?
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score,confusion_matrix
sss = StratifiedShuffleSplit(n_splits=1, random_state=42, test_size=0.33)
clf_obj = RandomForestClassifier(n_estimators=10)
scoring = {'accuracy' : make_scorer(accuracy_score),
'precision' : make_scorer(precision_score),
'recall' : make_scorer(recall_score),
'f1_score' : make_scorer(f1_score)}
results = cross_validate(estimator=clf_obj,
X=features,
y=labels,
cv=sss,
scoring=scoring)
我想这里让我感到困惑的是我收到了这个错误:
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
但我不明白我的 x 和 y 值发生了什么。我看到的第一个错误似乎是 scoring
参数:
---> 29 scoring=scoring)
...但是据我所见,我认为我已经正确填写了 cross_validate()
函数的参数?
完整错误跟踪:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-27-2af4c433ccc9> in <module>
27 y=labels,
28 cv=sss,
---> 29 scoring=scoring)
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
238 return_times=True, return_estimator=return_estimator,
239 error_score=error_score)
--> 240 for train, test in cv.split(X, y, groups))
241
242 zipped_scores = list(zip(*scores))
/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
915 # remaining jobs.
916 self._iterating = False
--> 917 if self.dispatch_one_batch(iterator):
918 self._iterating = self._original_iterator is not None
919
/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
752 tasks = BatchedCalls(itertools.islice(iterator, batch_size),
753 self._backend.get_nested_backend(),
--> 754 self._pickle_cache)
755 if len(tasks) == 0:
756 # No more tasks available in the iterator: tell caller to stop.
/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in __init__(self, iterator_slice, backend_and_jobs, pickle_cache)
208
209 def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
--> 210 self.items = list(iterator_slice)
211 self._size = len(self.items)
212 if isinstance(backend_and_jobs, tuple):
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in <genexpr>(.0)
233 pre_dispatch=pre_dispatch)
234 scores = parallel(
--> 235 delayed(_fit_and_score)(
236 clone(estimator), X, y, scorers, train, test, verbose, None,
237 fit_params, return_train_score=return_train_score,
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py in split(self, X, y, groups)
1313 """
1314 X, y, groups = indexable(X, y, groups)
-> 1315 for train, test in self._iter_indices(X, y, groups):
1316 yield train, test
1317
/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_split.py in _iter_indices(self, X, y, groups)
1693 class_counts = np.bincount(y_indices)
1694 if np.min(class_counts) < 2:
-> 1695 raise ValueError("The least populated class in y has only 1"
1696 " member, which is too few. The minimum"
1697 " number of groups for any class cannot"
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
错误消息实际上说明了一切:
ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.
您的 y
中可能有一个 class,它只有一个样本,因此实际上不可能进行任何分层拆分。
您可以做的是从您的数据中删除该(单个)样本 - 在任何情况下,由单个样本表示的 classes 对于 class化没有任何用处...