'bad input shape' 使用 scikit-learn SVM 和 optunity 时
'bad input shape' when using scikit-learn SVM and optunity
我正在尝试使用 optunity package 来调整我的 SVM 模型,我直接复制并通过它的最新示例代码,只需导入特征数组和数据数组
import optunity
import optunity.metrics
import sklearn.svm
import numpy as np
data_path = '/python/Feature'
files = ['A.npy', 'B.npy', 'C.npy']
array = []
labels = []
for i,name in enumerate(files):
data = np.load('{}/{}'.format(data_path, name))
for j in range(0,len(data)):
labels.append(data[j])
array.append(data)
print len(array) #=> 1247
print len(labels) #=> 1247
# score function: twice iterated 10-fold cross-validated accuracy
@optunity.cross_validated(x=data, y=labels, num_folds=10, num_iter=2)
def svm_auc(x_train, y_train, x_test, y_test, C, gamma):
model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train)
decision_values = model.decision_function(x_test)
return optunity.metrics.roc_auc(y_test, decision_values)
# perform tuning
optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1])
# train model on the full training set with tuned hyperparameters
optimal_model = sklearn.svm.SVC(**optimal_pars).fit(data, labels)
但是,编译器看起来很不高兴,我查看了 SVM class document 以仔细检查输入格式,但是我不明白 optunity 的编码语法.. 谁能帮我找出那里出了什么问题?非常感谢..(我正在使用 'rbf' 内核,我尝试添加但语法出错,在 optunity 的示例中很奇怪没有内核选择..)
Traceback (most recent call last):
File "python/SVM_turning.py", line 26, in <module>
optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1])
File "/lib/python2.7/site-packages/optunity/api.py", line 181, in maximize
pmap=pmap)
File "/lib/python2.7/site-packages/optunity/api.py", line 245, in optimize
solution, report = solver.optimize(f, maximize, pmap=pmap)
File "/lib/python2.7/site-packages/optunity/solvers/ParticleSwarm.py", line 257, in optimize
fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
File "/lib/python2.7/site-packages/optunity/solvers/ParticleSwarm.py", line 246, in evaluate
return f(**d)
File "/lib/python2.7/site-packages/optunity/functions.py", line 286, in wrapped_f
value = f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/functions.py", line 341, in wrapped_f
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/constraints.py", line 150, in wrapped_f
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/constraints.py", line 128, in wrapped_f
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/constraints.py", line 265, in func
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/cross_validation.py", line 386, in __call__
scores.append(self.f(**kwargs))
File "/python/SVM_turning.py", line 21, in svm_auc
model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train)
File "/lib/python2.7/site-packages/sklearn/svm/base.py", line 138, in fit
y = self._validate_targets(y)
File "/lib/python2.7/site-packages/sklearn/svm/base.py", line 441, in _validate_targets
y_ = column_or_1d(y, warn=True)
File "/lib/python2.7/site-packages/sklearn/utils/validation.py", line 319, in column_or_1d
raise ValueError("bad input shape {0}".format(shape))
ValueError: bad input shape (428, 600)
我没有看到optunity中默认的优化器是什么,但是如果你只是使用网格搜索,你可以使用scikit-learn中的GridSearchCV。
您的示例看起来很像 optunity 中的文档。您是否尝试过 运行 那里的确切示例?
我想我找到了问题所在。您在读取文件时正在准备列表 array
和 labels
。 array
依次被 data
填充。然而,稍后,您这样做:
@optunity.cross_validated(x=data, y=labels, num_folds=10, num_iter=2)
和
optimal_model = sklearn.svm.SVC(**optimal_pars).fit(data, labels)
,因此使用 data
作为您的数据集,而不是您准备的 array
。我不知道你从文件中读取的格式,所以我不能确定发生了什么。但是,data
和 labels
的尺寸几乎肯定不会匹配。
这是一个玩具示例 array
和 labels
确实可以正常工作:
import optunity
import optunity.metrics
import sklearn.svm
import numpy as np
#print len(array) #=> 1247
#print len(labels) #=> 1247
# make dummy data
array = np.array([[i] for i in range(1247)])
labels = [True] * 100 + [False] * 1147
# score function: twice iterated 10-fold cross-validated accuracy
@optunity.cross_validated(x=array, y=labels, num_folds=10, num_iter=2)
def svm_auc(x_train, y_train, x_test, y_test, C, gamma):
model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train)
decision_values = model.decision_function(x_test)
return optunity.metrics.roc_auc(y_test, decision_values)
# perform tuning
optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1])
# train model on the full training set with tuned hyperparameters
optimal_model = sklearn.svm.SVC(**optimal_pars).fit(array, labels)
print(optimal_pars)
哪些输出(示例):
{'C': 8.0126953125, 'gamma': 0.35791015625}
抱歉这么久才回复。
不确定这是否是您的错误,但我在使用常规数组时遇到了这个问题,而我本应使用 Numpy。
我正在尝试使用 optunity package 来调整我的 SVM 模型,我直接复制并通过它的最新示例代码,只需导入特征数组和数据数组
import optunity
import optunity.metrics
import sklearn.svm
import numpy as np
data_path = '/python/Feature'
files = ['A.npy', 'B.npy', 'C.npy']
array = []
labels = []
for i,name in enumerate(files):
data = np.load('{}/{}'.format(data_path, name))
for j in range(0,len(data)):
labels.append(data[j])
array.append(data)
print len(array) #=> 1247
print len(labels) #=> 1247
# score function: twice iterated 10-fold cross-validated accuracy
@optunity.cross_validated(x=data, y=labels, num_folds=10, num_iter=2)
def svm_auc(x_train, y_train, x_test, y_test, C, gamma):
model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train)
decision_values = model.decision_function(x_test)
return optunity.metrics.roc_auc(y_test, decision_values)
# perform tuning
optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1])
# train model on the full training set with tuned hyperparameters
optimal_model = sklearn.svm.SVC(**optimal_pars).fit(data, labels)
但是,编译器看起来很不高兴,我查看了 SVM class document 以仔细检查输入格式,但是我不明白 optunity 的编码语法.. 谁能帮我找出那里出了什么问题?非常感谢..(我正在使用 'rbf' 内核,我尝试添加但语法出错,在 optunity 的示例中很奇怪没有内核选择..)
Traceback (most recent call last):
File "python/SVM_turning.py", line 26, in <module>
optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1])
File "/lib/python2.7/site-packages/optunity/api.py", line 181, in maximize
pmap=pmap)
File "/lib/python2.7/site-packages/optunity/api.py", line 245, in optimize
solution, report = solver.optimize(f, maximize, pmap=pmap)
File "/lib/python2.7/site-packages/optunity/solvers/ParticleSwarm.py", line 257, in optimize
fitnesses = pmap(evaluate, list(map(self.particle2dict, pop)))
File "/lib/python2.7/site-packages/optunity/solvers/ParticleSwarm.py", line 246, in evaluate
return f(**d)
File "/lib/python2.7/site-packages/optunity/functions.py", line 286, in wrapped_f
value = f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/functions.py", line 341, in wrapped_f
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/constraints.py", line 150, in wrapped_f
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/constraints.py", line 128, in wrapped_f
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/constraints.py", line 265, in func
return f(*args, **kwargs)
File "/lib/python2.7/site-packages/optunity/cross_validation.py", line 386, in __call__
scores.append(self.f(**kwargs))
File "/python/SVM_turning.py", line 21, in svm_auc
model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train)
File "/lib/python2.7/site-packages/sklearn/svm/base.py", line 138, in fit
y = self._validate_targets(y)
File "/lib/python2.7/site-packages/sklearn/svm/base.py", line 441, in _validate_targets
y_ = column_or_1d(y, warn=True)
File "/lib/python2.7/site-packages/sklearn/utils/validation.py", line 319, in column_or_1d
raise ValueError("bad input shape {0}".format(shape))
ValueError: bad input shape (428, 600)
我没有看到optunity中默认的优化器是什么,但是如果你只是使用网格搜索,你可以使用scikit-learn中的GridSearchCV。
您的示例看起来很像 optunity 中的文档。您是否尝试过 运行 那里的确切示例?
我想我找到了问题所在。您在读取文件时正在准备列表 array
和 labels
。 array
依次被 data
填充。然而,稍后,您这样做:
@optunity.cross_validated(x=data, y=labels, num_folds=10, num_iter=2)
和
optimal_model = sklearn.svm.SVC(**optimal_pars).fit(data, labels)
,因此使用 data
作为您的数据集,而不是您准备的 array
。我不知道你从文件中读取的格式,所以我不能确定发生了什么。但是,data
和 labels
的尺寸几乎肯定不会匹配。
这是一个玩具示例 array
和 labels
确实可以正常工作:
import optunity
import optunity.metrics
import sklearn.svm
import numpy as np
#print len(array) #=> 1247
#print len(labels) #=> 1247
# make dummy data
array = np.array([[i] for i in range(1247)])
labels = [True] * 100 + [False] * 1147
# score function: twice iterated 10-fold cross-validated accuracy
@optunity.cross_validated(x=array, y=labels, num_folds=10, num_iter=2)
def svm_auc(x_train, y_train, x_test, y_test, C, gamma):
model = sklearn.svm.SVC(C=C, gamma=gamma).fit(x_train, y_train)
decision_values = model.decision_function(x_test)
return optunity.metrics.roc_auc(y_test, decision_values)
# perform tuning
optimal_pars, _, _ = optunity.maximize(svm_auc, num_evals=200, C=[0, 10], gamma=[0, 1])
# train model on the full training set with tuned hyperparameters
optimal_model = sklearn.svm.SVC(**optimal_pars).fit(array, labels)
print(optimal_pars)
哪些输出(示例):
{'C': 8.0126953125, 'gamma': 0.35791015625}
抱歉这么久才回复。
不确定这是否是您的错误,但我在使用常规数组时遇到了这个问题,而我本应使用 Numpy。