使用 Pandas 和 Sklearn.Neighbors
Using Pandas and Sklearn.Neighbors
我正在尝试使用 Python 3.5/Pandas/Sklearn.neighbors 在数据帧上拟合 KNN 模型。我已经导入了数据,将其拆分为训练和测试数据以及标签,但是当我尝试使用它进行预测时,出现以下错误。我是 Pandas 的新手,如有任何帮助,我们将不胜感激,谢谢!
import pandas as pd
from sklearn import cross_validation
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
seeds = pd.read_csv('seeds.tsv',sep='\t',names=['Area','Perimeter','Compactness','Kern_len','Kern_width','Assymetry','Kern_groovlen','Species'])
data = seeds.iloc[:,[0,1,2,3,4,5,6]]
labels = seeds.iloc[:,[7]]
x_train, x_test, y_train, y_test = cross_validation.train_test_split(data,labels, test_size=0.4, random_state=1 )
knn = KNeighborsRegressor(n_neighbors=30)
knn.fit(x_train,y_train)
knn.predict(x_test)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-121-2292e64e5ab8> in <module>()
----> 1 knn.predict(x_test)
C:\Anaconda3\lib\site-packages\sklearn\neighbors\regression.py in predict(self, X)
151
152 if weights is None:
--> 153 y_pred = np.mean(_y[neigh_ind], axis=1)
154 else:
155 y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float)
C:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in mean(a, axis, dtype, out, keepdims)
2876
2877 return _methods._mean(a, axis=axis, dtype=dtype,
-> 2878 out=out, keepdims=keepdims)
2879
2880
C:\Anaconda3\lib\site-packages\numpy\core\_methods.py in _mean(a, axis, dtype, out, keepdims)
66 if isinstance(ret, mu.ndarray):
67 ret = um.true_divide(
---> 68 ret, rcount, out=ret, casting='unsafe', subok=False)
69 elif hasattr(ret, 'dtype'):
70 ret = ret.dtype.type(ret / rcount)
TypeError: unsupported operand type(s) for /: 'str' and 'int'
您应该为此 KNN 使用 KNeighborsClassifier
。您正在尝试预测分类标签 Species
。上面代码中的回归器试图训练和预测连续值的数值变量,这就是引入问题的地方。
from sklearn.neighbors import KNeighborsClassifier
seeds = pd.read_csv('seeds.tsv',sep='\t',names=['Area','Perimeter','Compactness','Kern_len','Kern_width','Assymetry','Kern_groovlen','Species'])
data = seeds.iloc[:,[0,1,2,3,4,5,6]]
labels = seeds.iloc[:,[7]]
x_train, x_test, y_train, y_test = cross_validation.train_test_split(data,labels, test_size=0.4, random_state=1 )
knn = KNeighborsClassifier(n_neighbors=30)
http://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html
这是回归器与分类器(您要使用的分类器)相比绘制的结果。
我正在尝试使用 Python 3.5/Pandas/Sklearn.neighbors 在数据帧上拟合 KNN 模型。我已经导入了数据,将其拆分为训练和测试数据以及标签,但是当我尝试使用它进行预测时,出现以下错误。我是 Pandas 的新手,如有任何帮助,我们将不胜感激,谢谢!
import pandas as pd
from sklearn import cross_validation
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
seeds = pd.read_csv('seeds.tsv',sep='\t',names=['Area','Perimeter','Compactness','Kern_len','Kern_width','Assymetry','Kern_groovlen','Species'])
data = seeds.iloc[:,[0,1,2,3,4,5,6]]
labels = seeds.iloc[:,[7]]
x_train, x_test, y_train, y_test = cross_validation.train_test_split(data,labels, test_size=0.4, random_state=1 )
knn = KNeighborsRegressor(n_neighbors=30)
knn.fit(x_train,y_train)
knn.predict(x_test)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-121-2292e64e5ab8> in <module>()
----> 1 knn.predict(x_test)
C:\Anaconda3\lib\site-packages\sklearn\neighbors\regression.py in predict(self, X)
151
152 if weights is None:
--> 153 y_pred = np.mean(_y[neigh_ind], axis=1)
154 else:
155 y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float)
C:\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in mean(a, axis, dtype, out, keepdims)
2876
2877 return _methods._mean(a, axis=axis, dtype=dtype,
-> 2878 out=out, keepdims=keepdims)
2879
2880
C:\Anaconda3\lib\site-packages\numpy\core\_methods.py in _mean(a, axis, dtype, out, keepdims)
66 if isinstance(ret, mu.ndarray):
67 ret = um.true_divide(
---> 68 ret, rcount, out=ret, casting='unsafe', subok=False)
69 elif hasattr(ret, 'dtype'):
70 ret = ret.dtype.type(ret / rcount)
TypeError: unsupported operand type(s) for /: 'str' and 'int'
您应该为此 KNN 使用 KNeighborsClassifier
。您正在尝试预测分类标签 Species
。上面代码中的回归器试图训练和预测连续值的数值变量,这就是引入问题的地方。
from sklearn.neighbors import KNeighborsClassifier
seeds = pd.read_csv('seeds.tsv',sep='\t',names=['Area','Perimeter','Compactness','Kern_len','Kern_width','Assymetry','Kern_groovlen','Species'])
data = seeds.iloc[:,[0,1,2,3,4,5,6]]
labels = seeds.iloc[:,[7]]
x_train, x_test, y_train, y_test = cross_validation.train_test_split(data,labels, test_size=0.4, random_state=1 )
knn = KNeighborsClassifier(n_neighbors=30)
http://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html
这是回归器与分类器(您要使用的分类器)相比绘制的结果。