绘制 SVC 拉普拉斯核的决策边界时出错
Error in plotting the decision boundary for SVC Laplace kernel
我正在尝试使用预先计算的 Laplace 核(下面的代码)在此 scikit-learn post 的类似行上绘制 SVM 分类器的决策边界。我将测试点作为网格值 (xx, yy)
,就像 post 中提到的那样,并将训练点作为 X
和 y
。我可以使用训练点来拟合预先计算的内核。
import numpy as np
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics.pairwise import laplacian_kernel
#Load the iris data
iris_data = load_iris()
#Split the data and target
X = iris_data.data[:, :2]
y = iris_data.target
#Step size in mesh plot
h = 0.02
#Convert X and y to a numpy array
X = np.array(X)
y = np.array(y)
#Using Laplacian kernel - https://scikit-learn.org/stable/modules/metrics.html#laplacian-kernel
K = np.array(laplacian_kernel(X, gamma=.5))
svm = SVC(kernel='precomputed').fit(K, np.ravel(y))
# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
#plt.subplot(2, 2, i + 1)
#plt.subplots_adjust(wspace=0.4, hspace=0.4)
# Calculate the gram matrix for test points. Here is where the error is coming. xx- test, X-train.
K_test = np.array(laplacian_kernel(xx, X, gamma=.5))
#Predict using the gram matrix for test
Z = svm.predict(np.c_[K_test])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title('SVC with Laplace kernel')
plt.show()
但是,当我尝试在图形上为网格点绘制决策边界时,出现以下错误。
Traceback (most recent call last):
File "/home/user/Src/laplce.py", line 37, in <module>
K_test = np.array(laplacian_kernel(xx, X, gamma=.5))
File "/home/user/.local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py", line 1136, in laplacian_kernel
X, Y = check_pairwise_arrays(X, Y)
File "/home/user/.local/lib/python3.9/site-packages/sklearn/utils/validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "/home/user/.local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py", line 160, in check_pairwise_arrays
raise ValueError("Incompatible dimension for X and Y matrices: "
ValueError: Incompatible dimension for X and Y matrices: X.shape[1] == 280 while Y.shape[1] == 2
那么,如何解决错误并绘制 iris 数据的决策边界?提前致谢
问题是在应用拉普拉斯算子之前,让您的网格网格与训练矩阵具有相同的维度。因此,如果我们 运行 下面的代码适合 svm :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics.pairwise import laplacian_kernel
iris_data = load_iris()
X = iris_data.data[:, :2]
y = iris_data.target
h = 0.02
K = laplacian_kernel(X,gamma=.5)
svm = SVC(kernel='precomputed').fit(K, y)
像您一样创建网格:
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x_test = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
xx,yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
您对拉普拉斯算子的原始输入是 (150,2) 所以您基本上需要将 xx,yy
放入 2 列:
x_test = np.vstack([xx.ravel(),yy.ravel()]).T
K_test = laplacian_kernel(x_test, X, gamma=.5)
Z = svm.predict(K_test)
Z = Z.reshape(xx.shape)
然后剧情:
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
这些点或多或少是正确的,你可以看到它没有很好地解析1,2:
pd.crosstab(y,svm.predict(K))
col_0 0 1 2
row_0
0 49 1 0
1 0 35 15
2 0 11 39
我正在尝试使用预先计算的 Laplace 核(下面的代码)在此 scikit-learn post 的类似行上绘制 SVM 分类器的决策边界。我将测试点作为网格值 (xx, yy)
,就像 post 中提到的那样,并将训练点作为 X
和 y
。我可以使用训练点来拟合预先计算的内核。
import numpy as np
#from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics.pairwise import laplacian_kernel
#Load the iris data
iris_data = load_iris()
#Split the data and target
X = iris_data.data[:, :2]
y = iris_data.target
#Step size in mesh plot
h = 0.02
#Convert X and y to a numpy array
X = np.array(X)
y = np.array(y)
#Using Laplacian kernel - https://scikit-learn.org/stable/modules/metrics.html#laplacian-kernel
K = np.array(laplacian_kernel(X, gamma=.5))
svm = SVC(kernel='precomputed').fit(K, np.ravel(y))
# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
#plt.subplot(2, 2, i + 1)
#plt.subplots_adjust(wspace=0.4, hspace=0.4)
# Calculate the gram matrix for test points. Here is where the error is coming. xx- test, X-train.
K_test = np.array(laplacian_kernel(xx, X, gamma=.5))
#Predict using the gram matrix for test
Z = svm.predict(np.c_[K_test])
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title('SVC with Laplace kernel')
plt.show()
但是,当我尝试在图形上为网格点绘制决策边界时,出现以下错误。
Traceback (most recent call last):
File "/home/user/Src/laplce.py", line 37, in <module>
K_test = np.array(laplacian_kernel(xx, X, gamma=.5))
File "/home/user/.local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py", line 1136, in laplacian_kernel
X, Y = check_pairwise_arrays(X, Y)
File "/home/user/.local/lib/python3.9/site-packages/sklearn/utils/validation.py", line 63, in inner_f
return f(*args, **kwargs)
File "/home/user/.local/lib/python3.9/site-packages/sklearn/metrics/pairwise.py", line 160, in check_pairwise_arrays
raise ValueError("Incompatible dimension for X and Y matrices: "
ValueError: Incompatible dimension for X and Y matrices: X.shape[1] == 280 while Y.shape[1] == 2
那么,如何解决错误并绘制 iris 数据的决策边界?提前致谢
问题是在应用拉普拉斯算子之前,让您的网格网格与训练矩阵具有相同的维度。因此,如果我们 运行 下面的代码适合 svm :
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.metrics.pairwise import laplacian_kernel
iris_data = load_iris()
X = iris_data.data[:, :2]
y = iris_data.target
h = 0.02
K = laplacian_kernel(X,gamma=.5)
svm = SVC(kernel='precomputed').fit(K, y)
像您一样创建网格:
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
x_test = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
xx,yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
您对拉普拉斯算子的原始输入是 (150,2) 所以您基本上需要将 xx,yy
放入 2 列:
x_test = np.vstack([xx.ravel(),yy.ravel()]).T
K_test = laplacian_kernel(x_test, X, gamma=.5)
Z = svm.predict(K_test)
Z = Z.reshape(xx.shape)
然后剧情:
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
这些点或多或少是正确的,你可以看到它没有很好地解析1,2:
pd.crosstab(y,svm.predict(K))
col_0 0 1 2
row_0
0 49 1 0
1 0 35 15
2 0 11 39