将向量 w 投影到向量 v 上并绘制垂直线 - 为 PCA 做准备
Project vector w onto vector v and draw perpendicular line - preparation for PCA
我想做矢量投影作为 PCA 的准备,我按照 This 教程计算矢量投影。
w 是 'points' 到数据点上的向量,v 是跨越 w 应该投影到的线的向量。
密码是:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
from sklearn.preprocessing import StandardScaler
# Normalize the input data
A = np.array([[10,8],[1,2],[7,5],[3,5],[7,6],[8,7],[9,9],[4,5],[6,5],[6,8],
[1,9],[10,2],[6,3],[2,5],[1,14],[8,8],[9,5],[4,4],[5,6],[8,8],
[11,9],[10,12],[6,4],[5,2],[10,2],[8,3],[6,9],[0,4],[13,6],[9,6]])
A = StandardScaler(with_std=False,copy=False).fit_transform(A)
fig = plt.figure(figsize=(15,10))
ax0 = fig.add_subplot(111)
ax0.set_ylim(bottom=min(A[:,1])-3,top=max(A[:,1])+3)
ax0.scatter(A[:,0],A[:,1])
# Initialize a first vector a
v = np.array([1,0.5])
# Plot the vector v
#ax0.arrow(0,0,a[0],a[1],length_includes_head=True,width=0.03,color='green')
# Plot the line y=alpha*v defined by the vector a and passing the origin
ax0.plot(np.linspace(min(A[:,0])-3,max(A[:,0])+3),np.linspace(min(A[:,0])-3,max(A[:,0])+3)*(v[1]/v[0]),
'k--',linewidth=1.5,zorder=0)
# Run through all datapoints
coordinates_on_ba_run = [] # Store the coordinates of the projected points on a
for i in range(len(A[:,0])):
# Plot the vector v
#ax0.arrow(0,0,v[0],v[1],length_includes_head=True,width=0.03,color='green')
# Point on one of the datapoints and denote this vector with w
w = np.array([A[i][0],A[i][1]])
#ax0.arrow(0,0,w[0],w[1],length_includes_head=True,width=0.03,color='blue')
# Caclculate c and the projection vector cv. Additionally, test if the dot product of v and (w-cv) is zero
c = np.dot(w,v.reshape(2,1))/np.dot(v,v.reshape(2,1))
print(np.dot((w-c*v),v)) #This must be zero for each projection!
cv = c*v
# Draw a line from the datappoint in A to the tip of the vector cv.
ax0.plot([w[0],cv[0]],[w[1],cv[1]],linewidth=1,color='red',linestyle='--',zorder=0)
plt.show()
结果如下:
2.22044604925e-16
-2.22044604925e-16
0.0
0.0
2.77555756156e-17
-5.55111512313e-17
1.11022302463e-16
2.22044604925e-16
0.0
0.0
0.0
0.0
0.0
-2.22044604925e-16
0.0
-2.22044604925e-16
0.0
1.11022302463e-16
0.0
-2.22044604925e-16
0.0
-4.4408920985e-16
0.0
0.0
0.0
0.0
0.0
-2.22044604925e-16
-4.4408920985e-16
-2.22044604925e-16
所以代码正在运行,而且每个转换必须为零的 'control' 计算 (np.dot((w-c*v),v)
) 也为零...因此结果应该是正确的...但是,因为你可以用肉眼看到,虚线不垂直于向量 v 所跨越的线。所以这只是一个可视化问题还是代码中有错误?感谢任何帮助
发现错误了...看轴的比例,发现不相等,即x轴有极限(-10,10)而y轴有极限(-6,10)... 因此,这会扭曲视图,并且肉眼观察到红色虚线和 v 跨越的线之间的角度不是 90 度,而是其他取决于比率的角度。这也解释了为什么计算np.dot((w-c*v),v)
returns 零表明结果是正确的。
这是工作代码:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
from sklearn.preprocessing import StandardScaler
# Normalize the input data
A = np.array([[10,8],[1,2],[7,5],[3,5],[7,6],[8,7],[9,9],[4,5],[6,5],[6,8],
[1,9],[10,2],[6,3],[2,5],[1,14],[8,8],[9,5],[4,4],[5,6],[8,8],
[11,9],[10,12],[6,4],[5,2],[10,2],[8,3],[6,9],[0,4],[13,6],[9,6]])
A = StandardScaler(with_std=False,copy=False).fit_transform(A)
fig = plt.figure(figsize=(10,10))
ax0 = fig.add_subplot(111)
ax0.set_aspect('equal')
ax0.set_xlim((-10,10))
ax0.set_ylim((-10,10))
ax0.scatter(A[:,0],A[:,1])
# Run through all the data
for i in range(len(A[:,0])):
# v
v = np.array([3,2])
ax0.plot(np.linspace(-10,10),np.linspace(-10,10)*(v[1]/v[0]),color='black',linestyle='--',linewidth=1.5)
# w
w = np.array([A[i][0],A[i][1]])
#ax0.arrow(0,0,w[0],w[1],length_includes_head=True,width=0.01,color='green')
# cv
cv = (np.dot(w,v))/np.dot(v,np.transpose(v))*v
#ax0.arrow(0,0,cv[0],cv[1],length_includes_head=True,width=0.005,color='black')
print(cv)
# line between w and cv
ax0.plot([w[0],cv[0]],[w[1],cv[1]],'r--',linewidth=1.5)
# Check the result
print(np.dot((w-cv),cv))
plt.show()
我想做矢量投影作为 PCA 的准备,我按照 This 教程计算矢量投影。
密码是:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
from sklearn.preprocessing import StandardScaler
# Normalize the input data
A = np.array([[10,8],[1,2],[7,5],[3,5],[7,6],[8,7],[9,9],[4,5],[6,5],[6,8],
[1,9],[10,2],[6,3],[2,5],[1,14],[8,8],[9,5],[4,4],[5,6],[8,8],
[11,9],[10,12],[6,4],[5,2],[10,2],[8,3],[6,9],[0,4],[13,6],[9,6]])
A = StandardScaler(with_std=False,copy=False).fit_transform(A)
fig = plt.figure(figsize=(15,10))
ax0 = fig.add_subplot(111)
ax0.set_ylim(bottom=min(A[:,1])-3,top=max(A[:,1])+3)
ax0.scatter(A[:,0],A[:,1])
# Initialize a first vector a
v = np.array([1,0.5])
# Plot the vector v
#ax0.arrow(0,0,a[0],a[1],length_includes_head=True,width=0.03,color='green')
# Plot the line y=alpha*v defined by the vector a and passing the origin
ax0.plot(np.linspace(min(A[:,0])-3,max(A[:,0])+3),np.linspace(min(A[:,0])-3,max(A[:,0])+3)*(v[1]/v[0]),
'k--',linewidth=1.5,zorder=0)
# Run through all datapoints
coordinates_on_ba_run = [] # Store the coordinates of the projected points on a
for i in range(len(A[:,0])):
# Plot the vector v
#ax0.arrow(0,0,v[0],v[1],length_includes_head=True,width=0.03,color='green')
# Point on one of the datapoints and denote this vector with w
w = np.array([A[i][0],A[i][1]])
#ax0.arrow(0,0,w[0],w[1],length_includes_head=True,width=0.03,color='blue')
# Caclculate c and the projection vector cv. Additionally, test if the dot product of v and (w-cv) is zero
c = np.dot(w,v.reshape(2,1))/np.dot(v,v.reshape(2,1))
print(np.dot((w-c*v),v)) #This must be zero for each projection!
cv = c*v
# Draw a line from the datappoint in A to the tip of the vector cv.
ax0.plot([w[0],cv[0]],[w[1],cv[1]],linewidth=1,color='red',linestyle='--',zorder=0)
plt.show()
结果如下:
2.22044604925e-16
-2.22044604925e-16
0.0
0.0
2.77555756156e-17
-5.55111512313e-17
1.11022302463e-16
2.22044604925e-16
0.0
0.0
0.0
0.0
0.0
-2.22044604925e-16
0.0
-2.22044604925e-16
0.0
1.11022302463e-16
0.0
-2.22044604925e-16
0.0
-4.4408920985e-16
0.0
0.0
0.0
0.0
0.0
-2.22044604925e-16
-4.4408920985e-16
-2.22044604925e-16
所以代码正在运行,而且每个转换必须为零的 'control' 计算 (np.dot((w-c*v),v)
) 也为零...因此结果应该是正确的...但是,因为你可以用肉眼看到,虚线不垂直于向量 v 所跨越的线。所以这只是一个可视化问题还是代码中有错误?感谢任何帮助
发现错误了...看轴的比例,发现不相等,即x轴有极限(-10,10)而y轴有极限(-6,10)... 因此,这会扭曲视图,并且肉眼观察到红色虚线和 v 跨越的线之间的角度不是 90 度,而是其他取决于比率的角度。这也解释了为什么计算np.dot((w-c*v),v)
returns 零表明结果是正确的。
这是工作代码:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')
from sklearn.preprocessing import StandardScaler
# Normalize the input data
A = np.array([[10,8],[1,2],[7,5],[3,5],[7,6],[8,7],[9,9],[4,5],[6,5],[6,8],
[1,9],[10,2],[6,3],[2,5],[1,14],[8,8],[9,5],[4,4],[5,6],[8,8],
[11,9],[10,12],[6,4],[5,2],[10,2],[8,3],[6,9],[0,4],[13,6],[9,6]])
A = StandardScaler(with_std=False,copy=False).fit_transform(A)
fig = plt.figure(figsize=(10,10))
ax0 = fig.add_subplot(111)
ax0.set_aspect('equal')
ax0.set_xlim((-10,10))
ax0.set_ylim((-10,10))
ax0.scatter(A[:,0],A[:,1])
# Run through all the data
for i in range(len(A[:,0])):
# v
v = np.array([3,2])
ax0.plot(np.linspace(-10,10),np.linspace(-10,10)*(v[1]/v[0]),color='black',linestyle='--',linewidth=1.5)
# w
w = np.array([A[i][0],A[i][1]])
#ax0.arrow(0,0,w[0],w[1],length_includes_head=True,width=0.01,color='green')
# cv
cv = (np.dot(w,v))/np.dot(v,np.transpose(v))*v
#ax0.arrow(0,0,cv[0],cv[1],length_includes_head=True,width=0.005,color='black')
print(cv)
# line between w and cv
ax0.plot([w[0],cv[0]],[w[1],cv[1]],'r--',linewidth=1.5)
# Check the result
print(np.dot((w-cv),cv))
plt.show()