超过 2 个维度的 Kmeans
Kmeans on more than 2 dimensions
我有自己的算法来分类 kmeans
# parameter c is for how many cluster do you want
def kmeans(data, c, iter, state):
np.random.seed(state)
m=data.shape[0] #number of training examples
n=data.shape[1] #number of features
Centroids=np.array([]).reshape(n,0)
for i in range(c):
rand=np.random.randint(0,m-1)
Centroids=np.c_[Centroids,data[rand]]
result = {}
for i in range(iter):
distance=np.array([]).reshape(m,0) #Euclidian distance
for k in range(c):
tempDist=np.sum((data-Centroids[:,k])**2,axis=1)
distance=np.c_[distance,tempDist]
C=np.argmin(distance,axis=1)+1
Y={}
for k in range(c):
Y[k+1]=np.array([]).reshape(2,0)
for i in range(m):
Y[C[i]]=np.c_[Y[C[i]],X[i]]
for k in range(c):
Y[k+1]=Y[k+1].T
for k in range(c):
Centroids[:,k]=np.mean(Y[k+1],axis=0)
result=Y
return result
我测试了我的代码以对二维数据的 kmeans 进行分类并且它成功了
# This 2 dimensional data is just for example
Xd = [[7,0],[0,3],[3,4],[4,6],[7,1],[2,4]]
Z=kmeans(Xd,3,500,0)
print(Z)
>>>Z = {1: [[7,0],[7,1]],
2: [[3,4],[2,4]],
3: [[0,3],[4,6]]}
但是当我用 784 维的变量替换 Xd 时,它在这一行显示错误:
Y[C[i]]=np.c_[Y[C[i]],X[i]]
>>>all the input array dimensions for the concatenation axis must match exactly,
but along dimension 0, the array at index 0 has size 2 and the array at index 1 has size 784
我该怎么办?
Y[k+1]=np.array([]).reshape(2,0)
而不是 2
,此数组应匹配
您正在连接的一个。
我有自己的算法来分类 kmeans
# parameter c is for how many cluster do you want
def kmeans(data, c, iter, state):
np.random.seed(state)
m=data.shape[0] #number of training examples
n=data.shape[1] #number of features
Centroids=np.array([]).reshape(n,0)
for i in range(c):
rand=np.random.randint(0,m-1)
Centroids=np.c_[Centroids,data[rand]]
result = {}
for i in range(iter):
distance=np.array([]).reshape(m,0) #Euclidian distance
for k in range(c):
tempDist=np.sum((data-Centroids[:,k])**2,axis=1)
distance=np.c_[distance,tempDist]
C=np.argmin(distance,axis=1)+1
Y={}
for k in range(c):
Y[k+1]=np.array([]).reshape(2,0)
for i in range(m):
Y[C[i]]=np.c_[Y[C[i]],X[i]]
for k in range(c):
Y[k+1]=Y[k+1].T
for k in range(c):
Centroids[:,k]=np.mean(Y[k+1],axis=0)
result=Y
return result
我测试了我的代码以对二维数据的 kmeans 进行分类并且它成功了
# This 2 dimensional data is just for example
Xd = [[7,0],[0,3],[3,4],[4,6],[7,1],[2,4]]
Z=kmeans(Xd,3,500,0)
print(Z)
>>>Z = {1: [[7,0],[7,1]],
2: [[3,4],[2,4]],
3: [[0,3],[4,6]]}
但是当我用 784 维的变量替换 Xd 时,它在这一行显示错误:
Y[C[i]]=np.c_[Y[C[i]],X[i]]
>>>all the input array dimensions for the concatenation axis must match exactly,
but along dimension 0, the array at index 0 has size 2 and the array at index 1 has size 784
我该怎么办?
Y[k+1]=np.array([]).reshape(2,0)
而不是 2
,此数组应匹配
您正在连接的一个。