K-Means 中的质心
Centroids in K-Means
import math, random, os, operator, matplotlib, matplotlib.pyplot
from string import split
def EuDist(vecA, vecB):
return math.sqrt(sum(map(lambda x: x * x, [i - j for i, j in zip(vecA, vecB)])))
filename = "points.txt"
FILE = open(filename, "w")
for i in range(33):
line = str(random.uniform(1, 2) + random.uniform(-1, 1)) + "\t" + str(random.uniform(4, 5) + random.uniform(-1, 1)) + "\n"
FILE.write(line)
for i in range(33):
line = str(random.uniform(4, 6) + random.uniform(-1, 1)) + "\t" + str(random.uniform(4, 6) + random.uniform(-1, 1)) + "\n"
FILE.write(line)
for i in range(34):
line = str(random.uniform(2, 3) + random.uniform(-1, 1)) + "\t" + str(random.uniform(2, 3) + random.uniform(-1, 1)) + "\n"
FILE.write(line)
FILE.close()
dataFile = open("points.txt")
dataset = []
for line in dataFile:
lineSplit = split(line[: -2], "\t")
dataset.append([float(value) for value in lineSplit])
maxIters = input("Enter the maximum number of iterations: ")
center = input("Enter a number of clusters: ")
centoids = random.sample(dataset, center)
m = len(dataset)
cluster = [[] for i in range(len(centoids))]
for i in range(maxIters):
cluster = [[] for v in range(len(centoids))]
for j in range(m):
minK = 0
minDis = 100
for k in range(len(centoids)):
if operator.le(EuDist(dataset[j], centoids[k]), minDis):
minDis = EuDist(dataset[j], centoids[k])
minK = k
cluster[minK].append(j)
for t in range(len(centoids)):
x0 = sum([dataset[x][0] for x in cluster[t]])
y0 = sum([dataset[x][1] for x in cluster[t]])
centoids[k] = [x0 / len(cluster[t]), y0 / len(cluster[t])]
matplotlib.pyplot.plot(hold = False)
colorarr=["b", "r", "y", "g", "p"]
for k in range(len(cluster)):
clusterPoint = [dataset[x] for x in cluster[k]]
x0 = [x[0] for x in clusterPoint]
y0 = [x[1] for x in clusterPoint]
center = [(x0, y0) for x in clusterPoint]
matplotlib.pyplot.show(centoids)
matplotlib.pyplot.hold(True)
matplotlib.pyplot.scatter(x0, y0, center, c = colorarr[k])
picname = "picture_number_" + str(i + 1) + ".png"
matplotlib.pyplot.savefig(picname)
代码工作正常,但我遇到了问题。我不知道如何在此图上显示簇的质心。我知道我需要使用变量 centoids,但我不知道具体怎么做。请给我一个提示。
我不是 100% 确定你想要什么,但我认为你只是想在这些集群的组合散点图上过度绘制你的集群的质心,全部在一个图中(每个集群都有自己的颜色)。
按照这些思路可以工作:
from matplotlib import pyplot as plt
import numpy as np
data = {
'x': np.random.rand(4, 100),
'y': np.random.rand(4, 100),
}
centoids = {
'x': np.random.rand(4),
'y': np.random.rand(4),
}
colorarr = ["b", "r", "y", "g"]
for i, cluster in enumerate(zip(data['x'], data['y'])):
plt.scatter(cluster[0], cluster[1], s=50, c=colorarr[i])
plt.grid(True)
plt.scatter(centoids['x'], centoids['y'], marker='+', color=colorarr, s=330)
plt.savefig("random.png")
只需使用此处显示的几行 plt.
;您不需要更多,当然也不需要 hold
变量或 show
。基本上,您只是在前一个集群之上叠加绘制每个集群,并在此基础上绘制集群质心。
在上一个 scatter
中,我已将完整的 colorarr
提供给 color
关键字:这样,每个质心都会获得相应的簇颜色。
在您的代码中,它看起来像这样:
colorarr=["b", "r", "y", "g", "p"]
for k in range(len(cluster)):
clusterPoint = [dataset[x] for x in cluster[k]]
x0 = [x[0] for x in clusterPoint]
y0 = [x[1] for x in clusterPoint]
center = [(x0, y0) for x in clusterPoint]
matplotlib.pyplot.scatter(x0, y0, center, c = colorarr[k])
xcentoids, ycentoids = zip(*centoids)
matplotlib.pyplot.scatter(xcentoids, ycentoids, marker='+', color=colorarr, s=330)
picname = "picture_number_" + str(i + 1) + ".png"
matplotlib.pyplot.savefig(picname)
import math, random, os, operator, matplotlib, matplotlib.pyplot
from string import split
def EuDist(vecA, vecB):
return math.sqrt(sum(map(lambda x: x * x, [i - j for i, j in zip(vecA, vecB)])))
filename = "points.txt"
FILE = open(filename, "w")
for i in range(33):
line = str(random.uniform(1, 2) + random.uniform(-1, 1)) + "\t" + str(random.uniform(4, 5) + random.uniform(-1, 1)) + "\n"
FILE.write(line)
for i in range(33):
line = str(random.uniform(4, 6) + random.uniform(-1, 1)) + "\t" + str(random.uniform(4, 6) + random.uniform(-1, 1)) + "\n"
FILE.write(line)
for i in range(34):
line = str(random.uniform(2, 3) + random.uniform(-1, 1)) + "\t" + str(random.uniform(2, 3) + random.uniform(-1, 1)) + "\n"
FILE.write(line)
FILE.close()
dataFile = open("points.txt")
dataset = []
for line in dataFile:
lineSplit = split(line[: -2], "\t")
dataset.append([float(value) for value in lineSplit])
maxIters = input("Enter the maximum number of iterations: ")
center = input("Enter a number of clusters: ")
centoids = random.sample(dataset, center)
m = len(dataset)
cluster = [[] for i in range(len(centoids))]
for i in range(maxIters):
cluster = [[] for v in range(len(centoids))]
for j in range(m):
minK = 0
minDis = 100
for k in range(len(centoids)):
if operator.le(EuDist(dataset[j], centoids[k]), minDis):
minDis = EuDist(dataset[j], centoids[k])
minK = k
cluster[minK].append(j)
for t in range(len(centoids)):
x0 = sum([dataset[x][0] for x in cluster[t]])
y0 = sum([dataset[x][1] for x in cluster[t]])
centoids[k] = [x0 / len(cluster[t]), y0 / len(cluster[t])]
matplotlib.pyplot.plot(hold = False)
colorarr=["b", "r", "y", "g", "p"]
for k in range(len(cluster)):
clusterPoint = [dataset[x] for x in cluster[k]]
x0 = [x[0] for x in clusterPoint]
y0 = [x[1] for x in clusterPoint]
center = [(x0, y0) for x in clusterPoint]
matplotlib.pyplot.show(centoids)
matplotlib.pyplot.hold(True)
matplotlib.pyplot.scatter(x0, y0, center, c = colorarr[k])
picname = "picture_number_" + str(i + 1) + ".png"
matplotlib.pyplot.savefig(picname)
代码工作正常,但我遇到了问题。我不知道如何在此图上显示簇的质心。我知道我需要使用变量 centoids,但我不知道具体怎么做。请给我一个提示。
我不是 100% 确定你想要什么,但我认为你只是想在这些集群的组合散点图上过度绘制你的集群的质心,全部在一个图中(每个集群都有自己的颜色)。
按照这些思路可以工作:
from matplotlib import pyplot as plt
import numpy as np
data = {
'x': np.random.rand(4, 100),
'y': np.random.rand(4, 100),
}
centoids = {
'x': np.random.rand(4),
'y': np.random.rand(4),
}
colorarr = ["b", "r", "y", "g"]
for i, cluster in enumerate(zip(data['x'], data['y'])):
plt.scatter(cluster[0], cluster[1], s=50, c=colorarr[i])
plt.grid(True)
plt.scatter(centoids['x'], centoids['y'], marker='+', color=colorarr, s=330)
plt.savefig("random.png")
只需使用此处显示的几行 plt.
;您不需要更多,当然也不需要 hold
变量或 show
。基本上,您只是在前一个集群之上叠加绘制每个集群,并在此基础上绘制集群质心。
在上一个 scatter
中,我已将完整的 colorarr
提供给 color
关键字:这样,每个质心都会获得相应的簇颜色。
在您的代码中,它看起来像这样:
colorarr=["b", "r", "y", "g", "p"]
for k in range(len(cluster)):
clusterPoint = [dataset[x] for x in cluster[k]]
x0 = [x[0] for x in clusterPoint]
y0 = [x[1] for x in clusterPoint]
center = [(x0, y0) for x in clusterPoint]
matplotlib.pyplot.scatter(x0, y0, center, c = colorarr[k])
xcentoids, ycentoids = zip(*centoids)
matplotlib.pyplot.scatter(xcentoids, ycentoids, marker='+', color=colorarr, s=330)
picname = "picture_number_" + str(i + 1) + ".png"
matplotlib.pyplot.savefig(picname)