Python:k-均值聚类
Python: k-means clustering
我正在尝试对 .csv 文件的经度和纬度数据使用 k 均值聚类,但我不想绘制图表,而是只想获取并打印质心,以便我可以在 google 地图。有人知道如何编码吗?
import pandas as pd
import numpy as np
import csv
with open('fileName.csv', 'r') as infile:
csv_reader = csv.reader (infile, delimiter=',')
x = []
y = []
for row in csv_reader:
if row[3] != 'LONGITUDE':
x.append(float(row[3]))
y.append(float(row[4]))
df = pd.DataFrame({
'x': x,
'y': y
})
#implement x and y in k-means and print the centroids
我建议查看 Shapely 库。
from shapely.geometry import MultiPoint
import pandas as pd
import numpy as np
import csv
# Easy way to read your csv file in
df = pd.read_csv('fileName.csv').rename(columns={'LONGITUDE': 'x', 'LATITUDE': 'y'})
# Assumes you have a column 'cluster_id' that references the cluster id for each coordinate
cluster_ids = df['cluster_id'].unique()
kmeans_clusters = []
for cluster_id in cluster_ids:
# Filtered df for each cluster id
cluster_df = df.loc[df['cluster_id'] == cluster_id]
x_values = cluster_df['x'].tolist()
y_values = cluster_df['y'].tolist()
xy_pairs = [point for point in zip(x_values, y_values)]
kmeans_clusters.append(xy_pairs)
# Where kmeans_clusters is a list of your clusters, each containing a list of xy pairs
centroids = []
for cluster in kmeans_clusters:
if len(cluster) > 1:
# Create a convex hull, find the centroid
convex_hull = MultiPoint(cluster).convex_hull
centroid = convex_hull.centroid
# Unpack to tuple object
centroids.append(list(centroid.coords)[0])
else:
# Single point cluster, it is the centroid
centroids.append(cluster[0])
print(centroids)
我正在尝试对 .csv 文件的经度和纬度数据使用 k 均值聚类,但我不想绘制图表,而是只想获取并打印质心,以便我可以在 google 地图。有人知道如何编码吗?
import pandas as pd
import numpy as np
import csv
with open('fileName.csv', 'r') as infile:
csv_reader = csv.reader (infile, delimiter=',')
x = []
y = []
for row in csv_reader:
if row[3] != 'LONGITUDE':
x.append(float(row[3]))
y.append(float(row[4]))
df = pd.DataFrame({
'x': x,
'y': y
})
#implement x and y in k-means and print the centroids
我建议查看 Shapely 库。
from shapely.geometry import MultiPoint
import pandas as pd
import numpy as np
import csv
# Easy way to read your csv file in
df = pd.read_csv('fileName.csv').rename(columns={'LONGITUDE': 'x', 'LATITUDE': 'y'})
# Assumes you have a column 'cluster_id' that references the cluster id for each coordinate
cluster_ids = df['cluster_id'].unique()
kmeans_clusters = []
for cluster_id in cluster_ids:
# Filtered df for each cluster id
cluster_df = df.loc[df['cluster_id'] == cluster_id]
x_values = cluster_df['x'].tolist()
y_values = cluster_df['y'].tolist()
xy_pairs = [point for point in zip(x_values, y_values)]
kmeans_clusters.append(xy_pairs)
# Where kmeans_clusters is a list of your clusters, each containing a list of xy pairs
centroids = []
for cluster in kmeans_clusters:
if len(cluster) > 1:
# Create a convex hull, find the centroid
convex_hull = MultiPoint(cluster).convex_hull
centroid = convex_hull.centroid
# Unpack to tuple object
centroids.append(list(centroid.coords)[0])
else:
# Single point cluster, it is the centroid
centroids.append(cluster[0])
print(centroids)