如何在 Kmeans 中获取中心点
How to get the centre point in Kmeans
下面是我使用的数据集示例:
id,product,store,revenue,store_capacity,state
1,Ball,AB,222,1000,CA
1,Pen,AB,234,1452,WD
2,Books,CD,543,888,MA
2,Ink,EF,123,9865,NY
代码如下
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.spatial.distance import euclidean
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
sns.set(rc={'figure.figsize':(11.7,8.27)})
df = pd.read_csv(r'1.csv',index_col=None)
dummies = pd.get_dummies(data = df)
km = KMeans(n_clusters=2).fit(dummies)
labels = km.predict(dummies)
dummies['cluster_id'] = km.labels_
def distance_to_centroid(row, centroid):
row = row[['id', 'product', 'store', 'revenue','store_capacity', 'state_AL', 'state_CA', 'state_CH',
'state_WD', 'country_India', 'country_Japan', 'country_USA']]
return euclidean(row, centroid)
dummies['distance_to_center0'] = dummies.apply(lambda r: distance_to_centroid(r,
km.cluster_centers_[0]),1)
dummies['distance_to_center1'] = dummies.apply(lambda r: distance_to_centroid(r,
km.cluster_centers_[1]),1)
dummies['distance_to_center2'] = dummies.apply(lambda r: distance_to_centroid(r,
km.cluster_centers_[2]),1)
dummies_df = dummies[['distance_to_center0','distance_to_center1','cluster_id']]
test = {0:"Blue", 1:"Red", 2:"Green"}
sns.scatterplot(x="distance_to_center0", y="distance_to_center1", data=dummies_df, hue="cluster_id", palette = test)
我需要获取每个簇的中心,下面的代码正在获取 centroid of each element
表示每个元素到簇中心点的距离是多少
centroids = km.cluster_centers_
centroid_labels = [centroids[i] for i in labels]
centroid_label
我想获取每个簇的中心点
礼貌@Isma
km = KMeans(n_clusters=7).fit(dummies)
closest, _ = pairwise_distances_argmin_min(km.cluster_centers_, dummies)
closest
下面是我使用的数据集示例:
id,product,store,revenue,store_capacity,state
1,Ball,AB,222,1000,CA
1,Pen,AB,234,1452,WD
2,Books,CD,543,888,MA
2,Ink,EF,123,9865,NY
代码如下
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.spatial.distance import euclidean
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler, StandardScaler
sns.set(rc={'figure.figsize':(11.7,8.27)})
df = pd.read_csv(r'1.csv',index_col=None)
dummies = pd.get_dummies(data = df)
km = KMeans(n_clusters=2).fit(dummies)
labels = km.predict(dummies)
dummies['cluster_id'] = km.labels_
def distance_to_centroid(row, centroid):
row = row[['id', 'product', 'store', 'revenue','store_capacity', 'state_AL', 'state_CA', 'state_CH',
'state_WD', 'country_India', 'country_Japan', 'country_USA']]
return euclidean(row, centroid)
dummies['distance_to_center0'] = dummies.apply(lambda r: distance_to_centroid(r,
km.cluster_centers_[0]),1)
dummies['distance_to_center1'] = dummies.apply(lambda r: distance_to_centroid(r,
km.cluster_centers_[1]),1)
dummies['distance_to_center2'] = dummies.apply(lambda r: distance_to_centroid(r,
km.cluster_centers_[2]),1)
dummies_df = dummies[['distance_to_center0','distance_to_center1','cluster_id']]
test = {0:"Blue", 1:"Red", 2:"Green"}
sns.scatterplot(x="distance_to_center0", y="distance_to_center1", data=dummies_df, hue="cluster_id", palette = test)
我需要获取每个簇的中心,下面的代码正在获取 centroid of each element
表示每个元素到簇中心点的距离是多少
centroids = km.cluster_centers_
centroid_labels = [centroids[i] for i in labels]
centroid_label
我想获取每个簇的中心点
礼貌@Isma
km = KMeans(n_clusters=7).fit(dummies)
closest, _ = pairwise_distances_argmin_min(km.cluster_centers_, dummies)
closest