ValueError: x and y must be the same size In Python while creating KMeans Model
ValueError: x and y must be the same size In Python while creating KMeans Model
我正在使用流失数据集构建 Kmeans 聚类模型,但在尝试创建聚类图时收到一条错误消息:ValueError: x 和 y 的大小必须相同。
稍后我会 post 我的函数和图表代码,但为了缩小范围,我认为它可能与函数中的这行代码有关:
x=kmeans.cluster_centers_[:,0]
, y=kmeans.cluster_centers_[:,1]
这是完整的代码
def Create_kmeans_cluster_graph(df_final, data, n_clusters, x_title, y_title, chart_title):
""" Display K-means cluster based on data """
kmeans = KMeans(n_clusters=n_clusters # No of cluster in data
, random_state = random_state # Selecting same training data
)
kmeans.fit(data)
kmean_colors = [plotColor[c] for c in kmeans.labels_]
fig = plt.figure(figsize=(12,8))
plt.scatter(x= x_title + '_norm'
, y= y_title + '_norm'
, data=data
, color=kmean_colors # color of data points
, alpha=0.25 # transparancy of data points
)
plt.xlabel(x_title)
plt.ylabel(y_title)
plt.scatter(x=kmeans.cluster_centers_[:,0]
, y=kmeans.cluster_centers_[:,1]
, color='black'
, marker='X' # Marker sign for data points
, s=100 # marker size
)
plt.title(chart_title,fontsize=15)
plt.show()
return kmeans.fit_predict(df_final[df_final.Churn==1][[x_title+'_norm', y_title +'_norm']])
//Graph
df_final['Cluster'] = -1 # by default set Cluster to -1
df_final.iloc[(df_final.Churn==1),'Cluster'] = Create_kmeans_cluster_graph(df_final
,df_final[df_final.Churn==1][['Tenure_norm','MonthlyCharge_norm']]
,3
,'Tenure'
,'MonthlyCharges'
,"Tenure vs Monthlycharges : Churn customer cluster")
df_final['Cluster'].unique()
你因为这一行而得到那个错误:
plt.scatter(x= x_title + '_norm'
, y= y_title + '_norm'
, data=data
, color=kmean_colors # color of data points
, alpha=0.25 # transparancy of data points
)
如果你用plt.scatter
,它不接受data=
作为参数,你可以阅读the help page。您可以这样做:
plt.scatter(data[x_title + '_norm'],data[y_title + '_norm'],...)
或者您在 pandas 数据框上使用 plot.scatter method,这是我在您函数的编辑版本中所做的:
def Create_kmeans_cluster_graph(df_final, data, n_clusters, x_title, y_title, chart_title):
plotColor = ['k','g','b']
kmeans = KMeans(n_clusters=n_clusters , random_state = random_state)
kmeans.fit(data)
kmean_colors = [plotColor[c] for c in kmeans.labels_]
data.plot.scatter(x= x_title + '_norm', y= y_title + '_norm',
color=kmean_colors,alpha=0.25)
plt.xlabel(x_title)
plt.ylabel(y_title)
plt.scatter(x=kmeans.cluster_centers_[:,0],y=kmeans.cluster_centers_[:,1],
color='black',marker='X',s=100)
return kmeans.labels_
在示例数据集上,它有效:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
random_state = 42
np.random.seed(42)
df_final = pd.DataFrame({'Tenure_norm':np.random.uniform(0,1,50),
'MonthlyCharge_norm':np.random.uniform(0,1,50),
'Churn':np.random.randint(0,3,50)})
Create_kmeans_cluster_graph(df_final
,df_final[df_final.Churn==1][['Tenure_norm','MonthlyCharge_norm']]
,3
,'Tenure'
,'MonthlyCharge'
,"Tenure vs Monthlycharges : Churn customer cluster")
我正在使用流失数据集构建 Kmeans 聚类模型,但在尝试创建聚类图时收到一条错误消息:ValueError: x 和 y 的大小必须相同。
稍后我会 post 我的函数和图表代码,但为了缩小范围,我认为它可能与函数中的这行代码有关:
x=kmeans.cluster_centers_[:,0]
, y=kmeans.cluster_centers_[:,1]
这是完整的代码
def Create_kmeans_cluster_graph(df_final, data, n_clusters, x_title, y_title, chart_title):
""" Display K-means cluster based on data """
kmeans = KMeans(n_clusters=n_clusters # No of cluster in data
, random_state = random_state # Selecting same training data
)
kmeans.fit(data)
kmean_colors = [plotColor[c] for c in kmeans.labels_]
fig = plt.figure(figsize=(12,8))
plt.scatter(x= x_title + '_norm'
, y= y_title + '_norm'
, data=data
, color=kmean_colors # color of data points
, alpha=0.25 # transparancy of data points
)
plt.xlabel(x_title)
plt.ylabel(y_title)
plt.scatter(x=kmeans.cluster_centers_[:,0]
, y=kmeans.cluster_centers_[:,1]
, color='black'
, marker='X' # Marker sign for data points
, s=100 # marker size
)
plt.title(chart_title,fontsize=15)
plt.show()
return kmeans.fit_predict(df_final[df_final.Churn==1][[x_title+'_norm', y_title +'_norm']])
//Graph
df_final['Cluster'] = -1 # by default set Cluster to -1
df_final.iloc[(df_final.Churn==1),'Cluster'] = Create_kmeans_cluster_graph(df_final
,df_final[df_final.Churn==1][['Tenure_norm','MonthlyCharge_norm']]
,3
,'Tenure'
,'MonthlyCharges'
,"Tenure vs Monthlycharges : Churn customer cluster")
df_final['Cluster'].unique()
你因为这一行而得到那个错误:
plt.scatter(x= x_title + '_norm'
, y= y_title + '_norm'
, data=data
, color=kmean_colors # color of data points
, alpha=0.25 # transparancy of data points
)
如果你用plt.scatter
,它不接受data=
作为参数,你可以阅读the help page。您可以这样做:
plt.scatter(data[x_title + '_norm'],data[y_title + '_norm'],...)
或者您在 pandas 数据框上使用 plot.scatter method,这是我在您函数的编辑版本中所做的:
def Create_kmeans_cluster_graph(df_final, data, n_clusters, x_title, y_title, chart_title):
plotColor = ['k','g','b']
kmeans = KMeans(n_clusters=n_clusters , random_state = random_state)
kmeans.fit(data)
kmean_colors = [plotColor[c] for c in kmeans.labels_]
data.plot.scatter(x= x_title + '_norm', y= y_title + '_norm',
color=kmean_colors,alpha=0.25)
plt.xlabel(x_title)
plt.ylabel(y_title)
plt.scatter(x=kmeans.cluster_centers_[:,0],y=kmeans.cluster_centers_[:,1],
color='black',marker='X',s=100)
return kmeans.labels_
在示例数据集上,它有效:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
random_state = 42
np.random.seed(42)
df_final = pd.DataFrame({'Tenure_norm':np.random.uniform(0,1,50),
'MonthlyCharge_norm':np.random.uniform(0,1,50),
'Churn':np.random.randint(0,3,50)})
Create_kmeans_cluster_graph(df_final
,df_final[df_final.Churn==1][['Tenure_norm','MonthlyCharge_norm']]
,3
,'Tenure'
,'MonthlyCharge'
,"Tenure vs Monthlycharges : Churn customer cluster")