如何更改 Python prince 中的圆点类型

How to change dot types in Python prince

我正在使用 prince 进行主成分分析:

import pandas as pd
import matplotlib.pyplot as plt

pd.set_option('display.float_format', lambda x: '{:.6f}'.format(x))
X=pd.DataFrame(data=[
[1,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0],
[0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,1,0],
[0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0],
[0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,1,0,0],
[0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1],
[0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0],
[1,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,0,0],
[0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0,0],
[0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0],
[0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0],
[0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0],
[0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0],
[0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0],
[0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0],
[0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
[0,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,0,0],
[0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0],
[0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0],
[0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0],
[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0],
[0,1,0,0,1,0,0,0,1,0,1,1,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],
[0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0],
[0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1],
[0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0],
[0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0],
[0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0],
[1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0],
[0,0,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1],
[0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],
[0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0],
[0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0],
[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0],
[0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0],
[0,1,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1],
[0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,1,1],
[0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0],
[0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0],
[1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0],
[1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0],
[0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,1],
[1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0],
[0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,0,0],
[1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0],
[0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0],
[0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0],
[1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0],
[0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,1,1],
[0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0],
[1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0],
],
columns=pd.Series(['toilNnseC','iClRnroscde','DT','sERH','ioRgL','BN','NN','OSP','hntoPy','R','FR','MSV','Uwknnon','AcTESK','ebmndgied','emlma','arsngm','fitfd']),
index=pd.Series(['Asaf_r9h1','1vam_9anAbal','t_t5ggoB1a','l9arrB1teohi_','ie_Bd15','rd1ita9B_t','5er1_aCllr','_9orsCa1n','stoC_5a1r','Chyr7a1_','C1_rah8y','a17k_lrC','ko_61oC','mCehtpspo71_ri','p8motpr_Cis1eh','ncroro1C8a_','71D_ia','9seDa1_nelin','na1Do_7','De__1aL8uVny_','rasdee8_nFn1','nla17inFn_io','e_ar9usGl1','Gdn7io_w1o','1uu9Gt_knu','a20r_tH','e5_H1','nHoog7ndre_1oo','_l1a5Iqb','a_c7J1nsok','yo8_1reL','_16Luo','1eungaM_8','et6rM_e1gz','M1zu9e_k','_se18reMnos','P1apai_9hnrt','yat_lPi5J_leeelktaa1','P1etll_o5Layd_','2silP1er_','_s6nPtaei1','7oa_1Pdas','_8o1Ry','1ne5dRoys_','ehesu_Sw1rcg7','3ir_n1eSh','Sh11_on','Tankoa7_1','_ana6aT1n','o_19T','o1aTz9_p','_217rTn0a','_1nuT6g','1Veceu_ht9ar','_aY1n9','a81aYvrdz_a','_1Yu1','ogZh1n9_']),
)
X.columns.rename('Columns', inplace=True)
X.index.rename('Rows', inplace=True)

import prince
ca = prince.CA(n_components=2,n_iter=3,copy=True,check_input=True,engine='auto',random_state=42)
ca = ca.fit(X)
ca.row_coordinates(X)
ca.column_coordinates(X)
ax = ca.plot_coordinates(X=X,ax=None,figsize=(6, 6),x_component=0,y_component=1,show_row_labels=False,show_col_labels=False)

fig = ax.get_figure()
fig.savefig('figure01.png')

结果如下:

我的问题是,将要发布此图的期刊要求两个 类 的点使用不同的符号(我为此使用了颜色)。我怎样才能改变点的形状? (有,例如十字架,或菱形,或正方形......)。

拆分行列后可以使用“标记”:

#coordinates
x = ca.row_coordinates(X)
y = ca.column_coordinates(X)

#labels
labels = ca.explained_inertia_
x_label = 'Component 0 ('+ str(round(labels[0]*100,2))+'% inertia)'
y_label = 'Component 1 ('+ str(round(labels[1]*100,2))+'% inertia)'

#figure
plt.figure(num=None, figsize=(8, 8), dpi=100, facecolor='w', edgecolor='k')
ax = plt.scatter(x[0], x[1], marker='^', label='Row')
ax2 = plt.scatter(y[0], y[1], marker='x', label='Columns')
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.grid()
plt.legend()
plt.axhline(y=0, color='k')
plt.axvline(x=0, color='k')

我不确定这是最好的方法,但确实有效

您可以追溯更改标记(请参阅 并通过投票表示感谢):

...
from matplotlib.markers import MarkerStyle
from matplotlib.collections import PathCollection
...
ca = prince.CA(n_components=2,n_iter=3,copy=True,check_input=True,engine='auto',random_state=42)
ca = ca.fit(X)
ca.row_coordinates(X)
ca.column_coordinates(X)

fig, ax = plt.subplots(figsize=(6, 6))

ca.plot_coordinates(X=X,
                    ax=ax,
                    x_component=0,
                    y_component=1,
                    show_row_labels=False,
                    show_col_labels=False,
                    color="black"
                  )



markerlist = ["o", "v", "S", "H"]
for item, marker in zip(ax.collections, markerlist): 
    if type(item) == PathCollection:        
        new_marker = MarkerStyle(marker)   
        item.set_paths((new_marker.get_path(),))
        item.set_sizes([10])
    
ax.legend()

plt.show()