在散点图上添加颜色条标签作为文本
Add colorbar labels as text on scatter plot
我使用以下方法生成散点图:
x = list(auto['umap1'])
y = list(auto['umap2'])
final_df2 = pd.DataFrame(list(zip(x,y,communities)), columns =['x', 'y', 'cluster'])
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s = 0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = range(max(cluster_list)))
plt.show()
我想知道如何将颜色栏标签(1-31 的数字)添加到图表上每个标签对应的实际集群(作为文本)。这是因为很难从颜色中分辨出来,因为它们会循环回红色。
我试过了:
n = list(final_df2['cluster'])
for i, txt in enumerate(n):
ax.annotate(txt, (y[i], x[i]))
但这并没有给我带来好运。
您的注释代码正在为每个点编写注释。这只是在数字的海洋中结束。
您应该以某种方式为每个聚类找到一种中心,例如对属于同一聚类的所有点进行平均。
然后,您使用中心坐标来定位文本。你可以给它一个背景,让它更容易阅读。
由于我没有你的数据,下面的代码模拟了一些已经围绕中心的点。
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# calculate some random points to serve as cluster centers; run a few steps of a relaxing algorithm to separate them a bit
def random_distibuted_centers():
cx = np.random.uniform(-10, 10, MAX_CLUST + 1)
cy = np.random.uniform(-10, 10, MAX_CLUST + 1)
for _ in range(10):
for i in range(1, MAX_CLUST + 1):
for j in range(1, MAX_CLUST + 1):
if i != j:
dist = np.linalg.norm([cx[i] - cx[j], cy[i] - cy[j]])
if dist < 4:
cx[i] += 0.4 * (cx[i] - cx[j]) / dist
cy[i] += 0.4 * (cy[i] - cy[j]) / dist
return cx, cy
N = 1000
MAX_CLUST = 31
cx, cy = random_distibuted_centers()
# for demonstration purposes, just generate some random points around the centers
x = np.concatenate( [np.random.normal(cx[i], 2, N) for i in range(1,MAX_CLUST+1)])
y = np.concatenate( [np.random.normal(cy[i], 2, N) for i in range(1,MAX_CLUST+1)])
communities = np.repeat(range(1,MAX_CLUST+1), N)
final_df2 = pd.DataFrame({'x':x, 'y':y, 'cluster': communities})
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s=0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = cluster_list)
bbox_props = dict(boxstyle="circle,pad=0.3", fc="white", ec="black", lw=2, alpha=0.9)
for i in range(1,MAX_CLUST+1):
ax.annotate(i, xy=(cx[i], cy[i]), ha='center', va='center', bbox=bbox_props)
plt.show()
我使用以下方法生成散点图:
x = list(auto['umap1'])
y = list(auto['umap2'])
final_df2 = pd.DataFrame(list(zip(x,y,communities)), columns =['x', 'y', 'cluster'])
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s = 0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = range(max(cluster_list)))
plt.show()
我想知道如何将颜色栏标签(1-31 的数字)添加到图表上每个标签对应的实际集群(作为文本)。这是因为很难从颜色中分辨出来,因为它们会循环回红色。
我试过了:
n = list(final_df2['cluster'])
for i, txt in enumerate(n):
ax.annotate(txt, (y[i], x[i]))
但这并没有给我带来好运。
您的注释代码正在为每个点编写注释。这只是在数字的海洋中结束。
您应该以某种方式为每个聚类找到一种中心,例如对属于同一聚类的所有点进行平均。
然后,您使用中心坐标来定位文本。你可以给它一个背景,让它更容易阅读。
由于我没有你的数据,下面的代码模拟了一些已经围绕中心的点。
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# calculate some random points to serve as cluster centers; run a few steps of a relaxing algorithm to separate them a bit
def random_distibuted_centers():
cx = np.random.uniform(-10, 10, MAX_CLUST + 1)
cy = np.random.uniform(-10, 10, MAX_CLUST + 1)
for _ in range(10):
for i in range(1, MAX_CLUST + 1):
for j in range(1, MAX_CLUST + 1):
if i != j:
dist = np.linalg.norm([cx[i] - cx[j], cy[i] - cy[j]])
if dist < 4:
cx[i] += 0.4 * (cx[i] - cx[j]) / dist
cy[i] += 0.4 * (cy[i] - cy[j]) / dist
return cx, cy
N = 1000
MAX_CLUST = 31
cx, cy = random_distibuted_centers()
# for demonstration purposes, just generate some random points around the centers
x = np.concatenate( [np.random.normal(cx[i], 2, N) for i in range(1,MAX_CLUST+1)])
y = np.concatenate( [np.random.normal(cy[i], 2, N) for i in range(1,MAX_CLUST+1)])
communities = np.repeat(range(1,MAX_CLUST+1), N)
final_df2 = pd.DataFrame({'x':x, 'y':y, 'cluster': communities})
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s=0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = cluster_list)
bbox_props = dict(boxstyle="circle,pad=0.3", fc="white", ec="black", lw=2, alpha=0.9)
for i in range(1,MAX_CLUST+1):
ax.annotate(i, xy=(cx[i], cy[i]), ha='center', va='center', bbox=bbox_props)
plt.show()