如何避免散点图中标签重叠
How to avoid overlapping of labels in scatter plot
我的数据框如下所示:
Driving Power Dependence Power
F1 9.0 15.0
F2 14.0 14.0
F3 15.0 13.0
F4 16.0 1.0
F5 15.0 15.0
F6 15.0 15.0
F7 15.0 13.0
F8 12.0 15.0
F9 15.0 15.0
F10 15.0 15.0
F11 14.0 12.0
F12 11.0 15.0
F13 15.0 15.0
F14 15.0 10.0
F15 15.0 13.0
F16 1.0 16.0
我使用以下代码绘制了以上数据:
#data Frame for x, y
x = prom['Dependence Power']
y = prom['Driving Power']
n = ['F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16']
##########################################
plt.scatter(x, y, color="red")
plt.xlim([0, 18])
plt.ylim([0, 18])
for i, txt in enumerate(n):
plt.annotate(txt, (x[i], y[i]), fontsize=8, rotation=0)
plt.ylabel('Driving Power', fontweight='bold')
plt.xlabel('Dependence Power', fontweight='bold')
plt.title("MICMAC Analysis", fontsize = 13,fontweight='bold')
plt.grid()
#axis lines
plt.axhline(y=8, xmin=0, xmax=32)
plt.axvline(x=9, ymin=0, ymax=32)
plt.text(10, 10, 'Driving Factors')
plt.text(2,10,'Linkage Factors')
plt.text(2,4, "Autonomous Factors")
plt.text(10,4,'Dependent Factors')
#plt.savefig('micmac.png')
plt.show()
我的图看起来不错,但有一些注释重叠,例如,参见第一象限的标签 'F15'
和 'F18'
,必须有标签 'F3','F7','F15'
而不是 'F15' 和 'F5','F6','F9','F10','F13'
而不是 'F18'
我需要这样的输出:
可能有几种方法,为注释创建数据框,按列值分组并列出索引。在创建的数据框中设置注释。在此数据示例中,更多字符串重叠,因此我们仅更改不想重叠的索引的偏移值。
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import io
data = '''
"Driving Power" "Dependence Power"
F1 9.0 15.0
F2 14.0 14.0
F3 15.0 13.0
F4 16.0 1.0
F5 15.0 15.0
F6 15.0 15.0
F7 15.0 13.0
F8 12.0 15.0
F9 15.0 15.0
F10 15.0 15.0
F11 14.0 12.0
F12 11.0 15.0
F13 15.0 15.0
F14 15.0 10.0
F15 15.0 13.0
F16 1.0 16.0
'''
prom = pd.read_csv(io.StringIO(data), delim_whitespace=True)
x = prom['Dependence Power']
y = prom['Driving Power']
n = ['F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16']
prom = prom.reset_index(drop=False).groupby(['Driving Power','Dependence Power'])['index'].apply(list).reset_index()
plt.scatter(x, y, color="red")
plt.xlim([0, 18])
plt.ylim([0, 18])
for i,row in prom.iterrows():
offset = 0.2 if i == 8 else 0.4
plt.annotate(','.join(row['index']),
(row['Dependence Power'], row['Driving Power']),
xytext=(row['Dependence Power'],row['Driving Power']+offset),
fontsize=8)
# for i, txt in enumerate(n):
# plt.annotate(txt, (x[i], y[i]), fontsize=8, rotation=0)
plt.ylabel('Driving Power', fontweight='bold')
plt.xlabel('Dependence Power', fontweight='bold')
plt.title("MICMAC Analysis", fontsize = 13,fontweight='bold')
plt.grid()
#axis lines
plt.axhline(y=8, xmin=0, xmax=32)
plt.axvline(x=9, ymin=0, ymax=32)
plt.text(10, 10, 'Driving Factors')
plt.text(2,10,'Linkage Factors')
plt.text(2,4, "Autonomous Factors")
plt.text(10,4,'Dependent Factors')
#plt.savefig('micmac.png')
plt.show()
我的数据框如下所示:
Driving Power Dependence Power
F1 9.0 15.0
F2 14.0 14.0
F3 15.0 13.0
F4 16.0 1.0
F5 15.0 15.0
F6 15.0 15.0
F7 15.0 13.0
F8 12.0 15.0
F9 15.0 15.0
F10 15.0 15.0
F11 14.0 12.0
F12 11.0 15.0
F13 15.0 15.0
F14 15.0 10.0
F15 15.0 13.0
F16 1.0 16.0
我使用以下代码绘制了以上数据:
#data Frame for x, y
x = prom['Dependence Power']
y = prom['Driving Power']
n = ['F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16']
##########################################
plt.scatter(x, y, color="red")
plt.xlim([0, 18])
plt.ylim([0, 18])
for i, txt in enumerate(n):
plt.annotate(txt, (x[i], y[i]), fontsize=8, rotation=0)
plt.ylabel('Driving Power', fontweight='bold')
plt.xlabel('Dependence Power', fontweight='bold')
plt.title("MICMAC Analysis", fontsize = 13,fontweight='bold')
plt.grid()
#axis lines
plt.axhline(y=8, xmin=0, xmax=32)
plt.axvline(x=9, ymin=0, ymax=32)
plt.text(10, 10, 'Driving Factors')
plt.text(2,10,'Linkage Factors')
plt.text(2,4, "Autonomous Factors")
plt.text(10,4,'Dependent Factors')
#plt.savefig('micmac.png')
plt.show()
我的图看起来不错,但有一些注释重叠,例如,参见第一象限的标签 'F15'
和 'F18'
,必须有标签 'F3','F7','F15'
而不是 'F15' 和 'F5','F6','F9','F10','F13'
而不是 'F18'
我需要这样的输出:
可能有几种方法,为注释创建数据框,按列值分组并列出索引。在创建的数据框中设置注释。在此数据示例中,更多字符串重叠,因此我们仅更改不想重叠的索引的偏移值。
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import io
data = '''
"Driving Power" "Dependence Power"
F1 9.0 15.0
F2 14.0 14.0
F3 15.0 13.0
F4 16.0 1.0
F5 15.0 15.0
F6 15.0 15.0
F7 15.0 13.0
F8 12.0 15.0
F9 15.0 15.0
F10 15.0 15.0
F11 14.0 12.0
F12 11.0 15.0
F13 15.0 15.0
F14 15.0 10.0
F15 15.0 13.0
F16 1.0 16.0
'''
prom = pd.read_csv(io.StringIO(data), delim_whitespace=True)
x = prom['Dependence Power']
y = prom['Driving Power']
n = ['F1','F2','F3','F4','F5','F6','F7','F8','F9','F10','F11','F12','F13','F14','F15','F16']
prom = prom.reset_index(drop=False).groupby(['Driving Power','Dependence Power'])['index'].apply(list).reset_index()
plt.scatter(x, y, color="red")
plt.xlim([0, 18])
plt.ylim([0, 18])
for i,row in prom.iterrows():
offset = 0.2 if i == 8 else 0.4
plt.annotate(','.join(row['index']),
(row['Dependence Power'], row['Driving Power']),
xytext=(row['Dependence Power'],row['Driving Power']+offset),
fontsize=8)
# for i, txt in enumerate(n):
# plt.annotate(txt, (x[i], y[i]), fontsize=8, rotation=0)
plt.ylabel('Driving Power', fontweight='bold')
plt.xlabel('Dependence Power', fontweight='bold')
plt.title("MICMAC Analysis", fontsize = 13,fontweight='bold')
plt.grid()
#axis lines
plt.axhline(y=8, xmin=0, xmax=32)
plt.axvline(x=9, ymin=0, ymax=32)
plt.text(10, 10, 'Driving Factors')
plt.text(2,10,'Linkage Factors')
plt.text(2,4, "Autonomous Factors")
plt.text(10,4,'Dependent Factors')
#plt.savefig('micmac.png')
plt.show()