使用字典创建线图

Create a lineplot using a dictionary

我有一本字典,如下所示:

my_dict={2:((4, 0.56),(8, 0.75)), 6:((3, 0.05),(5, 0.46)), 7: ((4, 0.99),(1, 0.56))}

我想创建一个带有误差线的线图: x 轴上的字典键和 y 轴上的值。此外,每个值的第一个元组的项目需要在一行中,每个值的第二个元组的项目需要在另一行。

类似下面的内容

import matplotlib.pyplot as plt
my_dict={2:((4, 0.56),(8, 0.75)), 6:((3, 0.05),(5, 0.46)), 7: ((4, 0.99),(1, 0.56))}

x_vals=[2,6,7]
line1=[4,3,4]
line2=[8,5,1]
errorbar1=[0.56,0.05,0.99]
errorbar2=[0.75,0.46,0.56]
plt.plot(x_vals, line1, linestyle='dotted')
plt.plot(x_vals, line2, linestyle='dotted')
plt.errorbar(x_vals, line1, yerr=errorbar1, fmt=' ')
plt.errorbar(x_vals, line2, yerr=errorbar2, fmt=' ')
plt.xlabel('x axis')
plt.ylabel('yaxis')
plt.show()

  • 您需要解压 my_dictkeysvalues
  • python >=3.6 dicts,因此提取的值不需要排序。
  • python 3.8pandas 1.3.1matplotlib 3.4.2seaborn 0.11.1
  • 中测试
my_dict = {21: ((0.667, 0.126), (0.63, 0.068)), 52: ((0.679, 0.059), (0.637, 0.078)), 73: ((0.612, 0.211), (0.519, 0.143)), 94: ((0.709, 0.09), (0.711, 0.097))}

x_vals = my_dict.keys()

line1 = [v[0][0] for v in my_dict.values()]
line2 = [v[1][0] for v in my_dict.values()]

errorbar1 = [v[0][1] for v in my_dict.values()]
errorbar2 = [v[1][1] for v in my_dict.values()]

plt.plot(x_vals, line1, linestyle='dotted')
plt.plot(x_vals, line2, linestyle='dotted')
plt.errorbar(x_vals, line1, yerr=errorbar1, fmt=' ')
plt.errorbar(x_vals, line2, yerr=errorbar2, fmt=' ')
plt.xlabel('x axis')
plt.ylabel('yaxis')
plt.show()

使用pandas

  • 使用 pandas 1.3.1matplotlib 3.4.2
  • 进行了测试
  • 这将代码从 13 行减少到 7 行。
import pandas as pd

my_dict = {21: ((0.667, 0.126), (0.63, 0.068)), 52: ((0.679, 0.059), (0.637, 0.078)), 73: ((0.612, 0.211), (0.519, 0.143)), 94: ((0.709, 0.09), (0.711, 0.097))}

# load the dictionary into pandas
df = pd.DataFrame.from_dict(my_dict, orient='index', columns=['line1', 'line2'])

# display(df)
             line1           line2
21  (0.667, 0.126)   (0.63, 0.068)
52  (0.679, 0.059)  (0.637, 0.078)
73  (0.612, 0.211)  (0.519, 0.143)
94   (0.709, 0.09)  (0.711, 0.097)

# separate the tuples to columns
for i, col in enumerate(df.columns, 1):
    df[[col, f'errorbar{i}']] = pd.DataFrame(df[col].tolist(), index= df.index)

# display(df)
    line1  line2  errorbar1  errorbar2
21  0.667  0.630      0.126      0.068
52  0.679  0.637      0.059      0.078
73  0.612  0.519      0.211      0.143
94  0.709  0.711      0.090      0.097

# plot
ax = df.plot(y=['line1', 'line2'], linestyle='dotted', ylabel='y-axis', xlabel='x-axis', title='title', figsize=(8, 6))
ax.errorbar(df.index, 'line1', yerr='errorbar1', data=df, fmt=' ')
ax.errorbar(df.index, 'line2', yerr='errorbar2', data=df, fmt=' ')

更新

  • 我意识到以前的所有代码都是为了适应你的其他
  • 这两个问题的一切都可以忽略,绘图就是塑造数据以适应绘图 API。
  • 如果用另一题的rl,可以直接转成长格式,用seaborn.pointplot作图。
    • 正如您在前面的图中看到的那样,误差线重叠,使图更难以阅读。这里 dodge 用于稍微偏移点,因此误差线不会重叠。
import seaborn as sns
import pandas
import matplotlib.pyplot as plt

# using rl from the other question convert to a long form
rl = [{21: (0.5714285714285714, 0.6888888888888889), 52: (0.6153846153846154, 0.7111111111111111), 73: (0.7123287671232876, 0.6222222222222222), 94: (0.7127659574468085, 0.6)}, {21: (0.6190476190476191, 0.6444444444444445), 52: (0.6923076923076923, 0.6444444444444445), 73: (0.3698630136986301, 0.35555555555555557), 94: (0.7978723404255319, 0.7777777777777778)}, {21: (0.8095238095238095, 0.5555555555555556), 52: (0.7307692307692307, 0.5555555555555556), 73: (0.7534246575342466, 0.5777777777777777), 94: (0.6170212765957447, 0.7555555555555555)}]
df = pd.DataFrame(rl).melt(var_name='Sample Size')

df[['Train', 'Test']] = pd.DataFrame(df['value'].tolist(), index= df.index)
df.drop('value', axis=1, inplace=True)

df = df.melt(id_vars='Sample Size', var_name='Score')

# display(df)
   Sample Size  Score     value
0           21  Train  0.571429
1           21  Train  0.619048
2           21  Train  0.809524
3           52  Train  0.615385
4           52  Train  0.692308

# plot
fig = plt.figure(figsize=(8, 5))
p = sns.pointplot(data=df, x='Sample Size', y='value', hue='Score', ci='sd', dodge=0.25, linestyles='dotted')
p.set(ylabel='Mean of Trials', title='Score Metrics')

# given df, you can still see the metrics with
dfg = df.groupby(['Sample Size', 'Score']).agg(['mean', 'std'])

# display(dfg)
                      value          
                       mean       std
Sample Size Score                    
21          Test   0.629630  0.067890
            Train  0.666667  0.125988
52          Test   0.637037  0.078042
            Train  0.679487  0.058751
73          Test   0.518519  0.142869
            Train  0.611872  0.210591
94          Test   0.711111  0.096864
            Train  0.709220  0.090478

我认为您可以通过替换硬编码列表来简单地使用列表推导式

x_vals=[2,6,7]
line1=[4,3,4]
line2=[8,5,1]
errorbar1=[0.56,0.05,0.99]
errorbar2=[0.46,0.75,0.56]

x_vals=sorted(my_dict.keys())
line1=[my_dict[k][0][0] for k in x_vals] # [4,3,4]
line2=[my_dict[k][1][0] for k in x_vals] # [8,5,1]
errorbar1=[my_dict[k][0][1] for k in x_vals] # [0.56,0.05,0.99]
errorbar2=[my_dict[k][1][1] for k in x_vals] # [0.46,0.75,0.56]