Plotly - "grouped" 散点图

Plotly - "grouped" scatter plot

假设我有以下 pandas 数据框:

import pandas as pd
d = {'Person': ['Bob']*9 + ['Alice']*9,
    'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morining']*3 + ['Noon']*3 + ['Evening']*3,
    'Color': ['Red','Blue','Green']*6,
    'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)

如何创建这样的情节? (请原谅粗略的绘图)

我试过将散点图、带状图和箱线图引入其中,但没有成功。

谢谢!

您可以使用 itertuples(比 iterrows 性能更好)遍历 DataFrame 的每一行,并将 'Morning', 'Noon', and 'Evening' 值分别映射到 1,2,3,然后抖动 x 值通过映射 'Bob' to '-0.05''Alice' to 0.05 并将这些值添加到每个 x 值。您还可以将 'Color' 信息传递给 marker_color 参数。

然后把1,2,3tickvalues映射回'Morning','Noon' and 'Evening',同样用一个legendgroup只得到一个Bob和一个Alice 要显示的图例标记(停止在图例中显示每条迹线的标记)

import pandas as pd
import plotly.graph_objects as go

d = {'Person': ['Bob']*9 + ['Alice']*9,
    'Time': ['Morning']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
    'Color': ['Red','Blue','Green']*6,
    'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)

shapes = {'Bob': 'circle', 'Alice': 'diamond'}
time = {'Morning':1, 'Noon':2, 'Evening':3}
jitter = {'Bob': -0.05, 'Alice': 0.05}

fig = go.Figure()
## position 1 of each row is Person... position 4 is the Energy value
s = df.Person.shift() != df.Person
name_changes = s[s].index.values
for row in df.itertuples():
    if row[0] in name_changes:
        fig.add_trace(go.Scatter(
            x=[time[row[2]] + jitter[row[1]]],
            y=[row[4]],
            legendgroup=row[1],
            name=row[1],
            mode='markers',
            marker_symbol=shapes[row[1]],
            marker_color=row[3],
            showlegend=True
        ))
    else:
        fig.add_trace(go.Scatter(
            x=[time[row[2]] + jitter[row[1]]],
            y=[row[4]],
            legendgroup=row[1],
            name=row[1],
            mode='markers',
            marker_symbol=shapes[row[1]],
            marker_color=row[3],
            showlegend=False
        ))

fig.update_traces(marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey')))
fig.update_layout(
    xaxis=dict(
        tickmode='array',
        tickvals=list(time.values()),
        ticktext=list(time.keys())
    )
)
fig.show() 

  • 为每个 Person
  • 生成一个 scatter trace
  • 关于x的一点逻辑,让每个人都偏移。因此 hovertextxaxis ticks
import plotly.graph_objects as go

xbase = pd.Series(df["Time"].unique()).reset_index().rename(columns={"index":"x",0:"Time"})
dfp = df.merge(xbase, on="Time").set_index("Person")

go.Figure(
    [
        go.Scatter(
            name=p,
            x=dfp.loc[p, "x"] + i/10,
            y=dfp.loc[p, "Energy"],
            text=dfp.loc[p, "Time"],
            mode="markers",
            marker={"color": dfp.loc[p, "Color"], "symbol":i, "size":10},
            hovertemplate="(%{text},%{y})"
        )
        for i, p in enumerate(dfp.index.get_level_values("Person").unique())
    ]
).update_layout(xaxis={"tickmode":"array", "tickvals":xbase["x"], "ticktext":xbase["Time"]})

如果您只想使用 matplotlib 而不需要任何额外的依赖项,这里有一个示例代码。 (Pandas groupbys等操作留给你优化)

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
from matplotlib.lines import Line2D

df = pd.DataFrame(
    {
        'Person': ['Bob'] * 9 + ['Alice'] * 9,
        'Time': ['Morning'] * 3
        + ['Noon'] * 3
        + ['Evening'] * 3
        + ['Morning'] * 3
        + ['Noon'] * 3
        + ['Evening'] * 3,
        'Color': ['Red', 'Blue', 'Green'] * 6,
        'Energy': [1, 5, 4, 7, 3, 6, 8, 4, 2, 9, 8, 5, 2, 6, 7, 3, 8, 1],
    }
)

plt.figure()

x = ['Morning', 'Noon', 'Evening']

# Transform function
offset = lambda p: transforms.ScaledTranslation(
    p / 72.0, 0, plt.gcf().dpi_scale_trans
)
trans = plt.gca().transData

# Use this to center transformation
start_offset = -len(df['Person'].unique()) // 2

# Define as many markers as people you have
markers = ['o', '^']

# Use this for custom legend
custom_legend = []

# Do this if you need to aggregate
df = df.groupby(['Person', 'Time', 'Color'])['Energy'].sum().reset_index()

df = df.set_index('Time')
for i, [person, pgroup] in enumerate(df.groupby('Person')):
    pts = (i + start_offset) * 10
    marker = markers[i]
    transform = trans + offset(pts)

    # This is for legend, not plotted
    custom_legend.append(
        Line2D(
            [0],
            [0],
            color='w',
            markerfacecolor='black',
            marker=marker,
            markersize=10,
            label=person,
        )
    )

    for color, cgroup in pgroup.groupby('Color'):
        mornings = cgroup.loc[cgroup.index == 'Morning', 'Energy'].values[0]
        noons = cgroup.loc[cgroup.index == 'Noon', 'Energy'].values[0]
        evenings = cgroup.loc[cgroup.index == 'Evening', 'Energy'].values[0]

        # This stupid if is because you need to define at least one non
        # transformation scatter be it first or whatever.
        if pts == 0:
            plt.scatter(
                x,
                [mornings, noons, evenings],
                c=color.lower(),
                s=25,
                marker=marker,
            )
        else:
            plt.scatter(
                x,
                [mornings, noons, evenings],
                c=color.lower(),
                s=25,
                marker=marker,
                transform=transform,
            )

plt.ylabel('Energy')
plt.xlabel('Time')
plt.legend(handles=custom_legend)
plt.margins(x=0.5)
plt.show()

您已经收到了一些很好的建议,但由于您仍然想知道:

What if I also want the colors to show in the legend?

我只想补充一点,px.scatter 真的 接近于开箱即用的最佳方法。唯一缺少的是 jitter。不过,下面的情节可以由这几行代码产生:

fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')

fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
                                      name = t.name.split(',')[1], x = [1,2,3]))

fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())

完整代码:

import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

# data
d = {'Person': ['Bob']*9 + ['Alice']*9,
    'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
    'Color': ['Red','Blue','Green']*6,
    'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)

# figure setup
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')

# some customizations in order to get to the desired result:
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
                                      name = t.name.split(',')[1],
                                     x = [1,2,3]))
# jitter
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())


# layout
fig.update_layout(xaxis={"tickmode":"array","tickvals":[1,2,3],"ticktext":df.Time.unique()})
    
fig.show()

改进空间:

上面代码片段的一些元素无疑可以变得更加动态,比如 x = [1,2,3] 应该考虑到 x 轴上不同数量的元素。 jitter 的人数和参数也是如此。但如果这是你可以使用的东西,我也可以调查一下。