Plotly - "grouped" 散点图
Plotly - "grouped" scatter plot
假设我有以下 pandas 数据框:
import pandas as pd
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morining']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
如何创建这样的情节?
(请原谅粗略的绘图)
我试过将散点图、带状图和箱线图引入其中,但没有成功。
谢谢!
您可以使用 itertuples
(比 iterrows 性能更好)遍历 DataFrame 的每一行,并将 'Morning', 'Noon', and 'Evening'
值分别映射到 1,2,3
,然后抖动 x 值通过映射 'Bob' to '-0.05'
和 'Alice' to 0.05
并将这些值添加到每个 x 值。您还可以将 'Color' 信息传递给 marker_color
参数。
然后把1,2,3
的tickvalues
映射回'Morning','Noon' and 'Evening'
,同样用一个legendgroup
只得到一个Bob
和一个Alice
要显示的图例标记(停止在图例中显示每条迹线的标记)
import pandas as pd
import plotly.graph_objects as go
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morning']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
shapes = {'Bob': 'circle', 'Alice': 'diamond'}
time = {'Morning':1, 'Noon':2, 'Evening':3}
jitter = {'Bob': -0.05, 'Alice': 0.05}
fig = go.Figure()
## position 1 of each row is Person... position 4 is the Energy value
s = df.Person.shift() != df.Person
name_changes = s[s].index.values
for row in df.itertuples():
if row[0] in name_changes:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=True
))
else:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=False
))
fig.update_traces(marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey')))
fig.update_layout(
xaxis=dict(
tickmode='array',
tickvals=list(time.values()),
ticktext=list(time.keys())
)
)
fig.show()
- 为每个 Person
生成一个 scatter trace
- 关于x的一点逻辑,让每个人都偏移。因此 hovertext 和 xaxis ticks
import plotly.graph_objects as go
xbase = pd.Series(df["Time"].unique()).reset_index().rename(columns={"index":"x",0:"Time"})
dfp = df.merge(xbase, on="Time").set_index("Person")
go.Figure(
[
go.Scatter(
name=p,
x=dfp.loc[p, "x"] + i/10,
y=dfp.loc[p, "Energy"],
text=dfp.loc[p, "Time"],
mode="markers",
marker={"color": dfp.loc[p, "Color"], "symbol":i, "size":10},
hovertemplate="(%{text},%{y})"
)
for i, p in enumerate(dfp.index.get_level_values("Person").unique())
]
).update_layout(xaxis={"tickmode":"array", "tickvals":xbase["x"], "ticktext":xbase["Time"]})
如果您只想使用 matplotlib 而不需要任何额外的依赖项,这里有一个示例代码。 (Pandas groupbys等操作留给你优化)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
from matplotlib.lines import Line2D
df = pd.DataFrame(
{
'Person': ['Bob'] * 9 + ['Alice'] * 9,
'Time': ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3
+ ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3,
'Color': ['Red', 'Blue', 'Green'] * 6,
'Energy': [1, 5, 4, 7, 3, 6, 8, 4, 2, 9, 8, 5, 2, 6, 7, 3, 8, 1],
}
)
plt.figure()
x = ['Morning', 'Noon', 'Evening']
# Transform function
offset = lambda p: transforms.ScaledTranslation(
p / 72.0, 0, plt.gcf().dpi_scale_trans
)
trans = plt.gca().transData
# Use this to center transformation
start_offset = -len(df['Person'].unique()) // 2
# Define as many markers as people you have
markers = ['o', '^']
# Use this for custom legend
custom_legend = []
# Do this if you need to aggregate
df = df.groupby(['Person', 'Time', 'Color'])['Energy'].sum().reset_index()
df = df.set_index('Time')
for i, [person, pgroup] in enumerate(df.groupby('Person')):
pts = (i + start_offset) * 10
marker = markers[i]
transform = trans + offset(pts)
# This is for legend, not plotted
custom_legend.append(
Line2D(
[0],
[0],
color='w',
markerfacecolor='black',
marker=marker,
markersize=10,
label=person,
)
)
for color, cgroup in pgroup.groupby('Color'):
mornings = cgroup.loc[cgroup.index == 'Morning', 'Energy'].values[0]
noons = cgroup.loc[cgroup.index == 'Noon', 'Energy'].values[0]
evenings = cgroup.loc[cgroup.index == 'Evening', 'Energy'].values[0]
# This stupid if is because you need to define at least one non
# transformation scatter be it first or whatever.
if pts == 0:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
)
else:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
transform=transform,
)
plt.ylabel('Energy')
plt.xlabel('Time')
plt.legend(handles=custom_legend)
plt.margins(x=0.5)
plt.show()
您已经收到了一些很好的建议,但由于您仍然想知道:
What if I also want the colors to show in the legend?
我只想补充一点,px.scatter
真的 接近于开箱即用的最佳方法。唯一缺少的是 jitter
。不过,下面的情节可以由这几行代码产生:
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1], x = [1,2,3]))
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
完整代码:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
# data
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
# figure setup
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
# some customizations in order to get to the desired result:
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1],
x = [1,2,3]))
# jitter
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
# layout
fig.update_layout(xaxis={"tickmode":"array","tickvals":[1,2,3],"ticktext":df.Time.unique()})
fig.show()
改进空间:
上面代码片段的一些元素无疑可以变得更加动态,比如 x = [1,2,3]
应该考虑到 x 轴上不同数量的元素。 jitter
的人数和参数也是如此。但如果这是你可以使用的东西,我也可以调查一下。
假设我有以下 pandas 数据框:
import pandas as pd
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morining']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
如何创建这样的情节?
我试过将散点图、带状图和箱线图引入其中,但没有成功。
谢谢!
您可以使用 itertuples
(比 iterrows 性能更好)遍历 DataFrame 的每一行,并将 'Morning', 'Noon', and 'Evening'
值分别映射到 1,2,3
,然后抖动 x 值通过映射 'Bob' to '-0.05'
和 'Alice' to 0.05
并将这些值添加到每个 x 值。您还可以将 'Color' 信息传递给 marker_color
参数。
然后把1,2,3
的tickvalues
映射回'Morning','Noon' and 'Evening'
,同样用一个legendgroup
只得到一个Bob
和一个Alice
要显示的图例标记(停止在图例中显示每条迹线的标记)
import pandas as pd
import plotly.graph_objects as go
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morning']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
shapes = {'Bob': 'circle', 'Alice': 'diamond'}
time = {'Morning':1, 'Noon':2, 'Evening':3}
jitter = {'Bob': -0.05, 'Alice': 0.05}
fig = go.Figure()
## position 1 of each row is Person... position 4 is the Energy value
s = df.Person.shift() != df.Person
name_changes = s[s].index.values
for row in df.itertuples():
if row[0] in name_changes:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=True
))
else:
fig.add_trace(go.Scatter(
x=[time[row[2]] + jitter[row[1]]],
y=[row[4]],
legendgroup=row[1],
name=row[1],
mode='markers',
marker_symbol=shapes[row[1]],
marker_color=row[3],
showlegend=False
))
fig.update_traces(marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey')))
fig.update_layout(
xaxis=dict(
tickmode='array',
tickvals=list(time.values()),
ticktext=list(time.keys())
)
)
fig.show()
- 为每个 Person 生成一个 scatter trace
- 关于x的一点逻辑,让每个人都偏移。因此 hovertext 和 xaxis ticks
import plotly.graph_objects as go
xbase = pd.Series(df["Time"].unique()).reset_index().rename(columns={"index":"x",0:"Time"})
dfp = df.merge(xbase, on="Time").set_index("Person")
go.Figure(
[
go.Scatter(
name=p,
x=dfp.loc[p, "x"] + i/10,
y=dfp.loc[p, "Energy"],
text=dfp.loc[p, "Time"],
mode="markers",
marker={"color": dfp.loc[p, "Color"], "symbol":i, "size":10},
hovertemplate="(%{text},%{y})"
)
for i, p in enumerate(dfp.index.get_level_values("Person").unique())
]
).update_layout(xaxis={"tickmode":"array", "tickvals":xbase["x"], "ticktext":xbase["Time"]})
如果您只想使用 matplotlib 而不需要任何额外的依赖项,这里有一个示例代码。 (Pandas groupbys等操作留给你优化)
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
from matplotlib.lines import Line2D
df = pd.DataFrame(
{
'Person': ['Bob'] * 9 + ['Alice'] * 9,
'Time': ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3
+ ['Morning'] * 3
+ ['Noon'] * 3
+ ['Evening'] * 3,
'Color': ['Red', 'Blue', 'Green'] * 6,
'Energy': [1, 5, 4, 7, 3, 6, 8, 4, 2, 9, 8, 5, 2, 6, 7, 3, 8, 1],
}
)
plt.figure()
x = ['Morning', 'Noon', 'Evening']
# Transform function
offset = lambda p: transforms.ScaledTranslation(
p / 72.0, 0, plt.gcf().dpi_scale_trans
)
trans = plt.gca().transData
# Use this to center transformation
start_offset = -len(df['Person'].unique()) // 2
# Define as many markers as people you have
markers = ['o', '^']
# Use this for custom legend
custom_legend = []
# Do this if you need to aggregate
df = df.groupby(['Person', 'Time', 'Color'])['Energy'].sum().reset_index()
df = df.set_index('Time')
for i, [person, pgroup] in enumerate(df.groupby('Person')):
pts = (i + start_offset) * 10
marker = markers[i]
transform = trans + offset(pts)
# This is for legend, not plotted
custom_legend.append(
Line2D(
[0],
[0],
color='w',
markerfacecolor='black',
marker=marker,
markersize=10,
label=person,
)
)
for color, cgroup in pgroup.groupby('Color'):
mornings = cgroup.loc[cgroup.index == 'Morning', 'Energy'].values[0]
noons = cgroup.loc[cgroup.index == 'Noon', 'Energy'].values[0]
evenings = cgroup.loc[cgroup.index == 'Evening', 'Energy'].values[0]
# This stupid if is because you need to define at least one non
# transformation scatter be it first or whatever.
if pts == 0:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
)
else:
plt.scatter(
x,
[mornings, noons, evenings],
c=color.lower(),
s=25,
marker=marker,
transform=transform,
)
plt.ylabel('Energy')
plt.xlabel('Time')
plt.legend(handles=custom_legend)
plt.margins(x=0.5)
plt.show()
您已经收到了一些很好的建议,但由于您仍然想知道:
What if I also want the colors to show in the legend?
我只想补充一点,px.scatter
真的 接近于开箱即用的最佳方法。唯一缺少的是 jitter
。不过,下面的情节可以由这几行代码产生:
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1], x = [1,2,3]))
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
完整代码:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
# data
d = {'Person': ['Bob']*9 + ['Alice']*9,
'Time': ['Morining']*3 + ['Noon']*3 + ['Evening']*3 + ['Morning']*3 + ['Noon']*3 + ['Evening']*3,
'Color': ['Red','Blue','Green']*6,
'Energy': [1,5,4,7,3,6,8,4,2,9,8,5,2,6,7,3,8,1]}
df = pd.DataFrame(d)
# figure setup
fig = px.scatter(df, x = 'Time', y = 'Energy', color = 'Color', symbol = 'Person')
# some customizations in order to get to the desired result:
fig.for_each_trace(lambda t: t.update(marker_color = t.name.split(',')[0],
name = t.name.split(',')[1],
x = [1,2,3]))
# jitter
fig.for_each_trace(lambda t: t.update(x=tuple([x + 0.2 for x in list(t.x)])) if t.name == ' Alice' else ())
# layout
fig.update_layout(xaxis={"tickmode":"array","tickvals":[1,2,3],"ticktext":df.Time.unique()})
fig.show()
改进空间:
上面代码片段的一些元素无疑可以变得更加动态,比如 x = [1,2,3]
应该考虑到 x 轴上不同数量的元素。 jitter
的人数和参数也是如此。但如果这是你可以使用的东西,我也可以调查一下。