Plotly:如何在不循环遍历数据集的情况下突出显示周末?
Plotly: How to highlight weekends without looping through the dataset?
我正在尝试使用 plotly 绘制三个不同的时间序列数据帧(每个大约 60000 条记录),同时使用不同的背景颜色突出显示周末(和工作时间)。
有没有办法做到这一点,而不用像 中提到的那样遍历整个数据集。虽然此方法可能有效,但在大型数据集上的性能可能很差
您可以使用填充面积图同时突出显示所有周末,而无需使用循环且无需创建多个形状,请参见下面的代码示例。
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# generate a time series
df = pd.DataFrame({
'date': pd.date_range(start='2021-01-01', periods=18, freq='D'),
'value': 100 * np.cumsum(np.random.normal(loc=0.01, scale=0.005, size=18))
})
# define the y-axis limits
ymin, ymax = df['value'].min() - 5, df['value'].max() + 5
# create an auxiliary time series for highlighting the weekends, equal
# to "ymax" on Saturday and Sunday, and to "ymin" on the other days
df['weekend'] = np.where(df['date'].dt.day_name().isin(['Saturday', 'Sunday']), ymax, ymin)
# define the figure layout
layout = dict(
plot_bgcolor='white',
paper_bgcolor='white',
margin=dict(t=5, b=5, l=5, r=5, pad=0),
yaxis=dict(
range=[ymin, ymax], # fix the y-axis limits
tickfont=dict(size=6),
linecolor='#000000',
color='#000000',
showgrid=False,
mirror=True
),
xaxis=dict(
type='date',
tickformat='%d-%b-%Y (%a)',
tickfont=dict(size=6),
nticks=20,
linecolor='#000000',
color='#000000',
ticks='outside',
mirror=True
),
)
# add the figure traces
data = []
# plot the weekends as a filled area chart
data.append(
go.Scatter(
x=df['date'],
y=df['weekend'],
fill='tonext',
fillcolor='#d9d9d9',
mode='lines',
line=dict(width=0, shape='hvh'),
showlegend=False,
hoverinfo=None,
)
)
# plot the time series as a line chart
data.append(
go.Scatter(
x=df['date'],
y=df['value'],
mode='lines+markers',
marker=dict(size=4, color='#cc503e'),
line=dict(width=1, color='#cc503e'),
showlegend=False,
)
)
# create the figure
fig = go.Figure(data=data, layout=layout)
# save the figure
fig.write_image('figure.png', scale=2, width=500, height=300)
我会考虑使用 make_subplots
并将 go.Scatter
轨迹附加到次要 y 轴作为背景颜色而不是指示周末的形状。
基本代码元素:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df['date'], y=df.weekend,
fill = 'tonexty', fillcolor = 'rgba(99, 110, 250, 0.2)',
line_shape = 'hv', line_color = 'rgba(0,0,0,0)',
showlegend = False
),
row = 1, col = 1, secondary_y=True)
剧情:
完整代码:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import datetime
from plotly.subplots import make_subplots
pd.set_option('display.max_rows', None)
# data sample
cols = ['signal']
nperiods = 50
np.random.seed(2)
df = pd.DataFrame(np.random.randint(-1, 2, size=(nperiods, len(cols))),
columns=cols)
datelist = pd.date_range(datetime.datetime(2020, 1, 1).strftime('%Y-%m-%d'),periods=nperiods).tolist()
df['date'] = datelist
df = df.set_index(['date'])
df.index = pd.to_datetime(df.index)
df.iloc[0] = 0
df = df.cumsum().reset_index()
df['signal'] = df['signal'] + 100
df['weekend'] = np.where((df.date.dt.weekday == 5) | (df.date.dt.weekday == 6), 1, 0 )
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df['date'], y=df.weekend,
fill = 'tonexty', fillcolor = 'rgba(99, 110, 250, 0.2)',
line_shape = 'hv', line_color = 'rgba(0,0,0,0)',
showlegend = False
),
row = 1, col = 1, secondary_y=True)
fig.update_xaxes(showgrid=False)#, gridwidth=1, gridcolor='rgba(0,0,255,0.1)')
fig.update_layout(yaxis2_range=[-0,0.1], yaxis2_showgrid=False, yaxis2_tickfont_color = 'rgba(0,0,0,0)')
fig.add_trace(go.Scatter(x=df['date'], y = df.signal, line_color = 'blue'), secondary_y = False)
fig.show()
速度测试:
对于我系统下面代码片段中的 nperiods = 2000
,%%timeit
returns:
162 ms ± 1.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
我最初建议使用 fig.add_shape()
的方法相当慢:
49.2 s ± 2.18 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
对@vestland 的 的两个小修改(参见下面的代码块)为我修复了以下内容:
- 数据点位于 边缘 并且不明显 在 阴影垂直条内。它通过使用
hvh
代替 line_shape
来修复。请参阅文档 here.
- 上周末的阴影垂直条丢失了。通过对
fill
使用 tozeroy
来修复它。请参阅文档 here.
fig.add_trace(go.Scatter(...,
fill = 'tozeroy', # Changed from 'tonexty'.
line_shape = 'hvh', # Changed from 'hv'.
...,
),
我正在尝试使用 plotly 绘制三个不同的时间序列数据帧(每个大约 60000 条记录),同时使用不同的背景颜色突出显示周末(和工作时间)。
有没有办法做到这一点,而不用像
您可以使用填充面积图同时突出显示所有周末,而无需使用循环且无需创建多个形状,请参见下面的代码示例。
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# generate a time series
df = pd.DataFrame({
'date': pd.date_range(start='2021-01-01', periods=18, freq='D'),
'value': 100 * np.cumsum(np.random.normal(loc=0.01, scale=0.005, size=18))
})
# define the y-axis limits
ymin, ymax = df['value'].min() - 5, df['value'].max() + 5
# create an auxiliary time series for highlighting the weekends, equal
# to "ymax" on Saturday and Sunday, and to "ymin" on the other days
df['weekend'] = np.where(df['date'].dt.day_name().isin(['Saturday', 'Sunday']), ymax, ymin)
# define the figure layout
layout = dict(
plot_bgcolor='white',
paper_bgcolor='white',
margin=dict(t=5, b=5, l=5, r=5, pad=0),
yaxis=dict(
range=[ymin, ymax], # fix the y-axis limits
tickfont=dict(size=6),
linecolor='#000000',
color='#000000',
showgrid=False,
mirror=True
),
xaxis=dict(
type='date',
tickformat='%d-%b-%Y (%a)',
tickfont=dict(size=6),
nticks=20,
linecolor='#000000',
color='#000000',
ticks='outside',
mirror=True
),
)
# add the figure traces
data = []
# plot the weekends as a filled area chart
data.append(
go.Scatter(
x=df['date'],
y=df['weekend'],
fill='tonext',
fillcolor='#d9d9d9',
mode='lines',
line=dict(width=0, shape='hvh'),
showlegend=False,
hoverinfo=None,
)
)
# plot the time series as a line chart
data.append(
go.Scatter(
x=df['date'],
y=df['value'],
mode='lines+markers',
marker=dict(size=4, color='#cc503e'),
line=dict(width=1, color='#cc503e'),
showlegend=False,
)
)
# create the figure
fig = go.Figure(data=data, layout=layout)
# save the figure
fig.write_image('figure.png', scale=2, width=500, height=300)
我会考虑使用 make_subplots
并将 go.Scatter
轨迹附加到次要 y 轴作为背景颜色而不是指示周末的形状。
基本代码元素:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df['date'], y=df.weekend,
fill = 'tonexty', fillcolor = 'rgba(99, 110, 250, 0.2)',
line_shape = 'hv', line_color = 'rgba(0,0,0,0)',
showlegend = False
),
row = 1, col = 1, secondary_y=True)
剧情:
完整代码:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import datetime
from plotly.subplots import make_subplots
pd.set_option('display.max_rows', None)
# data sample
cols = ['signal']
nperiods = 50
np.random.seed(2)
df = pd.DataFrame(np.random.randint(-1, 2, size=(nperiods, len(cols))),
columns=cols)
datelist = pd.date_range(datetime.datetime(2020, 1, 1).strftime('%Y-%m-%d'),periods=nperiods).tolist()
df['date'] = datelist
df = df.set_index(['date'])
df.index = pd.to_datetime(df.index)
df.iloc[0] = 0
df = df.cumsum().reset_index()
df['signal'] = df['signal'] + 100
df['weekend'] = np.where((df.date.dt.weekday == 5) | (df.date.dt.weekday == 6), 1, 0 )
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df['date'], y=df.weekend,
fill = 'tonexty', fillcolor = 'rgba(99, 110, 250, 0.2)',
line_shape = 'hv', line_color = 'rgba(0,0,0,0)',
showlegend = False
),
row = 1, col = 1, secondary_y=True)
fig.update_xaxes(showgrid=False)#, gridwidth=1, gridcolor='rgba(0,0,255,0.1)')
fig.update_layout(yaxis2_range=[-0,0.1], yaxis2_showgrid=False, yaxis2_tickfont_color = 'rgba(0,0,0,0)')
fig.add_trace(go.Scatter(x=df['date'], y = df.signal, line_color = 'blue'), secondary_y = False)
fig.show()
速度测试:
对于我系统下面代码片段中的 nperiods = 2000
,%%timeit
returns:
162 ms ± 1.59 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
我最初建议使用 fig.add_shape()
的方法相当慢:
49.2 s ± 2.18 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
对@vestland 的
- 数据点位于 边缘 并且不明显 在 阴影垂直条内。它通过使用
hvh
代替line_shape
来修复。请参阅文档 here. - 上周末的阴影垂直条丢失了。通过对
fill
使用tozeroy
来修复它。请参阅文档 here.
fig.add_trace(go.Scatter(...,
fill = 'tozeroy', # Changed from 'tonexty'.
line_shape = 'hvh', # Changed from 'hv'.
...,
),