如何连接 python 中拆分小提琴图之间的各个数据点?
How can I connect the individual data points between split violin plots in python plotly?
如何在 python plotly 中连接拆分小提琴图之间的各个数据点?
示例代码:
from plotly import graph_objs as go
import pandas as pd
df = pd.DataFrame(data=[["2021.01", "test1", 1.1], ["2021.01", "test1", 1.2], ["2021.01", "test1", 1.3], ["2021.01", "test1", 1.4], ["2021.01", "test1", 1.5], ["2021.01", "test1", 1.6],
["2021.02", "test1", 1.2], ["2021.02", "test1", 1.3], ["2021.02", "test1", 1.4], ["2021.02", "test1", 1.5], ["2021.02", "test1", 1.6], ["2021.02", "test1", 1.7],
["2021.01", "test2", 1.5], ["2021.01", "test2", 1.6], ["2021.01", "test2", 1.7], ["2021.01", "test2", 1.8], ["2021.01", "test2", 1.9], ["2021.01", "test2", 2.0],
["2021.02", "test2", 1.7], ["2021.02", "test2", 1.8], ["2021.02", "test2", 1.9], ["2021.02", "test2", 2.0], ["2021.02", "test2", 2.1], ["2021.02", "test2", 2.2]],
columns=["date", "test", "values"],
index=["A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F"]
)
fig = go.Figure()
filter = "2021.01"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "negative",
pointpos = -1.5,
))
filter = "2021.02"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "positive",
pointpos = 1.5,
))
fig.update_traces(meanline_visible = True,
points="all",
jitter = 0.05,
scalemode = "count",
)
fig.show()
Output:
我想要一行 F 数据集,而不是原始输出,如下所示:
Desired output
我试图获取每个的最大值并在类别数据中添加折线图,但没有成功。 x轴的位置是手动设置的,所以需要找到最优值。 x轴可以根据图形区域设置,y轴可以根据y轴设置。
lines = df.groupby(['date','test'])['values'].max().to_frame('values')
fig.add_shape(type='line',
x0=0.055,
x1=0.45,
y0=lines.values[0][0],
y1=lines.values[2][0],
xref='paper',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=0.45,
x1=0.55,
y0=lines.values[2][0],
y1=lines.values[1][0],
xref='paper',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=0.55,
x1=0.95,
y0=lines.values[1][0],
y1=lines.values[3][0],
xref='paper',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.show()
使用 r-beginners answer 我想出了一个使用 pointpos 的动态解决方案。如果只有 2 个小提琴图,它会起作用。
原代码略有改动(添加了pointpos作为变量,并为过滤器添加了索引):
from plotly import graph_objs as go
import pandas as pd
pointpos = 1.5
filter_1 = "2021.01"
filter_2 = "2021.02"
df = pd.DataFrame(data=[["2021.01", "test1", 1.1], ["2021.01", "test1", 1.2], ["2021.01", "test1", 1.3], ["2021.01", "test1", 1.4], ["2021.01", "test1", 1.5], ["2021.01", "test1", 1.6],
["2021.02", "test1", 1.2], ["2021.02", "test1", 1.3], ["2021.02", "test1", 1.4], ["2021.02", "test1", 1.5], ["2021.02", "test1", 1.6], ["2021.02", "test1", 1.7],
["2021.01", "test2", 1.5], ["2021.01", "test2", 1.6], ["2021.01", "test2", 1.7], ["2021.01", "test2", 1.8], ["2021.01", "test2", 1.9], ["2021.01", "test2", 2.0],
["2021.02", "test2", 1.7], ["2021.02", "test2", 1.8], ["2021.02", "test2", 1.9], ["2021.02", "test2", 2.0], ["2021.02", "test2", 2.1], ["2021.02", "test2", 2.2]],
columns=["date", "test", "values"],
index=["A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F"]
)
fig = go.Figure()
fig.add_trace(go.Violin(x = df[df["date"]==filter_1]["test"],
y = df[df["date"]==filter_1]["values"],
text = df[df["date"]==filter_1].index,
name = filter_1,
side = "negative",
pointpos = -pointpos,
))
fig.add_trace(go.Violin(x = df[df["date"]==filter_2]["test"],
y = df[df["date"]==filter_2]["values"],
text = df[df["date"]==filter_2].index,
name = filter_2,
side = "positive",
pointpos = pointpos,
))
fig.update_traces(meanline_visible = True,
points="all",
jitter = 0.05,
scalemode = "count",
)
fig.show()
r-beginner 的答案略有变化(将 xref="paper" 更改为 xref="x" 并使行位置更加动态):
filter_letter = "F"
lines = df[df.index==filter_letter]
filter_test_1 = "test1"
filter_test_2 = "test2"
fig.add_shape(type='line',
x0=-pointpos/4,
x1=pointpos/4,
y0=float(lines[(lines["date"]==filter_1) & (lines["test"]==filter_test_1)]["values"]),
y1=float(lines[(lines["date"]==filter_2) & (lines["test"]==filter_test_1)]["values"]),
xref='x',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=pointpos/4,
x1=1-pointpos/4,
y0=float(lines[(lines["date"]==filter_2) & (lines["test"]==filter_test_1)]["values"]),
y1=float(lines[(lines["date"]==filter_1) & (lines["test"]==filter_test_2)]["values"]),
xref='x',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=1-pointpos/4,
x1=1+pointpos/4,
y0=float(lines[(lines["date"]==filter_1) & (lines["test"]==filter_test_2)]["values"]),
y1=float(lines[(lines["date"]==filter_2) & (lines["test"]==filter_test_2)]["values"]),
xref='x',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.show()
查看代码的输出:code's output
请注意,小提琴图的 pointpos 和外部参照的比例是不同的,见下图解释:
pointpos vs xref
很高兴您找到了适合您的解决方案。在您的代码中,您必须指定 pointpos = 1.5
,这与 x 轴值相差不远。如果您愿意指定 x 值而不是 pointpos,下面的代码片段将使用 fig=make_subplots()
生成以下图,并且这个小片段可以动态查找所有 y-values
:
# find correct Y values
lineY = []
for j, t in enumerate(df.test.unique()):
dfd = df[df.test == t]
for i, d in enumerate(dfd.date.unique()):
dft = dfd[dfd.date == d]
lineY.append(dft['values'].max())
剧情:
完整代码:
from plotly import graph_objs as go
import pandas as pd
df = pd.DataFrame(data=[["2021.01", "test1", 1.1], ["2021.01", "test1", 1.2], ["2021.01", "test1", 1.3], ["2021.01", "test1", 1.4], ["2021.01", "test1", 1.5], ["2021.01", "test1", 1.6],
["2021.02", "test1", 1.2], ["2021.02", "test1", 1.3], ["2021.02", "test1", 1.4], ["2021.02", "test1", 1.5], ["2021.02", "test1", 1.6], ["2021.02", "test1", 1.7],
["2021.01", "test2", 1.5], ["2021.01", "test2", 1.6], ["2021.01", "test2", 1.7], ["2021.01", "test2", 1.8], ["2021.01", "test2", 1.9], ["2021.01", "test2", 2.0],
["2021.02", "test2", 1.7], ["2021.02", "test2", 1.8], ["2021.02", "test2", 1.9], ["2021.02", "test2", 2.0], ["2021.02", "test2", 2.1], ["2021.02", "test2", 2.2]],
columns=["date", "test", "values"],
index=["A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F"]
)
# fig = go.Figure()
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig=make_subplots(
specs=[[{"secondary_y": True}]])
# print(fig.layout)
fig.update_layout(xaxis2= {'anchor': 'y', 'overlaying': 'x', 'side': 'top'},
# yaxis_domain=[0, 0.94]
)
filter = "2021.01"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "negative",
pointpos = -1.5,
))
filter = "2021.02"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "positive",
pointpos = 1.5,
))
fig.update_traces(meanline_visible = True,
points="all",
jitter = 0.0,
scalemode = "count",
)
fig.update_traces(marker_size = 10)
fig.update_layout(xaxis2_tickfont_color = 'rgba(0,0,0,0)')
# find correct Y values
lineY = []
for j, t in enumerate(df.test.unique()):
dfd = df[df.test == t]
for i, d in enumerate(dfd.date.unique()):
dft = dfd[dfd.date == d]
lineY.append(dft['values'].max())
# make up corresponding x values
lineX = [1,2.27,2.73,4]
fig.add_trace(go.Scatter(x=lineX, y = lineY, mode = 'markers+lines', name = 'max',
marker_size = 4,
line_color = 'black'))
fig.data[2].update(xaxis='x2')
fig.show()
如何在 python plotly 中连接拆分小提琴图之间的各个数据点?
示例代码:
from plotly import graph_objs as go
import pandas as pd
df = pd.DataFrame(data=[["2021.01", "test1", 1.1], ["2021.01", "test1", 1.2], ["2021.01", "test1", 1.3], ["2021.01", "test1", 1.4], ["2021.01", "test1", 1.5], ["2021.01", "test1", 1.6],
["2021.02", "test1", 1.2], ["2021.02", "test1", 1.3], ["2021.02", "test1", 1.4], ["2021.02", "test1", 1.5], ["2021.02", "test1", 1.6], ["2021.02", "test1", 1.7],
["2021.01", "test2", 1.5], ["2021.01", "test2", 1.6], ["2021.01", "test2", 1.7], ["2021.01", "test2", 1.8], ["2021.01", "test2", 1.9], ["2021.01", "test2", 2.0],
["2021.02", "test2", 1.7], ["2021.02", "test2", 1.8], ["2021.02", "test2", 1.9], ["2021.02", "test2", 2.0], ["2021.02", "test2", 2.1], ["2021.02", "test2", 2.2]],
columns=["date", "test", "values"],
index=["A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F"]
)
fig = go.Figure()
filter = "2021.01"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "negative",
pointpos = -1.5,
))
filter = "2021.02"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "positive",
pointpos = 1.5,
))
fig.update_traces(meanline_visible = True,
points="all",
jitter = 0.05,
scalemode = "count",
)
fig.show()
Output:
我想要一行 F 数据集,而不是原始输出,如下所示:
Desired output
我试图获取每个的最大值并在类别数据中添加折线图,但没有成功。 x轴的位置是手动设置的,所以需要找到最优值。 x轴可以根据图形区域设置,y轴可以根据y轴设置。
lines = df.groupby(['date','test'])['values'].max().to_frame('values')
fig.add_shape(type='line',
x0=0.055,
x1=0.45,
y0=lines.values[0][0],
y1=lines.values[2][0],
xref='paper',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=0.45,
x1=0.55,
y0=lines.values[2][0],
y1=lines.values[1][0],
xref='paper',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=0.55,
x1=0.95,
y0=lines.values[1][0],
y1=lines.values[3][0],
xref='paper',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.show()
使用 r-beginners answer 我想出了一个使用 pointpos 的动态解决方案。如果只有 2 个小提琴图,它会起作用。
原代码略有改动(添加了pointpos作为变量,并为过滤器添加了索引):
from plotly import graph_objs as go
import pandas as pd
pointpos = 1.5
filter_1 = "2021.01"
filter_2 = "2021.02"
df = pd.DataFrame(data=[["2021.01", "test1", 1.1], ["2021.01", "test1", 1.2], ["2021.01", "test1", 1.3], ["2021.01", "test1", 1.4], ["2021.01", "test1", 1.5], ["2021.01", "test1", 1.6],
["2021.02", "test1", 1.2], ["2021.02", "test1", 1.3], ["2021.02", "test1", 1.4], ["2021.02", "test1", 1.5], ["2021.02", "test1", 1.6], ["2021.02", "test1", 1.7],
["2021.01", "test2", 1.5], ["2021.01", "test2", 1.6], ["2021.01", "test2", 1.7], ["2021.01", "test2", 1.8], ["2021.01", "test2", 1.9], ["2021.01", "test2", 2.0],
["2021.02", "test2", 1.7], ["2021.02", "test2", 1.8], ["2021.02", "test2", 1.9], ["2021.02", "test2", 2.0], ["2021.02", "test2", 2.1], ["2021.02", "test2", 2.2]],
columns=["date", "test", "values"],
index=["A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F"]
)
fig = go.Figure()
fig.add_trace(go.Violin(x = df[df["date"]==filter_1]["test"],
y = df[df["date"]==filter_1]["values"],
text = df[df["date"]==filter_1].index,
name = filter_1,
side = "negative",
pointpos = -pointpos,
))
fig.add_trace(go.Violin(x = df[df["date"]==filter_2]["test"],
y = df[df["date"]==filter_2]["values"],
text = df[df["date"]==filter_2].index,
name = filter_2,
side = "positive",
pointpos = pointpos,
))
fig.update_traces(meanline_visible = True,
points="all",
jitter = 0.05,
scalemode = "count",
)
fig.show()
r-beginner 的答案略有变化(将 xref="paper" 更改为 xref="x" 并使行位置更加动态):
filter_letter = "F"
lines = df[df.index==filter_letter]
filter_test_1 = "test1"
filter_test_2 = "test2"
fig.add_shape(type='line',
x0=-pointpos/4,
x1=pointpos/4,
y0=float(lines[(lines["date"]==filter_1) & (lines["test"]==filter_test_1)]["values"]),
y1=float(lines[(lines["date"]==filter_2) & (lines["test"]==filter_test_1)]["values"]),
xref='x',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=pointpos/4,
x1=1-pointpos/4,
y0=float(lines[(lines["date"]==filter_2) & (lines["test"]==filter_test_1)]["values"]),
y1=float(lines[(lines["date"]==filter_1) & (lines["test"]==filter_test_2)]["values"]),
xref='x',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.add_shape(type='line',
x0=1-pointpos/4,
x1=1+pointpos/4,
y0=float(lines[(lines["date"]==filter_1) & (lines["test"]==filter_test_2)]["values"]),
y1=float(lines[(lines["date"]==filter_2) & (lines["test"]==filter_test_2)]["values"]),
xref='x',
yref='y',
line=dict(
color='RoyalBlue',
width=2
)
)
fig.show()
查看代码的输出:code's output
请注意,小提琴图的 pointpos 和外部参照的比例是不同的,见下图解释: pointpos vs xref
很高兴您找到了适合您的解决方案。在您的代码中,您必须指定 pointpos = 1.5
,这与 x 轴值相差不远。如果您愿意指定 x 值而不是 pointpos,下面的代码片段将使用 fig=make_subplots()
生成以下图,并且这个小片段可以动态查找所有 y-values
:
# find correct Y values
lineY = []
for j, t in enumerate(df.test.unique()):
dfd = df[df.test == t]
for i, d in enumerate(dfd.date.unique()):
dft = dfd[dfd.date == d]
lineY.append(dft['values'].max())
剧情:
完整代码:
from plotly import graph_objs as go
import pandas as pd
df = pd.DataFrame(data=[["2021.01", "test1", 1.1], ["2021.01", "test1", 1.2], ["2021.01", "test1", 1.3], ["2021.01", "test1", 1.4], ["2021.01", "test1", 1.5], ["2021.01", "test1", 1.6],
["2021.02", "test1", 1.2], ["2021.02", "test1", 1.3], ["2021.02", "test1", 1.4], ["2021.02", "test1", 1.5], ["2021.02", "test1", 1.6], ["2021.02", "test1", 1.7],
["2021.01", "test2", 1.5], ["2021.01", "test2", 1.6], ["2021.01", "test2", 1.7], ["2021.01", "test2", 1.8], ["2021.01", "test2", 1.9], ["2021.01", "test2", 2.0],
["2021.02", "test2", 1.7], ["2021.02", "test2", 1.8], ["2021.02", "test2", 1.9], ["2021.02", "test2", 2.0], ["2021.02", "test2", 2.1], ["2021.02", "test2", 2.2]],
columns=["date", "test", "values"],
index=["A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F",
"A", "B", "C", "D", "E", "F"]
)
# fig = go.Figure()
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
fig=make_subplots(
specs=[[{"secondary_y": True}]])
# print(fig.layout)
fig.update_layout(xaxis2= {'anchor': 'y', 'overlaying': 'x', 'side': 'top'},
# yaxis_domain=[0, 0.94]
)
filter = "2021.01"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "negative",
pointpos = -1.5,
))
filter = "2021.02"
fig.add_trace(go.Violin(x = df[df["date"]==filter]["test"],
y = df[df["date"]==filter]["values"],
text = df[df["date"]==filter].index,
name = filter,
side = "positive",
pointpos = 1.5,
))
fig.update_traces(meanline_visible = True,
points="all",
jitter = 0.0,
scalemode = "count",
)
fig.update_traces(marker_size = 10)
fig.update_layout(xaxis2_tickfont_color = 'rgba(0,0,0,0)')
# find correct Y values
lineY = []
for j, t in enumerate(df.test.unique()):
dfd = df[df.test == t]
for i, d in enumerate(dfd.date.unique()):
dft = dfd[dfd.date == d]
lineY.append(dft['values'].max())
# make up corresponding x values
lineX = [1,2.27,2.73,4]
fig.add_trace(go.Scatter(x=lineX, y = lineY, mode = 'markers+lines', name = 'max',
marker_size = 4,
line_color = 'black'))
fig.data[2].update(xaxis='x2')
fig.show()