如何在 python 中使用 plotly 制作混合统计子图？

Question

我在 csv 文件中有一些数据集（总共 3 个），需要以不同的方式表示它。它们必然是折线图、箱形图和带有 kde（核密度估计）的直方图。

我知道如何单独绘制它们，但为了更方便，我需要将它们合并为一个输出。查阅参考资料后，我确实写了一些代码，但没有运行。

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np

y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

fig = make_subplots(
    rows=3, cols=2,
    column_widths=[0.6, 0.4],
    row_heights=[0.3, 0.6],
    specs=[[{"type": "scatter"}, {"type": "box"}],
           [{"type": "scatter"}, {"type": "dist", "rowspan": 2}]
           [{"type": "scatter"},            None           ]])

fig.add_trace(
    go.Scatter(x = x, 
                y = y1,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(0, 0, 0)',
                width=1),
                showlegend=False,
                )
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(246, 52, 16)',
                width=1),
                showlegend=False,
                )
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                )
    row=3, col=1
)

fig.add_trace(
    go.Box(x=y1)
    go.Box(x=y2)
    go.Box(x=y3)
    row=1, col=2
)

hist_data = [y1, y2, y3]

fig.add_trace(
    ff.create_distplot(hist_data,
                         bin_size=.02, show_rug=False)

    row=2, col=2
)

fig.show()

上面的代码有什么问题，或者我怎样才能用唯一的输出绘制这些图表？

P.S。折线图需要分开以便更好地可视化。

Answer 1

如果你想使用 matplotlib 将所有图形合并到同一个输出中，你可以使用子图，我不知道它是否是你想要的。

import matplotlib.pyplot as plt
import numpy as np

y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

def scatter_plot(ax,x,y):
    ax.scatter(x,y)
    ax.locator_params(nbins=3)
    ax.set_xlabel('x-label')
    ax.set_ylabel('y-label')
    ax.set_title('Title')

def box_plot(ax,y):
    ax.boxplot(y)
    ax.locator_params(nbins=3)
    ax.set_xlabel('x-label')
    ax.set_ylabel('y-label')
    ax.set_title('Title')

def hist_plot(ax,y):
    ax.hist(y)
    ax.locator_params(nbins=3)
    ax.set_xlabel('x-label')
    ax.set_ylabel('y-label')
    ax.set_title('Title')

fig, ((ax1, ax2), (ax3, ax4), (ax5,ax6)) = plt.subplots(nrows=3, ncols=2)
scatter_plot(ax1,x,y1)
scatter_plot(ax2,x,y2)
scatter_plot(ax3,x,y3)
hist_plot(ax4,y1)
plt.tight_layout()
plt.show()

剧情：

Answer 2

评论中对话后的第二次尝试。

以下是我能做的最好的。这是一种方法，我在您的示例代码中构建一个 ff.create_distplot，然后“窃取”数据并将其用于 go.Histogram、go.Scatter 和 go.Box 对象的组合模拟前者的分布图和地毯图。

剧情：

代码：

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np
import pandas as pd

# data
y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

# subplot setupt
fig = make_subplots(
    rows=3, cols=2
)

# Line [1,1]
fig.add_trace(go.Scatter(x = x, 
                        y = y1,
                        hoverinfo = 'x+y',
                        mode='lines',
                        line=dict(color='rgb(0, 0, 0)',width=1),
                        showlegend=False,
                        name = 'series 1'
                        ),
             row=1, col=1
)

# Line [2,1]
fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(246, 52, 16)',
                width=1),
                showlegend=False,
                ),
    row=2, col=1
)

# Line [2,1]
fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                ),
    row=3, col=1
)

fig.update_layout(showlegend=False)

# box plots [1,1]
fig.add_trace(go.Box(x=y3, marker_color='rgb(16, 154, 246)'),
    row=1, col=2
)

fig.add_trace(go.Box(x=y2, marker_color='rgb(246, 52, 16)'),
    row=1, col=2
)

fig.add_trace(go.Box(x=y1, marker_color='rgb(0, 0, 0)'),
    row=1, col=2
)

# density plots [2,2]
hist_data = [y1, y2, y3]
group_labels = ['Group 1', 'Group 2', 'Group 3']

# fig 2 is only there to produce the numbers to fill
# in later go.Histogram and go.Scatter
fig2 = ff.create_distplot(hist_data, group_labels)

fig.add_trace(go.Histogram(fig2['data'][0],
                           marker_color='rgb(0, 0, 0)',
                           showlegend=False
                          ), row=2, col=2)

fig.add_trace(go.Histogram(fig2['data'][1],
                           marker_color='rgb(246, 52, 16)'
                          ), row=2, col=2)

fig.add_trace(go.Histogram(fig2['data'][2],
                           marker_color='rgb(16, 154, 246)'
                          ), row=2, col=2)

fig.add_trace(go.Scatter(fig2['data'][3],
                         line=dict(color='rgb(0, 0, 0)', width=0.5)
                        ), row=2, col=2)

fig.add_trace(go.Scatter(fig2['data'][4],
                         line=dict(color='rgb(246, 52, 16)', width=0.5)
                        ), row=2, col=2)

fig.add_trace(go.Scatter(fig2['data'][5],
                         line=dict(color='rgb(16, 154, 246)', width=0.5)
                        ), row=2, col=2)

# sorry excuse for a rugplot [3,2]
df = pd.DataFrame({'y1':y1, 'y2':y2, 'y3':y3}, index = x)
df['rug1'] = 1.2
df['rug2'] = 1.1
df['rug3'] = 1
df.tail()
#f=go.Figure()
fig.add_trace(go.Scatter(x=df['y1'], y = df['rug1'],
                       mode = 'markers',
                       marker=dict(color = 'rgb(0, 0, 0)', symbol='line-ns-open')
                        ), row=3, col=2)

fig.add_trace(go.Scatter(x=df['y2'], y = df['rug2'],
                       mode = 'markers',
                       marker=dict(color = 'rgb(246, 52, 16)', symbol='line-ns-open')
                        ), row=3, col=2)

fig.add_trace(go.Scatter(x=df['y3'], y = df['rug3'],
                       mode = 'markers',
                       marker=dict(color = 'rgb(16, 154, 246)', symbol='line-ns-open')
                        ), row=3, col=2)

# some manual adjustments on the rugplot
fig.update_yaxes(range=[0.9,1.3], tickfont=dict(color='rgba(0,0,0,0)', size=14), row=3, col=2)
fig.update_layout(title ='Mixed statistical subplots', showlegend=False)

fig.show()

编辑 - 第一次尝试：

我们将从这里开始：

What is wrong with the code above [...]?

这取决于您在这里的目的。但首先，你在这样的地方遗漏了很多逗号：

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                ) # <==================== insert missing comma !!!
    row=3, col=1
)

下一个片段让我感到困惑：

fig.add_trace(
    go.Box(x=y1)
    go.Box(x=y2)
    go.Box(x=y3)
    row=1, col=2
)

这里至少少了一个逗号，但我还是不太清楚你要做什么。在我看来，您想将所有框放在同一个图表中并将其绘制在第二列的顶部，但我认为其余设置没有多大意义。还有更多的担忧，因为你的 ff.create_distplot() 在你的设置中。

目前我能为您做的最好的事情是为每个系列设置一个图，第一列为 go.Scatter()，右列为相应的 go.Box()，如下所示：

情节 1：

这可能不是您要查找的 100%，但至少我认为以这种方式查看您的数据是有意义的。

代码 1:

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np

# data
y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

# subplot setupt
fig = make_subplots(
    rows=3, cols=2
)

# raw data with go.Scatter
fig.add_trace(go.Scatter(x = x, 
                        y = y1,
                        hoverinfo = 'x+y',
                        mode='lines',
                        line=dict(color='rgb(0, 0, 0)',width=1),
                        showlegend=False,
                        ),
             row=1, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(246, 52, 16)',
                width=1),
                showlegend=False,
                ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='rgb(16, 154, 246)',
                width=1),
                showlegend=False,
                ),
    row=3, col=1
)

# box plots
fig.add_trace(go.Box(x=y1),
    row=1, col=2
)

fig.add_trace(go.Box(x=y1),
    row=2, col=2
)

fig.add_trace(go.Box(x=y1),
    row=3, col=2
)

fig.show()

然后你可以像这样一起显示所有系列的分布：

情节 2：

代码2：

hist_data = [y1, y2, y3]
group_labels = ['Group 1', 'Group 2', 'Group 3']
fig2 = ff.create_distplot(hist_data, group_labels)
fig2.show()

Answer 3

我发了同样的帖子question on plotly forum, and the user empet，回答得体。

正如我所怀疑的那样，make_subplots() 无法处理图形对象，要走的路是 "add figure data as a single trace at a time"。

剧情： 代码：

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import numpy as np

y1 = np.random.randn(200) - 1
y2 = np.random.randn(200)
y3 = np.random.randn(200) + 1
x = np.linspace(0, 1, 200)

colors = ['#3f3f3f', '#00bfff', '#ff7f00']

fig = make_subplots(
    rows=3, cols=2,
    column_widths=[0.55, 0.45],
    row_heights=[1., 1., 1.],
    specs=[[{"type": "scatter"}, {"type": "xy"}],
           [{"type": "scatter"}, {"type": "xy", "rowspan": 2}],
           [{"type": "scatter"},            None           ]])

fig.add_trace(
    go.Scatter(x = x, 
                y = y1,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='#3f3f3f',
                width=1),
                showlegend=False,
                ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y2,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='#00bfff',
                width=1),
                showlegend=False,
                ),
    row=2, col=1
)

fig.add_trace(
    go.Scatter(x = x, 
                y = y3,
                hoverinfo = 'x+y',
                mode='lines',
                line=dict(color='#ff7f00',
                width=1),
                showlegend=False,
                ),
    row=3, col=1
)

boxfig= go.Figure(data=[go.Box(x=y1, showlegend=False, notched=True, marker_color="#3f3f3f", name='3'),
                        go.Box(x=y2, showlegend=False, notched=True, marker_color="#00bfff", name='2'),
                        go.Box(x=y3, showlegend=False, notched=True, marker_color="#ff7f00", name='1')])

for k in range(len(boxfig.data)):
     fig.add_trace(boxfig.data[k], row=1, col=2)

group_labels = ['Group 1', 'Group 2', 'Group 3']
hist_data = [y1, y2, y3]

distplfig = ff.create_distplot(hist_data, group_labels, colors=colors,
                         bin_size=.2, show_rug=False)

for k in range(len(distplfig.data)):
    fig.add_trace(distplfig.data[k],
    row=2, col=2
)
fig.update_layout(barmode='overlay')
fig.show()

如何在 python 中使用 plotly 制作混合统计子图？

How to make mixed statistical subplots using plotly in python?

python

python-3.x

plotly

plotly-python

评论中对话后的第二次尝试。

编辑 - 第一次尝试：