如何在 seaborn 图形级箱线图上叠加数据点

How to overlay data points on seaborn figure-level boxplots

我有以下数据框和下面的图 - 我想为因子图中的每个箱线图添加数据点,但我无法将箱线图和带状图组合到同一个图中(即它们不重叠,它们出现在彼此下方)。有解决办法吗?

import pandas as pd
import datetime

idx = pd.date_range('01-01-2020', '01-25-2020')

d = pd.Series({'01-01-2020': 1,
               '01-25-2020': 1})

d.index = pd.DatetimeIndex(d.index)

d = d.reindex(idx, fill_value=0)
df = pd.DataFrame(d).rename_axis("dt").reset_index()
df.drop(columns=df.columns[1], inplace=True)

# calculate week number 
df["week"] = df["dt"].dt.week

# create column counts for 'nhsbt centres'
df["A"] = np.random.randint(0, 50, df.shape[0])
df["B"] = np.random.randint(0, 30, df.shape[0])
df["C"] = np.random.randint(0, 20, df.shape[0])

# melt dataframe 
df1 = df[["A", "B", "C", "week"]]
df1 = df1.set_index("week")
df1 = df1.melt(ignore_index=False)
df1["week"] = df1.index

# make boxplot
sns.factorplot("week", "value", col="variable", data=df1, kind="box")

如果你有这样的数据框:

import pandas as pd
import matplotlib.pyplot as plt
import  seaborn as sns
import numpy as np


idx = pd.date_range('01-01-2020', '01-25-2020')

d = pd.Series({'01-01-2020': 1,
               '01-25-2020': 1})

d.index = pd.DatetimeIndex(d.index)

d = d.reindex(idx, fill_value=0)
df = pd.DataFrame(d).rename_axis("dt").reset_index()
df.drop(columns=df.columns[1], inplace=True)

df["week"] = df["dt"].dt.week
df["groupA"] = np.random.randint(0, 50, df.shape[0])
df["groupB"] = np.random.randint(0, 30, df.shape[0])
df["groupC"] = np.random.randint(0, 20, df.shape[0])

df1 = df[["groupA", "groupB", "groupC", "week"]]
df1 = df1.set_index("week")
df1 = df1.melt(ignore_index=False)
df1["week"] = df1.index
     variable  value  week
week                      
1      groupA     24     1
1      groupA     30     1
1      groupA     38     1
1      groupA     41     1
1      groupA     42     1
2      groupA     47     2
2      groupA      9     2
2      groupA     16     2
2      groupA     24     2
2      groupA      3     2
2      groupA     27     2
2      groupA     48     2
3      groupA     46     3
3      groupA     29     3
3      groupA      2     3
3      groupA     46     3
3      groupA     48     3
3      groupA     26     3
3      groupA     36     3
4      groupA     48     4
4      groupA     38     4
4      groupA     19     4
4      groupA     13     4
4      groupA     38     4
4      groupA     34     4
1      groupB     11     1
1      groupB     15     1
1      groupB     14     1
1      groupB     29     1
1      groupB      6     1
2      groupB     20     2
2      groupB     14     2
2      groupB     26     2
2      groupB     11     2
2      groupB     14     2
2      groupB      0     2
2      groupB     11     2
3      groupB     20     3
3      groupB     17     3
3      groupB     16     3
3      groupB     24     3
3      groupB     24     3
3      groupB     16     3
3      groupB     22     3
4      groupB     10     4
4      groupB     26     4
4      groupB      3     4
4      groupB      7     4
4      groupB     16     4
4      groupB     18     4
1      groupC     12     1
1      groupC      4     1
1      groupC      1     1
1      groupC      9     1
1      groupC     16     1
2      groupC      6     2
2      groupC     12     2
2      groupC      6     2
2      groupC     14     2
2      groupC      2     2
2      groupC     18     2
2      groupC     10     2
3      groupC     13     3
3      groupC     11     3
3      groupC     15     3
3      groupC      9     3
3      groupC     18     3
3      groupC      7     3
3      groupC      4     3
4      groupC      8     4
4      groupC     13     4
4      groupC      3     4
4      groupC      1     4
4      groupC      5     4
4      groupC      4     4

然后您可以将 seaborn.boxplot and seaborn.swarmplot or seaborn.stripplot 与 for 循环结合使用:

fig, ax = plt.subplots(1, len(df1['variable'].unique()), figsize = (15, 5))

for i, group in enumerate(df1['variable'].unique(), 0):
    sns.boxplot(ax = ax[i], data = df1[df1['variable'] == group], x = 'week', y = 'value')
    sns.swarmplot(ax = ax[i], data = df1[df1['variable'] == group], x = 'week', y = 'value', color = 'black')
    ax[i].set_title(group)

plt.show()
seaborn function plot
seaborn.swarmplot
seaborn.stripplot

注释

据我所知,你不能使用seaborn.factorplot(现在是seaborn.catplot),因为它是一个图形级函数,所以它确实不允许您在同一张图中组合多个图。

import seaborn as sns

# make boxplot with data from the OP
g = sns.catplot(x="week", y="value", col="variable", data=df1, kind="box")
g.map(sns.swarmplot, 'week', 'value', color='k', order=sorted(df1.week.unique()))