如何在 matplotlib/pandas 中制作数据框值的堆积条形图作为百分比

How to make stacked bar plot of dataframe values as percentage in matplotlib/pandas

我在数据框中有一个 0,1 的列表。我如何在 pandas 或 matplotlib 中绘制条形图的百分比,在图例 1,0 和 1,0 的百分比与整个列表比较的书面注释中?

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

list_1 = [1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,]
list_2 = [1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0,]
list_3 = [1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0,]

df1 = pd.DataFrame({'Data1': list_1,'Data2': list_2,'Data3': list_3})

df1 = df1.mean()
df1.columns = ['1']
df2 = pd.DataFrame(1-df1)
df2.columns = ['0']
df1 = pd.DataFrame(df1)
df = pd.concat([df1,df2], axis=1)
df.plot( kind='barh',stacked = True,mark_right = True) # this is ok

plt.text(1,2,'%', va = 'center', ha = 'center')

plt.show()

我得到这个情节:

但是我会得到 3 个列表的 1 和 0 的百分比,所以像这样:

您可以将 seaborn 的 histplotmultiple='fill'

一起使用
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

list_1 = [1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0]
list_2 = [1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0]
list_3 = [1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0]

df = pd.DataFrame({'Data1': list_1, 'Data2': list_2, 'Data3': list_3})
sns.set(style='white')
ax = sns.histplot(data=df, stat='percent', multiple='fill', discrete=True, shrink=0.8)
sns.despine()
ax.set_xticks([0, 1])

对于水平条和进一步的定制,它有助于将数据框转换为长格式。

import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import pandas as pd

list_1 = [1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0,1,0,1,1,1,0,0]
list_2 = [1,1,1,1,1,0,0,1,1,1,1,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,1,0]
list_3 = [1,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,1,1,0,0]

df = pd.DataFrame({'Data1': list_1, 'Data2': list_2, 'Data3': list_3})
sns.set(style='white')
fig, ax = plt.subplots(figsize=(10, 4))
sns.histplot(data=df.melt(var_name='Dataset', value_name='Value'), y='Value', hue='Dataset',
             stat='percent', multiple='fill', discrete=True, shrink=0.8,
             palette=['tomato', 'limegreen', 'cornflowerblue'], alpha=1, ax=ax)
sns.despine()
sns.move_legend(ax, bbox_to_anchor=(1.01, 1.02), loc='upper left')
ax.set_yticks([0, 1])
ax.xaxis.set_major_formatter(PercentFormatter(1))
for p in ax.patches:
    h, w, x, y = p.get_height(), p.get_width(), p.get_x(), p.get_y()
    text = f'{w * 100:0.2f} %'
    ax.annotate(text=text, xy=(x + w / 2, y + h / 2), ha='center', va='center', color='white', size=20)
plt.tight_layout()
plt.show()