两个变量的堆积柱形条形图
Stacked column bar chart over two variables
我有一些数据如下图
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = {
'gender':['female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male'],
'age':['15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '25 - 29', '30 - 34'],
'baseline':['M1', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M2', 'M2', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M5', 'M5', 'M5', 'M5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M5', 'M5', 'M5', 'M5', 'M5'],
'endline':['M5', 'M3', 'M1', 'M1', 'M1', 'M4', 'M4', 'M5', 'M2', 'M5', 'M5', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M1', 'M1', 'M2', 'M5', 'M5', 'M5', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M4', 'M4', 'M4', 'M3', 'M3', 'M3', 'M3', 'M2', 'M2', 'M2', 'M2']}
df = pd.DataFrame(data)
df.head()
cross_tab_prop = pd.crosstab(index = df['gender'],
columns = df['baseline'],
normalize = "index")
cross_tab_prop.plot(kind = 'bar',
stacked = True,
colormap = 'tab10',
figsize = (10, 6))
plt.legend(loc = "upper left", ncol = 5)
plt.xlabel("Gender")
plt.ylabel("Proportion")
并想制作如下所示的图表
我将不胜感激关于如何实现这一点的任何提示。
提前致谢
对于 seaborn,方法是:
- 将数据帧转换为 long form
- 用
multiple='fill'
和 col='gender'
创建一个 sns.displot()
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import pandas as pd
data = {
'gender':['female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male'],
'baseline':['M1', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M2', 'M2', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M5', 'M5', 'M5', 'M5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M5', 'M5', 'M5', 'M5', 'M5'],
'endline':['M5', 'M3', 'M1', 'M1', 'M1', 'M4', 'M4', 'M5', 'M2', 'M5', 'M5', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M1', 'M1', 'M2', 'M5', 'M5', 'M5', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M4', 'M4', 'M4', 'M3', 'M3', 'M3', 'M3', 'M2', 'M2', 'M2', 'M2']}
df = pd.DataFrame(data)
df_long = df.melt(id_vars='gender', value_vars=['baseline', 'endline'],
var_name='which', value_name='property')
g = sns.displot(data=df_long, x='which', hue='property', col='gender', multiple='fill')
g.set(xlabel='', ylabel='')
g.axes[0, 0].yaxis.set_major_formatter(PercentFormatter(1))
plt.show()
这是不同样式的相同情节:
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter, MultipleLocator
import seaborn as sns
import pandas as pd
# df_long = ...
sns.set_style('whitegrid')
g = sns.displot(data=df_long, x='which', hue='property', col='gender', multiple='fill', shrink=0.7, palette='turbo')
g.set(xlabel='', ylabel='')
g.axes[0, 0].yaxis.set_major_locator(MultipleLocator(.1))
g.axes[0, 0].yaxis.set_major_formatter(PercentFormatter(1))
g.axes[0, 0].set_xlim(-.6, 1.6)
sns.despine(left=True)
plt.subplots_adjust(wspace=0)
我有一些数据如下图
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = {
'gender':['female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male'],
'age':['15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '15 - 19', '20 - 24', '25 - 29', '30 - 34', '35 - 39', '> 39', '25 - 29', '30 - 34'],
'baseline':['M1', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M2', 'M2', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M5', 'M5', 'M5', 'M5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M5', 'M5', 'M5', 'M5', 'M5'],
'endline':['M5', 'M3', 'M1', 'M1', 'M1', 'M4', 'M4', 'M5', 'M2', 'M5', 'M5', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M1', 'M1', 'M2', 'M5', 'M5', 'M5', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M4', 'M4', 'M4', 'M3', 'M3', 'M3', 'M3', 'M2', 'M2', 'M2', 'M2']}
df = pd.DataFrame(data)
df.head()
cross_tab_prop = pd.crosstab(index = df['gender'],
columns = df['baseline'],
normalize = "index")
cross_tab_prop.plot(kind = 'bar',
stacked = True,
colormap = 'tab10',
figsize = (10, 6))
plt.legend(loc = "upper left", ncol = 5)
plt.xlabel("Gender")
plt.ylabel("Proportion")
并想制作如下所示的图表
我将不胜感激关于如何实现这一点的任何提示。
提前致谢
对于 seaborn,方法是:
- 将数据帧转换为 long form
- 用
multiple='fill'
和col='gender'
创建一个
sns.displot()
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import seaborn as sns
import pandas as pd
data = {
'gender':['female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'female', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male', 'male'],
'baseline':['M1', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M2', 'M2', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M5', 'M5', 'M5', 'M5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M2', 'M2', 'M2', 'M3', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M5', 'M5', 'M5', 'M5', 'M5'],
'endline':['M5', 'M3', 'M1', 'M1', 'M1', 'M4', 'M4', 'M5', 'M2', 'M5', 'M5', 'M3', 'M3', 'M3', 'M4', 'M4', 'M4', 'M1', 'M1', 'M2', 'M5', 'M5', 'M5', 'M1', 'M1', 'M1', 'M1', 'M4', 'M4', 'M4', 'M4', 'M4', 'M3', 'M3', 'M3', 'M3', 'M2', 'M2', 'M2', 'M2']}
df = pd.DataFrame(data)
df_long = df.melt(id_vars='gender', value_vars=['baseline', 'endline'],
var_name='which', value_name='property')
g = sns.displot(data=df_long, x='which', hue='property', col='gender', multiple='fill')
g.set(xlabel='', ylabel='')
g.axes[0, 0].yaxis.set_major_formatter(PercentFormatter(1))
plt.show()
这是不同样式的相同情节:
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter, MultipleLocator
import seaborn as sns
import pandas as pd
# df_long = ...
sns.set_style('whitegrid')
g = sns.displot(data=df_long, x='which', hue='property', col='gender', multiple='fill', shrink=0.7, palette='turbo')
g.set(xlabel='', ylabel='')
g.axes[0, 0].yaxis.set_major_locator(MultipleLocator(.1))
g.axes[0, 0].yaxis.set_major_formatter(PercentFormatter(1))
g.axes[0, 0].set_xlim(-.6, 1.6)
sns.despine(left=True)
plt.subplots_adjust(wspace=0)