绘制堆积条
Plotting stacked bar
图中左侧代表数据集。图的右侧代表我要制作的草图条。我如何绘制这些数据以生成这样的草图条,其中 X 代表年龄组,Y 代表长度组?
任何帮助都会很棒。提前致谢。
您可以将每一列切割成各自的 bin 并计算值:
age_bins = pd.cut(df['age'], bins=[0,50,60,70,80,1000],
labels=['<50','50-59','60-69','70-79','80+'])
length_bins = pd.cut(df['Length'], bins=[0,5,21, np.inf],
labels=['<=5 days', '6-20 days', '21+ days'])
(length_bins.groupby(age_bins)
.value_counts(normalize=True)
.unstack()
.plot.bar(stacked=True)
)
你会得到这样的东西:
使用pd.cut()
and pandas.DataFrame.groupby()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# data
random.seed(365)
data = {'age': [np.random.randint(90) for _ in range(100)],
'length': [np.random.randint(30) for _ in range(100)]}
# dataframe
df = pd.DataFrame(data)
# age and days groups
df['age_group'] = pd.cut(df['age'], bins=[0, 50, 60, 70, 80, 1000], labels=['<50', '50-59', '60-69', '70-79', '≥80'])
df['days'] = pd.cut(df['length'], bins=[0, 6, 20, 1000], labels=['≤5 days', '6-20 days', '≥20 days'])
age length age_group days
72 22 70-79 ≥20 days
2 14 <50 6-20 days
14 12 <50 6-20 days
47 4 <50 ≤5 days
18 12 <50 6-20 days
# groupby plot
plt.figure(figsize=(16, 10))
df.groupby(['age_group', 'days'])['days'].count().unstack().apply(lambda x: x*100/sum(x), axis=1).plot.bar(stacked=True)
plt.legend(loc='center right', bbox_to_anchor=(-0.05, 0.5))
plt.xlabel('Age Groups')
plt.xticks(rotation=0)
plt.gca().set_yticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_yticks()])
plt.show()
- 此解决方案类似于 Quang Hoang 提供的解决方案,不同之处在于它提供了从 0% - 100% 的
y-axis
而另一个从 0 - 1 标准化。
- 可以使用其他解决方案,然后只需使用以下格式格式化 y 轴:
plt.gca().set_yticklabels(['{:.0f}%'.format(x*100) for x in plt.gca().get_yticks()])
您可以使用 pd.cut
将连续变量转换为分类值。
然后,调用 pd.crosstab
将为您提供所需的信息。
import numpy as np
import pandas as pd
np.random.seed(42)
age = np.random.randint(40, 90, size=(200,))
length = np.random.randint(0, 30, size=(200,))
age = pd.cut(age, bins=[0, 50, 60, 70, 80, np.inf],labels =['<50','50-59','60-69','70-79','80+'])
length = pd.cut(length, bins=[0, 5, 20, 100], labels =['<=5 days', '6-20 days','20+ days'])
df = pd.crosstab(age, length).apply(lambda row: 100*row/row.sum(),axis=1)
df.columns.name = 'Length'
df.index.name = 'Age'
ax = df.plot(kind='bar', stacked=True, legend='reverse', figsize=(12, 8))
ax.tick_params(axis='x', rotation=0, labelsize=20)
ax.set_xlabel('Age', fontsize='xx-large')
ax.legend(loc='center left', bbox_to_anchor=(-0.4, 0.5), fontsize=20)
ax.set_ylim(0,100)
ax.set_yticklabels([f'{int(i)}%' for i in ax.get_yticks()], fontsize=20)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray')
图中左侧代表数据集。图的右侧代表我要制作的草图条。我如何绘制这些数据以生成这样的草图条,其中 X 代表年龄组,Y 代表长度组?
任何帮助都会很棒。提前致谢。
您可以将每一列切割成各自的 bin 并计算值:
age_bins = pd.cut(df['age'], bins=[0,50,60,70,80,1000],
labels=['<50','50-59','60-69','70-79','80+'])
length_bins = pd.cut(df['Length'], bins=[0,5,21, np.inf],
labels=['<=5 days', '6-20 days', '21+ days'])
(length_bins.groupby(age_bins)
.value_counts(normalize=True)
.unstack()
.plot.bar(stacked=True)
)
你会得到这样的东西:
使用pd.cut()
and pandas.DataFrame.groupby()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# data
random.seed(365)
data = {'age': [np.random.randint(90) for _ in range(100)],
'length': [np.random.randint(30) for _ in range(100)]}
# dataframe
df = pd.DataFrame(data)
# age and days groups
df['age_group'] = pd.cut(df['age'], bins=[0, 50, 60, 70, 80, 1000], labels=['<50', '50-59', '60-69', '70-79', '≥80'])
df['days'] = pd.cut(df['length'], bins=[0, 6, 20, 1000], labels=['≤5 days', '6-20 days', '≥20 days'])
age length age_group days
72 22 70-79 ≥20 days
2 14 <50 6-20 days
14 12 <50 6-20 days
47 4 <50 ≤5 days
18 12 <50 6-20 days
# groupby plot
plt.figure(figsize=(16, 10))
df.groupby(['age_group', 'days'])['days'].count().unstack().apply(lambda x: x*100/sum(x), axis=1).plot.bar(stacked=True)
plt.legend(loc='center right', bbox_to_anchor=(-0.05, 0.5))
plt.xlabel('Age Groups')
plt.xticks(rotation=0)
plt.gca().set_yticklabels(['{:.0f}%'.format(x) for x in plt.gca().get_yticks()])
plt.show()
- 此解决方案类似于 Quang Hoang 提供的解决方案,不同之处在于它提供了从 0% - 100% 的
y-axis
而另一个从 0 - 1 标准化。 - 可以使用其他解决方案,然后只需使用以下格式格式化 y 轴:
plt.gca().set_yticklabels(['{:.0f}%'.format(x*100) for x in plt.gca().get_yticks()])
您可以使用 pd.cut
将连续变量转换为分类值。
然后,调用 pd.crosstab
将为您提供所需的信息。
import numpy as np
import pandas as pd
np.random.seed(42)
age = np.random.randint(40, 90, size=(200,))
length = np.random.randint(0, 30, size=(200,))
age = pd.cut(age, bins=[0, 50, 60, 70, 80, np.inf],labels =['<50','50-59','60-69','70-79','80+'])
length = pd.cut(length, bins=[0, 5, 20, 100], labels =['<=5 days', '6-20 days','20+ days'])
df = pd.crosstab(age, length).apply(lambda row: 100*row/row.sum(),axis=1)
df.columns.name = 'Length'
df.index.name = 'Age'
ax = df.plot(kind='bar', stacked=True, legend='reverse', figsize=(12, 8))
ax.tick_params(axis='x', rotation=0, labelsize=20)
ax.set_xlabel('Age', fontsize='xx-large')
ax.legend(loc='center left', bbox_to_anchor=(-0.4, 0.5), fontsize=20)
ax.set_ylim(0,100)
ax.set_yticklabels([f'{int(i)}%' for i in ax.get_yticks()], fontsize=20)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.set_axisbelow(True)
ax.yaxis.grid(color='gray')