从数据框中自动制作多个条形图
Make multiple barplot automatically from a dataframe
我有一个数据框,其中有一个变量“Gender”(0 或 1)指示一个人是男性还是女性,另一个变量“Dis” ' 表示疾病状态(0、1、2 或 3)。
> df.head()
Gender Dis
0 1 2
1 0 0
2 0 1
3 1 3
4 0 0
5 0 1
我想制作一个条形图,其中包含每个“Dis”值的计数值,但我希望它按性别分隔,即,我想为每种疾病状态设置两个条形图。我想要这个:
但是,如果不手动写入每个条形图的计数值,我无法自动绘制此条形图。我不得不检查每个组合的计数值。我用以下内容手动制作了这个图:
X = ['0','1','2','3']
M = [43,9,20,11]
F = [118,21,168,20]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, M, 0.4, label = 'Male')
plt.bar(X_axis + 0.2, F, 0.4, label = 'Female')
plt.xticks(X_axis, X)
plt.xlabel("")
plt.ylabel("")
plt.legend()
plt.title("title")
def autolabel(rects):
for rect in rects:
h = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h),
ha='center', va='bottom')
plt.show()
我可以直接从数据框做一些更“自动”的事情吗?另外,我还可以在每个条形图的顶部显示计数值吗?
让我们试试 crosstab
+ DataFrame.plot
:
plot_df = (
pd.crosstab(df['Dis'], df['Gender'])
.rename(columns={0: 'Male', 1: 'Female'})
)
ax = plot_df.plot(kind='bar', rot=0, xlabel='', ylabel='', title='title')
plt.show()
crosstab
将根据 Dis
.
产生 Male/Female 的计数
rename
用于将列名0
/1
转为Male
/Female
:
plot_df
:
Gender Male Female
Dis
0 119 128
1 140 121
2 124 120
3 112 136
移动图例,以及条形顶部的值:
ax = plot_df.plot(kind='bar', rot=0, xlabel='', ylabel='', title='title')
for container in ax.containers:
ax.bar_label(container)
plt.legend(title='Gender', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
要将百分比添加到列的顶部:
- 将
plot_df
除以列总数
- 根据需要格式化
zip
与 containers
添加栏标签
plot_df = (
pd.crosstab(df['Dis'], df['Gender'])
.rename(columns={0: 'Male', 1: 'Female'})
)
# Calculate Percentages and format
labels_df = (
plot_df.div(plot_df.sum(axis=0)).mul(100).applymap('{:.2f}%'.format)
)
ax = plot_df.plot(kind='bar', rot=0, figsize=(9, 6), width=0.8,
xlabel='', ylabel='', title='title')
for container, col in zip(ax.containers, labels_df):
ax.bar_label(container, labels=labels_df[col])
plt.legend(title='Gender', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()
labels_df
:
Gender Male Female
Dis
0 24.04% 25.35%
1 28.28% 23.96%
2 25.05% 23.76%
3 22.63% 26.93%
示例数据和使用的导入:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
np.random.seed(5)
df = pd.DataFrame({'Gender': np.random.choice([0, 1], 1000),
'Dis': np.random.choice([0, 1, 2, 3], 1000)})
如果您想使用 for 循环执行此操作:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# assign data of lists.
data = {'Gender': [1,0,0,1,0,0,1,1], 'Dis': [2,0,1,3,0,1,0,1]}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
print(df)
然后创建空变量:
number_males_dis_0 = 0
number_females_dis_0 = 0
number_males_dis_1 = 0
number_females_dis_1 = 0
number_males_dis_2 = 0
number_females_dis_2 = 0
number_males_dis_3 = 0
number_females_dis_3 = 0
for i in range(0,len(data['Dis'])):
#print(i)
#dis = 0
if data['Dis'][i] == 0 and data['Gender'][i] == 0:
number_males_dis_0 += 1
elif data['Dis'][i] == 0 and data['Gender'][i] == 1:
number_females_dis_0 += 1
#dis = 1
elif data['Dis'][i] == 1 and data['Gender'][i] == 0:
number_males_dis_1 += 1
elif data['Dis'][i] == 1 and data['Gender'][i] == 1:
number_females_dis_1 += 1
#dis = 2
elif data['Dis'][i] == 2 and data['Gender'][i] == 0:
number_males_dis_2 += 1
elif data['Dis'][i] == 2 and data['Gender'][i] == 1:
number_females_dis_2 += 1
#dis = 3
elif data['Dis'][i] == 3 and data['Gender'][i] == 0:
number_males_dis_3 += 1
elif data['Dis'][i] == 3 and data['Gender'][i] == 1:
number_females_dis_3 += 1
然后剧情:
X = ['0','1','2','3']
M = [number_males_dis_0,number_males_dis_1,number_males_dis_2,number_males_dis_3]
F = [number_females_dis_0,number_females_dis_1,number_females_dis_2,number_females_dis_3]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, M, 0.4, label = 'Male')
plt.bar(X_axis + 0.2, F, 0.4, label = 'Female')
plt.xticks(X_axis, X)
plt.xlabel("")
plt.ylabel("")
plt.ylim(0,max([max(F),max(M)])+0.5)
plt.legend()
plt.title("title")
# Text on the top of each bar
for i in range(0,4):
plt.text(x = i - 0.25 , y = M[i] + 0.05, s = M[i], size = 10)
plt.text(x = i + 0.15 , y = F[i] + 0.05, s = F[i], size = 10)
plt.show()
结果:
Result
我有一个数据框,其中有一个变量“Gender”(0 或 1)指示一个人是男性还是女性,另一个变量“Dis” ' 表示疾病状态(0、1、2 或 3)。
> df.head()
Gender Dis
0 1 2
1 0 0
2 0 1
3 1 3
4 0 0
5 0 1
我想制作一个条形图,其中包含每个“Dis”值的计数值,但我希望它按性别分隔,即,我想为每种疾病状态设置两个条形图。我想要这个:
但是,如果不手动写入每个条形图的计数值,我无法自动绘制此条形图。我不得不检查每个组合的计数值。我用以下内容手动制作了这个图:
X = ['0','1','2','3']
M = [43,9,20,11]
F = [118,21,168,20]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, M, 0.4, label = 'Male')
plt.bar(X_axis + 0.2, F, 0.4, label = 'Female')
plt.xticks(X_axis, X)
plt.xlabel("")
plt.ylabel("")
plt.legend()
plt.title("title")
def autolabel(rects):
for rect in rects:
h = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*h, '%d'%int(h),
ha='center', va='bottom')
plt.show()
我可以直接从数据框做一些更“自动”的事情吗?另外,我还可以在每个条形图的顶部显示计数值吗?
让我们试试 crosstab
+ DataFrame.plot
:
plot_df = (
pd.crosstab(df['Dis'], df['Gender'])
.rename(columns={0: 'Male', 1: 'Female'})
)
ax = plot_df.plot(kind='bar', rot=0, xlabel='', ylabel='', title='title')
plt.show()
crosstab
将根据 Dis
.
rename
用于将列名0
/1
转为Male
/Female
:
plot_df
:
Gender Male Female
Dis
0 119 128
1 140 121
2 124 120
3 112 136
移动图例,以及条形顶部的值:
ax = plot_df.plot(kind='bar', rot=0, xlabel='', ylabel='', title='title')
for container in ax.containers:
ax.bar_label(container)
plt.legend(title='Gender', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
要将百分比添加到列的顶部:
- 将
plot_df
除以列总数 - 根据需要格式化
zip
与containers
添加栏标签
plot_df = (
pd.crosstab(df['Dis'], df['Gender'])
.rename(columns={0: 'Male', 1: 'Female'})
)
# Calculate Percentages and format
labels_df = (
plot_df.div(plot_df.sum(axis=0)).mul(100).applymap('{:.2f}%'.format)
)
ax = plot_df.plot(kind='bar', rot=0, figsize=(9, 6), width=0.8,
xlabel='', ylabel='', title='title')
for container, col in zip(ax.containers, labels_df):
ax.bar_label(container, labels=labels_df[col])
plt.legend(title='Gender', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()
labels_df
:
Gender Male Female
Dis
0 24.04% 25.35%
1 28.28% 23.96%
2 25.05% 23.76%
3 22.63% 26.93%
示例数据和使用的导入:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
np.random.seed(5)
df = pd.DataFrame({'Gender': np.random.choice([0, 1], 1000),
'Dis': np.random.choice([0, 1, 2, 3], 1000)})
如果您想使用 for 循环执行此操作:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# assign data of lists.
data = {'Gender': [1,0,0,1,0,0,1,1], 'Dis': [2,0,1,3,0,1,0,1]}
# Create DataFrame
df = pd.DataFrame(data)
# Print the output.
print(df)
然后创建空变量:
number_males_dis_0 = 0
number_females_dis_0 = 0
number_males_dis_1 = 0
number_females_dis_1 = 0
number_males_dis_2 = 0
number_females_dis_2 = 0
number_males_dis_3 = 0
number_females_dis_3 = 0
for i in range(0,len(data['Dis'])):
#print(i)
#dis = 0
if data['Dis'][i] == 0 and data['Gender'][i] == 0:
number_males_dis_0 += 1
elif data['Dis'][i] == 0 and data['Gender'][i] == 1:
number_females_dis_0 += 1
#dis = 1
elif data['Dis'][i] == 1 and data['Gender'][i] == 0:
number_males_dis_1 += 1
elif data['Dis'][i] == 1 and data['Gender'][i] == 1:
number_females_dis_1 += 1
#dis = 2
elif data['Dis'][i] == 2 and data['Gender'][i] == 0:
number_males_dis_2 += 1
elif data['Dis'][i] == 2 and data['Gender'][i] == 1:
number_females_dis_2 += 1
#dis = 3
elif data['Dis'][i] == 3 and data['Gender'][i] == 0:
number_males_dis_3 += 1
elif data['Dis'][i] == 3 and data['Gender'][i] == 1:
number_females_dis_3 += 1
然后剧情:
X = ['0','1','2','3']
M = [number_males_dis_0,number_males_dis_1,number_males_dis_2,number_males_dis_3]
F = [number_females_dis_0,number_females_dis_1,number_females_dis_2,number_females_dis_3]
X_axis = np.arange(len(X))
plt.bar(X_axis - 0.2, M, 0.4, label = 'Male')
plt.bar(X_axis + 0.2, F, 0.4, label = 'Female')
plt.xticks(X_axis, X)
plt.xlabel("")
plt.ylabel("")
plt.ylim(0,max([max(F),max(M)])+0.5)
plt.legend()
plt.title("title")
# Text on the top of each bar
for i in range(0,4):
plt.text(x = i - 0.25 , y = M[i] + 0.05, s = M[i], size = 10)
plt.text(x = i + 0.15 , y = F[i] + 0.05, s = F[i], size = 10)
plt.show()
结果: Result