从 csv 文件制作子图
Making subplot from csv file
这是我正在使用的 csv 文件的要点:
CODE AGEGROUP SEX CITY HEALTHSTATUS
---- --------- --- ---- ------------
E101 25 to 29 M Denver Recovered
E102 25 to 29 F Chicago Recovered
E105 45 to 49 M Denver Mild
我想用条形图来展示这个,但我对如何编写子图感到很困惑。在此代码中,我可以尽可能显示有多少男性和女性受到影响,但我无法显示健康状况和年龄组:
import matplotlib.pyplot as plt
plt.style.use("bmh")
x = df2["SEX"]
y = df2["SEX"].value_counts().plot(kind="bar")
plt.xlabel("Sex", fontsize=12)
plt.ylabel("Number of People", fontsize=12)
plt.title("Number of people affected based on sex", fontsize=12)
plt.show()
如何显示其他两个(健康状况和年龄段)?
这样做的一种方法是为每个条件创建一个单独的子图:
import pandas as df
import matplotlib.pyplot as plt
df = pd.DataFrame({'CODE':["E101", "E102", "E105"],
'AGEGROUP':["25 to 29", "25 to 29", "45 to 49"],
'SEX':["M", "F", "M"],
'CITY':["Denver", "Chicago", "Denver"],
'HEALTHSTATUS':["Recovered", "Recovered", "Mild"],
})
fs = 6
plt.style.use("bmh")
fig = plt.figure()
ax0 = plt.subplot(3, 1, 1)
df["SEX"].value_counts().plot(kind="bar", ax=ax0)
ax0.set_xlabel("Sex", fontsize=fs)
ax0.set_ylabel("Number of People", fontsize=fs)
ax0.tick_params(axis='both', labelsize=fs)
ax1 = plt.subplot(3, 1, 2)
df["AGEGROUP"].value_counts().plot(kind="bar", ax=ax1)
ax1.set_xlabel("AGEGROUP", fontsize=fs)
ax1.set_ylabel("Number of People", fontsize=fs)
ax1.tick_params(axis='both', labelsize=fs)
ax1.tick_params(axis='x', labelrotation=45)
ax2 = plt.subplot(3, 1, 3)
df["HEALTHSTATUS"].value_counts().plot(kind="bar", ax=ax2)
ax2.set_xlabel("HEALTHSTATUS", fontsize=fs)
ax2.set_ylabel("Number of People", fontsize=fs)
ax2.tick_params(axis='both', labelsize=fs)
ax2.tick_params(axis='x', labelrotation=45)
ax0.set_title("Number of people affected based on condition", fontsize=fs)
plt.tight_layout()
plt.show()
百分比条:
我认为它是这样的(by ps):
但是 plt 是:
代码如下:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df2 = pd.read_csv('data.csv')
people = ('F', 'M')
segments = 4
# multi-dimensional data
arr = ['25to29Recovered', '25to29Mild', '30to44Recovered', '30to44Mild', '45to49Recovered', '45to49Mild']
data = np.asarray([[1, 2],#25to29Recovered
[3, 4],#25to29Mild
[5, 6],#30to44Recovered
[7, 8],#30to44Mild
[9, 10],#45to49Recovered
[11, 12],#45to49Mild
])
percentages = np.zeros((data.shape[1], data.shape[0]))
col_sum = np.sum(data, axis=0)
for i in range(data.shape[0]):
for j in range(len(data[i])):
percentages[j, i] = data[i, j] / col_sum[j] * 100
y_pos = np.arange(len(people))
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)
colors = 'rgbm'
patch_handles = []
bottom = np.zeros(len(people))
for i, d in enumerate(data):
patch_handles.append(ax.bar(y_pos, d,
color=colors[i % len(colors)], align='center',
bottom=bottom))
bottom += d
# search all of the bar segments and annotate
for j in range(len(patch_handles)):
for i, patch in enumerate(patch_handles[j].get_children()):
bl = patch.get_xy()
x = 0.5 * patch.get_width() + bl[0]
y = 0.5 * patch.get_height() + bl[1]
ax.text(x, y, "%s\n%d%%" % (arr[j],percentages[i, j]), ha='center')
plt.show()
这是我正在使用的 csv 文件的要点:
CODE AGEGROUP SEX CITY HEALTHSTATUS
---- --------- --- ---- ------------
E101 25 to 29 M Denver Recovered
E102 25 to 29 F Chicago Recovered
E105 45 to 49 M Denver Mild
我想用条形图来展示这个,但我对如何编写子图感到很困惑。在此代码中,我可以尽可能显示有多少男性和女性受到影响,但我无法显示健康状况和年龄组:
import matplotlib.pyplot as plt
plt.style.use("bmh")
x = df2["SEX"]
y = df2["SEX"].value_counts().plot(kind="bar")
plt.xlabel("Sex", fontsize=12)
plt.ylabel("Number of People", fontsize=12)
plt.title("Number of people affected based on sex", fontsize=12)
plt.show()
如何显示其他两个(健康状况和年龄段)?
这样做的一种方法是为每个条件创建一个单独的子图:
import pandas as df
import matplotlib.pyplot as plt
df = pd.DataFrame({'CODE':["E101", "E102", "E105"],
'AGEGROUP':["25 to 29", "25 to 29", "45 to 49"],
'SEX':["M", "F", "M"],
'CITY':["Denver", "Chicago", "Denver"],
'HEALTHSTATUS':["Recovered", "Recovered", "Mild"],
})
fs = 6
plt.style.use("bmh")
fig = plt.figure()
ax0 = plt.subplot(3, 1, 1)
df["SEX"].value_counts().plot(kind="bar", ax=ax0)
ax0.set_xlabel("Sex", fontsize=fs)
ax0.set_ylabel("Number of People", fontsize=fs)
ax0.tick_params(axis='both', labelsize=fs)
ax1 = plt.subplot(3, 1, 2)
df["AGEGROUP"].value_counts().plot(kind="bar", ax=ax1)
ax1.set_xlabel("AGEGROUP", fontsize=fs)
ax1.set_ylabel("Number of People", fontsize=fs)
ax1.tick_params(axis='both', labelsize=fs)
ax1.tick_params(axis='x', labelrotation=45)
ax2 = plt.subplot(3, 1, 3)
df["HEALTHSTATUS"].value_counts().plot(kind="bar", ax=ax2)
ax2.set_xlabel("HEALTHSTATUS", fontsize=fs)
ax2.set_ylabel("Number of People", fontsize=fs)
ax2.tick_params(axis='both', labelsize=fs)
ax2.tick_params(axis='x', labelrotation=45)
ax0.set_title("Number of people affected based on condition", fontsize=fs)
plt.tight_layout()
plt.show()
百分比条:
我认为它是这样的(by ps):
但是 plt 是:
代码如下:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
df2 = pd.read_csv('data.csv')
people = ('F', 'M')
segments = 4
# multi-dimensional data
arr = ['25to29Recovered', '25to29Mild', '30to44Recovered', '30to44Mild', '45to49Recovered', '45to49Mild']
data = np.asarray([[1, 2],#25to29Recovered
[3, 4],#25to29Mild
[5, 6],#30to44Recovered
[7, 8],#30to44Mild
[9, 10],#45to49Recovered
[11, 12],#45to49Mild
])
percentages = np.zeros((data.shape[1], data.shape[0]))
col_sum = np.sum(data, axis=0)
for i in range(data.shape[0]):
for j in range(len(data[i])):
percentages[j, i] = data[i, j] / col_sum[j] * 100
y_pos = np.arange(len(people))
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)
colors = 'rgbm'
patch_handles = []
bottom = np.zeros(len(people))
for i, d in enumerate(data):
patch_handles.append(ax.bar(y_pos, d,
color=colors[i % len(colors)], align='center',
bottom=bottom))
bottom += d
# search all of the bar segments and annotate
for j in range(len(patch_handles)):
for i, patch in enumerate(patch_handles[j].get_children()):
bl = patch.get_xy()
x = 0.5 * patch.get_width() + bl[0]
y = 0.5 * patch.get_height() + bl[1]
ax.text(x, y, "%s\n%d%%" % (arr[j],percentages[i, j]), ha='center')
plt.show()