从 csv 文件制作子图

Question

这是我正在使用的 csv 文件的要点：

CODE     AGEGROUP      SEX     CITY      HEALTHSTATUS 
----     ---------     ---     ----      ------------
E101      25 to 29      M      Denver    Recovered
E102      25 to 29      F      Chicago   Recovered
E105      45 to 49      M      Denver    Mild

我想用条形图来展示这个，但我对如何编写子图感到很困惑。在此代码中，我可以尽可能显示有多少男性和女性受到影响，但我无法显示健康状况和年龄组：

import matplotlib.pyplot as plt

plt.style.use("bmh")
x = df2["SEX"]
y = df2["SEX"].value_counts().plot(kind="bar")

plt.xlabel("Sex", fontsize=12)
plt.ylabel("Number of People", fontsize=12)
plt.title("Number of people affected based on sex", fontsize=12)
plt.show()

如何显示其他两个（健康状况和年龄段）？

Answer 1

这样做的一种方法是为每个条件创建一个单独的子图：

import pandas as df
import matplotlib.pyplot as plt

df = pd.DataFrame({'CODE':["E101", "E102", "E105"],
                   'AGEGROUP':["25 to 29", "25 to 29", "45 to 49"],
                   'SEX':["M", "F", "M"],
                   'CITY':["Denver", "Chicago", "Denver"],
                   'HEALTHSTATUS':["Recovered", "Recovered", "Mild"],
                   })

fs = 6
plt.style.use("bmh")
fig = plt.figure()
ax0 = plt.subplot(3, 1, 1)
df["SEX"].value_counts().plot(kind="bar", ax=ax0)
ax0.set_xlabel("Sex", fontsize=fs)
ax0.set_ylabel("Number of People", fontsize=fs)
ax0.tick_params(axis='both', labelsize=fs)
ax1 = plt.subplot(3, 1, 2)
df["AGEGROUP"].value_counts().plot(kind="bar", ax=ax1)
ax1.set_xlabel("AGEGROUP", fontsize=fs)
ax1.set_ylabel("Number of People", fontsize=fs)
ax1.tick_params(axis='both', labelsize=fs)
ax1.tick_params(axis='x', labelrotation=45)
ax2 = plt.subplot(3, 1, 3)
df["HEALTHSTATUS"].value_counts().plot(kind="bar", ax=ax2)
ax2.set_xlabel("HEALTHSTATUS", fontsize=fs)
ax2.set_ylabel("Number of People", fontsize=fs)
ax2.tick_params(axis='both', labelsize=fs)
ax2.tick_params(axis='x', labelrotation=45)

ax0.set_title("Number of people affected based on condition", fontsize=fs)
plt.tight_layout()
plt.show()

Answer 2

百分比条：
我认为它是这样的（by ps）：

但是 plt 是：

代码如下：

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df2 = pd.read_csv('data.csv')

people = ('F', 'M')
segments = 4

# multi-dimensional data
arr = ['25to29Recovered', '25to29Mild', '30to44Recovered', '30to44Mild', '45to49Recovered', '45to49Mild']
data = np.asarray([[1, 2],#25to29Recovered
                   [3, 4],#25to29Mild
                    [5, 6],#30to44Recovered
                    [7, 8],#30to44Mild
                    [9, 10],#45to49Recovered
                    [11, 12],#45to49Mild
                   ])

percentages = np.zeros((data.shape[1], data.shape[0]))
col_sum = np.sum(data, axis=0)
for i in range(data.shape[0]):
    for j in range(len(data[i])):
        percentages[j, i] = data[i, j] / col_sum[j] * 100

y_pos = np.arange(len(people))

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)

colors = 'rgbm'
patch_handles = []

bottom = np.zeros(len(people))
for i, d in enumerate(data):
    patch_handles.append(ax.bar(y_pos, d,
                                color=colors[i % len(colors)], align='center',
                                bottom=bottom))
    bottom += d

# search all of the bar segments and annotate
for j in range(len(patch_handles)):
    for i, patch in enumerate(patch_handles[j].get_children()):
        bl = patch.get_xy()
        x = 0.5 * patch.get_width() + bl[0]
        y = 0.5 * patch.get_height() + bl[1]
        ax.text(x, y, "%s\n%d%%" % (arr[j],percentages[i, j]), ha='center')

plt.show()

从 csv 文件制作子图

Making subplot from csv file

python

csv

matplotlib

subplot