使用 matplotlib 的分组百分比堆叠条形图
Grouped percent stacked bar plot using matplotlib
我正在尝试创建一个 "grouped percent stacked bar plot" 因为需要一个更好的名字。这是我的代码的一个简单工作示例:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# Data
r = [0,1,2,3,4]
raw_data = {'PFHxA': [20, 1.5, 7, 10, 5], \
'PFHpA': [5, 15, 5, 10, 15], \
'PFOA': [2, 15, 18, 5, 10], \
'PFNA': [2, 15, 18, 5, 10], \
'PFDA': [2, 15, 18, 5, 10], \
'PFUnDA': [2, 15, 18, 5, 10], \
'PFHxS': [2, 15, 18, 5, 10], \
'PFOS': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
# From raw value to percentage
totals = [i+j+k+l+m+n+o+p for i,j,k,l,m,n,o,p in zip(df['PFHxA'],
df['PFHpA'], df['PFOA'], df['PFNA'], df['PFDA'], df['PFUnDA'], df['PFHxS'],
df['PFOS'])]
PFHxA = [i / j * 100 for i,j in zip(df['PFHxA'], totals)]
PFHpA = [i / j * 100 for i,j in zip(df['PFHpA'], totals)]
PFOA = [i / j * 100 for i,j in zip(df['PFOA'], totals)]
PFNA = [i / j * 100 for i,j in zip(df['PFNA'], totals)]
PFDA = [i / j * 100 for i,j in zip(df['PFDA'], totals)]
PFUnDA = [i / j * 100 for i,j in zip(df['PFUnDA'], totals)]
PFHxS = [i / j * 100 for i,j in zip(df['PFHxS'], totals)]
PFOS = [i / j * 100 for i,j in zip(df['PFOS'], totals)]
# plot
barWidth = 0.85
names = ('A','B','C','D','E')
# Create PFHxA Bars
plt.bar(r, PFHxA, color='#A20101', edgecolor='white', width=barWidth,
label="PFHxA")
# Create PFHpA Bars
plt.bar(r, PFHpA, bottom=PFHxA, color='#F44E54', edgecolor='white',
width=barWidth, label="PFHpA")
# Create PFOA Bars
plt.bar(r, PFOA, bottom=[i+j for i,j in zip(PFHxA, PFHpA)], color='#FF9904',
edgecolor='white', width=barWidth, label="PFOA")
# Create PFNA Bars
plt.bar(r, PFNA, bottom=[i+j+k for i,j,k in zip(PFHxA, PFHpA, PFOA)],
color='#FDDB5E', edgecolor='white', width=barWidth, label="PFNA")
# Create PFDA Bars
plt.bar(r, PFDA, bottom=[i+j+k+l for i,j,k,l in zip(PFHxA, PFHpA, PFOA,
PFNA)], color='#BAF1A1', edgecolor='white', width=barWidth, label="PFDA")
# Create PFUnDA Bars
plt.bar(r, PFUnDA, bottom=[i+j+k+l+m for i,j,k,l,m in zip(PFHxA, PFHpA,
PFOA, PFNA, PFDA)], color='#76AD3B', edgecolor='white', width=barWidth,
label="PFUnDA")
# Create PFHxS Bars
plt.bar(r, PFHxS, bottom=[i+j+k+l+m+n for i,j,k,l,m,n in zip(PFHxA, PFHpA,
PFOA, PFNA, PFDA, PFUnDA)], color='#0A709A', edgecolor='white',
width=barWidth, label="PFHxS")
# Create PFOS Bars
plt.bar(r, PFOS, bottom=[i+j+k+l+m+n+o for i,j,k,l,m,n,o in zip(PFHxA,
PFHpA, PFOA, PFNA, PFDA, PFUnDA, PFHxS)], color='#5E3F99',
edgecolor='white', width=barWidth, label="PFOS")
# Custom x axis
plt.xticks(r, names)
plt.xlabel("Sample ID")
# Custom y axis
plt.ylabel("Mass percentage")
# Add a legend
plt.legend(loc='upper left', bbox_to_anchor=(1,1), ncol=1)
# Show graphic
plt.show()
这很好用,我创建了以下百分比堆叠条形图:。
但是,我想在样本 B 和 C 之间以及样本 D 和 E 之间添加一些额外的白色 space,因为我想强调有三个样本组(A 和 B 是第一组,C和D是第二组,E是第三组)。有没有一种简单的方法可以做到这一点(这样我就可以将其扩展到三个以上的组)?理想情况下,我还希望能够在每个组上方添加一个文本标签 - 这也可以吗?
代码似乎太复杂了,无法询问条之间的间距,因为这是由 r
决定的,您可以将 r
更改为例如
r = [0,1,2.5,3.5,5]
可以通过plt.text
添加文字
for pos, text in r, texts:
plt.text(r, 101, text)
我正在尝试创建一个 "grouped percent stacked bar plot" 因为需要一个更好的名字。这是我的代码的一个简单工作示例:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd
# Data
r = [0,1,2,3,4]
raw_data = {'PFHxA': [20, 1.5, 7, 10, 5], \
'PFHpA': [5, 15, 5, 10, 15], \
'PFOA': [2, 15, 18, 5, 10], \
'PFNA': [2, 15, 18, 5, 10], \
'PFDA': [2, 15, 18, 5, 10], \
'PFUnDA': [2, 15, 18, 5, 10], \
'PFHxS': [2, 15, 18, 5, 10], \
'PFOS': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)
# From raw value to percentage
totals = [i+j+k+l+m+n+o+p for i,j,k,l,m,n,o,p in zip(df['PFHxA'],
df['PFHpA'], df['PFOA'], df['PFNA'], df['PFDA'], df['PFUnDA'], df['PFHxS'],
df['PFOS'])]
PFHxA = [i / j * 100 for i,j in zip(df['PFHxA'], totals)]
PFHpA = [i / j * 100 for i,j in zip(df['PFHpA'], totals)]
PFOA = [i / j * 100 for i,j in zip(df['PFOA'], totals)]
PFNA = [i / j * 100 for i,j in zip(df['PFNA'], totals)]
PFDA = [i / j * 100 for i,j in zip(df['PFDA'], totals)]
PFUnDA = [i / j * 100 for i,j in zip(df['PFUnDA'], totals)]
PFHxS = [i / j * 100 for i,j in zip(df['PFHxS'], totals)]
PFOS = [i / j * 100 for i,j in zip(df['PFOS'], totals)]
# plot
barWidth = 0.85
names = ('A','B','C','D','E')
# Create PFHxA Bars
plt.bar(r, PFHxA, color='#A20101', edgecolor='white', width=barWidth,
label="PFHxA")
# Create PFHpA Bars
plt.bar(r, PFHpA, bottom=PFHxA, color='#F44E54', edgecolor='white',
width=barWidth, label="PFHpA")
# Create PFOA Bars
plt.bar(r, PFOA, bottom=[i+j for i,j in zip(PFHxA, PFHpA)], color='#FF9904',
edgecolor='white', width=barWidth, label="PFOA")
# Create PFNA Bars
plt.bar(r, PFNA, bottom=[i+j+k for i,j,k in zip(PFHxA, PFHpA, PFOA)],
color='#FDDB5E', edgecolor='white', width=barWidth, label="PFNA")
# Create PFDA Bars
plt.bar(r, PFDA, bottom=[i+j+k+l for i,j,k,l in zip(PFHxA, PFHpA, PFOA,
PFNA)], color='#BAF1A1', edgecolor='white', width=barWidth, label="PFDA")
# Create PFUnDA Bars
plt.bar(r, PFUnDA, bottom=[i+j+k+l+m for i,j,k,l,m in zip(PFHxA, PFHpA,
PFOA, PFNA, PFDA)], color='#76AD3B', edgecolor='white', width=barWidth,
label="PFUnDA")
# Create PFHxS Bars
plt.bar(r, PFHxS, bottom=[i+j+k+l+m+n for i,j,k,l,m,n in zip(PFHxA, PFHpA,
PFOA, PFNA, PFDA, PFUnDA)], color='#0A709A', edgecolor='white',
width=barWidth, label="PFHxS")
# Create PFOS Bars
plt.bar(r, PFOS, bottom=[i+j+k+l+m+n+o for i,j,k,l,m,n,o in zip(PFHxA,
PFHpA, PFOA, PFNA, PFDA, PFUnDA, PFHxS)], color='#5E3F99',
edgecolor='white', width=barWidth, label="PFOS")
# Custom x axis
plt.xticks(r, names)
plt.xlabel("Sample ID")
# Custom y axis
plt.ylabel("Mass percentage")
# Add a legend
plt.legend(loc='upper left', bbox_to_anchor=(1,1), ncol=1)
# Show graphic
plt.show()
这很好用,我创建了以下百分比堆叠条形图:
但是,我想在样本 B 和 C 之间以及样本 D 和 E 之间添加一些额外的白色 space,因为我想强调有三个样本组(A 和 B 是第一组,C和D是第二组,E是第三组)。有没有一种简单的方法可以做到这一点(这样我就可以将其扩展到三个以上的组)?理想情况下,我还希望能够在每个组上方添加一个文本标签 - 这也可以吗?
代码似乎太复杂了,无法询问条之间的间距,因为这是由 r
决定的,您可以将 r
更改为例如
r = [0,1,2.5,3.5,5]
可以通过plt.text
for pos, text in r, texts:
plt.text(r, 101, text)