将值添加到非堆叠条形图 - python
Adding values onto unstacked bar chart - python
这是我的堆叠条形图代码。我可以为第一段 (AA) 添加百分比值,但如何为所有 4 个段添加值?
df = pd.read_csv("123.csv")
df1 = df.groupby(['Country', 'ClassWeight'])
['Count'].sum().unstack('ClassWeight').fillna(0)
#sort on the 'total' column, and then drop it to avoid double plotting
ax = df1.sort_values(['total']).iloc[:,:-1].plot(kind='barh', width=0.8,
stacked=True, figsize=(15, 10),colormap=ListedColormap(sns.color_palette("Blues_d")))
#plot barchart
ax.set_xlabel('No.of Shipments',fontsize=15)
ax.set_ylabel('Country',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title('Total Shipments by Country and Customer Class',fontsize=15)
df2=df1.sort_values(['total'],ascending=True)
df2['AA'] = 100*df2['AA']/df2['total']
df2['A'] = 100*df2['A']/df2['total']
df2['B'] = 100*df2['B']/df2['total']
df2['C'] = 100*df2['C']/df2['total']
df3 = df2.iloc[:,:-1]
#Can only enumerate on the AA column. How could we do all 4 columns?
for i, v in enumerate(df3['AA']):
ax.text(v + -1, i + -0.2, str("{0:.1f}%".format(v)), color='white',
fontweight='bold', fontsize=15)
示例数据:
ClassWeight AA A B C
Country
Romania 17.142857 32.268908 28.235294 22.352941
Finland 60.325203 13.495935 12.682927 13.495935
{'Country': {0: 'France', 1: 'Poland', 2: 'Lithuania', 3: 'United Kingdom', 4: 'Denmark'}, 'Count': {0: 233, 1: 232, 2: 286, 3: 236, 4: 223}, 'SumWeight': {0: 8072469.5, 1: 6689511.05, 2: 5158305.25 , 3: 4675914.53, 4: 3536684.52}, 'AvgWeight': {0: 34645.79, 1: 28834.1, 2: 18036.03, 3: 19813.2, 4: 15859.57}, 'ClassWeight': {0: 'AA', 1: 'AA', 2: 'AA', 3: 'AA', 4: 'AA'}}
我已尝试稍微重新安排您的代码 - 您不需要每次都创建新的数据帧,而且您真的只需要按总数排序一次。
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.DataFrame(
{'Country': {0: 'France', 1: 'France', 2: 'France', 3: 'France', 4: 'France'},
'Count': {0: 100, 1: 232, 2: 286, 3: 236, 4: 854},
'ClassWeight': {0: 'AA', 1: 'A', 2: 'B', 3: 'C', 4: 'total'}}
)
# Track which value columns we want to plot
VALUE_COLS =['AA', 'A', 'B', 'C']
# We only need to sort_values once, so we might as well do it as we generate df1
df1 = df.groupby(['Country', 'ClassWeight'])['Count']\
.sum()\
.unstack('ClassWeight')\
.fillna(0)\
.sort_values(by='total', ascending=False)
# Get percentage values
for col in VALUE_COLS:
df1[col + '_%'] = 100*df1[col]/df1['total']
ax = df1[VALUE_COLS].plot(kind='barh', width=0.8,stacked=True,
figsize=(15, 10),
colormap=ListedColormap(sns.color_palette("Blues_d")))
# Set up labels and ticks
ax.set_xlabel('No.of Shipments',fontsize=15)
ax.set_ylabel('Country',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title('Total Shipments by Country and Customer Class',fontsize=15)
# Add in text labels
df1['label_tot'] = 0
for col in VALUE_COLS:
df1['label_tot'] += df1[col]
for i, (val, pos) in enumerate(df1[[col + '_%', 'label_tot']].itertuples(index=False, name=None)):
ax.text(pos + -1, i, str("{0:.1f}%".format(val)),
color='white',fontweight='bold', fontsize=15, ha='right')
使用我对您的输入数据稍作修改后的版本,结果如下:
这是我的堆叠条形图代码。我可以为第一段 (AA) 添加百分比值,但如何为所有 4 个段添加值?
df = pd.read_csv("123.csv")
df1 = df.groupby(['Country', 'ClassWeight'])
['Count'].sum().unstack('ClassWeight').fillna(0)
#sort on the 'total' column, and then drop it to avoid double plotting
ax = df1.sort_values(['total']).iloc[:,:-1].plot(kind='barh', width=0.8,
stacked=True, figsize=(15, 10),colormap=ListedColormap(sns.color_palette("Blues_d")))
#plot barchart
ax.set_xlabel('No.of Shipments',fontsize=15)
ax.set_ylabel('Country',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title('Total Shipments by Country and Customer Class',fontsize=15)
df2=df1.sort_values(['total'],ascending=True)
df2['AA'] = 100*df2['AA']/df2['total']
df2['A'] = 100*df2['A']/df2['total']
df2['B'] = 100*df2['B']/df2['total']
df2['C'] = 100*df2['C']/df2['total']
df3 = df2.iloc[:,:-1]
#Can only enumerate on the AA column. How could we do all 4 columns?
for i, v in enumerate(df3['AA']):
ax.text(v + -1, i + -0.2, str("{0:.1f}%".format(v)), color='white',
fontweight='bold', fontsize=15)
示例数据:
ClassWeight AA A B C
Country
Romania 17.142857 32.268908 28.235294 22.352941
Finland 60.325203 13.495935 12.682927 13.495935
{'Country': {0: 'France', 1: 'Poland', 2: 'Lithuania', 3: 'United Kingdom', 4: 'Denmark'}, 'Count': {0: 233, 1: 232, 2: 286, 3: 236, 4: 223}, 'SumWeight': {0: 8072469.5, 1: 6689511.05, 2: 5158305.25 , 3: 4675914.53, 4: 3536684.52}, 'AvgWeight': {0: 34645.79, 1: 28834.1, 2: 18036.03, 3: 19813.2, 4: 15859.57}, 'ClassWeight': {0: 'AA', 1: 'AA', 2: 'AA', 3: 'AA', 4: 'AA'}}
我已尝试稍微重新安排您的代码 - 您不需要每次都创建新的数据帧,而且您真的只需要按总数排序一次。
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
df = pd.DataFrame(
{'Country': {0: 'France', 1: 'France', 2: 'France', 3: 'France', 4: 'France'},
'Count': {0: 100, 1: 232, 2: 286, 3: 236, 4: 854},
'ClassWeight': {0: 'AA', 1: 'A', 2: 'B', 3: 'C', 4: 'total'}}
)
# Track which value columns we want to plot
VALUE_COLS =['AA', 'A', 'B', 'C']
# We only need to sort_values once, so we might as well do it as we generate df1
df1 = df.groupby(['Country', 'ClassWeight'])['Count']\
.sum()\
.unstack('ClassWeight')\
.fillna(0)\
.sort_values(by='total', ascending=False)
# Get percentage values
for col in VALUE_COLS:
df1[col + '_%'] = 100*df1[col]/df1['total']
ax = df1[VALUE_COLS].plot(kind='barh', width=0.8,stacked=True,
figsize=(15, 10),
colormap=ListedColormap(sns.color_palette("Blues_d")))
# Set up labels and ticks
ax.set_xlabel('No.of Shipments',fontsize=15)
ax.set_ylabel('Country',fontsize=15)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
plt.title('Total Shipments by Country and Customer Class',fontsize=15)
# Add in text labels
df1['label_tot'] = 0
for col in VALUE_COLS:
df1['label_tot'] += df1[col]
for i, (val, pos) in enumerate(df1[[col + '_%', 'label_tot']].itertuples(index=False, name=None)):
ax.text(pos + -1, i, str("{0:.1f}%".format(val)),
color='white',fontweight='bold', fontsize=15, ha='right')
使用我对您的输入数据稍作修改后的版本,结果如下: