如何突出显示 df.plot.barh 中的最大值?

How can I highlight the largest value(s) in df.plot.barh?

我有一个堆叠条形图,使用颜色显示 NBA 球队中年龄组的分布,代码如下所示:

import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
    return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
    color = [i * 255 if i * 255 <= 255 else i * 255 - 1
             for i in color[:-1]]
    color = [int(round(i)) for i in color]
    return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
                   amount: int) -> List[str]:
    cmap = mcm.get_cmap(cmap)
    colors = [color_to_hex(cmap(i))
              for i in np.linspace(0, 1, amount)]
    return colors
def main() -> None:
    df = read_to_df("age_dist_median_six.xlsx")
    df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
    # transpose the dataframe
    df_age_only = df_age_only.iloc[::-1]
    # get a list of colors from cmap
    colors = cmap_to_colors("viridis", 6)
    barh = df_age_only.plot.barh(stacked = True, color = colors,
                                 width = 0.95, xticks = np.linspace(0, 100, 11),
                                 figsize = (10, 15))
    barh.legend(bbox_to_anchor=(1, 1))
    barh.margins(x = 0)
    mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
    main()

我总体上对结果很满意,但我也想突出显示最大的条形:所以我希望最大的一个(或多个,以防多个条形具有最大百分比)以实际颜色显示和所有其他条显示为灰色。我该怎么做?条形图目前看起来像这样: 可以在此处查看用于图表的 .xlsx 文件:https://send-anywhere.com/web/downloads/RJN1IIPS

这是一种通过直接使用 matplotlib 的 barh 函数来完成您想要的操作的方法。这个想法是迭代地设置水平堆叠条并同时分配适当的颜色。以下是您提供的用于执行我上述内容的代码的改编版:

import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple

def read_to_df(file_path: str) -> pd.DataFrame:
    return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
    color = [i * 255 if i * 255 <= 255 else i * 255 - 1
             for i in color[:-1]]
    color = [int(round(i)) for i in color]
    return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
                   amount: int) -> List[str]:
    cmap = mcm.get_cmap(cmap)
    colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
    return colors

def main() -> None:
    df = read_to_df("age_dist_median_six.xlsx")
    df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
    # transpose the dataframe
    df_age_only = df_age_only.iloc[::-1]
    colors = cmap_to_colors("viridis", 6)
    
    fig=mpl.figure(figsize=(12,12))
    N_teams=len(df_age_only)

    for i in range(N_teams):
      x_pos=0
      for column,j in zip(df_age_only,range(len(colors))):
        col_max=df_age_only.idxmax(axis='columns')[i]
        if df_age_only[col_max][i]==df_age_only[column][i]:
          if j==0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='k')
          elif j>0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='k')
           
        else:
          if j==0:
            mpl.barh(i,df_age_only[column][i],color='tab:grey',align='center',edgecolor='k')
          elif j>0:
            mpl.barh(i,df_age_only[column][i],color='tab:grey',left=x_pos,align='center',edgecolor='k')
        
        x_pos+=df_age_only[column][i]

    mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)

    #Setting up legend:
    for i in range(len(colors)):
      mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
    mpl.legend()
    
    mpl.show()
    mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
    main()

并且输出给出:

或者,如果您想保留原始颜色但突出显示具有最大值的条,您可以通过改变 alpha 值来更改条的透明度。请参阅下面的代码:

import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple

def read_to_df(file_path: str) -> pd.DataFrame:
    return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
    color = [i * 255 if i * 255 <= 255 else i * 255 - 1
             for i in color[:-1]]
    color = [int(round(i)) for i in color]
    return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
                   amount: int) -> List[str]:
    cmap = mcm.get_cmap(cmap)
    colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
    return colors

def main() -> None:
    df = read_to_df("age_dist_median_six.xlsx")
    df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
    # transpose the dataframe
    df_age_only = df_age_only.iloc[::-1]
    colors = cmap_to_colors("viridis", 6)
    

    fig=mpl.figure(figsize=(12,12))
    N_teams=len(df_age_only)

    for i in range(N_teams):
      x_pos=0
      for column,j in zip(df_age_only,range(len(colors))):
        col_max=df_age_only.idxmax(axis='columns')[i]
        if df_age_only[col_max][i]==df_age_only[column][i]:
          if j==0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=1)
          elif j>0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=1)
           
        else:
          if j==0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=0.3)
          elif j>0:
            mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=0.3)
        
        x_pos+=df_age_only[column][i]

   

    mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)

    #Setting up legend:
    for i in range(len(colors)):
      mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
    mpl.legend()
    
    mpl.show()
    mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
    main()

并且输出:

您可以更改以突出显示您的栏的其他内容包括 edgecolorhatch

一个想法是遍历生成的条,并改变它们的透明度。

下面的示例代码使用 plt 使代码更容易与教程和网络上的示例进行比较。另请注意,pandas' 绘图功能通常 return 和 ax.

import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd

df = pd.read_excel("age_dist_median_six.xlsx", index_col=0)

df_age_only = df.drop(["median", "youngest", "oldest"], axis=1)

ax = df_age_only.plot.barh(cmap='viridis', stacked=True, edgecolor='black', width=1, clip_on=False, figsize=(12, 6))
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
ax.invert_yaxis()
ax.margins(x=0, y=0)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='y', length=0)
ax.xaxis.set_major_formatter(PercentFormatter(100))

for i in range(len(df_age_only)):
    max_val = np.nanmax(df_age_only.iloc[i].values)
    for bar_group in ax.containers:
        bar = bar_group[i]
        if bar.get_width() < max_val - 1e-6:
            bar.set_alpha(0.4)
            # bar.set_facecolor('#DDDDDD') # light grey
plt.tight_layout()
plt.show()