如何突出显示 df.plot.barh 中的最大值?
How can I highlight the largest value(s) in df.plot.barh?
我有一个堆叠条形图,使用颜色显示 NBA 球队中年龄组的分布,代码如下所示:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i))
for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
# get a list of colors from cmap
colors = cmap_to_colors("viridis", 6)
barh = df_age_only.plot.barh(stacked = True, color = colors,
width = 0.95, xticks = np.linspace(0, 100, 11),
figsize = (10, 15))
barh.legend(bbox_to_anchor=(1, 1))
barh.margins(x = 0)
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
我总体上对结果很满意,但我也想突出显示最大的条形:所以我希望最大的一个(或多个,以防多个条形具有最大百分比)以实际颜色显示和所有其他条显示为灰色。我该怎么做?条形图目前看起来像这样:
可以在此处查看用于图表的 .xlsx 文件:https://send-anywhere.com/web/downloads/RJN1IIPS
这是一种通过直接使用 matplotlib 的 barh
函数来完成您想要的操作的方法。这个想法是迭代地设置水平堆叠条并同时分配适当的颜色。以下是您提供的用于执行我上述内容的代码的改编版:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
colors = cmap_to_colors("viridis", 6)
fig=mpl.figure(figsize=(12,12))
N_teams=len(df_age_only)
for i in range(N_teams):
x_pos=0
for column,j in zip(df_age_only,range(len(colors))):
col_max=df_age_only.idxmax(axis='columns')[i]
if df_age_only[col_max][i]==df_age_only[column][i]:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='k')
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='k')
else:
if j==0:
mpl.barh(i,df_age_only[column][i],color='tab:grey',align='center',edgecolor='k')
elif j>0:
mpl.barh(i,df_age_only[column][i],color='tab:grey',left=x_pos,align='center',edgecolor='k')
x_pos+=df_age_only[column][i]
mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
#Setting up legend:
for i in range(len(colors)):
mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
mpl.legend()
mpl.show()
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
并且输出给出:
或者,如果您想保留原始颜色但突出显示具有最大值的条,您可以通过改变 alpha 值来更改条的透明度。请参阅下面的代码:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
colors = cmap_to_colors("viridis", 6)
fig=mpl.figure(figsize=(12,12))
N_teams=len(df_age_only)
for i in range(N_teams):
x_pos=0
for column,j in zip(df_age_only,range(len(colors))):
col_max=df_age_only.idxmax(axis='columns')[i]
if df_age_only[col_max][i]==df_age_only[column][i]:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=1)
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=1)
else:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=0.3)
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=0.3)
x_pos+=df_age_only[column][i]
mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
#Setting up legend:
for i in range(len(colors)):
mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
mpl.legend()
mpl.show()
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
并且输出:
您可以更改以突出显示您的栏的其他内容包括 edgecolor
和 hatch
。
一个想法是遍历生成的条,并改变它们的透明度。
下面的示例代码使用 plt
使代码更容易与教程和网络上的示例进行比较。另请注意,pandas' 绘图功能通常 return 和 ax
.
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd
df = pd.read_excel("age_dist_median_six.xlsx", index_col=0)
df_age_only = df.drop(["median", "youngest", "oldest"], axis=1)
ax = df_age_only.plot.barh(cmap='viridis', stacked=True, edgecolor='black', width=1, clip_on=False, figsize=(12, 6))
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
ax.invert_yaxis()
ax.margins(x=0, y=0)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='y', length=0)
ax.xaxis.set_major_formatter(PercentFormatter(100))
for i in range(len(df_age_only)):
max_val = np.nanmax(df_age_only.iloc[i].values)
for bar_group in ax.containers:
bar = bar_group[i]
if bar.get_width() < max_val - 1e-6:
bar.set_alpha(0.4)
# bar.set_facecolor('#DDDDDD') # light grey
plt.tight_layout()
plt.show()
我有一个堆叠条形图,使用颜色显示 NBA 球队中年龄组的分布,代码如下所示:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i))
for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
# get a list of colors from cmap
colors = cmap_to_colors("viridis", 6)
barh = df_age_only.plot.barh(stacked = True, color = colors,
width = 0.95, xticks = np.linspace(0, 100, 11),
figsize = (10, 15))
barh.legend(bbox_to_anchor=(1, 1))
barh.margins(x = 0)
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
我总体上对结果很满意,但我也想突出显示最大的条形:所以我希望最大的一个(或多个,以防多个条形具有最大百分比)以实际颜色显示和所有其他条显示为灰色。我该怎么做?条形图目前看起来像这样:
这是一种通过直接使用 matplotlib 的 barh
函数来完成您想要的操作的方法。这个想法是迭代地设置水平堆叠条并同时分配适当的颜色。以下是您提供的用于执行我上述内容的代码的改编版:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
colors = cmap_to_colors("viridis", 6)
fig=mpl.figure(figsize=(12,12))
N_teams=len(df_age_only)
for i in range(N_teams):
x_pos=0
for column,j in zip(df_age_only,range(len(colors))):
col_max=df_age_only.idxmax(axis='columns')[i]
if df_age_only[col_max][i]==df_age_only[column][i]:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='k')
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='k')
else:
if j==0:
mpl.barh(i,df_age_only[column][i],color='tab:grey',align='center',edgecolor='k')
elif j>0:
mpl.barh(i,df_age_only[column][i],color='tab:grey',left=x_pos,align='center',edgecolor='k')
x_pos+=df_age_only[column][i]
mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
#Setting up legend:
for i in range(len(colors)):
mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
mpl.legend()
mpl.show()
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
并且输出给出:
或者,如果您想保留原始颜色但突出显示具有最大值的条,您可以通过改变 alpha 值来更改条的透明度。请参阅下面的代码:
import matplotlib.pyplot as mpl
import matplotlib.cm as mcm
import pandas as pd
import numpy as np
from typing import List, Tuple
def read_to_df(file_path: str) -> pd.DataFrame:
return pd.read_excel(file_path, index_col = 0)
def color_to_hex(color: Tuple[float]) -> str:
color = [i * 255 if i * 255 <= 255 else i * 255 - 1
for i in color[:-1]]
color = [int(round(i)) for i in color]
return "#%02x%02x%02x" % tuple(color)
def cmap_to_colors(cmap: str,
amount: int) -> List[str]:
cmap = mcm.get_cmap(cmap)
colors = [color_to_hex(cmap(i)) for i in np.linspace(0, 1, amount)]
return colors
def main() -> None:
df = read_to_df("age_dist_median_six.xlsx")
df_age_only = df.drop(["median", "youngest", "oldest"], axis = 1)
# transpose the dataframe
df_age_only = df_age_only.iloc[::-1]
colors = cmap_to_colors("viridis", 6)
fig=mpl.figure(figsize=(12,12))
N_teams=len(df_age_only)
for i in range(N_teams):
x_pos=0
for column,j in zip(df_age_only,range(len(colors))):
col_max=df_age_only.idxmax(axis='columns')[i]
if df_age_only[col_max][i]==df_age_only[column][i]:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=1)
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=1)
else:
if j==0:
mpl.barh(i,df_age_only[column][i],color=colors[j],align='center',edgecolor='tab:grey',alpha=0.3)
elif j>0:
mpl.barh(i,df_age_only[column][i],color=colors[j],left=x_pos,align='center',edgecolor='tab:grey',alpha=0.3)
x_pos+=df_age_only[column][i]
mpl.yticks(np.arange(N_teams),df_age_only.index,fontsize=9)
#Setting up legend:
for i in range(len(colors)):
mpl.plot([],[],color=colors[i],lw=10,label=str(df_age_only.keys()[i]))
mpl.legend()
mpl.show()
mpl.savefig("stacked_six_viridis.png")
if __name__ == "__main__":
main()
并且输出:
您可以更改以突出显示您的栏的其他内容包括 edgecolor
和 hatch
。
一个想法是遍历生成的条,并改变它们的透明度。
下面的示例代码使用 plt
使代码更容易与教程和网络上的示例进行比较。另请注意,pandas' 绘图功能通常 return 和 ax
.
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
import pandas as pd
df = pd.read_excel("age_dist_median_six.xlsx", index_col=0)
df_age_only = df.drop(["median", "youngest", "oldest"], axis=1)
ax = df_age_only.plot.barh(cmap='viridis', stacked=True, edgecolor='black', width=1, clip_on=False, figsize=(12, 6))
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
ax.invert_yaxis()
ax.margins(x=0, y=0)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.tick_params(axis='y', length=0)
ax.xaxis.set_major_formatter(PercentFormatter(100))
for i in range(len(df_age_only)):
max_val = np.nanmax(df_age_only.iloc[i].values)
for bar_group in ax.containers:
bar = bar_group[i]
if bar.get_width() < max_val - 1e-6:
bar.set_alpha(0.4)
# bar.set_facecolor('#DDDDDD') # light grey
plt.tight_layout()
plt.show()