如何向 matplotlib/seaborn 条形图添加第二个轴并使次要点与正确的条形对齐?
How do I add a second axis to a matplotlib/seaborn bar chart and have the secondary points align with the correct bars?
我写了一个(新手)python 函数(如下)来绘制由主要维度和可能的次要维度分解的条形图。例如,下图显示了每个性别中达到特定教育水平的人的百分比。
问题:如何在每个条形图上叠加该子组的家庭人数中位数,例如在 College/Female 柱上放置一个表示值“3”的点。 None 我见过的示例准确地覆盖了正确柱上的点。
我对此非常陌生,非常感谢您的帮助!
df = pd.DataFrame({'Student' : ['Alice', 'Bob', 'Chris', 'Dave', 'Edna', 'Frank'],
'Education' : ['HS', 'HS', 'HS', 'College', 'College', 'HS' ],
'Household Size': [4, 4, 3, 3, 3, 6 ],
'Gender' : ['F', 'M', 'M', 'M', 'F', 'M' ]});
def MakePercentageFrequencyTable(dataFrame, primaryDimension, secondaryDimension=None, extraAggregatedField=None):
lod = dataFrame.groupby([secondaryDimension]) if secondaryDimension is not None else dataFrame
primaryDimensionPercent = lod[primaryDimension].value_counts(normalize=True) \
.rename('percentage') \
.mul(100) \
.reset_index(drop=False);
if secondaryDimension is not None:
primaryDimensionPercent = primaryDimensionPercent.sort_values(secondaryDimension)
g = sns.catplot(x="percentage", y=secondaryDimension, hue=primaryDimension, kind='bar', data=primaryDimensionPercent)
else:
sns.catplot(x="percentage", y='index', kind='bar', data=primaryDimensionPercent)
MakePercentageFrequencyTable(dataFrame=df,primaryDimension='Education', secondaryDimension='Gender')
# Question: I want to send in extraAggregatedField='Household Size' when I call the function such that
# it creates a secondary 'Household Size' axis at the top of the figure
# and aggregates/integrates the 'Household Size' column such that the following points are plotted
# against the secondary axis and positioned over the given bars:
#
# Female/College => 3
# Female/High School => 4
# Male/College => 3
# Male/High School => 4
Picture of what I have been able to achieve so far
您将不得不使用 axes-level 函数 sns.barplot()
和 sns.stripplot()
而不是 catplot()
,它会创建一个新图形和 FacetGrid
。
像这样:
df = pd.DataFrame({'Student' : ['Alice', 'Bob', 'Chris', 'Dave', 'Edna', 'Frank'],
'Education' : ['HS', 'HS', 'HS', 'College', 'College', 'HS' ],
'Household Size': [4, 4, 3, 3, 3, 6 ],
'Gender' : ['F', 'M', 'M', 'M', 'F', 'M' ]});
def MakePercentageFrequencyTable(dataFrame, primaryDimension, secondaryDimension=None, extraAggregatedField=None, ax=None):
ax = plt.gca() if ax is None else ax
lod = dataFrame.groupby([secondaryDimension]) if secondaryDimension is not None else dataFrame
primaryDimensionPercent = lod[primaryDimension].value_counts(normalize=True) \
.rename('percentage') \
.mul(100) \
.reset_index(drop=False);
if secondaryDimension is not None:
primaryDimensionPercent = primaryDimensionPercent.sort_values(secondaryDimension)
ax = sns.barplot(x="percentage", y=secondaryDimension, hue=primaryDimension, data=primaryDimensionPercent, ax=ax)
else:
ax = sns.barplot(x="percentage", y='index', data=primaryDimensionPercent, ax=ax)
if extraAggregatedField is not None:
ax2 = ax.twiny()
extraDimension = dataFrame.groupby([primaryDimension, secondaryDimension]).mean().reset_index(drop=False)
ax2 = sns.stripplot(data=extraDimension, x=extraAggregatedField, y=secondaryDimension, hue=primaryDimension,
ax=ax2,dodge=True, edgecolors='k', linewidth=1, size=10)
plt.figure()
MakePercentageFrequencyTable(dataFrame=df,primaryDimension='Education', secondaryDimension='Gender', extraAggregatedField='Household Size')
我写了一个(新手)python 函数(如下)来绘制由主要维度和可能的次要维度分解的条形图。例如,下图显示了每个性别中达到特定教育水平的人的百分比。
问题:如何在每个条形图上叠加该子组的家庭人数中位数,例如在 College/Female 柱上放置一个表示值“3”的点。 None 我见过的示例准确地覆盖了正确柱上的点。
我对此非常陌生,非常感谢您的帮助!
df = pd.DataFrame({'Student' : ['Alice', 'Bob', 'Chris', 'Dave', 'Edna', 'Frank'],
'Education' : ['HS', 'HS', 'HS', 'College', 'College', 'HS' ],
'Household Size': [4, 4, 3, 3, 3, 6 ],
'Gender' : ['F', 'M', 'M', 'M', 'F', 'M' ]});
def MakePercentageFrequencyTable(dataFrame, primaryDimension, secondaryDimension=None, extraAggregatedField=None):
lod = dataFrame.groupby([secondaryDimension]) if secondaryDimension is not None else dataFrame
primaryDimensionPercent = lod[primaryDimension].value_counts(normalize=True) \
.rename('percentage') \
.mul(100) \
.reset_index(drop=False);
if secondaryDimension is not None:
primaryDimensionPercent = primaryDimensionPercent.sort_values(secondaryDimension)
g = sns.catplot(x="percentage", y=secondaryDimension, hue=primaryDimension, kind='bar', data=primaryDimensionPercent)
else:
sns.catplot(x="percentage", y='index', kind='bar', data=primaryDimensionPercent)
MakePercentageFrequencyTable(dataFrame=df,primaryDimension='Education', secondaryDimension='Gender')
# Question: I want to send in extraAggregatedField='Household Size' when I call the function such that
# it creates a secondary 'Household Size' axis at the top of the figure
# and aggregates/integrates the 'Household Size' column such that the following points are plotted
# against the secondary axis and positioned over the given bars:
#
# Female/College => 3
# Female/High School => 4
# Male/College => 3
# Male/High School => 4
Picture of what I have been able to achieve so far
您将不得不使用 axes-level 函数 sns.barplot()
和 sns.stripplot()
而不是 catplot()
,它会创建一个新图形和 FacetGrid
。
像这样:
df = pd.DataFrame({'Student' : ['Alice', 'Bob', 'Chris', 'Dave', 'Edna', 'Frank'],
'Education' : ['HS', 'HS', 'HS', 'College', 'College', 'HS' ],
'Household Size': [4, 4, 3, 3, 3, 6 ],
'Gender' : ['F', 'M', 'M', 'M', 'F', 'M' ]});
def MakePercentageFrequencyTable(dataFrame, primaryDimension, secondaryDimension=None, extraAggregatedField=None, ax=None):
ax = plt.gca() if ax is None else ax
lod = dataFrame.groupby([secondaryDimension]) if secondaryDimension is not None else dataFrame
primaryDimensionPercent = lod[primaryDimension].value_counts(normalize=True) \
.rename('percentage') \
.mul(100) \
.reset_index(drop=False);
if secondaryDimension is not None:
primaryDimensionPercent = primaryDimensionPercent.sort_values(secondaryDimension)
ax = sns.barplot(x="percentage", y=secondaryDimension, hue=primaryDimension, data=primaryDimensionPercent, ax=ax)
else:
ax = sns.barplot(x="percentage", y='index', data=primaryDimensionPercent, ax=ax)
if extraAggregatedField is not None:
ax2 = ax.twiny()
extraDimension = dataFrame.groupby([primaryDimension, secondaryDimension]).mean().reset_index(drop=False)
ax2 = sns.stripplot(data=extraDimension, x=extraAggregatedField, y=secondaryDimension, hue=primaryDimension,
ax=ax2,dodge=True, edgecolors='k', linewidth=1, size=10)
plt.figure()
MakePercentageFrequencyTable(dataFrame=df,primaryDimension='Education', secondaryDimension='Gender', extraAggregatedField='Household Size')