按 pandas 降序排列数据透视表值
Sort PivotTable values descending order in pandas
我有一个枢轴 table,它被绘制成一个垂直图,在条形图之间留有间隙。
我想按降序对每个月的值 'DATE' 进行排序,以便绘图在两者之间不留空隙:
我在绘图之前和期间尝试 sort_values(),但没有用。
它可能与多索引中的按级别排序(也尝试过)或重建索引有关,但我无法让它工作。
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.DataFrame(
{"CATEGORY": ["A", "B", "C", "C", "E", "F", "G", "D", "I", "B"],
"DATE": ["2021-06-17", "2021-06-28", "2021-07-01", "2021-06-06", "2021-06-22","2021-07-15","2021-06-12","2021-06-02","2021-07-24", "2021-06-21"],
"COUNT": [200, 350, 150, 350, 400, 250, 200, 150, 325, 300]}
)
df['DATE'] = pd.to_datetime(df['DATE'])
# Format the Date in Months
df['DATE'] = df['DATE'].dt.strftime('%B')
#Pivot table with SUM and filling the NaN values with zero
test=pd.pivot_table(df, values='COUNT', index='DATE', columns='CATEGORY',
aggfunc=np.sum).fillna(0)
print (test)
#GetLvl=test[["A", "B"]].columns.get_level_values(0)
#SortLvl=test.sortlevel(["CATEGORY", "COUNT"], ascending=[True, False], sort_remaining=False)
fig=plt.figure()
ax=test.plot(kind='bar', legend=False)
plt.show()
plt.close()
为了获得预期的结果,在此解决方案中,想法是绘制彼此相邻的不同图(每个图对应日期的每个值)。它针对 2 个子组(6 月和 7 月)完成,但可以很容易地推广。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# definition of df
df = pd.DataFrame(
{"CATEGORY": ["A", "B", "C", "C", "E", "F", "G", "D", "I", "B"],
"DATE": ["2021-06-17", "2021-06-28", "2021-07-01", "2021-06-06", "2021-06-22","2021-07-15","2021-06-12","2021-06-02","2021-07-24", "2021-06-21"],
"COUNT": [200, 350, 150, 350, 400, 250, 200, 150, 325, 300]}
)
df['DATE'] = pd.to_datetime(df['DATE'])
# Format the Date in Months
df['DATE'] = df['DATE'].dt.strftime('%B')
# group by Date & Category and add the values in Count for each (Date/Category)
grouped_df = df.groupby(['DATE', 'CATEGORY']).agg({'COUNT':['sum']})
# sort by Count separately inside each subgroup of Date (multi-index)
grouped_df = grouped_df.sort_values(['DATE', ('COUNT','sum')], ascending=[0,0])
# drop the line with label 'sum' in the multi-index (2nd level)
grouped_df.columns = grouped_df.columns.droplevel(1)
dfg = {}
# extract the df for the subgroup concerning each value of Date
dfg['June'] = grouped_df.loc[('June')].reset_index()
dfg['July'] = grouped_df.loc[('July')].reset_index()
# for each subgroup, get the ordered list of the Category
order1 = dfg['July']['CATEGORY']
order2 = dfg['June']['CATEGORY']
# create an 'ax' for each subgroup (here June and July)
# the width of each plot depends on the number of bars in each plot (here 7 and 3)
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(5, 9),
gridspec_kw={'width_ratios': [3/7, 1]})
# to have the same color for a given Category on all plots
palette = dict(zip(list('ABCDEFGHI'), ['C'+str(i) for i in np.arange(9)]))
color1 = [palette[x] for x in dfg['July']["CATEGORY"]]
color2 = [palette[x] for x in dfg['June']["CATEGORY"]]
# draw the barplots in each ax
ax1.bar(x=dfg['July']["CATEGORY"], height=dfg['July']["COUNT"], color=color1)
ax2.bar(x=dfg['June']["CATEGORY"], height=dfg['June']["COUNT"], color=color2)
# some cosmetics to get the plot expected
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.get_yaxis().set_visible(False)
ax2.set(ylabel=None)
ax1.set(xlabel='JULY')
ax2.set(xlabel='JUNE')
plt.setp(ax1.patches, width=1.0)
plt.setp(ax2.patches, width=1.0)
plt.show()
输出:
我有一个枢轴 table,它被绘制成一个垂直图,在条形图之间留有间隙。
我想按降序对每个月的值 'DATE' 进行排序,以便绘图在两者之间不留空隙:
我在绘图之前和期间尝试 sort_values(),但没有用。
它可能与多索引中的按级别排序(也尝试过)或重建索引有关,但我无法让它工作。
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.DataFrame(
{"CATEGORY": ["A", "B", "C", "C", "E", "F", "G", "D", "I", "B"],
"DATE": ["2021-06-17", "2021-06-28", "2021-07-01", "2021-06-06", "2021-06-22","2021-07-15","2021-06-12","2021-06-02","2021-07-24", "2021-06-21"],
"COUNT": [200, 350, 150, 350, 400, 250, 200, 150, 325, 300]}
)
df['DATE'] = pd.to_datetime(df['DATE'])
# Format the Date in Months
df['DATE'] = df['DATE'].dt.strftime('%B')
#Pivot table with SUM and filling the NaN values with zero
test=pd.pivot_table(df, values='COUNT', index='DATE', columns='CATEGORY',
aggfunc=np.sum).fillna(0)
print (test)
#GetLvl=test[["A", "B"]].columns.get_level_values(0)
#SortLvl=test.sortlevel(["CATEGORY", "COUNT"], ascending=[True, False], sort_remaining=False)
fig=plt.figure()
ax=test.plot(kind='bar', legend=False)
plt.show()
plt.close()
为了获得预期的结果,在此解决方案中,想法是绘制彼此相邻的不同图(每个图对应日期的每个值)。它针对 2 个子组(6 月和 7 月)完成,但可以很容易地推广。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# definition of df
df = pd.DataFrame(
{"CATEGORY": ["A", "B", "C", "C", "E", "F", "G", "D", "I", "B"],
"DATE": ["2021-06-17", "2021-06-28", "2021-07-01", "2021-06-06", "2021-06-22","2021-07-15","2021-06-12","2021-06-02","2021-07-24", "2021-06-21"],
"COUNT": [200, 350, 150, 350, 400, 250, 200, 150, 325, 300]}
)
df['DATE'] = pd.to_datetime(df['DATE'])
# Format the Date in Months
df['DATE'] = df['DATE'].dt.strftime('%B')
# group by Date & Category and add the values in Count for each (Date/Category)
grouped_df = df.groupby(['DATE', 'CATEGORY']).agg({'COUNT':['sum']})
# sort by Count separately inside each subgroup of Date (multi-index)
grouped_df = grouped_df.sort_values(['DATE', ('COUNT','sum')], ascending=[0,0])
# drop the line with label 'sum' in the multi-index (2nd level)
grouped_df.columns = grouped_df.columns.droplevel(1)
dfg = {}
# extract the df for the subgroup concerning each value of Date
dfg['June'] = grouped_df.loc[('June')].reset_index()
dfg['July'] = grouped_df.loc[('July')].reset_index()
# for each subgroup, get the ordered list of the Category
order1 = dfg['July']['CATEGORY']
order2 = dfg['June']['CATEGORY']
# create an 'ax' for each subgroup (here June and July)
# the width of each plot depends on the number of bars in each plot (here 7 and 3)
fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(5, 9),
gridspec_kw={'width_ratios': [3/7, 1]})
# to have the same color for a given Category on all plots
palette = dict(zip(list('ABCDEFGHI'), ['C'+str(i) for i in np.arange(9)]))
color1 = [palette[x] for x in dfg['July']["CATEGORY"]]
color2 = [palette[x] for x in dfg['June']["CATEGORY"]]
# draw the barplots in each ax
ax1.bar(x=dfg['July']["CATEGORY"], height=dfg['July']["COUNT"], color=color1)
ax2.bar(x=dfg['June']["CATEGORY"], height=dfg['June']["COUNT"], color=color2)
# some cosmetics to get the plot expected
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.get_yaxis().set_visible(False)
ax2.set(ylabel=None)
ax1.set(xlabel='JULY')
ax2.set(xlabel='JUNE')
plt.setp(ax1.patches, width=1.0)
plt.setp(ax2.patches, width=1.0)
plt.show()
输出: