Python Pandas 分组,排名,然后根据自定义排名赋值
Python Pandas groupby, rank, then assign value based on custom rank
问题设置
pandas 数据框
df = pd.DataFrame({'Group': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], 'Subgroup': ['Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 2', 'Group 2', 'Group 2'], 'Keyword': ['kw 1', 'kw 1', 'kw 1', 'kw 2', '+kw +2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Normalized': ['kw 1', 'kw 1', 'kw 1', 'kw 2', 'kw 2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Criterion Type': ['Exact', 'Phrase', 'Broad', 'Phrase', 'Broadified', 'Exact', 'Broad', 'Exact', 'Phrase'], 'Max CPC': [1.62, 1.73, 0.87, 1.70, 0.85, 1.60, 0.99, 1.58, 1.68], 'CPC Rank': [2, 1, 3, 1, 3, 2, 3, 2, 1], 'Type Rank': [1, 2, 3, 2, 3, 1, 3, 1, 2]})
这样可以使列位于正确的位置:
df = df[['Group', 'Subgroup', 'Keyword', 'Normalized', 'Criterion Type', 'Max CPC', 'CPC Rank', 'Type Rank']]
目标
groupby
['Group', 'Subgroup', 'Normalized']
,然后是 rank
Max CPC
。接下来,我想将与 CPC Rank
关联的 Max CPC
映射到 Type Rank
,这是根据 Criterion Type
和我自己的自定义排名确定的:
{'Exact':1, 'Phrase':2, 'Broadified':3, 'Broad':4}
结果将是 New CPC
列及其适当的 Max CPC
。
我已经对每个组内的值进行了排序,并使用索引分配了排序后的值。
这是你想要的吗?
df['new CPC'] = -1
parts = []
grouped = df.groupby(['Group', 'Subgroup', 'Normalized'])
for name, group in grouped:
type_rank_index = group.sort(columns='Type Rank').index
cpc_rank_index = group.sort(columns='CPC Rank').index
group.loc[type_rank_index, 'new CPC'] = group.loc[cpc_rank_index, 'Max CPC']
parts.append(group)
result = pd.concat(parts)
import pandas as pd
import numpy as np
df = pd.DataFrame({'Group': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], 'Subgroup': ['Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 2', 'Group 2', 'Group 2'], 'Keyword': ['kw 1', 'kw 1', 'kw 1', 'kw 2', '+kw +2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Normalized': ['kw 1', 'kw 1', 'kw 1', 'kw 2', 'kw 2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Criterion Type': ['Exact', 'Phrase', 'Broad', 'Phrase', 'Broadified', 'Exact', 'Broad', 'Exact', 'Phrase'], 'Max CPC': [1.62, 1.73, 0.87, 1.70, 0.85, 1.60, 0.99, 1.58, 1.68], 'CPC Rank': [2, 1, 3, 1, 3, 2, 3, 2, 1], 'Type Rank': [1, 2, 3, 2, 3, 1, 3, 1, 2]})
df = df[['Group', 'Subgroup', 'Keyword', 'Normalized', 'Criterion Type', 'Max CPC', 'CPC Rank', 'Type Rank']]
#Sort by custom priority based on their Criterion Type
df = df.sort(['Group', 'Subgroup', 'Normalized', 'Type Rank'])
#Reset index and drop old one
df = df.reset_index(drop=True)
print(df)
#Create df1 which is a Series of the Max CPC column in its correctly ranked order
df1 = df.sort(['Group', 'Subgroup', 'Normalized', 'CPC Rank'])['Max CPC']
#Reset index and drop old one
df1 = df1.reset_index(drop=True)
print(df1)
#Add the df1 Series to df and name the column New CPC
df['New CPC'] = df1
print(df)
这是迄今为止解决此问题最有效的方法。困难的部分是意识到我可以通过 Type Rank
sort
df
所以 Criterion Type
行按它们的等级排序。这意味着我希望最高的 Max CPC
应用于第一个,第二高的 Max CPC
应用于第二个,依此类推。
然后我所要做的就是创建一个按 CPC Rank
.
排序的 Max CPC
Series
最后,将此 Series
添加到现有的 df
。
试试这个
def group_rank(df):
# first of all you've to rank according to `Max CPC`
df['CPC Rank'] = df['Max CPC'].rank(ascending = False)
# create the mapping
mapping = pd.Series(data=df['Max CPC'].values , index= df['CPC Rank'].values)
# create new column according to your ranking
df['New CPC'] = df['Type Rank'].map(mapping)
return df
df.groupby(['Group', 'Subgroup', 'Normalized']).apply(group_rank)
问题设置
pandas 数据框
df = pd.DataFrame({'Group': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], 'Subgroup': ['Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 2', 'Group 2', 'Group 2'], 'Keyword': ['kw 1', 'kw 1', 'kw 1', 'kw 2', '+kw +2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Normalized': ['kw 1', 'kw 1', 'kw 1', 'kw 2', 'kw 2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Criterion Type': ['Exact', 'Phrase', 'Broad', 'Phrase', 'Broadified', 'Exact', 'Broad', 'Exact', 'Phrase'], 'Max CPC': [1.62, 1.73, 0.87, 1.70, 0.85, 1.60, 0.99, 1.58, 1.68], 'CPC Rank': [2, 1, 3, 1, 3, 2, 3, 2, 1], 'Type Rank': [1, 2, 3, 2, 3, 1, 3, 1, 2]})
这样可以使列位于正确的位置:
df = df[['Group', 'Subgroup', 'Keyword', 'Normalized', 'Criterion Type', 'Max CPC', 'CPC Rank', 'Type Rank']]
目标
groupby
['Group', 'Subgroup', 'Normalized']
,然后是 rank
Max CPC
。接下来,我想将与 CPC Rank
关联的 Max CPC
映射到 Type Rank
,这是根据 Criterion Type
和我自己的自定义排名确定的:
{'Exact':1, 'Phrase':2, 'Broadified':3, 'Broad':4}
结果将是 New CPC
列及其适当的 Max CPC
。
我已经对每个组内的值进行了排序,并使用索引分配了排序后的值。 这是你想要的吗?
df['new CPC'] = -1
parts = []
grouped = df.groupby(['Group', 'Subgroup', 'Normalized'])
for name, group in grouped:
type_rank_index = group.sort(columns='Type Rank').index
cpc_rank_index = group.sort(columns='CPC Rank').index
group.loc[type_rank_index, 'new CPC'] = group.loc[cpc_rank_index, 'Max CPC']
parts.append(group)
result = pd.concat(parts)
import pandas as pd
import numpy as np
df = pd.DataFrame({'Group': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'], 'Subgroup': ['Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 1', 'Group 2', 'Group 2', 'Group 2'], 'Keyword': ['kw 1', 'kw 1', 'kw 1', 'kw 2', '+kw +2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Normalized': ['kw 1', 'kw 1', 'kw 1', 'kw 2', 'kw 2', 'kw 2', 'kw 3', 'kw 3', 'kw 3'], 'Criterion Type': ['Exact', 'Phrase', 'Broad', 'Phrase', 'Broadified', 'Exact', 'Broad', 'Exact', 'Phrase'], 'Max CPC': [1.62, 1.73, 0.87, 1.70, 0.85, 1.60, 0.99, 1.58, 1.68], 'CPC Rank': [2, 1, 3, 1, 3, 2, 3, 2, 1], 'Type Rank': [1, 2, 3, 2, 3, 1, 3, 1, 2]})
df = df[['Group', 'Subgroup', 'Keyword', 'Normalized', 'Criterion Type', 'Max CPC', 'CPC Rank', 'Type Rank']]
#Sort by custom priority based on their Criterion Type
df = df.sort(['Group', 'Subgroup', 'Normalized', 'Type Rank'])
#Reset index and drop old one
df = df.reset_index(drop=True)
print(df)
#Create df1 which is a Series of the Max CPC column in its correctly ranked order
df1 = df.sort(['Group', 'Subgroup', 'Normalized', 'CPC Rank'])['Max CPC']
#Reset index and drop old one
df1 = df1.reset_index(drop=True)
print(df1)
#Add the df1 Series to df and name the column New CPC
df['New CPC'] = df1
print(df)
这是迄今为止解决此问题最有效的方法。困难的部分是意识到我可以通过 Type Rank
sort
df
所以 Criterion Type
行按它们的等级排序。这意味着我希望最高的 Max CPC
应用于第一个,第二高的 Max CPC
应用于第二个,依此类推。
然后我所要做的就是创建一个按 CPC Rank
.
Max CPC
Series
最后,将此 Series
添加到现有的 df
。
试试这个
def group_rank(df):
# first of all you've to rank according to `Max CPC`
df['CPC Rank'] = df['Max CPC'].rank(ascending = False)
# create the mapping
mapping = pd.Series(data=df['Max CPC'].values , index= df['CPC Rank'].values)
# create new column according to your ranking
df['New CPC'] = df['Type Rank'].map(mapping)
return df
df.groupby(['Group', 'Subgroup', 'Normalized']).apply(group_rank)