python pandas 自定义权重
python pandas custom weightages
根据以下详细信息,dataframe 具有公司级别的编号。 Dict 具有自定义权重。公司 'A' 有 7 行,所以我想从键为 7 的字典中获取自定义权重,并创建 'custom_weights' 作为新列。最新日期将具有最高权重。
与公司 'B' 和 'C' 类似,我需要分别使用 4 和 2(每个公司的行数)获取和附加权重。这些权重与 'custom_weights' 列对齐。
请问如何解决这个问题?提前谢谢你。
df=pd.DataFrame(columns=['CompanyName','Date_Published','Stand_Alone','Consolidated'],data=[('A','31-03-2017',np.random.rand(),np.random.rand()),('A','31-03-2016',np.random.rand(),np.random.rand()),('A','31-03-2015',np.random.rand(),np.random.rand()),('A','31-03-2014',np.random.rand(),np.random.rand()),('A','31-03-2013',np.random.rand(),np.random.rand()),('A','31-03-2012',np.random.rand(),np.random.rand()),('A','31-03-2011',np.random.rand(),np.random.rand()),('B','31-03-2017',np.random.rand(),np.random.rand()),('B','31-03-2016',np.random.rand(),np.random.rand()),('B','31-03-2015',np.random.rand(),np.random.rand()),('B','31-03-2014',np.random.rand(),np.random.rand()),('C','31-03-2017',np.random.rand(),np.random.rand()),('C','31-03-2016',np.random.rand(),np.random.rand())])
dict_wt.update({2:[55.55,44.45]})
dict_wt.update({3:[47.34,31,56,21,11]})
dict_wt.update({7:[21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]})
如果我正确理解你的问题,这可能是一个解决方案:
import pandas as pd, numpy as np
df=pd.DataFrame(columns=['CompanyName','Date_Published','Stand_Alone','Consolidated'],data=[('A','31-03-2017',np.random.rand(),np.random.rand()),('A','31-03-2016',np.random.rand(),np.random.rand()),('A','31-03-2015',np.random.rand(),np.random.rand()),('A','31-03-2014',np.random.rand(),np.random.rand()),('A','31-03-2013',np.random.rand(),np.random.rand()),('A','31-03-2012',np.random.rand(),np.random.rand()),('A','31-03-2011',np.random.rand(),np.random.rand()),('B','31-03-2017',np.random.rand(),np.random.rand()),('B','31-03-2016',np.random.rand(),np.random.rand()),('B','31-03-2015',np.random.rand(),np.random.rand()),('B','31-03-2014',np.random.rand(),np.random.rand()),('C','31-03-2017',np.random.rand(),np.random.rand()),('C','31-03-2016',np.random.rand(),np.random.rand())])
dict_wt = {}
dict_wt.update({2:[55.55,44.45]})
dict_wt.update({4:[11,56,21,11]})
dict_wt.update({7:[21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]})
weights = df['CompanyName'].value_counts().map(dict_wt)
df['CustomWeights'] = df['CompanyName'].map(weights)
# CompanyName Date_Published Stand_Alone Consolidated \
# 0 A 31-03-2017 0.465561 0.449511
# 1 A 31-03-2016 0.096015 0.472931
# 2 A 31-03-2015 0.176293 0.520192
# 3 A 31-03-2014 0.814840 0.043019
# 4 A 31-03-2013 0.387406 0.709103
# 5 A 31-03-2012 0.790282 0.751466
# 6 A 31-03-2011 0.047402 0.788732
# 7 B 31-03-2017 0.275830 0.214845
# 8 B 31-03-2016 0.341561 0.861411
# 9 B 31-03-2015 0.800487 0.469386
# 10 B 31-03-2014 0.071154 0.454278
# 11 C 31-03-2017 0.712978 0.034975
# 12 C 31-03-2016 0.672991 0.158985
# CustomWeights
# 0 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 1 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 2 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 3 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 4 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 5 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 6 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 7 [11, 56, 21, 11]
# 8 [11, 56, 21, 11]
# 9 [11, 56, 21, 11]
# 10 [11, 56, 21, 11]
# 11 [55.55, 44.45]
# 12 [55.55, 44.45]
使用groupby()
计算['CompanyNames']
的数量并使用transform()
将字典中的列表分配回数据框
df['CustomWeights'] = df.groupby('CompanyName')['Date_Published'].transform(lambda x: dict_wt.get(len(x)))
CompanyName Date_Published Stand_Alone Consolidated CustomWeights
0 A 31-03-2017 0.116712 0.044908 21.63
1 A 31-03-2016 0.228525 0.553351 18.54
2 A 31-03-2015 0.476527 0.913417 15.89
3 A 31-03-2014 0.989796 0.716775 13.62
4 A 31-03-2013 0.702358 0.880009 11.68
5 A 31-03-2012 0.531666 0.013267 10.01
6 A 31-03-2011 0.896103 0.351544 8.63
7 B 31-03-2017 0.405370 0.701944 11.00
8 B 31-03-2016 0.858221 0.450118 56.00
9 B 31-03-2015 0.163273 0.613447 21.00
10 B 31-03-2014 0.635888 0.570327 11.00
11 C 31-03-2017 0.680992 0.488191 55.55
12 C 31-03-2016 0.083883 0.682186 44.45
根据以下详细信息,dataframe 具有公司级别的编号。 Dict 具有自定义权重。公司 'A' 有 7 行,所以我想从键为 7 的字典中获取自定义权重,并创建 'custom_weights' 作为新列。最新日期将具有最高权重。
与公司 'B' 和 'C' 类似,我需要分别使用 4 和 2(每个公司的行数)获取和附加权重。这些权重与 'custom_weights' 列对齐。
请问如何解决这个问题?提前谢谢你。
df=pd.DataFrame(columns=['CompanyName','Date_Published','Stand_Alone','Consolidated'],data=[('A','31-03-2017',np.random.rand(),np.random.rand()),('A','31-03-2016',np.random.rand(),np.random.rand()),('A','31-03-2015',np.random.rand(),np.random.rand()),('A','31-03-2014',np.random.rand(),np.random.rand()),('A','31-03-2013',np.random.rand(),np.random.rand()),('A','31-03-2012',np.random.rand(),np.random.rand()),('A','31-03-2011',np.random.rand(),np.random.rand()),('B','31-03-2017',np.random.rand(),np.random.rand()),('B','31-03-2016',np.random.rand(),np.random.rand()),('B','31-03-2015',np.random.rand(),np.random.rand()),('B','31-03-2014',np.random.rand(),np.random.rand()),('C','31-03-2017',np.random.rand(),np.random.rand()),('C','31-03-2016',np.random.rand(),np.random.rand())])
dict_wt.update({2:[55.55,44.45]})
dict_wt.update({3:[47.34,31,56,21,11]})
dict_wt.update({7:[21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]})
如果我正确理解你的问题,这可能是一个解决方案:
import pandas as pd, numpy as np
df=pd.DataFrame(columns=['CompanyName','Date_Published','Stand_Alone','Consolidated'],data=[('A','31-03-2017',np.random.rand(),np.random.rand()),('A','31-03-2016',np.random.rand(),np.random.rand()),('A','31-03-2015',np.random.rand(),np.random.rand()),('A','31-03-2014',np.random.rand(),np.random.rand()),('A','31-03-2013',np.random.rand(),np.random.rand()),('A','31-03-2012',np.random.rand(),np.random.rand()),('A','31-03-2011',np.random.rand(),np.random.rand()),('B','31-03-2017',np.random.rand(),np.random.rand()),('B','31-03-2016',np.random.rand(),np.random.rand()),('B','31-03-2015',np.random.rand(),np.random.rand()),('B','31-03-2014',np.random.rand(),np.random.rand()),('C','31-03-2017',np.random.rand(),np.random.rand()),('C','31-03-2016',np.random.rand(),np.random.rand())])
dict_wt = {}
dict_wt.update({2:[55.55,44.45]})
dict_wt.update({4:[11,56,21,11]})
dict_wt.update({7:[21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]})
weights = df['CompanyName'].value_counts().map(dict_wt)
df['CustomWeights'] = df['CompanyName'].map(weights)
# CompanyName Date_Published Stand_Alone Consolidated \
# 0 A 31-03-2017 0.465561 0.449511
# 1 A 31-03-2016 0.096015 0.472931
# 2 A 31-03-2015 0.176293 0.520192
# 3 A 31-03-2014 0.814840 0.043019
# 4 A 31-03-2013 0.387406 0.709103
# 5 A 31-03-2012 0.790282 0.751466
# 6 A 31-03-2011 0.047402 0.788732
# 7 B 31-03-2017 0.275830 0.214845
# 8 B 31-03-2016 0.341561 0.861411
# 9 B 31-03-2015 0.800487 0.469386
# 10 B 31-03-2014 0.071154 0.454278
# 11 C 31-03-2017 0.712978 0.034975
# 12 C 31-03-2016 0.672991 0.158985
# CustomWeights
# 0 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 1 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 2 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 3 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 4 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 5 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 6 [21.63, 18.54, 15.89, 13.62, 11.68, 10.01, 8.63]
# 7 [11, 56, 21, 11]
# 8 [11, 56, 21, 11]
# 9 [11, 56, 21, 11]
# 10 [11, 56, 21, 11]
# 11 [55.55, 44.45]
# 12 [55.55, 44.45]
使用groupby()
计算['CompanyNames']
的数量并使用transform()
df['CustomWeights'] = df.groupby('CompanyName')['Date_Published'].transform(lambda x: dict_wt.get(len(x)))
CompanyName Date_Published Stand_Alone Consolidated CustomWeights
0 A 31-03-2017 0.116712 0.044908 21.63
1 A 31-03-2016 0.228525 0.553351 18.54
2 A 31-03-2015 0.476527 0.913417 15.89
3 A 31-03-2014 0.989796 0.716775 13.62
4 A 31-03-2013 0.702358 0.880009 11.68
5 A 31-03-2012 0.531666 0.013267 10.01
6 A 31-03-2011 0.896103 0.351544 8.63
7 B 31-03-2017 0.405370 0.701944 11.00
8 B 31-03-2016 0.858221 0.450118 56.00
9 B 31-03-2015 0.163273 0.613447 21.00
10 B 31-03-2014 0.635888 0.570327 11.00
11 C 31-03-2017 0.680992 0.488191 55.55
12 C 31-03-2016 0.083883 0.682186 44.45