数据框 - 合并
Dataframes - Combining
如何组合 df_1 和 df_2 - 以获得所需的数据帧?
彩色方块有望让您快速了解所需内容。即
努力解决这个问题 - 感谢所有帮助/建议。谢谢。
你可以尝试df_total = df1.append(df2),但这种方式可能会将列'race_id_legs2'放在'race_id_leg1'之后,所以你需要重新组织列。
这是一个可能的解决方案。我确定还有更多 eloquent 解决方案 - 但这个可行。
import pandas as pd
from collections import Counter
from itertools import chain
df_1=pd.DataFrame.from_dict({'name':['fred', 'fred', 'fred', 'bill', 'bill', \
'ted', 'ted', 'ted', 'ted'], 'pts':[8,4,5,7,2,3,9,8,5]})
df_2=pd.DataFrame.from_dict({'name':['pam', 'pam', 'lou', 'lou', 'lou', 'lou', \
'sam', 'sam', 'sam', 'sam'], 'pts':[5,6,5,6,5,6,5,6,5,6]})
############################################
# df_1 - setup 2 lists - first with the names (length of list for each person is 10 long)
# ....then do a list of points that is - length of list 10 long
df_1_count_of_names=list(Counter(df_1['name'].tolist()).values())
number_unique_names=df_1['name'].nunique()
count=0
start=0
end=df_1_count_of_names[count]
list_of_pts_for_each_name=[]
try:
while count<len(df_1['pts']):
list_of_pts_each_person=df_1['pts'][start:end].tolist()
list_of_10_zeros=[0]*10
pts_each_person_listof10 = list_of_pts_each_person + list_of_10_zeros[len(list_of_pts_each_person):]
list_of_pts_for_each_name.append(pts_each_person_listof10)
start=end
end=start+df_1_count_of_names[count+1]
count+=1
except IndexError:
pass
df_1_total_list_of_pts=list(chain.from_iterable(list_of_pts_for_each_name))
# print(df_1_total_list_of_pts)
X=df_1['name'].unique().tolist()
Y=[0]*10
df_1_total_list_of_names=[]
for i in X:
for j in Y:
df_1_total_list_of_names.append(i)
# print(df_1_total_list_of_names)
############################################
# df_2 - setup 2 lists - first with the names (length of list for each person is 10 long)
# ....then do a list of points that is - length of list 10 long
df_2_count_of_names=list(Counter(df_2['name'].tolist()).values())
number_unique_names=df_2['name'].nunique()
count=0
start=0
end=df_2_count_of_names[count]
list_of_pts_for_each_name=[]
try:
while count<len(df_2['pts']):
list_of_pts_each_person=df_2['pts'][start:end].tolist()
list_of_10_zeros=[0]*10
pts_each_person_listof10 = list_of_pts_each_person + list_of_10_zeros[len(list_of_pts_each_person):]
list_of_pts_for_each_name.append(pts_each_person_listof10)
start=end
end=start+df_2_count_of_names[count+1]
count+=1
except IndexError:
pass
df_2_total_list_of_pts=list(chain.from_iterable(list_of_pts_for_each_name))
# print(df_2_total_list_of_pts)
X=df_2['name'].unique().tolist()
Y=[0]*10
df_2_total_list_of_names=[]
for i in X:
for j in Y:
df_2_total_list_of_names.append(i)
# print(df_2_total_list_of_names)
############################################
# Now - combine the name and pts lists from df_1 and df_2 into one dataframe.
df_3=pd.DataFrame({'df_1_names':df_1_total_list_of_names, 'df_1_pts':df_1_total_list_of_pts,\
'df_2_names':df_2_total_list_of_names, 'df_2_pts':df_2_total_list_of_pts})
# print(df_3)
############################################
# Now - optional - get rid of the columns that have zeroes for both df_1 and df_2 in the
# pts columns
df_4=df_3[(df_3.df_1_pts!=0)|(df_3.df_2_pts!=0)]
print(df_4)
如何组合 df_1 和 df_2 - 以获得所需的数据帧?
彩色方块有望让您快速了解所需内容。即
努力解决这个问题 - 感谢所有帮助/建议。谢谢。
你可以尝试df_total = df1.append(df2),但这种方式可能会将列'race_id_legs2'放在'race_id_leg1'之后,所以你需要重新组织列。
这是一个可能的解决方案。我确定还有更多 eloquent 解决方案 - 但这个可行。
import pandas as pd
from collections import Counter
from itertools import chain
df_1=pd.DataFrame.from_dict({'name':['fred', 'fred', 'fred', 'bill', 'bill', \
'ted', 'ted', 'ted', 'ted'], 'pts':[8,4,5,7,2,3,9,8,5]})
df_2=pd.DataFrame.from_dict({'name':['pam', 'pam', 'lou', 'lou', 'lou', 'lou', \
'sam', 'sam', 'sam', 'sam'], 'pts':[5,6,5,6,5,6,5,6,5,6]})
############################################
# df_1 - setup 2 lists - first with the names (length of list for each person is 10 long)
# ....then do a list of points that is - length of list 10 long
df_1_count_of_names=list(Counter(df_1['name'].tolist()).values())
number_unique_names=df_1['name'].nunique()
count=0
start=0
end=df_1_count_of_names[count]
list_of_pts_for_each_name=[]
try:
while count<len(df_1['pts']):
list_of_pts_each_person=df_1['pts'][start:end].tolist()
list_of_10_zeros=[0]*10
pts_each_person_listof10 = list_of_pts_each_person + list_of_10_zeros[len(list_of_pts_each_person):]
list_of_pts_for_each_name.append(pts_each_person_listof10)
start=end
end=start+df_1_count_of_names[count+1]
count+=1
except IndexError:
pass
df_1_total_list_of_pts=list(chain.from_iterable(list_of_pts_for_each_name))
# print(df_1_total_list_of_pts)
X=df_1['name'].unique().tolist()
Y=[0]*10
df_1_total_list_of_names=[]
for i in X:
for j in Y:
df_1_total_list_of_names.append(i)
# print(df_1_total_list_of_names)
############################################
# df_2 - setup 2 lists - first with the names (length of list for each person is 10 long)
# ....then do a list of points that is - length of list 10 long
df_2_count_of_names=list(Counter(df_2['name'].tolist()).values())
number_unique_names=df_2['name'].nunique()
count=0
start=0
end=df_2_count_of_names[count]
list_of_pts_for_each_name=[]
try:
while count<len(df_2['pts']):
list_of_pts_each_person=df_2['pts'][start:end].tolist()
list_of_10_zeros=[0]*10
pts_each_person_listof10 = list_of_pts_each_person + list_of_10_zeros[len(list_of_pts_each_person):]
list_of_pts_for_each_name.append(pts_each_person_listof10)
start=end
end=start+df_2_count_of_names[count+1]
count+=1
except IndexError:
pass
df_2_total_list_of_pts=list(chain.from_iterable(list_of_pts_for_each_name))
# print(df_2_total_list_of_pts)
X=df_2['name'].unique().tolist()
Y=[0]*10
df_2_total_list_of_names=[]
for i in X:
for j in Y:
df_2_total_list_of_names.append(i)
# print(df_2_total_list_of_names)
############################################
# Now - combine the name and pts lists from df_1 and df_2 into one dataframe.
df_3=pd.DataFrame({'df_1_names':df_1_total_list_of_names, 'df_1_pts':df_1_total_list_of_pts,\
'df_2_names':df_2_total_list_of_names, 'df_2_pts':df_2_total_list_of_pts})
# print(df_3)
############################################
# Now - optional - get rid of the columns that have zeroes for both df_1 and df_2 in the
# pts columns
df_4=df_3[(df_3.df_1_pts!=0)|(df_3.df_2_pts!=0)]
print(df_4)