flat_table getting ValueError: cannot reindex from a duplicate axis , My problem is different for this error
flat_table getting ValueError: cannot reindex from a duplicate axis , My problem is different for this error
我有如下数据框
behaviour_attributes
0 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'can_perform_stw_everything', 'bs': [{'bid': ObjectId('6050da979198a053c3a02484'), 'n': 'Can Perform Spin Wheel Everything', 'ao': datetime.datetime(2021, 4, 6, 0, 0, 0, 266000), 'bs': 'CountLimitException', 'tids': [ObjectId('605073cb9198a053c39d7a4d')], 'tags': [{'tid': ObjectId('605073cb9198a053c39d7a4d'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d99e9198a053c3a01bee'), 'pts': 0, 'eo': datetime.datetime(2021, 4, 8, 18, 0)}]}}]}
1 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'game_escape_run', 'md': [{'n': 'total_score', 'v': '32'}, {'n': 'game_id', 'v': '3'}], 'bs': [{'bid': ObjectId('6050dba29198a053c3a02e4d'), 'n': 'Game Escape Run', 'ao': datetime.datetime(2021, 4, 5, 0, 0, 1, 230000), 'bs': 'OK', 'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d9689198a053c3a019f8'), 'pts': 1, 'eo': datetime.datetime(2021, 4, 5, 18, 0)}], 'at': {'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}]}}}]}
import flat_table
if 'behaviour_attributes' in getDataByDate_df.columns:
df = pd.DataFrame(getDataByDate_df['behaviour_attributes'])
getDataByDate_dfA = flat_table.normalize(df)
getDataByDate_df = pd.concat([getDataByDate_df, getDataByDate_dfA], axis=1)
getDataByDate_df.drop('index', axis=1, inplace=True)
getDataByDate_df.drop('behaviour_attributes', axis=1, inplace=True)
del getDataByDate_dfA
del df
我尝试删除索引然后使用 flat_table ,但错误在 getDataByDate_dfA = flat_table.normalize(df) 行
处仍然相同
在用文字字符串替换 ObjectId()
和 datetime.datetime()
对象后,我在 flat_table.normalize()
上没有遇到任何错误。不确定这是库错误还是功能。
数据
我假设你的数据是以 dict
类型存储的,所以我试图通过 ast.literal_eval()
将你粘贴的数据恢复为 dict
。由于此方法对对象有问题,因此需要将它们引用出来。
import pandas as pd
import io
import ast
import re
import flat_table
df = pd.read_csv(io.StringIO("""
behaviour_attributes
0 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'can_perform_stw_everything', 'bs': [{'bid': ObjectId('6050da979198a053c3a02484'), 'n': 'Can Perform Spin Wheel Everything', 'ao': datetime.datetime(2021, 4, 6, 0, 0, 0, 266000), 'bs': 'CountLimitException', 'tids': [ObjectId('605073cb9198a053c39d7a4d')], 'tags': [{'tid': ObjectId('605073cb9198a053c39d7a4d'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d99e9198a053c3a01bee'), 'pts': 0, 'eo': datetime.datetime(2021, 4, 8, 18, 0)}]}}]}
1 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'game_escape_run', 'md': [{'n': 'total_score', 'v': '32'}, {'n': 'game_id', 'v': '3'}], 'bs': [{'bid': ObjectId('6050dba29198a053c3a02e4d'), 'n': 'Game Escape Run', 'ao': datetime.datetime(2021, 4, 5, 0, 0, 1, 230000), 'bs': 'OK', 'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d9689198a053c3a019f8'), 'pts': 1, 'eo': datetime.datetime(2021, 4, 5, 18, 0)}], 'at': {'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}]}}}]}
"""), sep=r"\s{2,}", engine='python')
def restore_dict(s: str):
"""Restore dictionary by quoting out special objects."""
s1 = re.sub(r"ObjectId\('([^)]*)'\)", r"'ObjectId()'", s)
s2 = re.sub(r"datetime\.datetime\(([^)]*)\)", r"'datetime.datetime()'", s1)
return ast.literal_eval(s2)
df["behaviour_attributes"] = df["behaviour_attributes"].apply(restore_dict)
结果
df2 = flat_table.normalize(df)
# remove long prefix in column names for printing
df2.columns = [s.replace("behaviour_attributes.", "") for s in df2.columns]
print(df2)
index md.v md.n ... verb type className
0 0 NaN NaN ... can_perform_stw_everything behaviour behaviour
1 1 32 total_score ... game_escape_run behaviour behaviour
2 1 32 total_score ... game_escape_run behaviour behaviour
3 1 32 total_score ... game_escape_run behaviour behaviour
4 1 32 total_score ... game_escape_run behaviour behaviour
5 1 32 total_score ... game_escape_run behaviour behaviour
6 1 32 total_score ... game_escape_run behaviour behaviour
7 1 32 total_score ... game_escape_run behaviour behaviour
8 1 32 total_score ... game_escape_run behaviour behaviour
9 1 3 game_id ... game_escape_run behaviour behaviour
10 1 3 game_id ... game_escape_run behaviour behaviour
11 1 3 game_id ... game_escape_run behaviour behaviour
12 1 3 game_id ... game_escape_run behaviour behaviour
13 1 3 game_id ... game_escape_run behaviour behaviour
14 1 3 game_id ... game_escape_run behaviour behaviour
15 1 3 game_id ... game_escape_run behaviour behaviour
16 1 3 game_id ... game_escape_run behaviour behaviour
[17 rows x 20 columns]
我有如下数据框
behaviour_attributes
0 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'can_perform_stw_everything', 'bs': [{'bid': ObjectId('6050da979198a053c3a02484'), 'n': 'Can Perform Spin Wheel Everything', 'ao': datetime.datetime(2021, 4, 6, 0, 0, 0, 266000), 'bs': 'CountLimitException', 'tids': [ObjectId('605073cb9198a053c39d7a4d')], 'tags': [{'tid': ObjectId('605073cb9198a053c39d7a4d'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d99e9198a053c3a01bee'), 'pts': 0, 'eo': datetime.datetime(2021, 4, 8, 18, 0)}]}}]}
1 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'game_escape_run', 'md': [{'n': 'total_score', 'v': '32'}, {'n': 'game_id', 'v': '3'}], 'bs': [{'bid': ObjectId('6050dba29198a053c3a02e4d'), 'n': 'Game Escape Run', 'ao': datetime.datetime(2021, 4, 5, 0, 0, 1, 230000), 'bs': 'OK', 'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d9689198a053c3a019f8'), 'pts': 1, 'eo': datetime.datetime(2021, 4, 5, 18, 0)}], 'at': {'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}]}}}]}
import flat_table
if 'behaviour_attributes' in getDataByDate_df.columns:
df = pd.DataFrame(getDataByDate_df['behaviour_attributes'])
getDataByDate_dfA = flat_table.normalize(df)
getDataByDate_df = pd.concat([getDataByDate_df, getDataByDate_dfA], axis=1)
getDataByDate_df.drop('index', axis=1, inplace=True)
getDataByDate_df.drop('behaviour_attributes', axis=1, inplace=True)
del getDataByDate_dfA
del df
我尝试删除索引然后使用 flat_table ,但错误在 getDataByDate_dfA = flat_table.normalize(df) 行
处仍然相同在用文字字符串替换 ObjectId()
和 datetime.datetime()
对象后,我在 flat_table.normalize()
上没有遇到任何错误。不确定这是库错误还是功能。
数据
我假设你的数据是以 dict
类型存储的,所以我试图通过 ast.literal_eval()
将你粘贴的数据恢复为 dict
。由于此方法对对象有问题,因此需要将它们引用出来。
import pandas as pd
import io
import ast
import re
import flat_table
df = pd.read_csv(io.StringIO("""
behaviour_attributes
0 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'can_perform_stw_everything', 'bs': [{'bid': ObjectId('6050da979198a053c3a02484'), 'n': 'Can Perform Spin Wheel Everything', 'ao': datetime.datetime(2021, 4, 6, 0, 0, 0, 266000), 'bs': 'CountLimitException', 'tids': [ObjectId('605073cb9198a053c39d7a4d')], 'tags': [{'tid': ObjectId('605073cb9198a053c39d7a4d'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d99e9198a053c3a01bee'), 'pts': 0, 'eo': datetime.datetime(2021, 4, 8, 18, 0)}]}}]}
1 {'className': 'behaviour', 'type': 'behaviour', 'verb': 'game_escape_run', 'md': [{'n': 'total_score', 'v': '32'}, {'n': 'game_id', 'v': '3'}], 'bs': [{'bid': ObjectId('6050dba29198a053c3a02e4d'), 'n': 'Game Escape Run', 'ao': datetime.datetime(2021, 4, 5, 0, 0, 1, 230000), 'bs': 'OK', 'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}], 'prz': {'ch': False, 'pts': [{'pid': ObjectId('6050d9689198a053c3a019f8'), 'pts': 1, 'eo': datetime.datetime(2021, 4, 5, 18, 0)}], 'at': {'tids': [ObjectId('605073769198a053c39d77f1'), ObjectId('605071569198a053c39d6ab9')], 'tags': [{'tid': ObjectId('605071569198a053c39d6ab9'), 'prsn': True}, {'tid': ObjectId('605073769198a053c39d77f1'), 'prsn': True}]}}}]}
"""), sep=r"\s{2,}", engine='python')
def restore_dict(s: str):
"""Restore dictionary by quoting out special objects."""
s1 = re.sub(r"ObjectId\('([^)]*)'\)", r"'ObjectId()'", s)
s2 = re.sub(r"datetime\.datetime\(([^)]*)\)", r"'datetime.datetime()'", s1)
return ast.literal_eval(s2)
df["behaviour_attributes"] = df["behaviour_attributes"].apply(restore_dict)
结果
df2 = flat_table.normalize(df)
# remove long prefix in column names for printing
df2.columns = [s.replace("behaviour_attributes.", "") for s in df2.columns]
print(df2)
index md.v md.n ... verb type className
0 0 NaN NaN ... can_perform_stw_everything behaviour behaviour
1 1 32 total_score ... game_escape_run behaviour behaviour
2 1 32 total_score ... game_escape_run behaviour behaviour
3 1 32 total_score ... game_escape_run behaviour behaviour
4 1 32 total_score ... game_escape_run behaviour behaviour
5 1 32 total_score ... game_escape_run behaviour behaviour
6 1 32 total_score ... game_escape_run behaviour behaviour
7 1 32 total_score ... game_escape_run behaviour behaviour
8 1 32 total_score ... game_escape_run behaviour behaviour
9 1 3 game_id ... game_escape_run behaviour behaviour
10 1 3 game_id ... game_escape_run behaviour behaviour
11 1 3 game_id ... game_escape_run behaviour behaviour
12 1 3 game_id ... game_escape_run behaviour behaviour
13 1 3 game_id ... game_escape_run behaviour behaviour
14 1 3 game_id ... game_escape_run behaviour behaviour
15 1 3 game_id ... game_escape_run behaviour behaviour
16 1 3 game_id ... game_escape_run behaviour behaviour
[17 rows x 20 columns]