从嵌套字典到 python 数据框
From nested dictionary to python Dataframe
我有一个嵌套字典的示例,如下所示:
data = [{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': Decimal('70351.631210000000'),
'wellToTank': Decimal('13412'),
'tankToWheel': Decimal('56939')
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': Decimal('4.866239643000'),
'wellToTank': Decimal('0.902'),
'tankToWheel': Decimal('3.963')
}
}]
type(data)
是一个列表。
我想把它放在数据帧格式上,以便预期输出是这样的:
primaryEnergy_wellToTank primaryEnergy_tankToWheel carbonDioxide_wellToTank carbonDioxide_tankToWheel
13412 56939 0.902 3.963
我尝试了一些来自 pd.Dataframe 函数的转换:
df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in mydict.items() ]))df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in mydict.items() ]))
但是结果到现在还不是很成功
如何做到这一点?
下面是我在使用 df = pd.json_normalize(data)
时遇到的错误
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\zeep\xsd\valueobjects.py in __getattribute__(self, key)
142 try:
--> 143 return self.__values__[key]
144 except KeyError:
KeyError: 'values'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-180-cc2694b5448e> in <module>
----> 1 df = pd.json_normalize(result.result)
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\json\_normalize.py in _json_normalize(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)
272
273 if record_path is None:
--> 274 if any([isinstance(x, dict) for x in y.values()] for y in data):
275 # naive normalization, this is idempotent for flat records
276 # and potentially will inflate the data considerably for
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\json\_normalize.py in <genexpr>(.0)
272
273 if record_path is None:
--> 274 if any([isinstance(x, dict) for x in y.values()] for y in data):
275 # naive normalization, this is idempotent for flat records
276 # and potentially will inflate the data considerably for
~\AppData\Local\Continuum\anaconda3\lib\site-packages\zeep\xsd\valueobjects.py in __getattribute__(self, key)
144 except KeyError:
145 raise AttributeError(
--> 146 "%s instance has no attribute '%s'" % (self.__class__.__name__, key)
147 )
148
AttributeError: DistributionLoadResult instance has no attribute 'values'
- 我可以使用
serialize_object
函数解决问题。
- 如果
list
看起来像 post 底部的 list of dicts
,其中 resultInfo
重复,那么您可以使用 json_normalize
- 创建后
df
使用 pandas.DataFrame.drop
删除不需要的列
import pandas as pd
df = pd.json_normalize(data)
# display(df)
resultInfo.load resultInfo.unload.weight resultInfo.unload.unit resultInfo.unload.tonsPerTeu resultInfo.unload.tonsPerFeu resultInfo.unload.freightId resultInfo.unload.showEmissionsAtResponse resultInfo.location resultInfo.freightId resultInfo.emissionPercentage resultInfo.directDistance emissions.primaryEnergy.rail emissions.primaryEnergy.sea emissions.primaryEnergy.air emissions.primaryEnergy.inlandWaterways emissions.primaryEnergy.road._value_1 emissions.primaryEnergy.road.wellToTank emissions.primaryEnergy.road.tankToWheel emissions.primaryEnergy.logisticsite emissions.primaryEnergy.transfer emissions.primaryEnergy.unit emissions.carbonDioxide.rail emissions.carbonDioxide.sea emissions.carbonDioxide.air emissions.carbonDioxide.inlandWaterways emissions.carbonDioxide.road._value_1 emissions.carbonDioxide.road.wellToTank emissions.carbonDioxide.road.tankToWheel
0 None 59.0 ton None None None True zip:63937 None 1.0 767.71 None None None None 70351.63121 13412 5693 None None MegaJoule None None None None 4.86624 0.902 3.96
1 None 59.0 ton None None None True zip:63937 None 1.0 767.71 None None None None 70351.63121 13412 5693 None None MegaJoule None None None None 4.86624 0.902 3.96
2 None 59.0 ton None None None True zip:63937 None 1.0 767.71 None None None None 70351.63121 13412 5693 None None MegaJoule None None None None 4.86624 0.902 3.96
数据
data = [{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 70351.631210000000,
'wellToTank': 13412,
'tankToWheel': 5693
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 4.866239643000,
'wellToTank': 0.902,
'tankToWheel': 3.96
},
}
}
},
{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 70351.631210000000,
'wellToTank': 13412,
'tankToWheel': 5693
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 4.866239643000,
'wellToTank': 0.902,
'tankToWheel': 3.96
},
}
}
},
{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 70351.631210000000,
'wellToTank': 13412,
'tankToWheel': 5693
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 4.866239643000,
'wellToTank': 0.902,
'tankToWheel': 3.96
},
}
}
}
]
我有一个嵌套字典的示例,如下所示:
data = [{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': Decimal('70351.631210000000'),
'wellToTank': Decimal('13412'),
'tankToWheel': Decimal('56939')
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': Decimal('4.866239643000'),
'wellToTank': Decimal('0.902'),
'tankToWheel': Decimal('3.963')
}
}]
type(data)
是一个列表。
我想把它放在数据帧格式上,以便预期输出是这样的:
primaryEnergy_wellToTank primaryEnergy_tankToWheel carbonDioxide_wellToTank carbonDioxide_tankToWheel
13412 56939 0.902 3.963
我尝试了一些来自 pd.Dataframe 函数的转换:
df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in mydict.items() ]))df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in mydict.items() ]))
但是结果到现在还不是很成功
如何做到这一点?
下面是我在使用 df = pd.json_normalize(data)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\zeep\xsd\valueobjects.py in __getattribute__(self, key)
142 try:
--> 143 return self.__values__[key]
144 except KeyError:
KeyError: 'values'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
<ipython-input-180-cc2694b5448e> in <module>
----> 1 df = pd.json_normalize(result.result)
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\json\_normalize.py in _json_normalize(data, record_path, meta, meta_prefix, record_prefix, errors, sep, max_level)
272
273 if record_path is None:
--> 274 if any([isinstance(x, dict) for x in y.values()] for y in data):
275 # naive normalization, this is idempotent for flat records
276 # and potentially will inflate the data considerably for
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\json\_normalize.py in <genexpr>(.0)
272
273 if record_path is None:
--> 274 if any([isinstance(x, dict) for x in y.values()] for y in data):
275 # naive normalization, this is idempotent for flat records
276 # and potentially will inflate the data considerably for
~\AppData\Local\Continuum\anaconda3\lib\site-packages\zeep\xsd\valueobjects.py in __getattribute__(self, key)
144 except KeyError:
145 raise AttributeError(
--> 146 "%s instance has no attribute '%s'" % (self.__class__.__name__, key)
147 )
148
AttributeError: DistributionLoadResult instance has no attribute 'values'
- 我可以使用
serialize_object
函数解决问题。
- 如果
list
看起来像 post 底部的list of dicts
,其中resultInfo
重复,那么您可以使用json_normalize
- 创建后
df
使用pandas.DataFrame.drop
删除不需要的列
import pandas as pd
df = pd.json_normalize(data)
# display(df)
resultInfo.load resultInfo.unload.weight resultInfo.unload.unit resultInfo.unload.tonsPerTeu resultInfo.unload.tonsPerFeu resultInfo.unload.freightId resultInfo.unload.showEmissionsAtResponse resultInfo.location resultInfo.freightId resultInfo.emissionPercentage resultInfo.directDistance emissions.primaryEnergy.rail emissions.primaryEnergy.sea emissions.primaryEnergy.air emissions.primaryEnergy.inlandWaterways emissions.primaryEnergy.road._value_1 emissions.primaryEnergy.road.wellToTank emissions.primaryEnergy.road.tankToWheel emissions.primaryEnergy.logisticsite emissions.primaryEnergy.transfer emissions.primaryEnergy.unit emissions.carbonDioxide.rail emissions.carbonDioxide.sea emissions.carbonDioxide.air emissions.carbonDioxide.inlandWaterways emissions.carbonDioxide.road._value_1 emissions.carbonDioxide.road.wellToTank emissions.carbonDioxide.road.tankToWheel
0 None 59.0 ton None None None True zip:63937 None 1.0 767.71 None None None None 70351.63121 13412 5693 None None MegaJoule None None None None 4.86624 0.902 3.96
1 None 59.0 ton None None None True zip:63937 None 1.0 767.71 None None None None 70351.63121 13412 5693 None None MegaJoule None None None None 4.86624 0.902 3.96
2 None 59.0 ton None None None True zip:63937 None 1.0 767.71 None None None None 70351.63121 13412 5693 None None MegaJoule None None None None 4.86624 0.902 3.96
数据
data = [{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 70351.631210000000,
'wellToTank': 13412,
'tankToWheel': 5693
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 4.866239643000,
'wellToTank': 0.902,
'tankToWheel': 3.96
},
}
}
},
{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 70351.631210000000,
'wellToTank': 13412,
'tankToWheel': 5693
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 4.866239643000,
'wellToTank': 0.902,
'tankToWheel': 3.96
},
}
}
},
{
'resultInfo': {
'load': None,
'unload': {
'weight': 59.0,
'unit': 'ton',
'tonsPerTeu': None,
'tonsPerFeu': None,
'freightId': None,
'showEmissionsAtResponse': True
},
'location': 'zip:63937',
'freightId': None,
'emissionPercentage': 1.0,
'directDistance': 767.71
},
'emissions': {
'primaryEnergy': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 70351.631210000000,
'wellToTank': 13412,
'tankToWheel': 5693
},
'logisticsite': None,
'transfer': None,
'unit': 'MegaJoule'
},
'carbonDioxide': {
'rail': None,
'sea': None,
'air': None,
'inlandWaterways': None,
'road': {
'_value_1': 4.866239643000,
'wellToTank': 0.902,
'tankToWheel': 3.96
},
}
}
}
]