将复杂的 JSON 映射到 Pandas 数据框
Mapping complex JSON to Pandas Dataframe
背景
我有一个复杂的嵌套 JSON object,我正在尝试将其解压成 pandas df
以一种非常具体的方式。
JSON Object
这是一个摘录,包含 JSON object 的随机数据,它显示了 1x 系列(即 'Falconer Family')的层次结构示例(包括 children),但是总共有 100 个,而这个提取物只有 1x 系列,但是完整的 JSON object 有多个 -
{
"meta": {
"columns": [{
"key": "value",
"display_name": "Adjusted Value (No Div, USD)",
"output_type": "Number",
"currency": "USD"
},
{
"key": "time_weighted_return",
"display_name": "Current Quarter TWR (USD)",
"output_type": "Percent",
"currency": "USD"
},
{
"key": "time_weighted_return_2",
"display_name": "YTD TWR (USD)",
"output_type": "Percent",
"currency": "USD"
},
{
"key": "_custom_twr_audit_note_911328",
"display_name": "TWR Audit Note",
"output_type": "Word"
}
],
"groupings": [{
"key": "_custom_name_747205",
"display_name": "* Reporting Client Name"
},
{
"key": "_custom_new_entity_group_453577",
"display_name": "NEW Entity Group"
},
{
"key": "_custom_level_2_624287",
"display_name": "* Level 2"
},
{
"key": "legal_entity",
"display_name": "Legal Entity"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"time_weighted_return": -0.046732301295604683,
"time_weighted_return_2": -0.046732301295604683,
"_custom_twr_audit_note_911328": null,
"value": 23132492.905107163
},
"children": [{
"name": "Falconer Family",
"grouping": "_custom_name_747205",
"columns": {
"time_weighted_return": -0.046732301295604683,
"time_weighted_return_2": -0.046732301295604683,
"_custom_twr_audit_note_911328": null,
"value": 23132492.905107163
},
"children": [{
"name": "Wealth Bucket A",
"grouping": "_custom_new_entity_group_453577",
"columns": {
"time_weighted_return": -0.045960317420568164,
"time_weighted_return_2": -0.045960317420568164,
"_custom_twr_audit_note_911328": null,
"value": 13264448.506587159
},
"children": [{
"name": "Asset Class A",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": 0.000003434094574039648,
"time_weighted_return_2": 0.000003434094574039648,
"_custom_twr_audit_note_911328": null,
"value": 3337.99
},
"children": [{
"entity_id": 10604454,
"name": "HUDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000003434094574039648,
"time_weighted_return_2": 0.000003434094574039648,
"_custom_twr_audit_note_911328": null,
"value": 3337.99
},
"children": []
}]
},
{
"name": "Asset Class B",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.025871339096964152,
"time_weighted_return_2": -0.025871339096964152,
"_custom_twr_audit_note_911328": null,
"value": 1017004.7192636987
},
"children": [{
"entity_id": 10604454,
"name": "HUDG Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.025871339096964152,
"time_weighted_return_2": -0.025871339096964152,
"_custom_twr_audit_note_911328": null,
"value": 1017004.7192636987
},
"children": []
}]
},
{
"name": "Asset Class C",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.030370376329670656,
"time_weighted_return_2": -0.030370376329670656,
"_custom_twr_audit_note_911328": null,
"value": 231142.67772000004
},
"children": [{
"entity_id": 10604454,
"name": "HKDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.030370376329670656,
"time_weighted_return_2": -0.030370376329670656,
"_custom_twr_audit_note_911328": null,
"value": 231142.67772000004
},
"children": []
}]
},
{
"name": "Asset Class D",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.05382756475465478,
"time_weighted_return_2": -0.05382756475465478,
"_custom_twr_audit_note_911328": null,
"value": 9791282.570000006
},
"children": [{
"entity_id": 10604454,
"name": "HUDW Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05382756475465478,
"time_weighted_return_2": -0.05382756475465478,
"_custom_twr_audit_note_911328": null,
"value": 9791282.570000006
},
"children": []
}]
},
{
"name": "Asset Class E",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.01351630404081805,
"time_weighted_return_2": -0.01351630404081805,
"_custom_twr_audit_note_911328": null,
"value": 2153366.6396034593
},
"children": [{
"entity_id": 10604454,
"name": "HJDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.01351630404081805,
"time_weighted_return_2": -0.01351630404081805,
"_custom_twr_audit_note_911328": null,
"value": 2153366.6396034593
},
"children": []
}]
},
{
"name": "Asset Class F",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.002298190175237247,
"time_weighted_return_2": -0.002298190175237247,
"_custom_twr_audit_note_911328": null,
"value": 68313.90999999999
},
"children": [{
"entity_id": 10604454,
"name": "HADJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.002298190175237247,
"time_weighted_return_2": -0.002298190175237247,
"_custom_twr_audit_note_911328": null,
"value": 68313.90999999999
},
"children": []
}]
}
]
},
{
"name": "Wealth Bucket B",
"grouping": "_custom_new_entity_group_453577",
"columns": {
"time_weighted_return": -0.04769870075659244,
"time_weighted_return_2": -0.04769870075659244,
"_custom_twr_audit_note_911328": null,
"value": 9868044.398519998
},
"children": [{
"name": "Asset Class A",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": 0.000028632718065191298,
"time_weighted_return_2": 0.000028632718065191298,
"_custom_twr_audit_note_911328": null,
"value": 10234.94
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.0000282679297198829,
"time_weighted_return_2": 0.0000282679297198829,
"_custom_twr_audit_note_911328": null,
"value": 244.28
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000049373572795108345,
"time_weighted_return_2": 0.000049373572795108345,
"_custom_twr_audit_note_911328": null,
"value": 5081.08
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000006609603754315074,
"time_weighted_return_2": 0.000006609603754315074,
"_custom_twr_audit_note_911328": null,
"value": 1523.62
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000010999769004760296,
"time_weighted_return_2": 0.000010999769004760296,
"_custom_twr_audit_note_911328": null,
"value": 1828.9
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000006466673995619843,
"time_weighted_return_2": 0.000006466673995619843,
"_custom_twr_audit_note_911328": null,
"value": 1557.06
},
"children": []
}
]
},
{
"name": "Asset Class B",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.024645947842438676,
"time_weighted_return_2": -0.024645947842438676,
"_custom_twr_audit_note_911328": null,
"value": 674052.31962
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.043304004172576405,
"time_weighted_return_2": -0.043304004172576405,
"_custom_twr_audit_note_911328": null,
"value": 52800.96
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.022408434778798836,
"time_weighted_return_2": -0.022408434778798836,
"_custom_twr_audit_note_911328": null,
"value": 599594.11962
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
}
]
},
{
"name": "Asset Class C",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.03037038746301135,
"time_weighted_return_2": -0.03037038746301135,
"_custom_twr_audit_note_911328": null,
"value": 114472.69744
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.030370390035505124,
"time_weighted_return_2": -0.030370390035505124,
"_custom_twr_audit_note_911328": null,
"value": 114472.68744000001
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0,
"time_weighted_return_2": 0,
"_custom_twr_audit_note_911328": null,
"value": 0.01
},
"children": []
}
]
},
{
"name": "Asset Class D",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.06604362523792162,
"time_weighted_return_2": -0.06604362523792162,
"_custom_twr_audit_note_911328": null,
"value": 5722529.229999997
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.06154960593668424,
"time_weighted_return_2": -0.06154960593668424,
"_custom_twr_audit_note_911328": null,
"value": 1191838.9399999995
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.06750460387418267,
"time_weighted_return_2": -0.06750460387418267,
"_custom_twr_audit_note_911328": null,
"value": 4416618.520000002
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 38190.33
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 37940.72
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 37940.72
},
"children": []
}
]
},
{
"name": "Asset Class E",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.017118805423322003,
"time_weighted_return_2": -0.017118805423322003,
"_custom_twr_audit_note_911328": null,
"value": 3148495.0914600003
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.015251157805867277,
"time_weighted_return_2": -0.015251157805867277,
"_custom_twr_audit_note_911328": null,
"value": 800493.06146
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.01739609576880241,
"time_weighted_return_2": -0.01739609576880241,
"_custom_twr_audit_note_911328": null,
"value": 2215511.2700000005
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.02085132265594647,
"time_weighted_return_2": -0.02085132265594647,
"_custom_twr_audit_note_911328": null,
"value": 44031.21
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.02089393244695803,
"time_weighted_return_2": -0.02089393244695803,
"_custom_twr_audit_note_911328": null,
"value": 44394.159999999996
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.020607507059866248,
"time_weighted_return_2": -0.020607507059866248,
"_custom_twr_audit_note_911328": null,
"value": 44065.39000000001
},
"children": []
}
]
},
{
"name": "Asset Class F",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.0014710489231547497,
"time_weighted_return_2": -0.0014710489231547497,
"_custom_twr_audit_note_911328": null,
"value": 198260.12
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.0014477244560456848,
"time_weighted_return_2": -0.0014477244560456848,
"_custom_twr_audit_note_911328": null,
"value": 44612.33
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.001477821083437858,
"time_weighted_return_2": -0.001477821083437858,
"_custom_twr_audit_note_911328": null,
"value": 153647.78999999998
},
"children": []
}
]
}
]
}
]
}]
}
},
"included": []
}
}
注释 JSON Object 摘录
data
- 此处的数据可以忽略,这些是基础 children. 的汇总值
meta
- columns
– 包含我想用于每个适用的 children
'column` key:pair 值的列 header 值。
groupings
- 可以忽略。
children
层级 - children
有 4 个级别,可以通过它们的 name
识别如下 -
- 家庭
name
(即“猎鹰家庭”)
- 财富桶
name
(例如,“财富桶 A”)
- 资产 Class
name
(例如,“资产 Class A”)
- 基金
name
(例如,‘HUDJ Trust’)
目标输出
这是我试图实现的目标df
结构的摘录-
portfolio
name
entity_id
Adjusted Value (No Div, USD)
Current Quarter TWR (USD)
YTD TWR (USD)
TWR Audit Note
Falconer Family
Falconer Family
23132492.90510712
-0.046732301295604683
-0.046732301295604683
None
Falconer Family
Wealth Bucket A
13264448.506587146
-0.045960317420568164
-0.045960317420568164
None
Falconer Family
Asset Class A
3337.99
0.000003434094574039648
0.000003434094574039648
None
Falconer Family
HUDJ Trust
10604454
3337.99
0.000003434094574039648
0.000003434094574039648
None
Falconer Family
Asset Class B
1017004.7192636987
-0.025871339096964152
-0.025871339096964152
None
Falconer Family
HUDG Trust
10604454
1017004.7192636987
-0.025871339096964152
-0.025871339096964152
None
Falconer Family
Asset Class C
231142.67772000004
-0.030370376329670656
-0.030370376329670656
None
Falconer Family
HKDJ Trust
10604454
231142.67772000004
-0.030370376329670656
-0.030370376329670656
None
Falconer Family
Asset Class D
9791282.570000006
-0.05382756475465478
-0.05382756475465478
None
Falconer Family
HUDW Trust
10604454
9791282.570000006
-0.05382756475465478
-0.05382756475465478
None
关于目标输出的注释
- Portfolio header – 对于每一行,我想映射 top-level
children
name
值 [family name]。例如,‘猎鹰家族。
- 名称 header – 这应该只是每个
children
. 的 name
值
- 实体 ID – 所有第 4 级
children
entity_id
值应映射到此列。
- 数据列 – 无论级别如何,所有
children
都具有相同的 time_weighted_return
、time-weighted_return2
和 value
列,应分别映射。
- TWR 审计说明 – 这些
children
_custom_twr_audit_note_911318
值目前是空白的,但将来会用到。
目前的产出
我的主要问题是你可以看到我只能进入第一个[家庭]和第二个[财富桶] children
级。这让我错过了第三个 [资产 Class] 和第四个 [基金] -
portfolio
name
Adjusted Value (No Div, USD)
Current Quarter TWR (USD)
YTD TWR (USD)
TWR Audit Note)
0
Falconer Family
Falconer Family
2.313249e+07
-0.046732
-0.046732
None
1
Falconer Family
Wealth Bucket A
1.326445e+07
-0.045960
-0.045960
None
2
Falconer Family
Wealth Bucket B
9.868044e+06
-0.047699
-0.047699
None
当前代码
这是一个让我得到正确 df
格式的函数,但是我的主要问题是我无法找到返回所有 children 而不是仅返回 top-level -
的解决方案
# Function to read API response / JSON Object
def response_writer():
with open('api_response_2022-02-13.json') as f:
api_response = json.load(f)
return api_response
# Function to unpack JSON response into pandas dataframe.
def unpack_response():
while True:
try:
api_response = response_writer()
portfolio_views_children = api_response['data']['attributes']['total']['children']
portfolios = []
for portfolio in portfolio_views_children:
entity_columns = []
# include portfolio itself within an iterable so the total is the header
for entity in itertools.chain([portfolio], portfolio["children"]):
entity_data = entity["columns"].copy() # don't mutate original response
entity_data["portfolio"] = portfolio["name"] # from outer
entity_data["name"] = entity["name"]
entity_columns.append(entity_data)
df = pd.DataFrame(entity_columns)
portfolios.append(df)
# combine dataframes
df = pd.concat(portfolios)
# reorder and rename
column_ordering = {"portfolio": "portfolio", "name": "name"}
column_ordering.update({c["key"]: c["display_name"] for c in api_response["meta"]["columns"]})
df = df[column_ordering.keys()] # beware: un-named cols will be dropped
df = df.rename(columns=column_ordering)
break
except KeyError:
print("-----------------------------------\n","API TIMEOUT ERROR: TRY AGAIN...", "\n-----------------------------------\n")
return df
unpack_response()
帮助
简而言之,我正在寻找一些关于如何通过增强现有代码来利用剩余 children
的建议。虽然我花了很多时间来充分解释我的问题,但请询问是否有任何不清楚的地方。请注意 JSON 可能有多个家庭,因此提供的解决方案/建议必须遵守此
我认为这让你很接近;可能只需要调整各种 name
列并删除额外的数据(我保留了 grouping
列)。
主要思想是对所有可用的 children
级别递归使用 pd.json_normalize with pd.concat。
编辑: 将所有内容放入一个函数中并添加部分以像预期输出那样折叠 name
列。
def process_json(api_response):
def get_column_values(df):
return pd.concat([df, pd.json_normalize(df.pop('columns')).set_axis(df.index)], axis=1)
def expand_children(df):
if len(df.index) > 1:
df['children'] = df['children'].fillna('').apply(lambda x: None if len(x) == 0 else x)
df_children = df.pop('children').dropna().explode()
if len(df_children.index) == 0: # return df if no children to append
return df.index.names, df
df_children = pd.json_normalize(df_children, max_level=0).set_axis(df_children.index).set_index('name', append=True)
df_children = get_column_values(df_children)
idx_names = list(df_children.index.names)
idx_names[-1] = idx_names[-1] + '_' + str(len(idx_names))
df[idx_names[-1]] = None
return idx_names, pd.concat([df.set_index(idx_names[-1], append=True), df_children], axis=0)
columns_dict = pd.DataFrame(api_response['meta']['columns']).set_index('key').to_dict(orient='index') # save column definitions
df = pd.DataFrame(api_response['data']['attributes']['total']['children']).set_index('name') # get initial dataframe
df = get_column_values(df) # get columns for initial level
# expand children
while 'children' in df.columns:
idx_names, df = expand_children(df)
# reorder/replace column headers and sort index
df = (df.loc[:, [x for x in df.columns if x not in columns_dict.keys()] + list(columns_dict.keys())]
.rename(columns={k:v['display_name'] for k,v in columns_dict.items()})
.sort_index(na_position='first').reset_index())
#collapse "name" columns (careful of potential duplicate rows)
for col in idx_names[::-1]:
df[idx_names[-1]] = df[idx_names[-1]].fillna(df[col])
df = df.rename(columns={'name': 'portfolio', idx_names[-1]: 'name'}).drop(columns=idx_names[1:-1])
return df
由于另一个答案使用 iterrows
,通常不建议这样做,因此认为快速比较是值得的。
process_json(api_response)
54.2 ms ± 7.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
unpack_response(api_response) # iterrows
84.3 ms ± 9.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
jsonpath-ng
甚至可以很容易地解析这样一个嵌套的 json 对象。您可以通过以下命令安装这个方便的库:
pip install --upgrade jsonpath-ng
代码:
import json
import jsonpath_ng as jp
import pandas as pd
def unpack_response(r):
# Create a dataframe from extracted data
expr = jp.parse('$..children.[*]')
data = [{'full_path': str(m.full_path), **m.value} for m in expr.find(r)]
df = pd.json_normalize(data).sort_values('full_path', ignore_index=True)
# Append a portfolio column
df['portfolio'] = df.loc[df.full_path.str.contains(r'total\.children\.\[\d+]$'), 'name']
df['portfolio'].fillna(method='ffill', inplace=True)
# Deal with columns
trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']}
cols = ['full_path', 'portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note']
df = df.rename(columns=trans)[cols]
return df
# Load the sample data from file
# with open('api_response_2022-02-13.json', 'r') as f:
# api_response = json.load(f)
# Load the sample data from string
api_response = json.loads('{"meta": {"columns": [{"key": "value", "display_name": "Adjusted Value (No Div, USD)", "output_type": "Number", "currency": "USD"}, {"key": "time_weighted_return", "display_name": "Current Quarter TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "time_weighted_return_2", "display_name": "YTD TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "_custom_twr_audit_note_911328", "display_name": "TWR Audit Note", "output_type": "Word"}], "groupings": [{"key": "_custom_name_747205", "display_name": "* Reporting Client Name"}, {"key": "_custom_new_entity_group_453577", "display_name": "NEW Entity Group"}, {"key": "_custom_level_2_624287", "display_name": "* Level 2"}, {"key": "legal_entity", "display_name": "Legal Entity"}]}, "data": {"type": "portfolio_views", "attributes": {"total": {"name": "Total", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Falconer Family", "grouping": "_custom_name_747205", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Wealth Bucket A", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.045960317420568164, "time_weighted_return_2": -0.045960317420568164, "_custom_twr_audit_note_911328": null, "value": 13264448.506587159}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": [{"entity_id": 10604454, "name": "HUDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": [{"entity_id": 10604454, "name": "HUDG Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": [{"entity_id": 10604454, "name": "HKDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": [{"entity_id": 10604454, "name": "HUDW Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": [{"entity_id": 10604454, "name": "HJDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": [{"entity_id": 10604454, "name": "HADJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": []}]}]}, {"name": "Wealth Bucket B", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.04769870075659244, "time_weighted_return_2": -0.04769870075659244, "_custom_twr_audit_note_911328": null, "value": 9868044.398519998}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 2.8632718065191298e-05, "time_weighted_return_2": 2.8632718065191298e-05, "_custom_twr_audit_note_911328": null, "value": 10234.94}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 2.82679297198829e-05, "time_weighted_return_2": 2.82679297198829e-05, "_custom_twr_audit_note_911328": null, "value": 244.28}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 4.9373572795108345e-05, "time_weighted_return_2": 4.9373572795108345e-05, "_custom_twr_audit_note_911328": null, "value": 5081.08}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.609603754315074e-06, "time_weighted_return_2": 6.609603754315074e-06, "_custom_twr_audit_note_911328": null, "value": 1523.62}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": 1.0999769004760296e-05, "time_weighted_return_2": 1.0999769004760296e-05, "_custom_twr_audit_note_911328": null, "value": 1828.9}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.466673995619843e-06, "time_weighted_return_2": 6.466673995619843e-06, "_custom_twr_audit_note_911328": null, "value": 1557.06}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.024645947842438676, "time_weighted_return_2": -0.024645947842438676, "_custom_twr_audit_note_911328": null, "value": 674052.31962}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.043304004172576405, "time_weighted_return_2": -0.043304004172576405, "_custom_twr_audit_note_911328": null, "value": 52800.96}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.022408434778798836, "time_weighted_return_2": -0.022408434778798836, "_custom_twr_audit_note_911328": null, "value": 599594.11962}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.03037038746301135, "time_weighted_return_2": -0.03037038746301135, "_custom_twr_audit_note_911328": null, "value": 114472.69744}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370390035505124, "time_weighted_return_2": -0.030370390035505124, "_custom_twr_audit_note_911328": null, "value": 114472.68744000001}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 0, "time_weighted_return_2": 0, "_custom_twr_audit_note_911328": null, "value": 0.01}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.06604362523792162, "time_weighted_return_2": -0.06604362523792162, "_custom_twr_audit_note_911328": null, "value": 5722529.229999997}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06154960593668424, "time_weighted_return_2": -0.06154960593668424, "_custom_twr_audit_note_911328": null, "value": 1191838.9399999995}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06750460387418267, "time_weighted_return_2": -0.06750460387418267, "_custom_twr_audit_note_911328": null, "value": 4416618.520000002}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 38190.33}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.017118805423322003, "time_weighted_return_2": -0.017118805423322003, "_custom_twr_audit_note_911328": null, "value": 3148495.0914600003}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.015251157805867277, "time_weighted_return_2": -0.015251157805867277, "_custom_twr_audit_note_911328": null, "value": 800493.06146}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01739609576880241, "time_weighted_return_2": -0.01739609576880241, "_custom_twr_audit_note_911328": null, "value": 2215511.2700000005}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02085132265594647, "time_weighted_return_2": -0.02085132265594647, "_custom_twr_audit_note_911328": null, "value": 44031.21}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02089393244695803, "time_weighted_return_2": -0.02089393244695803, "_custom_twr_audit_note_911328": null, "value": 44394.159999999996}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.020607507059866248, "time_weighted_return_2": -0.020607507059866248, "_custom_twr_audit_note_911328": null, "value": 44065.39000000001}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.0014710489231547497, "time_weighted_return_2": -0.0014710489231547497, "_custom_twr_audit_note_911328": null, "value": 198260.12}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.0014477244560456848, "time_weighted_return_2": -0.0014477244560456848, "_custom_twr_audit_note_911328": null, "value": 44612.33}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.001477821083437858, "time_weighted_return_2": -0.001477821083437858, "_custom_twr_audit_note_911328": null, "value": 153647.78999999998}, "children": []}]}]}]}]}}, "included": []}}')
df = unpack_response(api_response)
解释:
首先,您可以通过以下命令确认预期的输出:
print(df.iloc[:5:,1:])
portfolio
name
entity_id
Adjusted Value (No Div, USD)
Current Quarter TWR (USD)
YTD TWR (USD)
TWR Audit Note
Falconer Family
Falconer Family
nan
2.31325e+07
-0.0467323
-0.0467323
Falconer Family
Wealth Bucket A
nan
1.32644e+07
-0.0459603
-0.0459603
Falconer Family
Asset Class A
nan
3337.99
3.43409e-06
3.43409e-06
Falconer Family
HUDJ Trust
1.06045e+07
3337.99
3.43409e-06
3.43409e-06
Falconer Family
Asset Class B
nan
1.017e+06
-0.0258713
-0.0258713
随后,您可以通过以下命令查看jsonpath-ng
中的精彩功能之一:
print(df.iloc[:10,:3])
full_path
portfolio
name
data.attributes.total.children.[0]
Falconer Family
Falconer Family
data.attributes.total.children.[0].children.[0]
Falconer Family
Wealth Bucket A
data.attributes.total.children.[0].children.[0].children.[0]
Falconer Family
Asset Class A
data.attributes.total.children.[0].children.[0].children.[0].children.[0]
Falconer Family
HUDJ Trust
data.attributes.total.children.[0].children.[0].children.[1]
Falconer Family
Asset Class B
data.attributes.total.children.[0].children.[0].children.[1].children.[0]
Falconer Family
HUDG Trust
data.attributes.total.children.[0].children.[0].children.[2]
Falconer Family
Asset Class C
data.attributes.total.children.[0].children.[0].children.[2].children.[0]
Falconer Family
HKDJ Trust
data.attributes.total.children.[0].children.[0].children.[3]
Falconer Family
Asset Class D
data.attributes.total.children.[0].children.[0].children.[3].children.[0]
Falconer Family
HUDW Trust
多亏了full_path
栏目,您可以瞬间掌握每行提取数据的嵌套层次。实际上,我使用这些路径附加了正确的 portfolio
值。
在代码方面,重点是下面一行:
expr = jp.parse('$..children.[*]')
通过上面的表达式,您可以在json对象的任何级别搜索children
属性。 README.rst 告诉您每个语法代表什么。
Syntax
Meaning
$
The root object
jsonpath1 .. jsonpath2
All nodes matched by jsonpath2 that descend from any node matching jsonpath1
[*]
any array index
速度:
我比较了上述 jsonpath-ng
方法和下面显示的 nested-for-loop 方法的速度。
#比较:
Method
Duration
Speed ratio
jsonpath-ng
9.72 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.7 (faster)
Nested-for-loop
55.4 ms ± 7.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
1
#nested-for-loop方法的代码:
def unpack_response(r):
df = pd.DataFrame()
for _, r1 in pd.json_normalize(r, ['data', 'attributes', 'total', 'children']).iterrows():
r1['portfolio'] = r1['name']
df = df.append(r1)
for _, r2 in pd.json_normalize(r1.children).iterrows():
df = df.append(r2)
for _, r3 in pd.json_normalize(r2.children).iterrows():
df = df.append(r3).append(pd.json_normalize(r3.children))
df['portfolio'].fillna(method='ffill', inplace=True)
trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']}
cols = ['portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note']
df = df.rename(columns=trans)[cols].reset_index(drop=True)
return df
你的问题是一层json里面有3层'nested'children。你想在 pandas
中将它们全部装入 pandas
和 un-nest 它们(explode
它们),而不是手动在外部 for
循环中。
挑战:爆炸时,每个 child 依次有一个名为 name
的字段,该字段已存在于 parent child 中。因此,我们需要在进行连续爆炸时重命名它们(就像您在原始问题中所做的那样,例如,'portfolio' 实际上在 json 中称为 'name')。最终保存在 columns
信息中的所有其他值都可以 unpack
ed.
完整的工作示例:
import pandas as pd
import json
# get the json file
json_dict = json.load(open('api_response_2022-02-13.json'))
# create a (nested) df out of it, and rename the 'top-level' name field to 'portfolio'
packed_df = pd.DataFrame.from_dict(json_dict['data']['attributes']['total']['children'])\
.rename(columns={'name': 'portfolio'})
# expand the level-1 'children' (and call their 'name' field 'grand-parent')
unpacked_df = packed_df.groupby('portfolio')['children']\
.apply(lambda x: pd.DataFrame(x.values[0])).reset_index()\
.rename(columns={'name': 'grand_parent_name'})
# expand the level-2 'children' (and call their 'name' field 'parent')
unpacked_df = unpacked_df.groupby(['portfolio', 'grand_parent_name'])['children']\
.apply(lambda x: pd.DataFrame(x.values[0])).reset_index()\
.rename(columns={'name': 'parent_name'})
# expand the level-3 'children' (and keep their name as is)
unpacked_df = unpacked_df.groupby(['portfolio', 'grand_parent_name', 'parent_name'])['children']\
.apply(lambda x: pd.DataFrame(x.values[0])).reset_index()
# expand the column field info from 'dict' to multiple columns
unpacked_df = pd.concat([unpacked_df.drop('columns', axis=1), pd.DataFrame(unpacked_df['columns'].tolist())], axis=1)
当然,您可以将 3 个递归 un-packing 步骤放在一个单独的函数中,但为了清楚起见,在这里将它们分开(您实际上是 un-packing 不同层次结构的 'children', 需要分别处理每个 'name')
最后,您拥有所有 3 个层级的所有列(一个 df 中的所有 json)- 重命名它们,删除其中一些,如您所愿。对于您的最终输出,您的屏幕截图已经混合了不同的 name
级别(“Wealth Bucket A/B”与“HUDW/HADJ Trust”不在同一级别)
背景
我有一个复杂的嵌套 JSON object,我正在尝试将其解压成 pandas df
以一种非常具体的方式。
JSON Object
这是一个摘录,包含 JSON object 的随机数据,它显示了 1x 系列(即 'Falconer Family')的层次结构示例(包括 children),但是总共有 100 个,而这个提取物只有 1x 系列,但是完整的 JSON object 有多个 -
{
"meta": {
"columns": [{
"key": "value",
"display_name": "Adjusted Value (No Div, USD)",
"output_type": "Number",
"currency": "USD"
},
{
"key": "time_weighted_return",
"display_name": "Current Quarter TWR (USD)",
"output_type": "Percent",
"currency": "USD"
},
{
"key": "time_weighted_return_2",
"display_name": "YTD TWR (USD)",
"output_type": "Percent",
"currency": "USD"
},
{
"key": "_custom_twr_audit_note_911328",
"display_name": "TWR Audit Note",
"output_type": "Word"
}
],
"groupings": [{
"key": "_custom_name_747205",
"display_name": "* Reporting Client Name"
},
{
"key": "_custom_new_entity_group_453577",
"display_name": "NEW Entity Group"
},
{
"key": "_custom_level_2_624287",
"display_name": "* Level 2"
},
{
"key": "legal_entity",
"display_name": "Legal Entity"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"time_weighted_return": -0.046732301295604683,
"time_weighted_return_2": -0.046732301295604683,
"_custom_twr_audit_note_911328": null,
"value": 23132492.905107163
},
"children": [{
"name": "Falconer Family",
"grouping": "_custom_name_747205",
"columns": {
"time_weighted_return": -0.046732301295604683,
"time_weighted_return_2": -0.046732301295604683,
"_custom_twr_audit_note_911328": null,
"value": 23132492.905107163
},
"children": [{
"name": "Wealth Bucket A",
"grouping": "_custom_new_entity_group_453577",
"columns": {
"time_weighted_return": -0.045960317420568164,
"time_weighted_return_2": -0.045960317420568164,
"_custom_twr_audit_note_911328": null,
"value": 13264448.506587159
},
"children": [{
"name": "Asset Class A",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": 0.000003434094574039648,
"time_weighted_return_2": 0.000003434094574039648,
"_custom_twr_audit_note_911328": null,
"value": 3337.99
},
"children": [{
"entity_id": 10604454,
"name": "HUDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000003434094574039648,
"time_weighted_return_2": 0.000003434094574039648,
"_custom_twr_audit_note_911328": null,
"value": 3337.99
},
"children": []
}]
},
{
"name": "Asset Class B",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.025871339096964152,
"time_weighted_return_2": -0.025871339096964152,
"_custom_twr_audit_note_911328": null,
"value": 1017004.7192636987
},
"children": [{
"entity_id": 10604454,
"name": "HUDG Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.025871339096964152,
"time_weighted_return_2": -0.025871339096964152,
"_custom_twr_audit_note_911328": null,
"value": 1017004.7192636987
},
"children": []
}]
},
{
"name": "Asset Class C",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.030370376329670656,
"time_weighted_return_2": -0.030370376329670656,
"_custom_twr_audit_note_911328": null,
"value": 231142.67772000004
},
"children": [{
"entity_id": 10604454,
"name": "HKDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.030370376329670656,
"time_weighted_return_2": -0.030370376329670656,
"_custom_twr_audit_note_911328": null,
"value": 231142.67772000004
},
"children": []
}]
},
{
"name": "Asset Class D",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.05382756475465478,
"time_weighted_return_2": -0.05382756475465478,
"_custom_twr_audit_note_911328": null,
"value": 9791282.570000006
},
"children": [{
"entity_id": 10604454,
"name": "HUDW Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05382756475465478,
"time_weighted_return_2": -0.05382756475465478,
"_custom_twr_audit_note_911328": null,
"value": 9791282.570000006
},
"children": []
}]
},
{
"name": "Asset Class E",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.01351630404081805,
"time_weighted_return_2": -0.01351630404081805,
"_custom_twr_audit_note_911328": null,
"value": 2153366.6396034593
},
"children": [{
"entity_id": 10604454,
"name": "HJDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.01351630404081805,
"time_weighted_return_2": -0.01351630404081805,
"_custom_twr_audit_note_911328": null,
"value": 2153366.6396034593
},
"children": []
}]
},
{
"name": "Asset Class F",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.002298190175237247,
"time_weighted_return_2": -0.002298190175237247,
"_custom_twr_audit_note_911328": null,
"value": 68313.90999999999
},
"children": [{
"entity_id": 10604454,
"name": "HADJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.002298190175237247,
"time_weighted_return_2": -0.002298190175237247,
"_custom_twr_audit_note_911328": null,
"value": 68313.90999999999
},
"children": []
}]
}
]
},
{
"name": "Wealth Bucket B",
"grouping": "_custom_new_entity_group_453577",
"columns": {
"time_weighted_return": -0.04769870075659244,
"time_weighted_return_2": -0.04769870075659244,
"_custom_twr_audit_note_911328": null,
"value": 9868044.398519998
},
"children": [{
"name": "Asset Class A",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": 0.000028632718065191298,
"time_weighted_return_2": 0.000028632718065191298,
"_custom_twr_audit_note_911328": null,
"value": 10234.94
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.0000282679297198829,
"time_weighted_return_2": 0.0000282679297198829,
"_custom_twr_audit_note_911328": null,
"value": 244.28
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000049373572795108345,
"time_weighted_return_2": 0.000049373572795108345,
"_custom_twr_audit_note_911328": null,
"value": 5081.08
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000006609603754315074,
"time_weighted_return_2": 0.000006609603754315074,
"_custom_twr_audit_note_911328": null,
"value": 1523.62
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000010999769004760296,
"time_weighted_return_2": 0.000010999769004760296,
"_custom_twr_audit_note_911328": null,
"value": 1828.9
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000006466673995619843,
"time_weighted_return_2": 0.000006466673995619843,
"_custom_twr_audit_note_911328": null,
"value": 1557.06
},
"children": []
}
]
},
{
"name": "Asset Class B",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.024645947842438676,
"time_weighted_return_2": -0.024645947842438676,
"_custom_twr_audit_note_911328": null,
"value": 674052.31962
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.043304004172576405,
"time_weighted_return_2": -0.043304004172576405,
"_custom_twr_audit_note_911328": null,
"value": 52800.96
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.022408434778798836,
"time_weighted_return_2": -0.022408434778798836,
"_custom_twr_audit_note_911328": null,
"value": 599594.11962
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
}
]
},
{
"name": "Asset Class C",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.03037038746301135,
"time_weighted_return_2": -0.03037038746301135,
"_custom_twr_audit_note_911328": null,
"value": 114472.69744
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.030370390035505124,
"time_weighted_return_2": -0.030370390035505124,
"_custom_twr_audit_note_911328": null,
"value": 114472.68744000001
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0,
"time_weighted_return_2": 0,
"_custom_twr_audit_note_911328": null,
"value": 0.01
},
"children": []
}
]
},
{
"name": "Asset Class D",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.06604362523792162,
"time_weighted_return_2": -0.06604362523792162,
"_custom_twr_audit_note_911328": null,
"value": 5722529.229999997
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.06154960593668424,
"time_weighted_return_2": -0.06154960593668424,
"_custom_twr_audit_note_911328": null,
"value": 1191838.9399999995
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.06750460387418267,
"time_weighted_return_2": -0.06750460387418267,
"_custom_twr_audit_note_911328": null,
"value": 4416618.520000002
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 38190.33
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 37940.72
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 37940.72
},
"children": []
}
]
},
{
"name": "Asset Class E",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.017118805423322003,
"time_weighted_return_2": -0.017118805423322003,
"_custom_twr_audit_note_911328": null,
"value": 3148495.0914600003
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.015251157805867277,
"time_weighted_return_2": -0.015251157805867277,
"_custom_twr_audit_note_911328": null,
"value": 800493.06146
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.01739609576880241,
"time_weighted_return_2": -0.01739609576880241,
"_custom_twr_audit_note_911328": null,
"value": 2215511.2700000005
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.02085132265594647,
"time_weighted_return_2": -0.02085132265594647,
"_custom_twr_audit_note_911328": null,
"value": 44031.21
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.02089393244695803,
"time_weighted_return_2": -0.02089393244695803,
"_custom_twr_audit_note_911328": null,
"value": 44394.159999999996
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.020607507059866248,
"time_weighted_return_2": -0.020607507059866248,
"_custom_twr_audit_note_911328": null,
"value": 44065.39000000001
},
"children": []
}
]
},
{
"name": "Asset Class F",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.0014710489231547497,
"time_weighted_return_2": -0.0014710489231547497,
"_custom_twr_audit_note_911328": null,
"value": 198260.12
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.0014477244560456848,
"time_weighted_return_2": -0.0014477244560456848,
"_custom_twr_audit_note_911328": null,
"value": 44612.33
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.001477821083437858,
"time_weighted_return_2": -0.001477821083437858,
"_custom_twr_audit_note_911328": null,
"value": 153647.78999999998
},
"children": []
}
]
}
]
}
]
}]
}
},
"included": []
}
}
注释 JSON Object 摘录
data
- 此处的数据可以忽略,这些是基础 children. 的汇总值
meta
-columns
– 包含我想用于每个适用的children
'column` key:pair 值的列 header 值。groupings
- 可以忽略。children
层级 -children
有 4 个级别,可以通过它们的name
识别如下 -- 家庭
name
(即“猎鹰家庭”) - 财富桶
name
(例如,“财富桶 A”) - 资产 Class
name
(例如,“资产 Class A”) - 基金
name
(例如,‘HUDJ Trust’)
- 家庭
目标输出
这是我试图实现的目标df
结构的摘录-
portfolio | name | entity_id | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note |
---|---|---|---|---|---|---|
Falconer Family | Falconer Family | 23132492.90510712 | -0.046732301295604683 | -0.046732301295604683 | None | |
Falconer Family | Wealth Bucket A | 13264448.506587146 | -0.045960317420568164 | -0.045960317420568164 | None | |
Falconer Family | Asset Class A | 3337.99 | 0.000003434094574039648 | 0.000003434094574039648 | None | |
Falconer Family | HUDJ Trust | 10604454 | 3337.99 | 0.000003434094574039648 | 0.000003434094574039648 | None |
Falconer Family | Asset Class B | 1017004.7192636987 | -0.025871339096964152 | -0.025871339096964152 | None | |
Falconer Family | HUDG Trust | 10604454 | 1017004.7192636987 | -0.025871339096964152 | -0.025871339096964152 | None |
Falconer Family | Asset Class C | 231142.67772000004 | -0.030370376329670656 | -0.030370376329670656 | None | |
Falconer Family | HKDJ Trust | 10604454 | 231142.67772000004 | -0.030370376329670656 | -0.030370376329670656 | None |
Falconer Family | Asset Class D | 9791282.570000006 | -0.05382756475465478 | -0.05382756475465478 | None | |
Falconer Family | HUDW Trust | 10604454 | 9791282.570000006 | -0.05382756475465478 | -0.05382756475465478 | None |
关于目标输出的注释
- Portfolio header – 对于每一行,我想映射 top-level
children
name
值 [family name]。例如,‘猎鹰家族。 - 名称 header – 这应该只是每个
children
. 的 - 实体 ID – 所有第 4 级
children
entity_id
值应映射到此列。 - 数据列 – 无论级别如何,所有
children
都具有相同的time_weighted_return
、time-weighted_return2
和value
列,应分别映射。 - TWR 审计说明 – 这些
children
_custom_twr_audit_note_911318
值目前是空白的,但将来会用到。
name
值
目前的产出
我的主要问题是你可以看到我只能进入第一个[家庭]和第二个[财富桶] children
级。这让我错过了第三个 [资产 Class] 和第四个 [基金] -
portfolio | name | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note) | |
---|---|---|---|---|---|---|
0 | Falconer Family | Falconer Family | 2.313249e+07 | -0.046732 | -0.046732 | None |
1 | Falconer Family | Wealth Bucket A | 1.326445e+07 | -0.045960 | -0.045960 | None |
2 | Falconer Family | Wealth Bucket B | 9.868044e+06 | -0.047699 | -0.047699 | None |
当前代码
这是一个让我得到正确 df
格式的函数,但是我的主要问题是我无法找到返回所有 children 而不是仅返回 top-level -
# Function to read API response / JSON Object
def response_writer():
with open('api_response_2022-02-13.json') as f:
api_response = json.load(f)
return api_response
# Function to unpack JSON response into pandas dataframe.
def unpack_response():
while True:
try:
api_response = response_writer()
portfolio_views_children = api_response['data']['attributes']['total']['children']
portfolios = []
for portfolio in portfolio_views_children:
entity_columns = []
# include portfolio itself within an iterable so the total is the header
for entity in itertools.chain([portfolio], portfolio["children"]):
entity_data = entity["columns"].copy() # don't mutate original response
entity_data["portfolio"] = portfolio["name"] # from outer
entity_data["name"] = entity["name"]
entity_columns.append(entity_data)
df = pd.DataFrame(entity_columns)
portfolios.append(df)
# combine dataframes
df = pd.concat(portfolios)
# reorder and rename
column_ordering = {"portfolio": "portfolio", "name": "name"}
column_ordering.update({c["key"]: c["display_name"] for c in api_response["meta"]["columns"]})
df = df[column_ordering.keys()] # beware: un-named cols will be dropped
df = df.rename(columns=column_ordering)
break
except KeyError:
print("-----------------------------------\n","API TIMEOUT ERROR: TRY AGAIN...", "\n-----------------------------------\n")
return df
unpack_response()
帮助
简而言之,我正在寻找一些关于如何通过增强现有代码来利用剩余 children
的建议。虽然我花了很多时间来充分解释我的问题,但请询问是否有任何不清楚的地方。请注意 JSON 可能有多个家庭,因此提供的解决方案/建议必须遵守此
我认为这让你很接近;可能只需要调整各种 name
列并删除额外的数据(我保留了 grouping
列)。
主要思想是对所有可用的 children
级别递归使用 pd.json_normalize with pd.concat。
编辑: 将所有内容放入一个函数中并添加部分以像预期输出那样折叠 name
列。
def process_json(api_response):
def get_column_values(df):
return pd.concat([df, pd.json_normalize(df.pop('columns')).set_axis(df.index)], axis=1)
def expand_children(df):
if len(df.index) > 1:
df['children'] = df['children'].fillna('').apply(lambda x: None if len(x) == 0 else x)
df_children = df.pop('children').dropna().explode()
if len(df_children.index) == 0: # return df if no children to append
return df.index.names, df
df_children = pd.json_normalize(df_children, max_level=0).set_axis(df_children.index).set_index('name', append=True)
df_children = get_column_values(df_children)
idx_names = list(df_children.index.names)
idx_names[-1] = idx_names[-1] + '_' + str(len(idx_names))
df[idx_names[-1]] = None
return idx_names, pd.concat([df.set_index(idx_names[-1], append=True), df_children], axis=0)
columns_dict = pd.DataFrame(api_response['meta']['columns']).set_index('key').to_dict(orient='index') # save column definitions
df = pd.DataFrame(api_response['data']['attributes']['total']['children']).set_index('name') # get initial dataframe
df = get_column_values(df) # get columns for initial level
# expand children
while 'children' in df.columns:
idx_names, df = expand_children(df)
# reorder/replace column headers and sort index
df = (df.loc[:, [x for x in df.columns if x not in columns_dict.keys()] + list(columns_dict.keys())]
.rename(columns={k:v['display_name'] for k,v in columns_dict.items()})
.sort_index(na_position='first').reset_index())
#collapse "name" columns (careful of potential duplicate rows)
for col in idx_names[::-1]:
df[idx_names[-1]] = df[idx_names[-1]].fillna(df[col])
df = df.rename(columns={'name': 'portfolio', idx_names[-1]: 'name'}).drop(columns=idx_names[1:-1])
return df
由于另一个答案使用 iterrows
,通常不建议这样做,因此认为快速比较是值得的。
process_json(api_response)
54.2 ms ± 7.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
unpack_response(api_response) # iterrows
84.3 ms ± 9.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
jsonpath-ng
甚至可以很容易地解析这样一个嵌套的 json 对象。您可以通过以下命令安装这个方便的库:
pip install --upgrade jsonpath-ng
代码:
import json
import jsonpath_ng as jp
import pandas as pd
def unpack_response(r):
# Create a dataframe from extracted data
expr = jp.parse('$..children.[*]')
data = [{'full_path': str(m.full_path), **m.value} for m in expr.find(r)]
df = pd.json_normalize(data).sort_values('full_path', ignore_index=True)
# Append a portfolio column
df['portfolio'] = df.loc[df.full_path.str.contains(r'total\.children\.\[\d+]$'), 'name']
df['portfolio'].fillna(method='ffill', inplace=True)
# Deal with columns
trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']}
cols = ['full_path', 'portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note']
df = df.rename(columns=trans)[cols]
return df
# Load the sample data from file
# with open('api_response_2022-02-13.json', 'r') as f:
# api_response = json.load(f)
# Load the sample data from string
api_response = json.loads('{"meta": {"columns": [{"key": "value", "display_name": "Adjusted Value (No Div, USD)", "output_type": "Number", "currency": "USD"}, {"key": "time_weighted_return", "display_name": "Current Quarter TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "time_weighted_return_2", "display_name": "YTD TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "_custom_twr_audit_note_911328", "display_name": "TWR Audit Note", "output_type": "Word"}], "groupings": [{"key": "_custom_name_747205", "display_name": "* Reporting Client Name"}, {"key": "_custom_new_entity_group_453577", "display_name": "NEW Entity Group"}, {"key": "_custom_level_2_624287", "display_name": "* Level 2"}, {"key": "legal_entity", "display_name": "Legal Entity"}]}, "data": {"type": "portfolio_views", "attributes": {"total": {"name": "Total", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Falconer Family", "grouping": "_custom_name_747205", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Wealth Bucket A", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.045960317420568164, "time_weighted_return_2": -0.045960317420568164, "_custom_twr_audit_note_911328": null, "value": 13264448.506587159}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": [{"entity_id": 10604454, "name": "HUDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": [{"entity_id": 10604454, "name": "HUDG Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": [{"entity_id": 10604454, "name": "HKDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": [{"entity_id": 10604454, "name": "HUDW Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": [{"entity_id": 10604454, "name": "HJDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": [{"entity_id": 10604454, "name": "HADJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": []}]}]}, {"name": "Wealth Bucket B", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.04769870075659244, "time_weighted_return_2": -0.04769870075659244, "_custom_twr_audit_note_911328": null, "value": 9868044.398519998}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 2.8632718065191298e-05, "time_weighted_return_2": 2.8632718065191298e-05, "_custom_twr_audit_note_911328": null, "value": 10234.94}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 2.82679297198829e-05, "time_weighted_return_2": 2.82679297198829e-05, "_custom_twr_audit_note_911328": null, "value": 244.28}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 4.9373572795108345e-05, "time_weighted_return_2": 4.9373572795108345e-05, "_custom_twr_audit_note_911328": null, "value": 5081.08}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.609603754315074e-06, "time_weighted_return_2": 6.609603754315074e-06, "_custom_twr_audit_note_911328": null, "value": 1523.62}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": 1.0999769004760296e-05, "time_weighted_return_2": 1.0999769004760296e-05, "_custom_twr_audit_note_911328": null, "value": 1828.9}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.466673995619843e-06, "time_weighted_return_2": 6.466673995619843e-06, "_custom_twr_audit_note_911328": null, "value": 1557.06}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.024645947842438676, "time_weighted_return_2": -0.024645947842438676, "_custom_twr_audit_note_911328": null, "value": 674052.31962}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.043304004172576405, "time_weighted_return_2": -0.043304004172576405, "_custom_twr_audit_note_911328": null, "value": 52800.96}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.022408434778798836, "time_weighted_return_2": -0.022408434778798836, "_custom_twr_audit_note_911328": null, "value": 599594.11962}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.03037038746301135, "time_weighted_return_2": -0.03037038746301135, "_custom_twr_audit_note_911328": null, "value": 114472.69744}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370390035505124, "time_weighted_return_2": -0.030370390035505124, "_custom_twr_audit_note_911328": null, "value": 114472.68744000001}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 0, "time_weighted_return_2": 0, "_custom_twr_audit_note_911328": null, "value": 0.01}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.06604362523792162, "time_weighted_return_2": -0.06604362523792162, "_custom_twr_audit_note_911328": null, "value": 5722529.229999997}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06154960593668424, "time_weighted_return_2": -0.06154960593668424, "_custom_twr_audit_note_911328": null, "value": 1191838.9399999995}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06750460387418267, "time_weighted_return_2": -0.06750460387418267, "_custom_twr_audit_note_911328": null, "value": 4416618.520000002}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 38190.33}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.017118805423322003, "time_weighted_return_2": -0.017118805423322003, "_custom_twr_audit_note_911328": null, "value": 3148495.0914600003}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.015251157805867277, "time_weighted_return_2": -0.015251157805867277, "_custom_twr_audit_note_911328": null, "value": 800493.06146}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01739609576880241, "time_weighted_return_2": -0.01739609576880241, "_custom_twr_audit_note_911328": null, "value": 2215511.2700000005}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02085132265594647, "time_weighted_return_2": -0.02085132265594647, "_custom_twr_audit_note_911328": null, "value": 44031.21}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02089393244695803, "time_weighted_return_2": -0.02089393244695803, "_custom_twr_audit_note_911328": null, "value": 44394.159999999996}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.020607507059866248, "time_weighted_return_2": -0.020607507059866248, "_custom_twr_audit_note_911328": null, "value": 44065.39000000001}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.0014710489231547497, "time_weighted_return_2": -0.0014710489231547497, "_custom_twr_audit_note_911328": null, "value": 198260.12}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.0014477244560456848, "time_weighted_return_2": -0.0014477244560456848, "_custom_twr_audit_note_911328": null, "value": 44612.33}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.001477821083437858, "time_weighted_return_2": -0.001477821083437858, "_custom_twr_audit_note_911328": null, "value": 153647.78999999998}, "children": []}]}]}]}]}}, "included": []}}')
df = unpack_response(api_response)
解释:
首先,您可以通过以下命令确认预期的输出:
print(df.iloc[:5:,1:])
portfolio | name | entity_id | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note |
---|---|---|---|---|---|---|
Falconer Family | Falconer Family | nan | 2.31325e+07 | -0.0467323 | -0.0467323 | |
Falconer Family | Wealth Bucket A | nan | 1.32644e+07 | -0.0459603 | -0.0459603 | |
Falconer Family | Asset Class A | nan | 3337.99 | 3.43409e-06 | 3.43409e-06 | |
Falconer Family | HUDJ Trust | 1.06045e+07 | 3337.99 | 3.43409e-06 | 3.43409e-06 | |
Falconer Family | Asset Class B | nan | 1.017e+06 | -0.0258713 | -0.0258713 |
随后,您可以通过以下命令查看jsonpath-ng
中的精彩功能之一:
print(df.iloc[:10,:3])
full_path | portfolio | name |
---|---|---|
data.attributes.total.children.[0] | Falconer Family | Falconer Family |
data.attributes.total.children.[0].children.[0] | Falconer Family | Wealth Bucket A |
data.attributes.total.children.[0].children.[0].children.[0] | Falconer Family | Asset Class A |
data.attributes.total.children.[0].children.[0].children.[0].children.[0] | Falconer Family | HUDJ Trust |
data.attributes.total.children.[0].children.[0].children.[1] | Falconer Family | Asset Class B |
data.attributes.total.children.[0].children.[0].children.[1].children.[0] | Falconer Family | HUDG Trust |
data.attributes.total.children.[0].children.[0].children.[2] | Falconer Family | Asset Class C |
data.attributes.total.children.[0].children.[0].children.[2].children.[0] | Falconer Family | HKDJ Trust |
data.attributes.total.children.[0].children.[0].children.[3] | Falconer Family | Asset Class D |
data.attributes.total.children.[0].children.[0].children.[3].children.[0] | Falconer Family | HUDW Trust |
多亏了full_path
栏目,您可以瞬间掌握每行提取数据的嵌套层次。实际上,我使用这些路径附加了正确的 portfolio
值。
在代码方面,重点是下面一行:
expr = jp.parse('$..children.[*]')
通过上面的表达式,您可以在json对象的任何级别搜索children
属性。 README.rst 告诉您每个语法代表什么。
Syntax | Meaning |
---|---|
$ |
The root object |
jsonpath1 .. jsonpath2 |
All nodes matched by jsonpath2 that descend from any node matching jsonpath1 |
[*] |
any array index |
速度:
我比较了上述 jsonpath-ng
方法和下面显示的 nested-for-loop 方法的速度。
#比较:
Method | Duration | Speed ratio |
---|---|---|
jsonpath-ng |
9.72 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) | 5.7 (faster) |
Nested-for-loop | 55.4 ms ± 7.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | 1 |
#nested-for-loop方法的代码:
def unpack_response(r):
df = pd.DataFrame()
for _, r1 in pd.json_normalize(r, ['data', 'attributes', 'total', 'children']).iterrows():
r1['portfolio'] = r1['name']
df = df.append(r1)
for _, r2 in pd.json_normalize(r1.children).iterrows():
df = df.append(r2)
for _, r3 in pd.json_normalize(r2.children).iterrows():
df = df.append(r3).append(pd.json_normalize(r3.children))
df['portfolio'].fillna(method='ffill', inplace=True)
trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']}
cols = ['portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note']
df = df.rename(columns=trans)[cols].reset_index(drop=True)
return df
你的问题是一层json里面有3层'nested'children。你想在 pandas
中将它们全部装入 pandas
和 un-nest 它们(explode
它们),而不是手动在外部 for
循环中。
挑战:爆炸时,每个 child 依次有一个名为 name
的字段,该字段已存在于 parent child 中。因此,我们需要在进行连续爆炸时重命名它们(就像您在原始问题中所做的那样,例如,'portfolio' 实际上在 json 中称为 'name')。最终保存在 columns
信息中的所有其他值都可以 unpack
ed.
完整的工作示例:
import pandas as pd
import json
# get the json file
json_dict = json.load(open('api_response_2022-02-13.json'))
# create a (nested) df out of it, and rename the 'top-level' name field to 'portfolio'
packed_df = pd.DataFrame.from_dict(json_dict['data']['attributes']['total']['children'])\
.rename(columns={'name': 'portfolio'})
# expand the level-1 'children' (and call their 'name' field 'grand-parent')
unpacked_df = packed_df.groupby('portfolio')['children']\
.apply(lambda x: pd.DataFrame(x.values[0])).reset_index()\
.rename(columns={'name': 'grand_parent_name'})
# expand the level-2 'children' (and call their 'name' field 'parent')
unpacked_df = unpacked_df.groupby(['portfolio', 'grand_parent_name'])['children']\
.apply(lambda x: pd.DataFrame(x.values[0])).reset_index()\
.rename(columns={'name': 'parent_name'})
# expand the level-3 'children' (and keep their name as is)
unpacked_df = unpacked_df.groupby(['portfolio', 'grand_parent_name', 'parent_name'])['children']\
.apply(lambda x: pd.DataFrame(x.values[0])).reset_index()
# expand the column field info from 'dict' to multiple columns
unpacked_df = pd.concat([unpacked_df.drop('columns', axis=1), pd.DataFrame(unpacked_df['columns'].tolist())], axis=1)
当然,您可以将 3 个递归 un-packing 步骤放在一个单独的函数中,但为了清楚起见,在这里将它们分开(您实际上是 un-packing 不同层次结构的 'children', 需要分别处理每个 'name')
最后,您拥有所有 3 个层级的所有列(一个 df 中的所有 json)- 重命名它们,删除其中一些,如您所愿。对于您的最终输出,您的屏幕截图已经混合了不同的 name
级别(“Wealth Bucket A/B”与“HUDW/HADJ Trust”不在同一级别)