flatten_json 列表的递归展平函数
flatten_json recursive flattening function for lists
我想在每个级别展平以下 JSON 并在每个级别创建一个 pandas 数据框,我使用 flatten_json
来做到这一点,但为此我需要遍历每个创建多个嵌套 for 循环的级别:
{
"metadata": {
"name": "abc",
"time": "2020-04-01"
},
"data": [
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "qwer",
"type": "abd",
"level1": [
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "asd",
"type": "abd",
"level2": [
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "abs",
"type": "abd"
},
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "abs",
"type": "abd"
}
]
}
]
}
]
}
我正在尝试使用下面的代码 flatten_json (Flatten JSON in Python) 将这个 json 展平:
import pandas as pd
import flatten_json as fj
import json
level2 = []
keys = {'data', 'level1', 'level2'}
with open('test_lh.json') as f:
data = json.load(f)
for x in data['data']:
for y in x['level1']:
for z in y['level2']:
dic = fj.flatten(z)
level2.append(dic)
df = pd.DataFrame(level2)
print(df)
输出如下:
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 abs abd
1 abc def 123 abc def 123 abs abd
我如何编写一个递归函数来获得相同的输出而不调用 n 个 for 循环?级别可以下降多个级别。我已经尝试为此使用 json_normalize
,但我还需要在最终输出中使用父级标识符,并且 json_normalize
不适用于多个记录路径。
我用递归解决了它,这是我的代码:
import json
import pandas as pd
import flatten_json as fj
keys = {'data', 'level1', 'level2', 'level3'}
with open('test_lh.json') as f:
data = json.load(f)
levels = ['data.level1.level2.level3', 'data.level1.level2', 'data.level1', 'data']
recs_dict = {}
def do_step(data_dict, level, depth, path):
recs = []
for x in data_dict[level]:
if depth < len(path.split('.'))-1:
do_step(x, path.split('.')[depth+1], depth+1, path)
else:
dic = fj.flatten(x, root_keys_to_ignore=keys)
recs.append(dic)
recs_dict[level] = recs
for path in levels:
do_step(data, path.split('.')[0], 0, path)
for key, value in recs_dict.items():
print(key)
df = pd.DataFrame(recs_dict[key])
print(df)
这是输出:
level3
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 abs level3
1 abc def 123 abc def 123 abs level3
level2
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 abs level2
1 abc def 123 abc def 123 abs abd
level1
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 asd level1
data
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 qwer abd
我想在每个级别展平以下 JSON 并在每个级别创建一个 pandas 数据框,我使用 flatten_json
来做到这一点,但为此我需要遍历每个创建多个嵌套 for 循环的级别:
{
"metadata": {
"name": "abc",
"time": "2020-04-01"
},
"data": [
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "qwer",
"type": "abd",
"level1": [
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "asd",
"type": "abd",
"level2": [
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "abs",
"type": "abd"
},
{
"identifiers": [
{
"type": "abc",
"scheme": "def",
"value": "123"
},
{
"type": "abc",
"scheme": "def",
"value": "123"
}
],
"name": "abs",
"type": "abd"
}
]
}
]
}
]
}
我正在尝试使用下面的代码 flatten_json (Flatten JSON in Python) 将这个 json 展平:
import pandas as pd
import flatten_json as fj
import json
level2 = []
keys = {'data', 'level1', 'level2'}
with open('test_lh.json') as f:
data = json.load(f)
for x in data['data']:
for y in x['level1']:
for z in y['level2']:
dic = fj.flatten(z)
level2.append(dic)
df = pd.DataFrame(level2)
print(df)
输出如下:
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 abs abd
1 abc def 123 abc def 123 abs abd
我如何编写一个递归函数来获得相同的输出而不调用 n 个 for 循环?级别可以下降多个级别。我已经尝试为此使用 json_normalize
,但我还需要在最终输出中使用父级标识符,并且 json_normalize
不适用于多个记录路径。
我用递归解决了它,这是我的代码:
import json
import pandas as pd
import flatten_json as fj
keys = {'data', 'level1', 'level2', 'level3'}
with open('test_lh.json') as f:
data = json.load(f)
levels = ['data.level1.level2.level3', 'data.level1.level2', 'data.level1', 'data']
recs_dict = {}
def do_step(data_dict, level, depth, path):
recs = []
for x in data_dict[level]:
if depth < len(path.split('.'))-1:
do_step(x, path.split('.')[depth+1], depth+1, path)
else:
dic = fj.flatten(x, root_keys_to_ignore=keys)
recs.append(dic)
recs_dict[level] = recs
for path in levels:
do_step(data, path.split('.')[0], 0, path)
for key, value in recs_dict.items():
print(key)
df = pd.DataFrame(recs_dict[key])
print(df)
这是输出:
level3
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 abs level3
1 abc def 123 abc def 123 abs level3
level2
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 abs level2
1 abc def 123 abc def 123 abs abd
level1
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 asd level1
data
identifiers_0_type identifiers_0_scheme identifiers_0_value identifiers_1_type identifiers_1_scheme identifiers_1_value name type
0 abc def 123 abc def 123 qwer abd