Flatten 函数检索错误的值
Flatten function retrieving the wrong value
我目前有一个 Python 脚本,它使用展平函数来展平 JSON 对象,然后我将使用 json_normalize 将展平的 JSON 到 DataFrame 中。
import pandas as pd
from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, p_key=None, parent_key=False, separator='.'):
items = []
if isinstance(dictionary, list):
for listval in dictionary:
items.extend(flatten(listval).items())
return dict(items)
for key, value in dictionary.items():
if parent_key:
new_key = f"{str(p_key)}{separator}{key}"
else:
new_key = p_key if p_key else key
if isinstance(value, mm):
items.extend(flatten(
dictionary=value,
p_key=new_key,
parent_key=True,
separator=separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten(
dictionary={str(k): v},
p_key=new_key,
parent_key=False,
separator=separator).items())
else:
items.append((new_key, value))
return dict(items)
jfile = [{
"id" : 1,
"labelId" : 169,
"indicators" : [
62
],
"Wait" : 6,
"Levels" : [
{
"isActive" : "true",
"pressure" : 3,
"actions" : [
{
"isActive" : "true",
"description" : "Place"
}
],
"users" : [
5467,
5469,
5
]
},
{
"isActive" : "true",
"pressure" : 2,
"actions" : [
{
"isActive" : "true",
"description" : "Test"
}
],
"users" : [
3253,
6903
]
}
]
}]
flatdoc = [flatten(i) for i in jfile]
flatdoc = json_normalize(flatdoc)
print(flatdoc)
当前输出:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
1 169 62 6 true 2 true Test 6903
我目前得到的输出是从由于某种原因在 JSON 对象中找到的最后一个键检索最后一个值。我需要它做的是从字典中的第一个键中检索它,然后当该列表没有嵌套并且只包含 numbers/integers 时,它应该简单地分解它,这样结果如下所示:
预期输出:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
1 169 62 6 true 3 true Place 5467
1 169 62 6 true 3 true Place 5469
1 169 62 6 true 3 true Place 5
我将如何着手修改函数,以便它找到正确的键来生成适当的值?
你的扁平化函数是错误的:对于给定的 jfile
它只有 returns 一行,而应该是 5 行。
这是正确的版本:
def flatten(data):
if isinstance(data, (list, tuple)):
newdata = []
for elt in data:
elt = flatten(elt)
if isinstance(elt, list):
newdata.extend(elt)
else:
newdata.append(elt)
return newdata
elif isinstance(data, mm):
for k,v in data.items():
v = flatten(v)
if isinstance(v, list):
newdata = [data.copy() for _ in v]
for i, elt in enumerate(v):
newdata[i][k] = elt
return flatten(newdata)
data[k] = v
return data
然后你可以直接做:
flatdoc = pd.json_normalize(flatten(jfile))
获得:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
0 1 169 62 6 true 3 true Place 5467
1 1 169 62 6 true 3 true Place 5469
2 1 169 62 6 true 3 true Place 5
3 1 169 62 6 true 2 true Test 3253
4 1 169 62 6 true 2 true Test 6903
您可以对生成器使用递归:
from itertools import product
import pandas as pd
jfile = [{'id': 1, 'labelId': 169, 'indicators': [62], 'Wait': 6, 'Levels': [{'isActive': 'true', 'pressure': 3, 'actions': [{'isActive': 'true', 'description': 'Place'}], 'users': [5467, 5469, 5]}, {'isActive': 'true', 'pressure': 2, 'actions': [{'isActive': 'true', 'description': 'Test'}], 'users': [3253, 6903]}]}]
def get_vals(d, c = []):
if not isinstance(d, (dict, list)):
yield ('.'.join(c), d)
elif isinstance(d, list):
yield from [i for b in d for i in get_vals(b, c=c)]
else:
for i in product(*[get_vals(b if isinstance(b, list) else [b], c+[a]) for a, b in d.items()]):
yield [j for k in i for j in ([k] if all(not isinstance(l, (tuple, list)) for l in k) else k)]
df = pd.DataFrame([dict(i) for i in get_vals(jfile)])
输出:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
0 1 169 62 6 true 3 true Place 5467
1 1 169 62 6 true 3 true Place 5469
2 1 169 62 6 true 3 true Place 5
3 1 169 62 6 true 2 true Test 3253
4 1 169 62 6 true 2 true Test 6903
我目前有一个 Python 脚本,它使用展平函数来展平 JSON 对象,然后我将使用 json_normalize 将展平的 JSON 到 DataFrame 中。
import pandas as pd
from pandas import json_normalize
from collections.abc import MutableMapping as mm
def flatten(dictionary, p_key=None, parent_key=False, separator='.'):
items = []
if isinstance(dictionary, list):
for listval in dictionary:
items.extend(flatten(listval).items())
return dict(items)
for key, value in dictionary.items():
if parent_key:
new_key = f"{str(p_key)}{separator}{key}"
else:
new_key = p_key if p_key else key
if isinstance(value, mm):
items.extend(flatten(
dictionary=value,
p_key=new_key,
parent_key=True,
separator=separator).items())
elif isinstance(value, list):
for k, v in enumerate(value):
items.extend(flatten(
dictionary={str(k): v},
p_key=new_key,
parent_key=False,
separator=separator).items())
else:
items.append((new_key, value))
return dict(items)
jfile = [{
"id" : 1,
"labelId" : 169,
"indicators" : [
62
],
"Wait" : 6,
"Levels" : [
{
"isActive" : "true",
"pressure" : 3,
"actions" : [
{
"isActive" : "true",
"description" : "Place"
}
],
"users" : [
5467,
5469,
5
]
},
{
"isActive" : "true",
"pressure" : 2,
"actions" : [
{
"isActive" : "true",
"description" : "Test"
}
],
"users" : [
3253,
6903
]
}
]
}]
flatdoc = [flatten(i) for i in jfile]
flatdoc = json_normalize(flatdoc)
print(flatdoc)
当前输出:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
1 169 62 6 true 2 true Test 6903
我目前得到的输出是从由于某种原因在 JSON 对象中找到的最后一个键检索最后一个值。我需要它做的是从字典中的第一个键中检索它,然后当该列表没有嵌套并且只包含 numbers/integers 时,它应该简单地分解它,这样结果如下所示:
预期输出:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
1 169 62 6 true 3 true Place 5467
1 169 62 6 true 3 true Place 5469
1 169 62 6 true 3 true Place 5
我将如何着手修改函数,以便它找到正确的键来生成适当的值?
你的扁平化函数是错误的:对于给定的 jfile
它只有 returns 一行,而应该是 5 行。
这是正确的版本:
def flatten(data):
if isinstance(data, (list, tuple)):
newdata = []
for elt in data:
elt = flatten(elt)
if isinstance(elt, list):
newdata.extend(elt)
else:
newdata.append(elt)
return newdata
elif isinstance(data, mm):
for k,v in data.items():
v = flatten(v)
if isinstance(v, list):
newdata = [data.copy() for _ in v]
for i, elt in enumerate(v):
newdata[i][k] = elt
return flatten(newdata)
data[k] = v
return data
然后你可以直接做:
flatdoc = pd.json_normalize(flatten(jfile))
获得:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
0 1 169 62 6 true 3 true Place 5467
1 1 169 62 6 true 3 true Place 5469
2 1 169 62 6 true 3 true Place 5
3 1 169 62 6 true 2 true Test 3253
4 1 169 62 6 true 2 true Test 6903
您可以对生成器使用递归:
from itertools import product
import pandas as pd
jfile = [{'id': 1, 'labelId': 169, 'indicators': [62], 'Wait': 6, 'Levels': [{'isActive': 'true', 'pressure': 3, 'actions': [{'isActive': 'true', 'description': 'Place'}], 'users': [5467, 5469, 5]}, {'isActive': 'true', 'pressure': 2, 'actions': [{'isActive': 'true', 'description': 'Test'}], 'users': [3253, 6903]}]}]
def get_vals(d, c = []):
if not isinstance(d, (dict, list)):
yield ('.'.join(c), d)
elif isinstance(d, list):
yield from [i for b in d for i in get_vals(b, c=c)]
else:
for i in product(*[get_vals(b if isinstance(b, list) else [b], c+[a]) for a, b in d.items()]):
yield [j for k in i for j in ([k] if all(not isinstance(l, (tuple, list)) for l in k) else k)]
df = pd.DataFrame([dict(i) for i in get_vals(jfile)])
输出:
id labelId indicators Wait Levels.isActive Levels.pressure Levels.actions.isActive Levels.actions.description Levels.users
0 1 169 62 6 true 3 true Place 5467
1 1 169 62 6 true 3 true Place 5469
2 1 169 62 6 true 3 true Place 5
3 1 169 62 6 true 2 true Test 3253
4 1 169 62 6 true 2 true Test 6903