Python 将简单的字典列表转换为层次结构树
Python Convert flat list of dicts to hierarchy tree
我在尝试转换以下列表时遇到问题:
lst = [
{"id": 0, "job": "CEO", "ManagerID": 0, "name": "John Smith"},
{"id": 1, "job": "Medical Manager", "ManagerID": 0, "name": "Medic 1"},
{"id": 2, "job": "Medical Assist", "ManagerID": 1, "name": "Medic 2"},
{"id": 3, "job": "ICT Manager", "ManagerID": 0, "name": "ICT 1"},
{"id": 4, "job": "ICT Assist", "ManagerID": 3, "name": "ICT 2"},
{"id": 5, "job": "ICT Junior", "ManagerID": 4, "name": "ICT 3"}
]
变成
这样的格式
output = [
{"id": 0, "job": "CEO", "ManagerID": 0, "name": "John Smith", "children" : [
{ "id":1, "job": "Medical Manager", "name": "Medic 1", "children" : [
{"id": 2, "job": "Medical Assist", "name": "Medic 2"}
]
},
{"id": 3, "job": "ICT Manager", "name": "ICT 1", "children":[
{"id": 4, "job": "ICT Assist", "name": "ICT 2", "children" : [
{"id": 5, "job": "ICT Junior", "name": "ICT 3"}
]}
]}
],
}]
如果只有一个根节点 (ManagerID = 0),则所有其他分支都会断开。
我尝试改编 中的代码,但无法生成所需的格式
我一直在使用的代码如下,但是仍然有父节点的重复
classes = [] #everyones id
for item in lst:
name = item['id']
if name not in classes:
classes.append(name)
treenodes = {}
root_node = None
for item in lst: # Create tree nodes
item['children'] = []
name = item['id']
treenodes[name] = item
parent = item['ManagerID']
if parent not in classes: # parent is root node, create
if parent not in treenodes:
node = {}
node['ManagerID'] = 0 #set manager to root
node['children'] = []
node['id'] = parent
root_node = node
treenodes[parent] = node
# Connect parents and children
for item in lst: # Create tree nodes
parent = item['ManagerID']
parent_node = treenodes[parent]
parent_node['children'].append(item)
output = treenodes
非常感谢任何帮助。
您的代码确实有效,但您需要使用 treenodes[0]
条目(CEO)。 treenodes
中的其余键值对仅用于簿记,以便轻松找到给定员工条目的给定经理。
如果你不能指望0
是根节点的ID,那么你可以使用CEO被标记为管理自己的事实;根节点是经理 ID 指向他们自己的 ID 的节点。更常见的情况是根节点根本没有父ID。
您还将 CEO 添加到他们自己的 children
列表中(CEO 的经理 ID 是他们自己的 ID),因此您的树中有一个递归引用。
您找到的代码不是最清晰或最有效的。我会构建一个从 id
到复制对象的字典(所以你原来的 lst
字典没有改变),然后遍历 那个结构 并添加条目到他们的管理员 ID 条目。我正在使用 'root nodes self-reference' 规则(因此经理 ID 等于他们自己的 ID):
employees = {}
managers = set()
root_id = None
for emp in lst:
id, mid = emp['id'], emp['ManagerID']
# create a copy of emp, and add a "children" list
employees[id] = {**emp, 'children': []}
managers.add(mid)
if id == mid:
# the root of the tree references itself as the manager
root_id = id
# add empty manager entries for missing manager IDs, reporting to root ID.
for id in managers - employees.keys():
employees[id] = {
'id': id, 'ManagerID': root_id, 'children': [],
'job': None, 'name': None
}
for id, emp in employees.items():
manager = employees[emp.pop('ManagerID')]
if id != root_id: # don't add the root to anything
manager['children'].append(emp)
output = employees[root_id]
上面使用一个集合来跟踪哪些经理 ID 已被查看,因此您可以简单地添加缺少的经理条目(在这种情况下向 CEO 报告)。
对于您的输入,产生:
{'id': 0, 'job': 'CEO', 'name': 'John Smith', 'children':
[{'id': 1, 'job': 'Medical Manager', 'name': 'Medic 1', 'children':
[{'id': 2, 'job': 'Medical Assist', 'name': 'Medic 2', 'children': []}],
},
{'id': 3, 'job': 'ICT Manager', 'name': 'ICT 1', 'children':
[{'id': 4, 'job': 'ICT Assist', 'name': 'ICT 2', 'children':
[{'id': 5, 'job': 'ICT Junior', 'name': 'ICT 3', 'children': []}]
}]
}]
}
这是构建层次结构的递归版本。
递归版本
from pprint import pprint
def to_lookup(employees):
employee_lookup = dict()
for employee in employees:
if employee["id"] != employee["ManagerID"]:
manager_id = employee["ManagerID"]
children = employee_lookup.get(manager_id)
if not children:
children = employee_lookup[manager_id] = list()
children.append(employee.copy())
else:
manager = employee.copy()
return manager, employee_lookup
def build_hierarchy(manager, employee_lookup):
employees = employee_lookup.get(manager["id"], list())
for employee in employees:
build_hierarchy(employee, employee_lookup)
if employees:
manager['children'] = employees
return manager
employees = [
{"id": 0, "job": "CEO", "ManagerID": 0, "name": "John Smith"},
{"id": 1, "job": "Medical Manager", "ManagerID": 0, "name": "Medic 1"},
{"id": 2, "job": "Medical Assist", "ManagerID": 1, "name": "Medic 2"},
{"id": 3, "job": "ICT Manager", "ManagerID": 0, "name": "ICT 1"},
{"id": 4, "job": "ICT Assist", "ManagerID": 3, "name": "ICT 2"},
{"id": 5, "job": "ICT Junior", "ManagerID": 4, "name": "ICT 3"}
]
manager, employee_lookup = to_lookup(employees)
hierarchy = build_hierarchy(manager, employee_lookup)
pprint(hierarchy)
输出
{'ManagerID': 0,
'children': [{'ManagerID': 0,
'children': [{'ManagerID': 1,
'id': 2,
'job': 'Medical Assist',
'name': 'Medic 2'}],
'id': 1,
'job': 'Medical Manager',
'name': 'Medic 1'},
{'ManagerID': 0,
'children': [{'ManagerID': 3,
'children': [{'ManagerID': 4,
'id': 5,
'job': 'ICT Junior',
'name': 'ICT 3'}],
'id': 4,
'job': 'ICT Assist',
'name': 'ICT 2'}],
'id': 3,
'job': 'ICT Manager',
'name': 'ICT 1'}],
'id': 0,
'job': 'CEO',
'name': 'John Smith'}
性能测试
hierarchy_size = 2000000
employees = [
{"id": 0, "ManagerID": 0},
]
for idx in range(1, hierarchy_size):
manager_id = random.randint(0, idx - 1)
employees.append({"id": idx, "ManagerID": manager_id})
start = datetime.datetime.now()
manager, employee_lookup = to_lookup(employees)
hierarchy = build_hierarchy(manager, employee_lookup)
print(datetime.datetime.now() - start)
我在尝试转换以下列表时遇到问题:
lst = [
{"id": 0, "job": "CEO", "ManagerID": 0, "name": "John Smith"},
{"id": 1, "job": "Medical Manager", "ManagerID": 0, "name": "Medic 1"},
{"id": 2, "job": "Medical Assist", "ManagerID": 1, "name": "Medic 2"},
{"id": 3, "job": "ICT Manager", "ManagerID": 0, "name": "ICT 1"},
{"id": 4, "job": "ICT Assist", "ManagerID": 3, "name": "ICT 2"},
{"id": 5, "job": "ICT Junior", "ManagerID": 4, "name": "ICT 3"}
]
变成
这样的格式output = [
{"id": 0, "job": "CEO", "ManagerID": 0, "name": "John Smith", "children" : [
{ "id":1, "job": "Medical Manager", "name": "Medic 1", "children" : [
{"id": 2, "job": "Medical Assist", "name": "Medic 2"}
]
},
{"id": 3, "job": "ICT Manager", "name": "ICT 1", "children":[
{"id": 4, "job": "ICT Assist", "name": "ICT 2", "children" : [
{"id": 5, "job": "ICT Junior", "name": "ICT 3"}
]}
]}
],
}]
如果只有一个根节点 (ManagerID = 0),则所有其他分支都会断开。
我尝试改编
我一直在使用的代码如下,但是仍然有父节点的重复
classes = [] #everyones id
for item in lst:
name = item['id']
if name not in classes:
classes.append(name)
treenodes = {}
root_node = None
for item in lst: # Create tree nodes
item['children'] = []
name = item['id']
treenodes[name] = item
parent = item['ManagerID']
if parent not in classes: # parent is root node, create
if parent not in treenodes:
node = {}
node['ManagerID'] = 0 #set manager to root
node['children'] = []
node['id'] = parent
root_node = node
treenodes[parent] = node
# Connect parents and children
for item in lst: # Create tree nodes
parent = item['ManagerID']
parent_node = treenodes[parent]
parent_node['children'].append(item)
output = treenodes
非常感谢任何帮助。
您的代码确实有效,但您需要使用 treenodes[0]
条目(CEO)。 treenodes
中的其余键值对仅用于簿记,以便轻松找到给定员工条目的给定经理。
如果你不能指望0
是根节点的ID,那么你可以使用CEO被标记为管理自己的事实;根节点是经理 ID 指向他们自己的 ID 的节点。更常见的情况是根节点根本没有父ID。
您还将 CEO 添加到他们自己的 children
列表中(CEO 的经理 ID 是他们自己的 ID),因此您的树中有一个递归引用。
您找到的代码不是最清晰或最有效的。我会构建一个从 id
到复制对象的字典(所以你原来的 lst
字典没有改变),然后遍历 那个结构 并添加条目到他们的管理员 ID 条目。我正在使用 'root nodes self-reference' 规则(因此经理 ID 等于他们自己的 ID):
employees = {}
managers = set()
root_id = None
for emp in lst:
id, mid = emp['id'], emp['ManagerID']
# create a copy of emp, and add a "children" list
employees[id] = {**emp, 'children': []}
managers.add(mid)
if id == mid:
# the root of the tree references itself as the manager
root_id = id
# add empty manager entries for missing manager IDs, reporting to root ID.
for id in managers - employees.keys():
employees[id] = {
'id': id, 'ManagerID': root_id, 'children': [],
'job': None, 'name': None
}
for id, emp in employees.items():
manager = employees[emp.pop('ManagerID')]
if id != root_id: # don't add the root to anything
manager['children'].append(emp)
output = employees[root_id]
上面使用一个集合来跟踪哪些经理 ID 已被查看,因此您可以简单地添加缺少的经理条目(在这种情况下向 CEO 报告)。
对于您的输入,产生:
{'id': 0, 'job': 'CEO', 'name': 'John Smith', 'children':
[{'id': 1, 'job': 'Medical Manager', 'name': 'Medic 1', 'children':
[{'id': 2, 'job': 'Medical Assist', 'name': 'Medic 2', 'children': []}],
},
{'id': 3, 'job': 'ICT Manager', 'name': 'ICT 1', 'children':
[{'id': 4, 'job': 'ICT Assist', 'name': 'ICT 2', 'children':
[{'id': 5, 'job': 'ICT Junior', 'name': 'ICT 3', 'children': []}]
}]
}]
}
这是构建层次结构的递归版本。
递归版本
from pprint import pprint
def to_lookup(employees):
employee_lookup = dict()
for employee in employees:
if employee["id"] != employee["ManagerID"]:
manager_id = employee["ManagerID"]
children = employee_lookup.get(manager_id)
if not children:
children = employee_lookup[manager_id] = list()
children.append(employee.copy())
else:
manager = employee.copy()
return manager, employee_lookup
def build_hierarchy(manager, employee_lookup):
employees = employee_lookup.get(manager["id"], list())
for employee in employees:
build_hierarchy(employee, employee_lookup)
if employees:
manager['children'] = employees
return manager
employees = [
{"id": 0, "job": "CEO", "ManagerID": 0, "name": "John Smith"},
{"id": 1, "job": "Medical Manager", "ManagerID": 0, "name": "Medic 1"},
{"id": 2, "job": "Medical Assist", "ManagerID": 1, "name": "Medic 2"},
{"id": 3, "job": "ICT Manager", "ManagerID": 0, "name": "ICT 1"},
{"id": 4, "job": "ICT Assist", "ManagerID": 3, "name": "ICT 2"},
{"id": 5, "job": "ICT Junior", "ManagerID": 4, "name": "ICT 3"}
]
manager, employee_lookup = to_lookup(employees)
hierarchy = build_hierarchy(manager, employee_lookup)
pprint(hierarchy)
输出
{'ManagerID': 0,
'children': [{'ManagerID': 0,
'children': [{'ManagerID': 1,
'id': 2,
'job': 'Medical Assist',
'name': 'Medic 2'}],
'id': 1,
'job': 'Medical Manager',
'name': 'Medic 1'},
{'ManagerID': 0,
'children': [{'ManagerID': 3,
'children': [{'ManagerID': 4,
'id': 5,
'job': 'ICT Junior',
'name': 'ICT 3'}],
'id': 4,
'job': 'ICT Assist',
'name': 'ICT 2'}],
'id': 3,
'job': 'ICT Manager',
'name': 'ICT 1'}],
'id': 0,
'job': 'CEO',
'name': 'John Smith'}
性能测试
hierarchy_size = 2000000
employees = [
{"id": 0, "ManagerID": 0},
]
for idx in range(1, hierarchy_size):
manager_id = random.randint(0, idx - 1)
employees.append({"id": idx, "ManagerID": manager_id})
start = datetime.datetime.now()
manager, employee_lookup = to_lookup(employees)
hierarchy = build_hierarchy(manager, employee_lookup)
print(datetime.datetime.now() - start)