从位置/名称空间列表构建 python 中的树的惯用方法?
An idiomatic way to construct a tree in python from a list of locations / namespaces?
在 python 中完成以下任务的惯用方法是什么?该算法不必高效或使用常量内存。
给定元组列表[("location", data)]
,例如:
from_this = [
("global.module1.submodule1.function1", "some data"),
("global.module1.func1", "other data"),
("global.module1.submodule1.func2", "data again"),
("global.module1.submodule1.func3", "data"),
("global.module2.func3", "data"),
("global.module4.submodule1.func3", "data"),
("global.module4.submodule2.func4", "daata"),
("global.module4.submodule2", "da_ta")
]
目标是构建字典的分层树(基于位置)。例如:
{"name": "global", "children": [
{"name": "module1", "children": [
{"name": "submodule1", "children": [
{"name": "function1", "data": "some data"},
{"name": "func2", "data": "data again"},
{"name": "func3", "data": "data"}
]},
{"name": "func1", "data": "other data"}
]
},
{"name": "module2", "children": [{"name": "func3", "data": "data"}]},
{"name": "module4", "children": [
{"name": "submodule1", "children": [{"name": "func3", "data": "data"}]},
{"name": "submodule2", "data": "da_ta", "children": [{"name": "func4", "data": "daata"}]}
]}
] }
这是一种解决方案。我没有优化它,但我认为它应该相当高效并且使用常量内存。
def process(entries):
root = {'name': None}
for entry in entries:
node = root
for name in entry[0].split('.'):
try:
node = next(child for child in node['children'] if child['name'] == name)
except KeyError:
node['children'] = [{'name': name}]
node = node['children'][-1]
except StopIteration:
node['children'].append({'name': name})
node = node['children'][-1]
node['data'] = entry[1]
return root
请注意,return 值将是树的根,看起来像 {'name': None, 'children': [ ... ]}
,因为可能有多个顶级命名空间。在您给出的示例中,只有一个顶级命名空间 global
但通常情况下可能并非如此。
示例:使用示例数据from_this
>>> pprint(process(from_this))
{'children': [{'children': [{'children': [{'children': [{'data': 'some data',
'name': 'function1'},
{'data': 'data again',
'name': 'func2'},
{'data': 'data',
'name': 'func3'}],
'name': 'submodule1'},
{'data': 'other data',
'name': 'func1'}],
'name': 'module1'},
{'children': [{'data': 'data',
'name': 'func3'}],
'name': 'module2'},
{'children': [{'children': [{'data': 'data',
'name': 'func3'}],
'name': 'submodule1'},
{'children': [{'data': 'daata',
'name': 'func4'}],
'data': 'da_ta',
'name': 'submodule2'}],
'name': 'module4'}],
'name': 'global'}],
'name': None}
所以第一个解决方案类似于Reximus的答案,一个交互式解决方案。请注意,该树以名为 root 的节点为根,允许多个顶级名称空间:
#! /usr/bin/env python
import pprint
from_this = [
("global.module1.submodule1.function1", "some data"),
("global.module1.func1", "other data"),
("global.module1.submodule1.func2", "data again"),
("global.module1.submodule1.func3", "data"),
("global.module2.func3", "data"),
("global.module4.submodule1.func3", "data"),
("global.module4.submodule2.func4", "daata"),
("global.module4.submodule2", "da_ta")
]
def make_node():
d = {
"name": "",
"data": "",
"children": []
}
return d
path_sep = "."
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
current = tree
path_list = path.split(path_sep)
for name in path_list:
next_node = None
for child in current["children"]:
if child["name"] == name:
next_node = child
break
if next_node is None:
n = make_node()
n["name"] = name
current["children"].append(n)
current = n
else:
current = next_node
current["data"] = data
pprint.pprint(tree, indent=1)
但也许使用递归是解决任何树相关问题的更 "idiomatic" 方法:
def insert(path, data, node):
if len(path) == 0:
node["data"] = data
return
for child in node["children"]:
if child["name"] == path[0]:
insert(path[1:], data, child)
return
child = make_node()
child["name"] = path[0]
node["children"].append(child)
insert(path[1:], data, child)
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
insert(path.split(path_sep), data, tree)
pprint.pprint(tree)
在 python 中完成以下任务的惯用方法是什么?该算法不必高效或使用常量内存。
给定元组列表[("location", data)]
,例如:
from_this = [
("global.module1.submodule1.function1", "some data"),
("global.module1.func1", "other data"),
("global.module1.submodule1.func2", "data again"),
("global.module1.submodule1.func3", "data"),
("global.module2.func3", "data"),
("global.module4.submodule1.func3", "data"),
("global.module4.submodule2.func4", "daata"),
("global.module4.submodule2", "da_ta")
]
目标是构建字典的分层树(基于位置)。例如:
{"name": "global", "children": [
{"name": "module1", "children": [
{"name": "submodule1", "children": [
{"name": "function1", "data": "some data"},
{"name": "func2", "data": "data again"},
{"name": "func3", "data": "data"}
]},
{"name": "func1", "data": "other data"}
]
},
{"name": "module2", "children": [{"name": "func3", "data": "data"}]},
{"name": "module4", "children": [
{"name": "submodule1", "children": [{"name": "func3", "data": "data"}]},
{"name": "submodule2", "data": "da_ta", "children": [{"name": "func4", "data": "daata"}]}
]}
] }
这是一种解决方案。我没有优化它,但我认为它应该相当高效并且使用常量内存。
def process(entries):
root = {'name': None}
for entry in entries:
node = root
for name in entry[0].split('.'):
try:
node = next(child for child in node['children'] if child['name'] == name)
except KeyError:
node['children'] = [{'name': name}]
node = node['children'][-1]
except StopIteration:
node['children'].append({'name': name})
node = node['children'][-1]
node['data'] = entry[1]
return root
请注意,return 值将是树的根,看起来像 {'name': None, 'children': [ ... ]}
,因为可能有多个顶级命名空间。在您给出的示例中,只有一个顶级命名空间 global
但通常情况下可能并非如此。
示例:使用示例数据from_this
>>> pprint(process(from_this))
{'children': [{'children': [{'children': [{'children': [{'data': 'some data',
'name': 'function1'},
{'data': 'data again',
'name': 'func2'},
{'data': 'data',
'name': 'func3'}],
'name': 'submodule1'},
{'data': 'other data',
'name': 'func1'}],
'name': 'module1'},
{'children': [{'data': 'data',
'name': 'func3'}],
'name': 'module2'},
{'children': [{'children': [{'data': 'data',
'name': 'func3'}],
'name': 'submodule1'},
{'children': [{'data': 'daata',
'name': 'func4'}],
'data': 'da_ta',
'name': 'submodule2'}],
'name': 'module4'}],
'name': 'global'}],
'name': None}
所以第一个解决方案类似于Reximus的答案,一个交互式解决方案。请注意,该树以名为 root 的节点为根,允许多个顶级名称空间:
#! /usr/bin/env python
import pprint
from_this = [
("global.module1.submodule1.function1", "some data"),
("global.module1.func1", "other data"),
("global.module1.submodule1.func2", "data again"),
("global.module1.submodule1.func3", "data"),
("global.module2.func3", "data"),
("global.module4.submodule1.func3", "data"),
("global.module4.submodule2.func4", "daata"),
("global.module4.submodule2", "da_ta")
]
def make_node():
d = {
"name": "",
"data": "",
"children": []
}
return d
path_sep = "."
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
current = tree
path_list = path.split(path_sep)
for name in path_list:
next_node = None
for child in current["children"]:
if child["name"] == name:
next_node = child
break
if next_node is None:
n = make_node()
n["name"] = name
current["children"].append(n)
current = n
else:
current = next_node
current["data"] = data
pprint.pprint(tree, indent=1)
但也许使用递归是解决任何树相关问题的更 "idiomatic" 方法:
def insert(path, data, node):
if len(path) == 0:
node["data"] = data
return
for child in node["children"]:
if child["name"] == path[0]:
insert(path[1:], data, child)
return
child = make_node()
child["name"] = path[0]
node["children"].append(child)
insert(path[1:], data, child)
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
insert(path.split(path_sep), data, tree)
pprint.pprint(tree)