从位置/名称空间列表构建 python 中的树的惯用方法?

An idiomatic way to construct a tree in python from a list of locations / namespaces?

在 python 中完成以下任务的惯用方法是什么?该算法不必高效或使用常量内存。

给定元组列表[("location", data)],例如:

from_this = [
    ("global.module1.submodule1.function1", "some data"),
    ("global.module1.func1", "other data"),
    ("global.module1.submodule1.func2", "data again"),
    ("global.module1.submodule1.func3", "data"),
    ("global.module2.func3", "data"),
    ("global.module4.submodule1.func3", "data"),
    ("global.module4.submodule2.func4", "daata"),
    ("global.module4.submodule2", "da_ta")
]

目标是构建字典的分层树(基于位置)。例如:

{"name": "global", "children": [
    {"name": "module1", "children": [ 
        {"name": "submodule1", "children": [
            {"name": "function1", "data": "some data"},
            {"name": "func2", "data": "data again"},
            {"name": "func3", "data": "data"}
            ]},
        {"name": "func1", "data": "other data"}
        ]
     },
    {"name": "module2", "children": [{"name": "func3", "data": "data"}]},
    {"name": "module4", "children": [
        {"name": "submodule1", "children": [{"name": "func3", "data": "data"}]},
        {"name": "submodule2", "data": "da_ta", "children": [{"name": "func4", "data": "daata"}]}
    ]}
] }

这是一种解决方案。我没有优化它,但我认为它应该相当高效并且使用常量内存。

def process(entries):
    root = {'name': None}
    for entry in entries:
        node = root
        for name in entry[0].split('.'):
            try:
                node = next(child for child in node['children'] if child['name'] == name)
            except KeyError:
                node['children'] = [{'name': name}]
                node = node['children'][-1]
            except StopIteration:
                node['children'].append({'name': name})
                node = node['children'][-1]
        node['data'] = entry[1]
    return root

请注意,return 值将是树的根,看起来像 {'name': None, 'children': [ ... ]},因为可能有多个顶级命名空间。在您给出的示例中,只有一个顶级命名空间 global 但通常情况下可能并非如此。

示例:使用示例数据from_this

>>> pprint(process(from_this))
{'children': [{'children': [{'children': [{'children': [{'data': 'some data',
                                                         'name': 'function1'},
                                                        {'data': 'data again',
                                                         'name': 'func2'},
                                                        {'data': 'data',
                                                         'name': 'func3'}],
                                           'name': 'submodule1'},
                                          {'data': 'other data',
                                           'name': 'func1'}],
                             'name': 'module1'},
                            {'children': [{'data': 'data',
                                           'name': 'func3'}],
                             'name': 'module2'},
                            {'children': [{'children': [{'data': 'data',
                                                         'name': 'func3'}],
                                           'name': 'submodule1'},
                                          {'children': [{'data': 'daata',
                                                         'name': 'func4'}],
                                           'data': 'da_ta',
                                           'name': 'submodule2'}],
                             'name': 'module4'}],
               'name': 'global'}],
 'name': None}

所以第一个解决方案类似于Reximus的答案,一个交互式解决方案。请注意,该树以名为 root 的节点为根,允许多个顶级名称空间:

#! /usr/bin/env python
import pprint

from_this = [
        ("global.module1.submodule1.function1", "some data"),
        ("global.module1.func1", "other data"),
        ("global.module1.submodule1.func2", "data again"),
        ("global.module1.submodule1.func3", "data"),
        ("global.module2.func3", "data"),
        ("global.module4.submodule1.func3", "data"),
        ("global.module4.submodule2.func4", "daata"),
        ("global.module4.submodule2", "da_ta")
]


def make_node():
    d = {
         "name": "",
         "data": "",
         "children": []
         }
    return d

path_sep = "."
tree = make_node()
tree["name"] = "root"
for path, data in from_this:
    current = tree
    path_list = path.split(path_sep)
    for name in path_list:
        next_node = None
        for child in current["children"]:
            if child["name"] == name:
                next_node = child
                break
        if next_node is None:
            n = make_node()
            n["name"] = name
            current["children"].append(n)
            current = n
        else:
            current = next_node
    current["data"] = data
pprint.pprint(tree, indent=1)

但也许使用递归是解决任何树相关问题的更 "idiomatic" 方法:

def insert(path, data, node):
    if len(path) == 0:
        node["data"] = data
        return
    for child in node["children"]:
        if child["name"] == path[0]:
            insert(path[1:], data, child)
            return
    child = make_node()
    child["name"] = path[0]
    node["children"].append(child)
    insert(path[1:], data, child)

tree = make_node()
tree["name"] = "root"
for path, data in from_this:
    insert(path.split(path_sep), data, tree)
pprint.pprint(tree)