Python 通过多个键对字典列表进行单向分组和聚合
Python Group and aggregate unidirectionally a list of dictionaries by multiple keys
我正在构建一个树选择器,我需要将我的数据结构化为一棵分组项目的树。我有下面的输入,这是一个字典列表。
data = [
{'region': 'R1', 'group': 'G1', 'category': 'C1', 'item': 'I2'},
{'region': 'R1', 'group': 'G1', 'category': 'C1', 'item': 'I1'},
{'region': 'R1', 'group': 'G2', 'category': 'C2', 'item': 'I3'},
{'region': 'R2', 'group': 'G1', 'category': 'C1', 'item': 'I1'},
{'region': 'R2', 'group': 'G2', 'category': 'C2', 'item': 'I3'},
{'region': 'R2', 'group': 'G2', 'category': 'C2', 'item': 'I4'},
{'region': 'R2', 'group': 'G2', 'category': 'C3', 'item': 'I5'},
]
我想获得以下输出
result = {
"regions": [
{
"name": "R1",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1","items": [{ "name": "I2"},{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}]}
]
}
]
},
{
"name": "R2",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1","items": [{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2","items": [{"name": "I3"},{"name": "I4"}]},
{"name": "C3", "items": [{"name": "I5"}]}
]
}
]
}
]
}
经过一些研究,我想出了这个解决方案
from collections import OrderedDict
d = OrderedDict()
for aggr in data:
d.setdefault(
key=(aggr['region'], aggr['group'], aggr['category']),
default=list()
).append({"name": aggr['item']})
d1 = OrderedDict()
for k, v in d.items():
d1.setdefault(
key=(k[0], k[1]),
default=list()
).append({"name": k[2], "items": v})
d2 = OrderedDict()
for k, v in d1.items():
d2.setdefault(
key=k[0],
default=list()
).append({"name": k[1], "categories": v})
result = {"regions": [{"name": k, "groups": v} for k, v in d2.items()]}
它在工作,但我相信它不是最 pythonic 的解决方案。我没能简化它。
如能提出其他解决方案或改进上述代码,我们将不胜感激
只要项目已排序,就像在您的示例中一样,您可以在递归函数中使用 itertools
中的 groupby
,例如:
from itertools import groupby
from operator import itemgetter
def plural(word):
return f"{word}s" if word[-1] != 'y' else f"{word[:-1]}ies"
def grouping(records, *keys):
if len(keys) == 1:
return [{"name": record[keys[0]]} for record in records]
return [
{"name": key, plural(keys[1]): grouping(group, *keys[1:])}
for key, group in groupby(records, itemgetter(keys[0]))
]
result = {"regions": grouping(data, "region", "group", "category", "item")}
如果无法保证排序,那么您可以通过以下方式调整grouping
def grouping(records, *keys):
if len(keys) == 1:
return [{"name": record[keys[0]]} for record in records]
key_func = itemgetter(keys[0])
records = sorted(records, key=key_func)
return [
{"name": key, plural(keys[1]): grouping(group, *keys[1:])}
for key, group in groupby(records, key_func)
]
或预先对data
排序
keys = ["region", "group", "category", "item"]
data = sorted(data, key=itemgetter(*keys))
result = {"regions": grouping(data, *keys)}
问题中提供的 data
第一个版本的结果:
result = {
"regions": [
{
"name": "R1",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1", "items": [{"name": "I2"}, {"name": "I1"}]
}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}]}
]
}
]
},
{
"name": "R2",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1", "items": [{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}, {"name": "I4"}]},
{"name": "C3", "items": [{"name": "I5"}]}
]
}
]
}
]
}
我正在构建一个树选择器,我需要将我的数据结构化为一棵分组项目的树。我有下面的输入,这是一个字典列表。
data = [
{'region': 'R1', 'group': 'G1', 'category': 'C1', 'item': 'I2'},
{'region': 'R1', 'group': 'G1', 'category': 'C1', 'item': 'I1'},
{'region': 'R1', 'group': 'G2', 'category': 'C2', 'item': 'I3'},
{'region': 'R2', 'group': 'G1', 'category': 'C1', 'item': 'I1'},
{'region': 'R2', 'group': 'G2', 'category': 'C2', 'item': 'I3'},
{'region': 'R2', 'group': 'G2', 'category': 'C2', 'item': 'I4'},
{'region': 'R2', 'group': 'G2', 'category': 'C3', 'item': 'I5'},
]
我想获得以下输出
result = {
"regions": [
{
"name": "R1",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1","items": [{ "name": "I2"},{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}]}
]
}
]
},
{
"name": "R2",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1","items": [{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2","items": [{"name": "I3"},{"name": "I4"}]},
{"name": "C3", "items": [{"name": "I5"}]}
]
}
]
}
]
}
经过一些研究,我想出了这个解决方案
from collections import OrderedDict
d = OrderedDict()
for aggr in data:
d.setdefault(
key=(aggr['region'], aggr['group'], aggr['category']),
default=list()
).append({"name": aggr['item']})
d1 = OrderedDict()
for k, v in d.items():
d1.setdefault(
key=(k[0], k[1]),
default=list()
).append({"name": k[2], "items": v})
d2 = OrderedDict()
for k, v in d1.items():
d2.setdefault(
key=k[0],
default=list()
).append({"name": k[1], "categories": v})
result = {"regions": [{"name": k, "groups": v} for k, v in d2.items()]}
它在工作,但我相信它不是最 pythonic 的解决方案。我没能简化它。
如能提出其他解决方案或改进上述代码,我们将不胜感激
只要项目已排序,就像在您的示例中一样,您可以在递归函数中使用 itertools
中的 groupby
,例如:
from itertools import groupby
from operator import itemgetter
def plural(word):
return f"{word}s" if word[-1] != 'y' else f"{word[:-1]}ies"
def grouping(records, *keys):
if len(keys) == 1:
return [{"name": record[keys[0]]} for record in records]
return [
{"name": key, plural(keys[1]): grouping(group, *keys[1:])}
for key, group in groupby(records, itemgetter(keys[0]))
]
result = {"regions": grouping(data, "region", "group", "category", "item")}
如果无法保证排序,那么您可以通过以下方式调整grouping
def grouping(records, *keys):
if len(keys) == 1:
return [{"name": record[keys[0]]} for record in records]
key_func = itemgetter(keys[0])
records = sorted(records, key=key_func)
return [
{"name": key, plural(keys[1]): grouping(group, *keys[1:])}
for key, group in groupby(records, key_func)
]
或预先对data
排序
keys = ["region", "group", "category", "item"]
data = sorted(data, key=itemgetter(*keys))
result = {"regions": grouping(data, *keys)}
问题中提供的 data
第一个版本的结果:
result = {
"regions": [
{
"name": "R1",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1", "items": [{"name": "I2"}, {"name": "I1"}]
}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}]}
]
}
]
},
{
"name": "R2",
"groups": [
{
"name": "G1",
"categories": [
{"name": "C1", "items": [{"name": "I1"}]}
]
},
{
"name": "G2",
"categories": [
{"name": "C2", "items": [{"name": "I3"}, {"name": "I4"}]},
{"name": "C3", "items": [{"name": "I5"}]}
]
}
]
}
]
}