如何遍历Python中的N级嵌套字典?
How to iterate through an N-level nested dictionary in Python?
我发现自己制作了很多多级词典。我总是不得不编写非常冗长的代码来遍历具有大量临时变量的字典的所有级别。
有没有一种方法可以概括此函数以遍历多个级别而不是硬编码并手动指定有多少级别?
def iterate_multilevel_dictionary(d, number_of_levels):
# How to auto-detect number of levels?
# number_of_levels = 0
if number_of_levels == 1:
for k1, v1 in d.items():
yield k1, v1
if number_of_levels == 2:
for k1, v1 in d.items():
for k2, v2 in v1.items():
yield k1, k2, v2
if number_of_levels == 3:
for k1, v1 in d.items():
for k2, v2 in v1.items():
for k3, v3 in v2.items():
yield k1, k2, k3, v3
# Level 1
d_level1 = {"a":1,"b":2,"c":3}
for items in iterate_multilevel_dictionary(d_level1, number_of_levels=1):
print(items)
# ('a', 1)
# ('b', 2)
# ('c', 3)
# Level 2
d_level2 = {"group_1":{"a":1}, "group_2":{"b":2,"c":3}}
for items in iterate_multilevel_dictionary(d_level2, number_of_levels=2):
print(items)
#('group_1', 'a', 1)
#('group_2', 'b', 2)
#('group_2', 'c', 3)
# Level 3
d_level3 = {"collection_1":d_level2}
for items in iterate_multilevel_dictionary(d_level3, number_of_levels=3):
print(items)
# ('collection_1', 'group_1', 'a', 1)
# ('collection_1', 'group_2', 'b', 2)
# ('collection_1', 'group_2', 'c', 3)
这里有一个快速而粗略的解决方案:
d_level1 = {"a":1,"b":2,"c":3}
d_level2 = {"group_1":{"a":1}, "group_2":{"b":2,"c":3}}
d_level3 = {"collection_1":d_level2}
def flatten(d_in, base=()):
for k in d_in:
if type(d_in[k]) == dict:
flatten(d_in[k], base+(k,))
else:
print(base + (k, d_in[k]))
flatten(d_level1)
# ('a', 1)
# ('b', 2)
# ('c', 3)
flatten(d_level2)
#('group_1', 'a', 1)
#('group_2', 'b', 2)
#('group_2', 'c', 3)
flatten(d_level3)
# ('collection_1', 'group_1', 'a', 1)
# ('collection_1', 'group_2', 'b', 2)
# ('collection_1', 'group_2', 'c', 3)
注意!! Python 的递归限制约为 1000!因此,在 python 中使用递归时,请仔细考虑您要做什么,并准备好在调用这样的递归函数时捕获 RuntimeError。
编辑:
通过评论,我意识到我犯了一个错误,我没有将密钥添加到 level1 dict 输出,并且我使用可变结构作为默认参数。我在打印声明中添加了这些和括号并重新发布。输出现在与 OP 的所需输出相匹配,并使用更好和现代的 python.
试试这个代码
还支持关卡组合
from typing import List, Tuple
def iterate_multilevel_dictionary(d: dict):
dicts_to_iterate: List[Tuple[dict, list]] = [(d, [])]
'''
the first item is the dict object and the second object is the prefix keys
'''
while dicts_to_iterate:
current_dict, suffix = dicts_to_iterate.pop()
for k, v in current_dict.items():
if isinstance(v, dict):
dicts_to_iterate.append((v, suffix + [k]))
else:
yield suffix + [k] + [v]
if __name__ == '__main__':
d_level1 = {"a": 1, "b": 2, "c": 3}
print(f"test for {d_level1}")
for items in iterate_multilevel_dictionary(d_level1):
print(items)
d_level2 = {"group_1": {"a": 1}, "group_2": {"b": 2, "c": 3}}
print(f"test for {d_level2}")
for items in iterate_multilevel_dictionary(d_level2):
print(items)
d_level3 = {"collection_1": d_level2}
print(f"test for {d_level3}")
for items in iterate_multilevel_dictionary(d_level3):
print(items)
d_level123 = {}
[d_level123.update(i) for i in [d_level1, d_level2, d_level3]]
print(f"test for {d_level123}")
for items in iterate_multilevel_dictionary(d_level123):
print(items)
输出是:
test for {'a': 1, 'b': 2, 'c': 3}
['a', 1]
['b', 2]
['c', 3]
test for {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}}
['group_2', 'b', 2]
['group_2', 'c', 3]
['group_1', 'a', 1]
test for {'collection_1': {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}}}
['collection_1', 'group_2', 'b', 2]
['collection_1', 'group_2', 'c', 3]
['collection_1', 'group_1', 'a', 1]
test for {'a': 1, 'b': 2, 'c': 3, 'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}, 'collection_1': {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}}}
['a', 1]
['b', 2]
['c', 3]
['collection_1', 'group_2', 'b', 2]
['collection_1', 'group_2', 'c', 3]
['collection_1', 'group_1', 'a', 1]
['group_2', 'b', 2]
['group_2', 'c', 3]
['group_1', 'a', 1]
使用递归是另一种方法,但我认为不使用递归编写更具挑战性和效率更高:)
看到@VoNWooDSoN 的回答后,我写了这篇文章。我把它变成了一个迭代器,而不是在函数内部打印,并做了一些改变以使其更具可读性。所以请在此处查看他的 。
def flatten(d, base=()):
for k, v in d.items():
if isinstance(v, dict):
yield from flatten(v, base + (k,))
else:
yield base + (k, v)
1- 生成而不是打印。
2- isinstance()
而不是 type
这样 dict
的子类也可以工作。您还可以使用 typing
模块中的 MutableMapping
而不是 dict
以使其更通用。
3- IMO,从 .items()
得到 (k, v)
对比 k
和 d[k]
.
更具可读性
更通用?
您想将其扩展到更通用的 CAN(不必像 OP 中的解决方案一样)接受 depths
的数量以防万一?
考虑这些例子:
d_level1 = {"a": 1, "b": 2, "c": 3}
d_level2 = {"group_1": {"a": 1}, "group_2": {"b": 2, "c": 3}}
d_level3 = {"collection_1": d_level2}
for items in flatten(d_level3):
print(items)
print('------------------------------')
for items in flatten(d_level3, depth=0):
print(items)
print('------------------------------')
for items in flatten(d_level3, depth=1):
print(items)
print('------------------------------')
for items in flatten(d_level3, depth=2):
print(items)
输出:
('collection_1', 'group_1', 'a', 1)
('collection_1', 'group_2', 'b', 2)
('collection_1', 'group_2', 'c', 3)
------------------------------
('collection_1', {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}})
------------------------------
('collection_1', 'group_1', {'a': 1})
('collection_1', 'group_2', {'b': 2, 'c': 3})
------------------------------
('collection_1', 'group_1', 'a', 1)
('collection_1', 'group_2', 'b', 2)
('collection_1', 'group_2', 'c', 3)
depth=None
不考虑深度(一开始仍然像您想要的那样工作)。但是现在通过指定从 0
到 2
的深度,您可以看到我们能够迭代我们想要的深度。这是代码:
def flatten(d, base=(), depth=None):
for k, v in d.items():
if not isinstance(v, dict):
yield base + (k, v)
else:
if depth is None:
yield from flatten(v, base + (k,))
else:
if depth == 0:
yield base + (k, v)
else:
yield from flatten(v, base + (k,), depth - 1)
用于识别字典深度的线性解决方案。
d={'a':{1:1,2:2},"b":0,'c':"{}"}
print(d)
s=str(d)
dictionary_stack,dictionary_depth=0,0
def push():
global dictionary_depth
global dictionary_stack
dictionary_stack+=1
dictionary_depth=max(dictionary_depth,dictionary_stack)
def pop():
global dictionary_stack
dictionary_stack-=1
string_safety=False
for c in s:
if c =="'":
string_safety=not(string_safety)
if not(string_safety) and c =='{':
push()
if not(string_safety) and c =='}':
pop()
print(dictionary_depth)
输出:
{'a': {1:1, 2:2}, 'b': 0, 'c': '{}'}
2
我发现自己制作了很多多级词典。我总是不得不编写非常冗长的代码来遍历具有大量临时变量的字典的所有级别。
有没有一种方法可以概括此函数以遍历多个级别而不是硬编码并手动指定有多少级别?
def iterate_multilevel_dictionary(d, number_of_levels):
# How to auto-detect number of levels?
# number_of_levels = 0
if number_of_levels == 1:
for k1, v1 in d.items():
yield k1, v1
if number_of_levels == 2:
for k1, v1 in d.items():
for k2, v2 in v1.items():
yield k1, k2, v2
if number_of_levels == 3:
for k1, v1 in d.items():
for k2, v2 in v1.items():
for k3, v3 in v2.items():
yield k1, k2, k3, v3
# Level 1
d_level1 = {"a":1,"b":2,"c":3}
for items in iterate_multilevel_dictionary(d_level1, number_of_levels=1):
print(items)
# ('a', 1)
# ('b', 2)
# ('c', 3)
# Level 2
d_level2 = {"group_1":{"a":1}, "group_2":{"b":2,"c":3}}
for items in iterate_multilevel_dictionary(d_level2, number_of_levels=2):
print(items)
#('group_1', 'a', 1)
#('group_2', 'b', 2)
#('group_2', 'c', 3)
# Level 3
d_level3 = {"collection_1":d_level2}
for items in iterate_multilevel_dictionary(d_level3, number_of_levels=3):
print(items)
# ('collection_1', 'group_1', 'a', 1)
# ('collection_1', 'group_2', 'b', 2)
# ('collection_1', 'group_2', 'c', 3)
这里有一个快速而粗略的解决方案:
d_level1 = {"a":1,"b":2,"c":3}
d_level2 = {"group_1":{"a":1}, "group_2":{"b":2,"c":3}}
d_level3 = {"collection_1":d_level2}
def flatten(d_in, base=()):
for k in d_in:
if type(d_in[k]) == dict:
flatten(d_in[k], base+(k,))
else:
print(base + (k, d_in[k]))
flatten(d_level1)
# ('a', 1)
# ('b', 2)
# ('c', 3)
flatten(d_level2)
#('group_1', 'a', 1)
#('group_2', 'b', 2)
#('group_2', 'c', 3)
flatten(d_level3)
# ('collection_1', 'group_1', 'a', 1)
# ('collection_1', 'group_2', 'b', 2)
# ('collection_1', 'group_2', 'c', 3)
注意!! Python 的递归限制约为 1000!因此,在 python 中使用递归时,请仔细考虑您要做什么,并准备好在调用这样的递归函数时捕获 RuntimeError。
编辑: 通过评论,我意识到我犯了一个错误,我没有将密钥添加到 level1 dict 输出,并且我使用可变结构作为默认参数。我在打印声明中添加了这些和括号并重新发布。输出现在与 OP 的所需输出相匹配,并使用更好和现代的 python.
试试这个代码
还支持关卡组合
from typing import List, Tuple
def iterate_multilevel_dictionary(d: dict):
dicts_to_iterate: List[Tuple[dict, list]] = [(d, [])]
'''
the first item is the dict object and the second object is the prefix keys
'''
while dicts_to_iterate:
current_dict, suffix = dicts_to_iterate.pop()
for k, v in current_dict.items():
if isinstance(v, dict):
dicts_to_iterate.append((v, suffix + [k]))
else:
yield suffix + [k] + [v]
if __name__ == '__main__':
d_level1 = {"a": 1, "b": 2, "c": 3}
print(f"test for {d_level1}")
for items in iterate_multilevel_dictionary(d_level1):
print(items)
d_level2 = {"group_1": {"a": 1}, "group_2": {"b": 2, "c": 3}}
print(f"test for {d_level2}")
for items in iterate_multilevel_dictionary(d_level2):
print(items)
d_level3 = {"collection_1": d_level2}
print(f"test for {d_level3}")
for items in iterate_multilevel_dictionary(d_level3):
print(items)
d_level123 = {}
[d_level123.update(i) for i in [d_level1, d_level2, d_level3]]
print(f"test for {d_level123}")
for items in iterate_multilevel_dictionary(d_level123):
print(items)
输出是:
test for {'a': 1, 'b': 2, 'c': 3}
['a', 1]
['b', 2]
['c', 3]
test for {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}}
['group_2', 'b', 2]
['group_2', 'c', 3]
['group_1', 'a', 1]
test for {'collection_1': {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}}}
['collection_1', 'group_2', 'b', 2]
['collection_1', 'group_2', 'c', 3]
['collection_1', 'group_1', 'a', 1]
test for {'a': 1, 'b': 2, 'c': 3, 'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}, 'collection_1': {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}}}
['a', 1]
['b', 2]
['c', 3]
['collection_1', 'group_2', 'b', 2]
['collection_1', 'group_2', 'c', 3]
['collection_1', 'group_1', 'a', 1]
['group_2', 'b', 2]
['group_2', 'c', 3]
['group_1', 'a', 1]
使用递归是另一种方法,但我认为不使用递归编写更具挑战性和效率更高:)
看到@VoNWooDSoN 的回答后,我写了这篇文章。我把它变成了一个迭代器,而不是在函数内部打印,并做了一些改变以使其更具可读性。所以请在此处查看他的
def flatten(d, base=()):
for k, v in d.items():
if isinstance(v, dict):
yield from flatten(v, base + (k,))
else:
yield base + (k, v)
1- 生成而不是打印。
2- isinstance()
而不是 type
这样 dict
的子类也可以工作。您还可以使用 typing
模块中的 MutableMapping
而不是 dict
以使其更通用。
3- IMO,从 .items()
得到 (k, v)
对比 k
和 d[k]
.
更通用?
您想将其扩展到更通用的 CAN(不必像 OP 中的解决方案一样)接受 depths
的数量以防万一?
考虑这些例子:
d_level1 = {"a": 1, "b": 2, "c": 3}
d_level2 = {"group_1": {"a": 1}, "group_2": {"b": 2, "c": 3}}
d_level3 = {"collection_1": d_level2}
for items in flatten(d_level3):
print(items)
print('------------------------------')
for items in flatten(d_level3, depth=0):
print(items)
print('------------------------------')
for items in flatten(d_level3, depth=1):
print(items)
print('------------------------------')
for items in flatten(d_level3, depth=2):
print(items)
输出:
('collection_1', 'group_1', 'a', 1)
('collection_1', 'group_2', 'b', 2)
('collection_1', 'group_2', 'c', 3)
------------------------------
('collection_1', {'group_1': {'a': 1}, 'group_2': {'b': 2, 'c': 3}})
------------------------------
('collection_1', 'group_1', {'a': 1})
('collection_1', 'group_2', {'b': 2, 'c': 3})
------------------------------
('collection_1', 'group_1', 'a', 1)
('collection_1', 'group_2', 'b', 2)
('collection_1', 'group_2', 'c', 3)
depth=None
不考虑深度(一开始仍然像您想要的那样工作)。但是现在通过指定从 0
到 2
的深度,您可以看到我们能够迭代我们想要的深度。这是代码:
def flatten(d, base=(), depth=None):
for k, v in d.items():
if not isinstance(v, dict):
yield base + (k, v)
else:
if depth is None:
yield from flatten(v, base + (k,))
else:
if depth == 0:
yield base + (k, v)
else:
yield from flatten(v, base + (k,), depth - 1)
用于识别字典深度的线性解决方案。
d={'a':{1:1,2:2},"b":0,'c':"{}"}
print(d)
s=str(d)
dictionary_stack,dictionary_depth=0,0
def push():
global dictionary_depth
global dictionary_stack
dictionary_stack+=1
dictionary_depth=max(dictionary_depth,dictionary_stack)
def pop():
global dictionary_stack
dictionary_stack-=1
string_safety=False
for c in s:
if c =="'":
string_safety=not(string_safety)
if not(string_safety) and c =='{':
push()
if not(string_safety) and c =='}':
pop()
print(dictionary_depth)
输出:
{'a': {1:1, 2:2}, 'b': 0, 'c': '{}'}
2