python 中的三向字典深度合并
Three way dictionary deep merge in python
我想合并两个字典 A 和 B,知道一个共同的先前状态 C 两个词典。我也需要在子词典中进行合并。在真正冲突的情况下,我需要抛出异常。
1 - 在下面的示例中,合并方法应该理解 A 和 B 编辑了不同的项目,因此合并不应引发冲突
C = {"x": 0, "y": 0}
A = {"x": 1, "y": 0} # Edit x, but not y
B = {"x": 0, "y": 1} # Edit y, but not x
# merge(A, B, C) => {"x": 1, "y": 1}
2 - 该函数需要能够处理新项目和已删除项目
C = {"x": 0}
A = {"x": 0, "y": 0} # Add y, keep x untouched
B = {} # Delete x
# merge(A, B, C) => {"y": 0}
3 - 当真正的冲突发生时函数应该抛出异常
C = {"x": 0}
A = {"x": 1} # Edit x
B = {"x": 2} # Also edit x
# merge(A, B, C) => raise Exception
C = {"x": 0}
A = {"x": 1} # Edit x
B = {} # Delete x
# merge(A, B, C) => raise Exception
4 - 该函数应该递归工作
C = {"deeper": {"x": 0, "y": 0}}
A = {"deeper": {"x": 1, "y": 0}} # Edit deeper["x"], but not deeper["y"]
B = {"deeper": {"x": 0, "y": 1}} # Edit deeper["y"], but not deeper["x"]
# merge(A, B, C) => {"deeper": {"x": 1, "y": 1}}
实现这种合并功能的最佳方法是什么?
尝试一个递归检查您的案例并在满足 none 个案例时失败(引发值错误)的函数。
我相信这可以满足你的情况
def three_way_merge(Origin, Dict_A, Dict_B):
newdict = dict()
for key, value in Origin.items():
if isinstance(value, dict): # handle recursion
newdict[key] = three_way_merge(Origin[key], Dict_A[key], Dict_B[key])
elif key not in Dict_A.keys() and Dict_B[key] == value:
pass
elif key not in Dict_B.keys() and Dict_A[key] == value:
pass
elif Dict_A[key] == value and Dict_B[key] == value:
newdict[key] = value
elif Dict_A[key] == value and Dict_B[key] != value:
newdict[key] = Dict_B[key]
elif Dict_A[key] != value and Dict_B[key] == value:
newdict[key] = Dict_A[key]
elif Dict_A[key] == Dict_B[key]:
newdict[key] = Dict_A[key]
else: # check for a conflict with this key
raise ValueError('conflict occured with {} \n {} and {} both changed'.format(key, Dict_A[key], Dict_B[key]))
newdict.update(add_missing_keys(Dict_A, Origin, Dict_B))
newdict.update(add_missing_keys(Dict_B, Origin, Dict_A))
return newdict
def add_missing_keys (Dict_A, Origin, Dict_B):
newdict = dict()
for key, value in Dict_A.items():
if key not in Origin.keys():
if key not in Dict_B.keys() or Dict_B[key] == value:
newdict[key] = value
else:
raise ValueError('conflict occured with {} \n {} and {} both changed'.format(key, Dict_A[key], Dict_B[key]))
return newdict
print(three_way_merge({'x':0, 'y':0}, {'x':1, 'y':0}, {'x':0, 'y':2})) # returns {'x':1, 'y':2}
print(three_way_merge({'x':0}, {'x':0, 'y':0}, {})) # returns {'y':0}
print(three_way_merge({'x':0}, {'x':1}, {'x':1})) # returns {'x':1}
print(three_way_merge({"deeper": {"x": 0, "y": 0}},{"deeper": {"x": 1, "y": 0}},{"deeper": {"x": 0, "y": 2}})) # returns {'deeper': {'x': 1, 'y': 2}}
print(three_way_merge({'x':0}, {'x':1}, {'x':2})) # raises valueerror
print(three_way_merge({'x':0}, {'x':1}, {})) # raises keyerror
你可以将所有的dict项转化为集合,使用从对称差异到C的键的交集来查找冲突,并使用3个集合的交集(公共项)和差异到C的并集来获得合并。递归合并A、B、C共有的sub-dicts,将sub-dicts转换为item pairs的元组,使其可哈希并转换为sets,合并后再转换回dicts。
编辑:如果 dict 值是不可散列的对象,例如集合,则必须序列化这些值(我建议使用 pickle
作为序列化程序,因为它具有 Python 的本机支持) 之前你可以将 dict 项目转换成一个集合,并在合并后反序列化它们:
import pickle
def merge(a, b, c):
# recursively merge sub-dicts that are common to a, b and c
for k in a.keys() & b.keys() & c.keys():
if all(isinstance(d.get(k), dict) for d in (a, b, c)):
a[k] = b[k] = c[k] = merge(a[k], b[k], c[k])
# convert sub-dicts into tuples of item pairs to allow them to be hashable
for d in a, b, c:
for k, v in d.items():
if isinstance(v, dict):
d[k] = tuple(v.items())
# convert all the dict items into sets
set_a, set_b, set_c = (set((k, pickle.dumps(v)) for k, v in d.items()) for d in (a, b, c))
# intersect keys from the symmetric set differences to c to find conflicts
for k in set(k for k, _ in set_a ^ set_c) & set(k for k, _ in set_b ^ set_c):
# it isn't really a conflict if the new values of a and b are the same
if a.get(k) != b.get(k) or (k in a) ^ (k in b):
raise ValueError("Conflict found in key %s" % k)
# merge the dicts by union'ing the differences to c with the common items
d = dict(set_a & set_b & set_c | set_a - set_c | set_b - set_c)
# convert the tuple of items back to dicts for output
for k, v in d.items():
v = pickle.loads(v)
if isinstance(v, tuple):
d[k] = dict(v)
else:
d[k] = v
return d
这样:
C = {"x": 0, "y": 0}
A = {"x": 1, "y": 0} # Edit x, but not y
B = {"x": 0, "y": 1} # Edit y, but not x
print(merge(A, B, C))
C = {"x": 0}
A = {"x": 0, "y": 0} # Add y, keep x untouched
B = {} # Delete x
print(merge(A, B, C))
C = {"x": 0}
A = {"x": 1} # Edit x
B = {"x": 1} # Edit x with the same value
print(merge(A, B, C))
C = {"deeper": {"x": 0, "y": {3, 4}}}
A = {"deeper": {"x": {1, 2}, "y": {4, 3}}} # Edit deeper["x"], but not deeper["y"]
B = {"deeper": {"x": 0, "y": 1}} # Edit deeper["y"], but not deeper["x"]
print(merge(A, B, C))
C = {"deeper": 1}
A = {"deeper": {"x": 0, "y": 1}} # Edit deeper and turn it into a dict
B = {"deeper": 1, "x": 2} # Add x, keep deeper untouched
print(merge(A, B, C))
C = {"deeper": {"x": 0, "y": 1}}
A = {"deeper": {"x": 0, "y": 1}} # Keep deeper untouched
B = {"deeper": 1} # Turn deeper into a scalar
print(merge(A, B, C))
会输出:
{'x': 1, 'y': 1}
{'y': 0}
{'x': 1}
{'deeper': {'x': {1, 2}, 'y': 1}}
{'deeper': {'x': 0, 'y': 1}, 'x': 2}
{'deeper': 1}
同时:
C = {"x": 0}
A = {"x": 1} # Edit x
B = {"x": 2} # Edit x with a different value
print(merge(A, B, C))
会提高:
ValueError: Conflict found in key x
和:
C = {"deeper": {"x": 0, "y": 1}}
A = {"deeper": {"x": 0, "y": 2}} # Edit deeper["y"], but not deeper["x"]
B = {"deeper": 1} # Turn deeper into a scalar
print(merge(A, B, C))
会提高:
ValueError: Conflict found in key deeper
我想合并两个字典 A 和 B,知道一个共同的先前状态 C 两个词典。我也需要在子词典中进行合并。在真正冲突的情况下,我需要抛出异常。
1 - 在下面的示例中,合并方法应该理解 A 和 B 编辑了不同的项目,因此合并不应引发冲突
C = {"x": 0, "y": 0}
A = {"x": 1, "y": 0} # Edit x, but not y
B = {"x": 0, "y": 1} # Edit y, but not x
# merge(A, B, C) => {"x": 1, "y": 1}
2 - 该函数需要能够处理新项目和已删除项目
C = {"x": 0}
A = {"x": 0, "y": 0} # Add y, keep x untouched
B = {} # Delete x
# merge(A, B, C) => {"y": 0}
3 - 当真正的冲突发生时函数应该抛出异常
C = {"x": 0}
A = {"x": 1} # Edit x
B = {"x": 2} # Also edit x
# merge(A, B, C) => raise Exception
C = {"x": 0}
A = {"x": 1} # Edit x
B = {} # Delete x
# merge(A, B, C) => raise Exception
4 - 该函数应该递归工作
C = {"deeper": {"x": 0, "y": 0}}
A = {"deeper": {"x": 1, "y": 0}} # Edit deeper["x"], but not deeper["y"]
B = {"deeper": {"x": 0, "y": 1}} # Edit deeper["y"], but not deeper["x"]
# merge(A, B, C) => {"deeper": {"x": 1, "y": 1}}
实现这种合并功能的最佳方法是什么?
尝试一个递归检查您的案例并在满足 none 个案例时失败(引发值错误)的函数。
我相信这可以满足你的情况
def three_way_merge(Origin, Dict_A, Dict_B):
newdict = dict()
for key, value in Origin.items():
if isinstance(value, dict): # handle recursion
newdict[key] = three_way_merge(Origin[key], Dict_A[key], Dict_B[key])
elif key not in Dict_A.keys() and Dict_B[key] == value:
pass
elif key not in Dict_B.keys() and Dict_A[key] == value:
pass
elif Dict_A[key] == value and Dict_B[key] == value:
newdict[key] = value
elif Dict_A[key] == value and Dict_B[key] != value:
newdict[key] = Dict_B[key]
elif Dict_A[key] != value and Dict_B[key] == value:
newdict[key] = Dict_A[key]
elif Dict_A[key] == Dict_B[key]:
newdict[key] = Dict_A[key]
else: # check for a conflict with this key
raise ValueError('conflict occured with {} \n {} and {} both changed'.format(key, Dict_A[key], Dict_B[key]))
newdict.update(add_missing_keys(Dict_A, Origin, Dict_B))
newdict.update(add_missing_keys(Dict_B, Origin, Dict_A))
return newdict
def add_missing_keys (Dict_A, Origin, Dict_B):
newdict = dict()
for key, value in Dict_A.items():
if key not in Origin.keys():
if key not in Dict_B.keys() or Dict_B[key] == value:
newdict[key] = value
else:
raise ValueError('conflict occured with {} \n {} and {} both changed'.format(key, Dict_A[key], Dict_B[key]))
return newdict
print(three_way_merge({'x':0, 'y':0}, {'x':1, 'y':0}, {'x':0, 'y':2})) # returns {'x':1, 'y':2}
print(three_way_merge({'x':0}, {'x':0, 'y':0}, {})) # returns {'y':0}
print(three_way_merge({'x':0}, {'x':1}, {'x':1})) # returns {'x':1}
print(three_way_merge({"deeper": {"x": 0, "y": 0}},{"deeper": {"x": 1, "y": 0}},{"deeper": {"x": 0, "y": 2}})) # returns {'deeper': {'x': 1, 'y': 2}}
print(three_way_merge({'x':0}, {'x':1}, {'x':2})) # raises valueerror
print(three_way_merge({'x':0}, {'x':1}, {})) # raises keyerror
你可以将所有的dict项转化为集合,使用从对称差异到C的键的交集来查找冲突,并使用3个集合的交集(公共项)和差异到C的并集来获得合并。递归合并A、B、C共有的sub-dicts,将sub-dicts转换为item pairs的元组,使其可哈希并转换为sets,合并后再转换回dicts。
编辑:如果 dict 值是不可散列的对象,例如集合,则必须序列化这些值(我建议使用 pickle
作为序列化程序,因为它具有 Python 的本机支持) 之前你可以将 dict 项目转换成一个集合,并在合并后反序列化它们:
import pickle
def merge(a, b, c):
# recursively merge sub-dicts that are common to a, b and c
for k in a.keys() & b.keys() & c.keys():
if all(isinstance(d.get(k), dict) for d in (a, b, c)):
a[k] = b[k] = c[k] = merge(a[k], b[k], c[k])
# convert sub-dicts into tuples of item pairs to allow them to be hashable
for d in a, b, c:
for k, v in d.items():
if isinstance(v, dict):
d[k] = tuple(v.items())
# convert all the dict items into sets
set_a, set_b, set_c = (set((k, pickle.dumps(v)) for k, v in d.items()) for d in (a, b, c))
# intersect keys from the symmetric set differences to c to find conflicts
for k in set(k for k, _ in set_a ^ set_c) & set(k for k, _ in set_b ^ set_c):
# it isn't really a conflict if the new values of a and b are the same
if a.get(k) != b.get(k) or (k in a) ^ (k in b):
raise ValueError("Conflict found in key %s" % k)
# merge the dicts by union'ing the differences to c with the common items
d = dict(set_a & set_b & set_c | set_a - set_c | set_b - set_c)
# convert the tuple of items back to dicts for output
for k, v in d.items():
v = pickle.loads(v)
if isinstance(v, tuple):
d[k] = dict(v)
else:
d[k] = v
return d
这样:
C = {"x": 0, "y": 0}
A = {"x": 1, "y": 0} # Edit x, but not y
B = {"x": 0, "y": 1} # Edit y, but not x
print(merge(A, B, C))
C = {"x": 0}
A = {"x": 0, "y": 0} # Add y, keep x untouched
B = {} # Delete x
print(merge(A, B, C))
C = {"x": 0}
A = {"x": 1} # Edit x
B = {"x": 1} # Edit x with the same value
print(merge(A, B, C))
C = {"deeper": {"x": 0, "y": {3, 4}}}
A = {"deeper": {"x": {1, 2}, "y": {4, 3}}} # Edit deeper["x"], but not deeper["y"]
B = {"deeper": {"x": 0, "y": 1}} # Edit deeper["y"], but not deeper["x"]
print(merge(A, B, C))
C = {"deeper": 1}
A = {"deeper": {"x": 0, "y": 1}} # Edit deeper and turn it into a dict
B = {"deeper": 1, "x": 2} # Add x, keep deeper untouched
print(merge(A, B, C))
C = {"deeper": {"x": 0, "y": 1}}
A = {"deeper": {"x": 0, "y": 1}} # Keep deeper untouched
B = {"deeper": 1} # Turn deeper into a scalar
print(merge(A, B, C))
会输出:
{'x': 1, 'y': 1}
{'y': 0}
{'x': 1}
{'deeper': {'x': {1, 2}, 'y': 1}}
{'deeper': {'x': 0, 'y': 1}, 'x': 2}
{'deeper': 1}
同时:
C = {"x": 0}
A = {"x": 1} # Edit x
B = {"x": 2} # Edit x with a different value
print(merge(A, B, C))
会提高:
ValueError: Conflict found in key x
和:
C = {"deeper": {"x": 0, "y": 1}}
A = {"deeper": {"x": 0, "y": 2}} # Edit deeper["y"], but not deeper["x"]
B = {"deeper": 1} # Turn deeper into a scalar
print(merge(A, B, C))
会提高:
ValueError: Conflict found in key deeper