__deepcopy__ 具有循环引用的对象

__deepcopy__ object with cyclic references

from copy import deepcopy

class DoubleLinkedListNeedsDeepCopy:
    def __init__(self, val, tail=None):
        self.val = val
        self._next_node = None
        self._tail = tail or self

    def append(self, new_val):
        next_node = type(self)(new_val, self._tail)
        self._next_node = next_node
        return next_node

    def __deepcopy__(self, memo):
        new_copy = type(self)(self.val)
        new_copy._next_node = deepcopy(self._next_node, memo)
        new_copy._tail = deepcopy(self._tail, memo)
        return new_copy

    @property
    def next(self):
        return self._next_node

    @property
    def tail(self):
        return self._tail

    @property
    def is_last(self):
        return self._next_node == None

linked_list = head = DoubleLinkedListNeedsDeepCopy(1)
for i in range(2, 5):
    head = head.append(i)

def print_list(linked_list):
    cur = linked_list
    for i in range(20):
        print(cur.val, end=' ')

        if cur.is_last:
            break
        else:
            cur = cur.next
    print()

import sys
sys.setrecursionlimit(10000)

print_list(linked_list)
linked_list.next.next.val = 5
print_list(linked_list)
list_copy = deepcopy(linked_list)
list_copy.next.next.val = 8
print_list(list_copy)
print_list(linked_list)

预期输出为:

1 2 3 4 
1 2 5 4 
1 2 8 4 
1 2 5 4 

然而,在遵循递归路径后,它以 RecursionError 失败:linked_list.next.next.next.tail.next.next.next...

(这当然是一个玩具例子,我需要一个复杂的树状结构来复制到现实生活中)

事实证明,在大多数情况下(如果您不需要从副本中明确排除某些字段),即使 obj 有自链接或其他令人讨厌的属性。

虽然您决定完全避免覆盖 __deepcopy__,但实际问题仍未得到解答。我在谷歌上搜索解决方案,但没有找到任何东西,所以经过反复试验后,我找到了答案,我想 post 在这里。

您编写的代码因 RecursionError 而失败的原因是执行顺序。 memo 词典仅在 __deepcopy__ returns 之后更新。您可以在 copy.py 的源代码中查看。这是它最重要的部分,没有碎片,对我们的案例来说是不必要的:

def deepcopy(x, memo=None, _nil=[]):
    ...

    if memo is None:
        memo = {}

    d = id(x)
    y = memo.get(d, _nil)
    if y is not _nil:
        return y

    ...

    copier = getattr(x, "__deepcopy__", None)
    if copier:
        y = copier(memo)

    ...

    # If is its own copy, don't memoize.
    if y is not x:
        memo[d] = y
        _keep_alive(x, memo) # Make sure x lives at least as long as d
    return y

所以,我们的问题是 memo 在调用另一个具有相同参数的 __deepcopy__ 之前未在 __deepcopy__ 中更新。知道了这一点,只需一行代码就可以轻松修复您的代码:

def __deepcopy__(self, memo):
    new_copy = type(self)(self.val)

    memo[id(self)] = new_copy  # THIS LINE: update memo before re-entering deep-copy machinery

    new_copy._next_node = deepcopy(self._next_node, memo)
    new_copy._tail = deepcopy(self._tail, memo)
    return new_copy