如何最有效地使用 collections.deque(popleft 与 appendleft)

How to use collections.deque most effectively (popleft vs. appendleft)

我在学习Python中的数据结构时一直在学习队列,想问一个关于它的使用的问题。

我想队列中有两种方法 append/pop。第一种方法是使用 deque.append()deque.popleft()。另一种方法是使用 deque.appendleft()deque.pop()。两者之间有性能差异吗?如果不是,根据您的经验,哪个更常用?您是否出于其他原因推荐其中一个?

从我的角度来看,它们本质上做的是相同的事情,因为它们都实现了先进先出。非常感谢您的意见!

使用哪一端并不重要。两者都同样是最优的。与仅在列表的 right/top 上提供 O(1) 追加和弹出操作的列表不同,追加和弹出操作在双端队列的两端是 guaranteed 是 O(1)。如果不是,它的存在就没有意义,我们不妨使用一个列表。

查看CPython 3.8的源代码,方法poppopleft几乎相同:

static PyObject *
deque_pop(dequeobject *deque, PyObject *unused)
{
    PyObject *item;
    block *prevblock;

    if (Py_SIZE(deque) == 0) {
        PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
        return NULL;
    }
    item = deque->rightblock->data[deque->rightindex];
    deque->rightindex--;
    Py_SIZE(deque)--;
    deque->state++;

    if (deque->rightindex < 0) {
        if (Py_SIZE(deque)) {
            prevblock = deque->rightblock->leftlink;
            assert(deque->leftblock != deque->rightblock);
            freeblock(deque->rightblock);
            CHECK_NOT_END(prevblock);
            MARK_END(prevblock->rightlink);
            deque->rightblock = prevblock;
            deque->rightindex = BLOCKLEN - 1;
        } else {
            assert(deque->leftblock == deque->rightblock);
            assert(deque->leftindex == deque->rightindex+1);
            /* re-center instead of freeing a block */
            deque->leftindex = CENTER + 1;
            deque->rightindex = CENTER;
        }
    }
    return item;
}

static PyObject *
deque_popleft(dequeobject *deque, PyObject *unused)
{
    PyObject *item;
    block *prevblock;

    if (Py_SIZE(deque) == 0) {
        PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
        return NULL;
    }
    assert(deque->leftblock != NULL);
    item = deque->leftblock->data[deque->leftindex];
    deque->leftindex++;
    Py_SIZE(deque)--;
    deque->state++;

    if (deque->leftindex == BLOCKLEN) {
        if (Py_SIZE(deque)) {
            assert(deque->leftblock != deque->rightblock);
            prevblock = deque->leftblock->rightlink;
            freeblock(deque->leftblock);
            CHECK_NOT_END(prevblock);
            MARK_END(prevblock->leftlink);
            deque->leftblock = prevblock;
            deque->leftindex = 0;
        } else {
            assert(deque->leftblock == deque->rightblock);
            assert(deque->leftindex == deque->rightindex+1);
            /* re-center instead of freeing a block */
            deque->leftindex = CENTER + 1;
            deque->rightindex = CENTER;
        }
    }
    return item;
}

追加也是如此:

static inline int
deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
    if (deque->rightindex == BLOCKLEN - 1) {
        block *b = newblock();
        if (b == NULL)
            return -1;
        b->leftlink = deque->rightblock;
        CHECK_END(deque->rightblock->rightlink);
        deque->rightblock->rightlink = b;
        deque->rightblock = b;
        MARK_END(b->rightlink);
        deque->rightindex = -1;
    }
    Py_SIZE(deque)++;
    deque->rightindex++;
    deque->rightblock->data[deque->rightindex] = item;
    if (NEEDS_TRIM(deque, maxlen)) {
        PyObject *olditem = deque_popleft(deque, NULL);
        Py_DECREF(olditem);
    } else {
        deque->state++;
    }
    return 0;
}

static inline int
deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
    if (deque->leftindex == 0) {
        block *b = newblock();
        if (b == NULL)
            return -1;
        b->rightlink = deque->leftblock;
        CHECK_END(deque->leftblock->leftlink);
        deque->leftblock->leftlink = b;
        deque->leftblock = b;
        MARK_END(b->leftlink);
        deque->leftindex = BLOCKLEN;
    }
    Py_SIZE(deque)++;
    deque->leftindex--;
    deque->leftblock->data[deque->leftindex] = item;
    if (NEEDS_TRIM(deque, deque->maxlen)) {
        PyObject *olditem = deque_pop(deque, NULL);
        Py_DECREF(olditem);
    } else {
        deque->state++;
    }
    return 0;
}

CPython 中的双端队列以块为单位分配和释放内存,以最大限度地减少堆访问并改善缓存局部性(数组中的顺序元素可能属于同一缓存块)。这些方法都根据需要分配或取消分配内存和 increment/decrement 一些索引。所有恒定时间。

根据the docs

Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction.

所以两个选项之间没有渐近差异;无论哪种方式,您的 "enqueue" 和 "poll" 操作都是在恒定时间内完成的。这是意料之中的,因为双端队列(双端队列)的全部意义在于在两侧进行有效的添加和删除操作。

使用 timeit 的经验结果确认基本上没有区别:

from collections import deque

def append_popleft():
    d = deque()
    for i in range(10000):
        d.append(i)
    for j in range(10000):
        d.popleft()

def appendleft_pop():
    d = deque()
    for i in range(10000):
        d.appendleft(i)
    for j in range(10000):
        d.pop()

import timeit
t = timeit.timeit(append_popleft, number=10000)
print('append / popleft:', t)
t = timeit.timeit(appendleft_pop, number=10000)
print('appendleft / pop:', t)

输出:

append / popleft: 12.000681700999849
appendleft / pop: 11.937629571999423