如何最有效地使用 collections.deque(popleft 与 appendleft)
How to use collections.deque most effectively (popleft vs. appendleft)
我在学习Python中的数据结构时一直在学习队列,想问一个关于它的使用的问题。
我想队列中有两种方法 append/pop。第一种方法是使用 deque.append()
和 deque.popleft()
。另一种方法是使用 deque.appendleft()
和 deque.pop()
。两者之间有性能差异吗?如果不是,根据您的经验,哪个更常用?您是否出于其他原因推荐其中一个?
从我的角度来看,它们本质上做的是相同的事情,因为它们都实现了先进先出。非常感谢您的意见!
使用哪一端并不重要。两者都同样是最优的。与仅在列表的 right/top 上提供 O(1) 追加和弹出操作的列表不同,追加和弹出操作在双端队列的两端是 guaranteed 是 O(1)。如果不是,它的存在就没有意义,我们不妨使用一个列表。
查看CPython 3.8的源代码,方法pop
和popleft
几乎相同:
static PyObject *
deque_pop(dequeobject *deque, PyObject *unused)
{
PyObject *item;
block *prevblock;
if (Py_SIZE(deque) == 0) {
PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
return NULL;
}
item = deque->rightblock->data[deque->rightindex];
deque->rightindex--;
Py_SIZE(deque)--;
deque->state++;
if (deque->rightindex < 0) {
if (Py_SIZE(deque)) {
prevblock = deque->rightblock->leftlink;
assert(deque->leftblock != deque->rightblock);
freeblock(deque->rightblock);
CHECK_NOT_END(prevblock);
MARK_END(prevblock->rightlink);
deque->rightblock = prevblock;
deque->rightindex = BLOCKLEN - 1;
} else {
assert(deque->leftblock == deque->rightblock);
assert(deque->leftindex == deque->rightindex+1);
/* re-center instead of freeing a block */
deque->leftindex = CENTER + 1;
deque->rightindex = CENTER;
}
}
return item;
}
static PyObject *
deque_popleft(dequeobject *deque, PyObject *unused)
{
PyObject *item;
block *prevblock;
if (Py_SIZE(deque) == 0) {
PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
return NULL;
}
assert(deque->leftblock != NULL);
item = deque->leftblock->data[deque->leftindex];
deque->leftindex++;
Py_SIZE(deque)--;
deque->state++;
if (deque->leftindex == BLOCKLEN) {
if (Py_SIZE(deque)) {
assert(deque->leftblock != deque->rightblock);
prevblock = deque->leftblock->rightlink;
freeblock(deque->leftblock);
CHECK_NOT_END(prevblock);
MARK_END(prevblock->leftlink);
deque->leftblock = prevblock;
deque->leftindex = 0;
} else {
assert(deque->leftblock == deque->rightblock);
assert(deque->leftindex == deque->rightindex+1);
/* re-center instead of freeing a block */
deque->leftindex = CENTER + 1;
deque->rightindex = CENTER;
}
}
return item;
}
追加也是如此:
static inline int
deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
if (deque->rightindex == BLOCKLEN - 1) {
block *b = newblock();
if (b == NULL)
return -1;
b->leftlink = deque->rightblock;
CHECK_END(deque->rightblock->rightlink);
deque->rightblock->rightlink = b;
deque->rightblock = b;
MARK_END(b->rightlink);
deque->rightindex = -1;
}
Py_SIZE(deque)++;
deque->rightindex++;
deque->rightblock->data[deque->rightindex] = item;
if (NEEDS_TRIM(deque, maxlen)) {
PyObject *olditem = deque_popleft(deque, NULL);
Py_DECREF(olditem);
} else {
deque->state++;
}
return 0;
}
static inline int
deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
if (deque->leftindex == 0) {
block *b = newblock();
if (b == NULL)
return -1;
b->rightlink = deque->leftblock;
CHECK_END(deque->leftblock->leftlink);
deque->leftblock->leftlink = b;
deque->leftblock = b;
MARK_END(b->leftlink);
deque->leftindex = BLOCKLEN;
}
Py_SIZE(deque)++;
deque->leftindex--;
deque->leftblock->data[deque->leftindex] = item;
if (NEEDS_TRIM(deque, deque->maxlen)) {
PyObject *olditem = deque_pop(deque, NULL);
Py_DECREF(olditem);
} else {
deque->state++;
}
return 0;
}
CPython 中的双端队列以块为单位分配和释放内存,以最大限度地减少堆访问并改善缓存局部性(数组中的顺序元素可能属于同一缓存块)。这些方法都根据需要分配或取消分配内存和 increment/decrement 一些索引。所有恒定时间。
根据the docs:
Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction.
所以两个选项之间没有渐近差异;无论哪种方式,您的 "enqueue" 和 "poll" 操作都是在恒定时间内完成的。这是意料之中的,因为双端队列(双端队列)的全部意义在于在两侧进行有效的添加和删除操作。
使用 timeit
的经验结果确认基本上没有区别:
from collections import deque
def append_popleft():
d = deque()
for i in range(10000):
d.append(i)
for j in range(10000):
d.popleft()
def appendleft_pop():
d = deque()
for i in range(10000):
d.appendleft(i)
for j in range(10000):
d.pop()
import timeit
t = timeit.timeit(append_popleft, number=10000)
print('append / popleft:', t)
t = timeit.timeit(appendleft_pop, number=10000)
print('appendleft / pop:', t)
输出:
append / popleft: 12.000681700999849
appendleft / pop: 11.937629571999423
我在学习Python中的数据结构时一直在学习队列,想问一个关于它的使用的问题。
我想队列中有两种方法 append/pop。第一种方法是使用 deque.append()
和 deque.popleft()
。另一种方法是使用 deque.appendleft()
和 deque.pop()
。两者之间有性能差异吗?如果不是,根据您的经验,哪个更常用?您是否出于其他原因推荐其中一个?
从我的角度来看,它们本质上做的是相同的事情,因为它们都实现了先进先出。非常感谢您的意见!
使用哪一端并不重要。两者都同样是最优的。与仅在列表的 right/top 上提供 O(1) 追加和弹出操作的列表不同,追加和弹出操作在双端队列的两端是 guaranteed 是 O(1)。如果不是,它的存在就没有意义,我们不妨使用一个列表。
查看CPython 3.8的源代码,方法pop
和popleft
几乎相同:
static PyObject *
deque_pop(dequeobject *deque, PyObject *unused)
{
PyObject *item;
block *prevblock;
if (Py_SIZE(deque) == 0) {
PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
return NULL;
}
item = deque->rightblock->data[deque->rightindex];
deque->rightindex--;
Py_SIZE(deque)--;
deque->state++;
if (deque->rightindex < 0) {
if (Py_SIZE(deque)) {
prevblock = deque->rightblock->leftlink;
assert(deque->leftblock != deque->rightblock);
freeblock(deque->rightblock);
CHECK_NOT_END(prevblock);
MARK_END(prevblock->rightlink);
deque->rightblock = prevblock;
deque->rightindex = BLOCKLEN - 1;
} else {
assert(deque->leftblock == deque->rightblock);
assert(deque->leftindex == deque->rightindex+1);
/* re-center instead of freeing a block */
deque->leftindex = CENTER + 1;
deque->rightindex = CENTER;
}
}
return item;
}
static PyObject *
deque_popleft(dequeobject *deque, PyObject *unused)
{
PyObject *item;
block *prevblock;
if (Py_SIZE(deque) == 0) {
PyErr_SetString(PyExc_IndexError, "pop from an empty deque");
return NULL;
}
assert(deque->leftblock != NULL);
item = deque->leftblock->data[deque->leftindex];
deque->leftindex++;
Py_SIZE(deque)--;
deque->state++;
if (deque->leftindex == BLOCKLEN) {
if (Py_SIZE(deque)) {
assert(deque->leftblock != deque->rightblock);
prevblock = deque->leftblock->rightlink;
freeblock(deque->leftblock);
CHECK_NOT_END(prevblock);
MARK_END(prevblock->leftlink);
deque->leftblock = prevblock;
deque->leftindex = 0;
} else {
assert(deque->leftblock == deque->rightblock);
assert(deque->leftindex == deque->rightindex+1);
/* re-center instead of freeing a block */
deque->leftindex = CENTER + 1;
deque->rightindex = CENTER;
}
}
return item;
}
追加也是如此:
static inline int
deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
if (deque->rightindex == BLOCKLEN - 1) {
block *b = newblock();
if (b == NULL)
return -1;
b->leftlink = deque->rightblock;
CHECK_END(deque->rightblock->rightlink);
deque->rightblock->rightlink = b;
deque->rightblock = b;
MARK_END(b->rightlink);
deque->rightindex = -1;
}
Py_SIZE(deque)++;
deque->rightindex++;
deque->rightblock->data[deque->rightindex] = item;
if (NEEDS_TRIM(deque, maxlen)) {
PyObject *olditem = deque_popleft(deque, NULL);
Py_DECREF(olditem);
} else {
deque->state++;
}
return 0;
}
static inline int
deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen)
{
if (deque->leftindex == 0) {
block *b = newblock();
if (b == NULL)
return -1;
b->rightlink = deque->leftblock;
CHECK_END(deque->leftblock->leftlink);
deque->leftblock->leftlink = b;
deque->leftblock = b;
MARK_END(b->leftlink);
deque->leftindex = BLOCKLEN;
}
Py_SIZE(deque)++;
deque->leftindex--;
deque->leftblock->data[deque->leftindex] = item;
if (NEEDS_TRIM(deque, deque->maxlen)) {
PyObject *olditem = deque_pop(deque, NULL);
Py_DECREF(olditem);
} else {
deque->state++;
}
return 0;
}
CPython 中的双端队列以块为单位分配和释放内存,以最大限度地减少堆访问并改善缓存局部性(数组中的顺序元素可能属于同一缓存块)。这些方法都根据需要分配或取消分配内存和 increment/decrement 一些索引。所有恒定时间。
根据the docs:
Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction.
所以两个选项之间没有渐近差异;无论哪种方式,您的 "enqueue" 和 "poll" 操作都是在恒定时间内完成的。这是意料之中的,因为双端队列(双端队列)的全部意义在于在两侧进行有效的添加和删除操作。
使用 timeit
的经验结果确认基本上没有区别:
from collections import deque
def append_popleft():
d = deque()
for i in range(10000):
d.append(i)
for j in range(10000):
d.popleft()
def appendleft_pop():
d = deque()
for i in range(10000):
d.appendleft(i)
for j in range(10000):
d.pop()
import timeit
t = timeit.timeit(append_popleft, number=10000)
print('append / popleft:', t)
t = timeit.timeit(appendleft_pop, number=10000)
print('appendleft / pop:', t)
输出:
append / popleft: 12.000681700999849
appendleft / pop: 11.937629571999423