PyLongObject 内存泄漏
PyLongObject memory leak
我的 encode
方法中存在内存泄漏。
static const size_t _bits_per_digit = sizeof(digit) * CHAR_BIT;
/**
* Returns a Python int storing the bitstream of the Elias gamma encoded input list.
*/
static PyObject *
encode(PyObject *self, PyObject *obj)
{
if (!PyList_Check(obj)) {
PyErr_SetString(PyExc_TypeError, "Input to encode must be a list");
return NULL;
}
const Py_ssize_t list_length = PyList_GET_SIZE(obj);
size_t size = _ceil_div((size_t) PyList_GET_SIZE(obj), _bits_per_digit);
size_t current_digit = 0;
size_t current_bit = 0;
PyLongObject *bstream = _PyLong_New(size);
if (bstream == NULL) {
return NULL; // _PyLong_New sets errors
}
for (Py_ssize_t i = 0; i < list_length; i++)
{
PyObject *item = PyList_GET_ITEM(obj, i);
unsigned long number = PyLong_AsUnsignedLong(item);
if (PyErr_Occurred()) { // number overflowed or was negative
PyErr_Format(PyExc_ValueError, "Number at list index %zd was negative or too large", i);
return NULL;
} else if (number == 0) { // unencodable
PyErr_Format(PyExc_ValueError, "Zero at list index %zd", i);
return NULL;
}
size_t N = _fast_floor_log2(number);
size_t required_size = _ceil_div(current_digit * _bits_per_digit + current_bit + N * 2 + 1 + list_length - i - 1, _bits_per_digit);
if (size < required_size) {
size = required_size;
PyLongObject *new = (PyLongObject *) PyObject_Realloc(bstream, offsetof(PyLongObject, ob_digit) + size * _bits_per_digit);
if(new == NULL) {
PyObject_Free(bstream);
return PyErr_NoMemory();
}
bstream = new;
PyObject_InitVar((PyVarObject*)bstream, &PyLong_Type, size);
}
// write leading zeroes
size_t zeroes_left = N;
while (zeroes_left > 0) {
if (PyErr_CheckSignals()) {
return NULL;
}
if (current_bit == 0) {
bstream->ob_digit[current_digit] = (digit) 0;
if (zeroes_left > _bits_per_digit) {
zeroes_left -= _bits_per_digit;
++current_digit;
} else {
current_bit += zeroes_left;
if (current_bit == _bits_per_digit) {
current_bit = 0;
++current_digit;
}
zeroes_left = 0;
}
} else {
digit mask = ~((digit)-1 >> current_bit); // enable the current_bit most significant bits
bstream->ob_digit[current_digit] &= mask; // set remainder of this digit to zeroes
digit zeroes_written = _bits_per_digit - current_bit;
if (zeroes_left > zeroes_written) {
zeroes_left -= zeroes_written;
current_bit = 0;
++current_digit;
} else {
current_bit += zeroes_left;
if (current_bit == _bits_per_digit) {
current_bit = 0;
++current_digit;
}
zeroes_left = 0;
}
}
}
// write the binary representation of number
size_t bits_left = N + 1;
while (bits_left > 0) {
if (PyErr_CheckSignals()) {
return NULL;
}
unsigned long mask = 1UL << (bits_left - 1);
if (number & mask) {
bstream->ob_digit[current_digit] |= ~((digit) -1 >> 1) >> current_bit;
} else {
bstream->ob_digit[current_digit] &= ~(~((digit) -1 >> 1) >> current_bit);
}
++current_bit;
if (current_bit == _bits_per_digit) {
current_bit = 0;
++current_digit;
}
--bits_left;
mask >>= 1;
}
}
// remove unused digits
if ((size - current_digit) > 1) {
size = current_digit + 1;
PyLongObject *new = (PyLongObject *) PyObject_Realloc(bstream, offsetof(PyLongObject, ob_digit) + size * _bits_per_digit);
if(new == NULL) {
PyObject_Free(bstream);
return PyErr_NoMemory();
}
bstream = new;
PyObject_InitVar((PyVarObject*)bstream, &PyLong_Type, size);
}
// fill leftover with zeroes, overwrite malloc randomness
digit mask = ~((digit) -1 >> current_bit);
bstream->ob_digit[current_digit] &= mask;
return (PyObject *) bstream;
}
在Python
tracemalloc.start()
encoded = encode(numbers)
sleep(3) # give garbage collection time to run
size, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print("size, peak", size, peak)
print(sys.getsizeof(encoded))
打印
size, peak 17053496 17053496
2131708
而不是预期的
size, peak 2131708 ...
2131708
使用我自己的类型而不是 PyLongObject
,它看起来与我的眼睛相同,导致根本没有内存泄漏和输出
size, peak 2131309 2131309
2131309
这是具有该输出的类型
typedef struct {
PyObject_VAR_HEAD
uint8_t data[1];
} BitStream;
static PyTypeObject BitStreamType = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "elias_gamma_code.BitStream",
.tp_dealloc = 0,
.tp_free = PyObject_Del,
.tp_basicsize = offsetof(BitStream, data),
.tp_itemsize = sizeof(uint8_t),
.tp_flags = Py_TPFLAGS_DEFAULT,
};
因此,使用 PyLongObject 有一些特殊之处,特别是会导致大量内存使用。
我猜是因为PyObject_Realloc,所以才过来问的:
但显然
if a non-null pointer was returned, then obj was already freed
并且确实使用 PyObject_Free(previous_obj)
导致了 pointer being freed was not allocated
malloc 错误。
Elias 伽马编码将非零正二进制整数表示为 N 个前导零,N 是数字的位长度 - 1(与 floor(log2(num)) 相同),后跟数字本身。
+--------+---------------+
| Number | γ encoding |
+--------+---------------+
| 1 | 1 |
| 2 | 0 10 |
| 3 | 0 11 |
| 4 | 00 100 |
| 5 | 00 101 |
| ... | |
| 15 | 000 1111 |
| 16 | 0000 1 0000 |
| ... | |
| 31 | 0000 1 1111 |
| 32 | 00000 10 0000 |
列表中的所有数字都可以这样编码并连接起来。生成的比特流可以明确地解码为原始列表。
在错误路径中,给定
PyLongObject *bstream = _PyLong_New(size);
您必须减少对它的引用。但是你在这里没有这样做:
if (PyErr_Occurred()) { // number overflowed or was negative
PyErr_Format(PyExc_ValueError, "Number at list index %zd was negative or too large", i);
return NULL;
} else if (number == 0) { // unencodable
PyErr_Format(PyExc_ValueError, "Zero at list index %zd", i);
return NULL;
}
正确的 C 习语是 goto
错误处理标签:
PyLongObject *bstream = NULL;
if (!(bstream = _PyLong_New(size))) {
goto error;
}
if (! some other check) {
goto error;
}
...
return bstream;
error:
// xdecref requires that the pointer is initialized in all
// code paths, either to null or pointing to a valid live PyObject
Py_XDECREF(bstream);
return NULL;
}
另一个问题是是否允许在 Python 中完全重新分配一个 longobject...它将来可能会中断。
最后这个看起来有点奇怪:
size * _bits_per_digit
PyObject_Realloc
将新大小设为 字节 ,那么为什么要乘以每个数字 位 - 而不是必须乘以数字类型的 width (sizeof).
我的 encode
方法中存在内存泄漏。
static const size_t _bits_per_digit = sizeof(digit) * CHAR_BIT;
/**
* Returns a Python int storing the bitstream of the Elias gamma encoded input list.
*/
static PyObject *
encode(PyObject *self, PyObject *obj)
{
if (!PyList_Check(obj)) {
PyErr_SetString(PyExc_TypeError, "Input to encode must be a list");
return NULL;
}
const Py_ssize_t list_length = PyList_GET_SIZE(obj);
size_t size = _ceil_div((size_t) PyList_GET_SIZE(obj), _bits_per_digit);
size_t current_digit = 0;
size_t current_bit = 0;
PyLongObject *bstream = _PyLong_New(size);
if (bstream == NULL) {
return NULL; // _PyLong_New sets errors
}
for (Py_ssize_t i = 0; i < list_length; i++)
{
PyObject *item = PyList_GET_ITEM(obj, i);
unsigned long number = PyLong_AsUnsignedLong(item);
if (PyErr_Occurred()) { // number overflowed or was negative
PyErr_Format(PyExc_ValueError, "Number at list index %zd was negative or too large", i);
return NULL;
} else if (number == 0) { // unencodable
PyErr_Format(PyExc_ValueError, "Zero at list index %zd", i);
return NULL;
}
size_t N = _fast_floor_log2(number);
size_t required_size = _ceil_div(current_digit * _bits_per_digit + current_bit + N * 2 + 1 + list_length - i - 1, _bits_per_digit);
if (size < required_size) {
size = required_size;
PyLongObject *new = (PyLongObject *) PyObject_Realloc(bstream, offsetof(PyLongObject, ob_digit) + size * _bits_per_digit);
if(new == NULL) {
PyObject_Free(bstream);
return PyErr_NoMemory();
}
bstream = new;
PyObject_InitVar((PyVarObject*)bstream, &PyLong_Type, size);
}
// write leading zeroes
size_t zeroes_left = N;
while (zeroes_left > 0) {
if (PyErr_CheckSignals()) {
return NULL;
}
if (current_bit == 0) {
bstream->ob_digit[current_digit] = (digit) 0;
if (zeroes_left > _bits_per_digit) {
zeroes_left -= _bits_per_digit;
++current_digit;
} else {
current_bit += zeroes_left;
if (current_bit == _bits_per_digit) {
current_bit = 0;
++current_digit;
}
zeroes_left = 0;
}
} else {
digit mask = ~((digit)-1 >> current_bit); // enable the current_bit most significant bits
bstream->ob_digit[current_digit] &= mask; // set remainder of this digit to zeroes
digit zeroes_written = _bits_per_digit - current_bit;
if (zeroes_left > zeroes_written) {
zeroes_left -= zeroes_written;
current_bit = 0;
++current_digit;
} else {
current_bit += zeroes_left;
if (current_bit == _bits_per_digit) {
current_bit = 0;
++current_digit;
}
zeroes_left = 0;
}
}
}
// write the binary representation of number
size_t bits_left = N + 1;
while (bits_left > 0) {
if (PyErr_CheckSignals()) {
return NULL;
}
unsigned long mask = 1UL << (bits_left - 1);
if (number & mask) {
bstream->ob_digit[current_digit] |= ~((digit) -1 >> 1) >> current_bit;
} else {
bstream->ob_digit[current_digit] &= ~(~((digit) -1 >> 1) >> current_bit);
}
++current_bit;
if (current_bit == _bits_per_digit) {
current_bit = 0;
++current_digit;
}
--bits_left;
mask >>= 1;
}
}
// remove unused digits
if ((size - current_digit) > 1) {
size = current_digit + 1;
PyLongObject *new = (PyLongObject *) PyObject_Realloc(bstream, offsetof(PyLongObject, ob_digit) + size * _bits_per_digit);
if(new == NULL) {
PyObject_Free(bstream);
return PyErr_NoMemory();
}
bstream = new;
PyObject_InitVar((PyVarObject*)bstream, &PyLong_Type, size);
}
// fill leftover with zeroes, overwrite malloc randomness
digit mask = ~((digit) -1 >> current_bit);
bstream->ob_digit[current_digit] &= mask;
return (PyObject *) bstream;
}
在Python
tracemalloc.start()
encoded = encode(numbers)
sleep(3) # give garbage collection time to run
size, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print("size, peak", size, peak)
print(sys.getsizeof(encoded))
打印
size, peak 17053496 17053496
2131708
而不是预期的
size, peak 2131708 ...
2131708
使用我自己的类型而不是 PyLongObject
,它看起来与我的眼睛相同,导致根本没有内存泄漏和输出
size, peak 2131309 2131309
2131309
这是具有该输出的类型
typedef struct {
PyObject_VAR_HEAD
uint8_t data[1];
} BitStream;
static PyTypeObject BitStreamType = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = "elias_gamma_code.BitStream",
.tp_dealloc = 0,
.tp_free = PyObject_Del,
.tp_basicsize = offsetof(BitStream, data),
.tp_itemsize = sizeof(uint8_t),
.tp_flags = Py_TPFLAGS_DEFAULT,
};
因此,使用 PyLongObject 有一些特殊之处,特别是会导致大量内存使用。
我猜是因为PyObject_Realloc,所以才过来问的:
但显然
if a non-null pointer was returned, then obj was already freed
并且确实使用 PyObject_Free(previous_obj)
导致了 pointer being freed was not allocated
malloc 错误。
Elias 伽马编码将非零正二进制整数表示为 N 个前导零,N 是数字的位长度 - 1(与 floor(log2(num)) 相同),后跟数字本身。
+--------+---------------+
| Number | γ encoding |
+--------+---------------+
| 1 | 1 |
| 2 | 0 10 |
| 3 | 0 11 |
| 4 | 00 100 |
| 5 | 00 101 |
| ... | |
| 15 | 000 1111 |
| 16 | 0000 1 0000 |
| ... | |
| 31 | 0000 1 1111 |
| 32 | 00000 10 0000 |
列表中的所有数字都可以这样编码并连接起来。生成的比特流可以明确地解码为原始列表。
在错误路径中,给定
PyLongObject *bstream = _PyLong_New(size);
您必须减少对它的引用。但是你在这里没有这样做:
if (PyErr_Occurred()) { // number overflowed or was negative
PyErr_Format(PyExc_ValueError, "Number at list index %zd was negative or too large", i);
return NULL;
} else if (number == 0) { // unencodable
PyErr_Format(PyExc_ValueError, "Zero at list index %zd", i);
return NULL;
}
正确的 C 习语是 goto
错误处理标签:
PyLongObject *bstream = NULL;
if (!(bstream = _PyLong_New(size))) {
goto error;
}
if (! some other check) {
goto error;
}
...
return bstream;
error:
// xdecref requires that the pointer is initialized in all
// code paths, either to null or pointing to a valid live PyObject
Py_XDECREF(bstream);
return NULL;
}
另一个问题是是否允许在 Python 中完全重新分配一个 longobject...它将来可能会中断。
最后这个看起来有点奇怪:
size * _bits_per_digit
PyObject_Realloc
将新大小设为 字节 ,那么为什么要乘以每个数字 位 - 而不是必须乘以数字类型的 width (sizeof).