如何在 C 中正确存储 PyObject*?
How to store PyObject* correctly in C?
我正在编写一个用于缓存的小型库。 Python dict 不适合我,已经尝试过 std::map,得到的 SIGSEGV 有非常相似的错误。无论如何,整个要点在下面的日志中进行了描述。我究竟做错了什么?有没有其他方法可以在 C 中存储对象?
问题:
(gdb) run
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: /usr/bin/python3
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
Python 3.9.2 (default, Feb 20 2021, 18:40:11)
[GCC 10.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from syncached import cache
>>> cache.push(1, object())
>>> cache.get(1) == object()
True
>>> cache.get(1) == object()
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7d049d0 in PyMem_Calloc () from /usr/lib/libpython3.9.so.1.0
(gdb) bt
#0 0x00007ffff7d049d0 in PyMem_Calloc () from /usr/lib/libpython3.9.so.1.0
#1 0x00007ffff7cfb27d in PyList_New () from /usr/lib/libpython3.9.so.1.0
#2 0x00007ffff7d6f4e3 in ?? () from /usr/lib/libpython3.9.so.1.0
#3 0x00007ffff7de2e37 in PyAST_CompileObject () from /usr/lib/libpython3.9.so.1.0
#4 0x00007ffff7de2c3b in ?? () from /usr/lib/libpython3.9.so.1.0
#5 0x00007ffff7cf68ab in ?? () from /usr/lib/libpython3.9.so.1.0
#6 0x00007ffff7cf6a63 in PyRun_InteractiveLoopFlags () from /usr/lib/libpython3.9.so.1.0
#7 0x00007ffff7c84f6b in PyRun_AnyFileExFlags () from /usr/lib/libpython3.9.so.1.0
#8 0x00007ffff7c7965c in ?? () from /usr/lib/libpython3.9.so.1.0
#9 0x00007ffff7dc9fa9 in Py_BytesMain () from /usr/lib/libpython3.9.so.1.0
#10 0x00007ffff7a46b25 in __libc_start_main () from /usr/lib/libc.so.6
#11 0x000055555555504e in _start ()
pyhashmap.c:
#include "Python.h"
#include <stdlib.h>
typedef struct {
Py_hash_t key;
PyObject *val;
} hashmap_member;
typedef struct {
size_t cache_size;
size_t currsize;
hashmap_member *list;
} pyhashmap;
pyhashmap *new_map(size_t size){
pyhashmap *map = PyMem_Malloc(sizeof(pyhashmap));
map->cache_size = size;
map->currsize = 0;
map->list = PyMem_Malloc(size*sizeof(hashmap_member));
return map;
}
void map_insert(pyhashmap *map, Py_hash_t key, PyObject *val){
if (map->currsize == map->cache_size){
return;
}
for (size_t i = 0; i < map->currsize; i++){
if (map->list[i].key == key){
return;
}
}
map->list[map->currsize] = (hashmap_member) {.key = key, .val = val};
map->currsize++;
}
PyObject *map_get(pyhashmap *map, Py_hash_t key){
for (size_t i = 0; i < map->currsize; i++){
if (map->list[i].key == key){
return map->list[i].val;
}
}
return Py_None;
}
ipyhashmap.pxd:
cdef extern from "pyhashmap.c":
ctypedef struct pyhashmap
pyhashmap *new_map(size_t)
void map_insert(pyhashmap *, int, object)
object map_get(pyhashmap *, int)
cache.pyx:
from syncached.ipyhashmap cimport pyhashmap, new_map, map_insert, map_get
cdef pyhashmap *map = new_map(5)
cpdef push(int key, object val):
map_insert(map, key, val)
cpdef get(key):
return map_get(map, key)
另外,第二个问题:
>>> cache.push(3, {"a": "B"})
>>> cache.get(3)
{3: 3, ((<NULL>, 'get'), ('cache', 'get')): ((((((...), ()), None), (3, None)), 'get'), ('cache', 'get')), ((((((...), None), None), ((((...), 'get'), ((...), 'get')), None)), 'get'), ()): ((((((...), 'get'), None), (((...), 'get'), None)), 'get'), ()), ((((...), 'get'), None), (((...), 'get'), None)): ((((...), 'get'), None), (((...), 'get'), None)), 'Py_Repr': [{...}, [...]]}
>>> cache.get(3)
KeyError: 'unknown symbol table entry'
>>> cache.get(3)
[1] 21720 segmentation fault (core dumped) python3
选项 1:让另一个 python 对象引用存储的 python 对象。
我推荐的最佳方法是通过将这些对象存储在 python 列表、字典或集合中来防止存储的 python 对象在自定义地图中时被垃圾回收.这将确保在存储对象时引用计数不会降为零。
选项 2:手动管理存储的 python 对象引用计数。
您可以尝试在处理指向 Python 对象 (PyObject*) 的指针时手动管理引用计数。如果您增加引用计数但不减少相同的次数,则该对象在不再使用时将永远不会从内存中删除。被占用的不能被应用程序回收,即内存会泄漏。但是,如果您不增加引用计数,当您仍在 C 代码中引用该对象时,该对象可能会被删除。
您可以尝试使用 Python's Reference Counting API (see answer to similar question here 中的 Py_INCREF 和 Py_DECREF 直接在 C 中管理内存。如果允许你使用C++而不是C,那么RAII可以让引用计数管理更简单。
我正在编写一个用于缓存的小型库。 Python dict 不适合我,已经尝试过 std::map,得到的 SIGSEGV 有非常相似的错误。无论如何,整个要点在下面的日志中进行了描述。我究竟做错了什么?有没有其他方法可以在 C 中存储对象?
问题:
(gdb) run
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: /usr/bin/python3
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
Python 3.9.2 (default, Feb 20 2021, 18:40:11)
[GCC 10.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from syncached import cache
>>> cache.push(1, object())
>>> cache.get(1) == object()
True
>>> cache.get(1) == object()
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7d049d0 in PyMem_Calloc () from /usr/lib/libpython3.9.so.1.0
(gdb) bt
#0 0x00007ffff7d049d0 in PyMem_Calloc () from /usr/lib/libpython3.9.so.1.0
#1 0x00007ffff7cfb27d in PyList_New () from /usr/lib/libpython3.9.so.1.0
#2 0x00007ffff7d6f4e3 in ?? () from /usr/lib/libpython3.9.so.1.0
#3 0x00007ffff7de2e37 in PyAST_CompileObject () from /usr/lib/libpython3.9.so.1.0
#4 0x00007ffff7de2c3b in ?? () from /usr/lib/libpython3.9.so.1.0
#5 0x00007ffff7cf68ab in ?? () from /usr/lib/libpython3.9.so.1.0
#6 0x00007ffff7cf6a63 in PyRun_InteractiveLoopFlags () from /usr/lib/libpython3.9.so.1.0
#7 0x00007ffff7c84f6b in PyRun_AnyFileExFlags () from /usr/lib/libpython3.9.so.1.0
#8 0x00007ffff7c7965c in ?? () from /usr/lib/libpython3.9.so.1.0
#9 0x00007ffff7dc9fa9 in Py_BytesMain () from /usr/lib/libpython3.9.so.1.0
#10 0x00007ffff7a46b25 in __libc_start_main () from /usr/lib/libc.so.6
#11 0x000055555555504e in _start ()
pyhashmap.c:
#include "Python.h"
#include <stdlib.h>
typedef struct {
Py_hash_t key;
PyObject *val;
} hashmap_member;
typedef struct {
size_t cache_size;
size_t currsize;
hashmap_member *list;
} pyhashmap;
pyhashmap *new_map(size_t size){
pyhashmap *map = PyMem_Malloc(sizeof(pyhashmap));
map->cache_size = size;
map->currsize = 0;
map->list = PyMem_Malloc(size*sizeof(hashmap_member));
return map;
}
void map_insert(pyhashmap *map, Py_hash_t key, PyObject *val){
if (map->currsize == map->cache_size){
return;
}
for (size_t i = 0; i < map->currsize; i++){
if (map->list[i].key == key){
return;
}
}
map->list[map->currsize] = (hashmap_member) {.key = key, .val = val};
map->currsize++;
}
PyObject *map_get(pyhashmap *map, Py_hash_t key){
for (size_t i = 0; i < map->currsize; i++){
if (map->list[i].key == key){
return map->list[i].val;
}
}
return Py_None;
}
ipyhashmap.pxd:
cdef extern from "pyhashmap.c":
ctypedef struct pyhashmap
pyhashmap *new_map(size_t)
void map_insert(pyhashmap *, int, object)
object map_get(pyhashmap *, int)
cache.pyx:
from syncached.ipyhashmap cimport pyhashmap, new_map, map_insert, map_get
cdef pyhashmap *map = new_map(5)
cpdef push(int key, object val):
map_insert(map, key, val)
cpdef get(key):
return map_get(map, key)
另外,第二个问题:
>>> cache.push(3, {"a": "B"})
>>> cache.get(3)
{3: 3, ((<NULL>, 'get'), ('cache', 'get')): ((((((...), ()), None), (3, None)), 'get'), ('cache', 'get')), ((((((...), None), None), ((((...), 'get'), ((...), 'get')), None)), 'get'), ()): ((((((...), 'get'), None), (((...), 'get'), None)), 'get'), ()), ((((...), 'get'), None), (((...), 'get'), None)): ((((...), 'get'), None), (((...), 'get'), None)), 'Py_Repr': [{...}, [...]]}
>>> cache.get(3)
KeyError: 'unknown symbol table entry'
>>> cache.get(3)
[1] 21720 segmentation fault (core dumped) python3
选项 1:让另一个 python 对象引用存储的 python 对象。
我推荐的最佳方法是通过将这些对象存储在 python 列表、字典或集合中来防止存储的 python 对象在自定义地图中时被垃圾回收.这将确保在存储对象时引用计数不会降为零。
选项 2:手动管理存储的 python 对象引用计数。
您可以尝试在处理指向 Python 对象 (PyObject*) 的指针时手动管理引用计数。如果您增加引用计数但不减少相同的次数,则该对象在不再使用时将永远不会从内存中删除。被占用的不能被应用程序回收,即内存会泄漏。但是,如果您不增加引用计数,当您仍在 C 代码中引用该对象时,该对象可能会被删除。
您可以尝试使用 Python's Reference Counting API (see answer to similar question here 中的 Py_INCREF 和 Py_DECREF 直接在 C 中管理内存。如果允许你使用C++而不是C,那么RAII可以让引用计数管理更简单。