使用 Python Api 在 C 中实现的二叉树将 PyObject 转换为字节

Binary Tree implemented in C using Python Api converts PyObject into Bytes

我的 PyObject 在获取它的值时变成了一个 Bytes 对象

所以最近,我正在用 C 做一个项目,我在其中实现了几种类型的树,以便能够在 python 中使用它们,因为 C btree 实现比 Python一个。 (Ikr 有一些可用的库,但由于在锁定期间我有更多的空闲时间,我想,我可以做我自己的库。)

一切正常,直到我想在同一行找到一个元素并打印它的值。当我这样做时,我的 Node 对象变成了 Bytes 对象,没有任何进一步的赋值。

更多信息

OS: Ubuntu 16.04.
Python版本:Python3.5.2
海湾合作委员会:5.4.0-6 Ubuntu

Python代码:

import mylib
import random

maxv = 10

def addValues(tree, values):
    for value in values:
        tree.insert(value)

def main():
    my_list = [i for i in range(maxv)]
    #random.shuffle(my_list)
    tree    = mylib.BinaryTree('test')
    addValues(tree, my_list)
    print(tree.find(3).getValue())
    print(tree.sort(False))
    
main()

预期输出(如果主函数中最后一行之前的行是 print(tree.find(3)) 则有效):

3
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

我用上面的测试代码得到的输出:

3
Segmentation fault

发生分段错误,因为包含值 3 的节点在打印其值时变成了 Bytes 对象。下面的代码将打印新的 Node.

import mylib
import random

maxv = 10

def addValues(tree, values):
    for value in values:
        tree.insert(value)

def main():
    my_list = [i for i in range(maxv)]
    #random.shuffle(my_list)
    tree    = mylib.BinaryTree('test')
    addValues(tree, my_list)
    print(tree.find(0).getValue()) #0 is going to be the root node's value, since random.shuffle is not happening.
    print(tree.root)
    print(tree.sort(False))
    
main()

输出:

0
b'0'
Segmentation fault

我花了几天时间调试它,而且由于我绝对不是 C 编程高手,(您将在下面看到),我找不到错误。我想继续,并实现更多功能,但我无法找到错误。我有可能错过了一些如此微不足道的事情,或者我不知道的事情。遗憾的是,我不是经验丰富的 C 程序员。 :/

我的模块包含更多代码,我不会post这个问题不需要什么。如果您认为应该多看一些以理解我的代码,请随时告诉我!我也希望我的代码可读性好!

有人能解释一下究竟发生了什么吗?
谢谢!

此处相关的C代码:

递归查找函数:

/** find node by value */
/*static*/ PyObject *find(Node *root, int value) {
    if((PyObject *) root == Py_None) {
        return NULL;
    }
    if(root->value == value) {
        return (PyObject *) root;
    }
    if(value < root->value) {
        return find((Node *) root->left,  value);
    }
    if(value > root->value) {
        return find((Node *) root->right, value);
    }
}

控制器,正在从Python调用此方法,这将调用上面的递归函数:

/** Find node by value */
/*static*/ PyObject *BinaryTree_Find(BinaryTree *self, PyObject *args, PyObject *kwds) {
    int value;
    PyObject *result;
    
    if(!PyArg_ParseTuple(args, "i", &value)) {
        return NULL;
    }
    
    result = find((Node *) self->root, value);
    if(result == NULL) {
        result = Py_None;
    }
    
    return (PyObject *) result;
}

最小的、可重现的例子

您可以使用上面的Python代码来驱动这个模块,下面的代码来创建库。

from distutils.core import setup, Extension
setup(
    name = 'mylib', version = '1.0',  \
    ext_modules = [Extension('mylib',
        [
            'custom.c',
        ])])

和 运行 通过:sudo python3 build.py install

#include <Python.h>
#include "structmember.h"

/* Header Block */
#define MODULE_NAME "mylib"
#define MODULE_DOC  "Custom Binary Tree Implementations in C for Python"

/** Node */
typedef struct {
    PyObject_HEAD
    int value;
    PyObject *left;
    PyObject *right;
} Node;

/** BinaryTree */
typedef struct {
    PyObject_HEAD
    int elem;
    PyObject *name;
    PyObject *root;
} BinaryTree;

/** NODE */

/** creating node, set inital values */
/*static*/ PyObject *newNode(PyTypeObject *type, PyObject *args, PyObject *kwds) {
    Node *self;
    
    self = (Node *) type->tp_alloc(type, 0);
    if(self == NULL) {
        return NULL;
    }
    
    self->value = 0;
    self->left  = NULL;
    self->right = NULL;
    
    return (PyObject *) self;
}

/** init function of node, set parameter as value */
/*static*/ int Node_Init(Node *self, PyObject *args, PyObject *kwds) {
    int value;
    
    if(!PyArg_ParseTuple(args, "i", &value)) {
        return -1;
    }
    
    self->value = value;
    self->left  = Py_None;
    self->right = Py_None;
    
    return 0;
}

/** deallocation of node */
/*static*/ void Node_Dealloc(Node *self) {
    if(self->left  != Py_None) Py_DECREF(self->left);
    if(self->right != Py_None) Py_DECREF(self->right);
    Py_TYPE(self)->tp_free((PyObject *) self);
}

/** return value of node */
/*static*/ PyObject *Node_GetValue(Node *self, PyObject *args, PyObject *kwds) {
    return Py_BuildValue("i", self->value);
}

/** node variables */
/*static*/ PyMemberDef Node_Members[] = {
    {
        "left",
        T_OBJECT_EX,
        offsetof(Node, left),
        0,
        "Left Child"
    },
    {
        "right",
        T_OBJECT_EX,
        offsetof(Node, right),
        0,
        "Right Child"
    }
};

/** node methods */
/*static*/ PyMethodDef Node_Methods[] = {
    {
        "getValue",
        (PyCFunction) Node_GetValue,
        METH_NOARGS,
        "Get value of node"
    },
    {NULL}
};

/** node type def */
/*static*/ PyTypeObject Node_Type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name      = "BinaryTree Node",
    .tp_doc       = "Custom Binary Tree Node Object",
    .tp_basicsize = sizeof(Node),
    .tp_itemsize  = 0,
    .tp_flags     = Py_TPFLAGS_DEFAULT,
    .tp_new       = newNode,
    .tp_init      = (initproc) Node_Init,
    .tp_dealloc   = (destructor) Node_Dealloc,
    .tp_members   = Node_Members,
    .tp_methods   = Node_Methods,
};

/** CONTROLLER */

/** insert new node into the tree */
/*static*/ PyObject *insert(Node *root, int value, PyObject *args) {
    if((PyObject *) root == Py_None) {
        return PyObject_CallObject((PyObject *) &Node_Type, args);
    }
    if(root->value == value) {
        return NULL;
    }
    if(value < root->value) {
        root->left  = insert((Node *) root->left, value, args);
    }
    if(value > root->value) {
        root->right = insert((Node *) root->right, value, args);
    }
    return (PyObject *) root;
}

/** find node by value */
/*static*/ PyObject *find(Node *root, int value) {
    if((PyObject *) root == Py_None) {
        return NULL;
    }
    if(root->value == value) {
        return (PyObject *) root;
    }
    if(value < root->value) {
        return find((Node *) root->left,  value);
    }
    if(value > root->value) {
        return find((Node *) root->right, value);
    }
}

/** sort tree asc / inorder traversal */
/*static*/ void sortAsc(Node *root, PyObject *list) {
    if((PyObject *) root == Py_None) {
        return;
    }
    sortAsc((Node *) root->left,  list);
    PyList_Append(list, Py_BuildValue("i", root->value));
    sortAsc((Node *) root->right, list);
}

/** sort tree desc */
/*static*/ void sortDesc(Node *root, PyObject *list) {
    if((PyObject *) root == Py_None) {
        return;
    }
    sortDesc((Node *) root->right,  list);
    PyList_Append(list, Py_BuildValue("i", root->value));
    sortDesc((Node *) root->left, list);
}

/** BinaryTree */

/** creating binary tree, set inital values */
/*static*/ PyObject *newBinaryTree(PyTypeObject *type, PyObject *args, PyObject *kwds) {
    BinaryTree *self;
    
    self = (BinaryTree *) type->tp_alloc(type, 0);
    if(self == NULL) {
        return NULL;
    }
    
    self->name = PyUnicode_FromString("");
    if(self->name == NULL) {
        Py_DECREF(self);
        return NULL;
    }
    
    self->elem = 0;
    self->root = Py_None;
    
    return (PyObject *) self;
}

/** set parameters as variables */
/*static*/ int BinaryTree_Init(BinaryTree *self, PyObject *args, PyObject *kwds) {
    PyObject *name, *temp;
    
    if(!PyArg_ParseTuple(args, "O", &name)) {
        return -1;
    }
    
    if(name) {
        temp = self->name;
        Py_INCREF(name);
        self->name = name;
        Py_XDECREF(temp);
    }
    
    self->elem = 0;
    
    return 0;
}

/** deallocation of binary tree */
/*static*/ void BinaryTree_Dealloc(BinaryTree *self) {
    Py_XDECREF(self->name);
    Py_XDECREF(self->root);
    Py_TYPE(self)->tp_free((PyObject *) self);
}

/** insert controller, set parameter as value, drive inserting, return true */
/*static*/ PyObject *BinaryTree_Insert(BinaryTree *self, PyObject *args, PyObject *kwds) {
    int value;
    PyObject *result;
    
    if(!PyArg_ParseTuple(args, "i", &value)) {
        return NULL;
    }
    
    result = insert((Node *) self->root, value, args);
    if(result == NULL) {
        Py_XDECREF(result);
        PyErr_SetString(PyExc_TypeError, "Already existing value!");
        return NULL;
    }
    
    self->root = result;
    self->elem++;
    
    Py_RETURN_TRUE;
}

/** Find node by value */
/*static*/ PyObject *BinaryTree_Find(BinaryTree *self, PyObject *args, PyObject *kwds) {
    int value;
    PyObject *result;
    
    if(!PyArg_ParseTuple(args, "i", &value)) {
        return NULL;
    }
    
    result = find((Node *) self->root, value);
    if(result == NULL) {
        result = Py_None;
    }
    
    return (PyObject *) result;
}

/** sort binary tree asc */
/*static*/ PyObject *BinaryTree_Sort(BinaryTree *self, PyObject *args, PyObject *kwds) {
    int rev;
    PyObject *list = PyList_New(0);
    
    if(!PyArg_ParseTuple(args, "p", &rev)) {
        Py_XDECREF(list);
        return NULL;
    }
    
    switch(rev) {
        case 0:
            sortAsc( (Node *) self->root, list);
            break;
        case 1:
            sortDesc((Node *) self->root, list);
            break;
        default:
            sortAsc( (Node *) self->root, list);
    }
    
    return list;
}

/** binary tree variables */
/*static*/ PyMemberDef BinaryTree_Members[] = {
    {
        "name",
        T_OBJECT_EX,
        offsetof(BinaryTree, name),
        0,
        "BinaryTree's unique name"
    },
    {
        "root",
        T_OBJECT_EX,
        offsetof(BinaryTree, root),
        0,
        "BinaryTree's root"
    },
    {NULL}
};

/** binary tree methods */
/*static*/ PyMethodDef BinaryTree_Methods[] = {
    {
        "insert",
        (PyCFunction) BinaryTree_Insert,
        METH_VARARGS,
        "Insert new node."
    },
    {
        "find",
        (PyCFunction) BinaryTree_Find,
        METH_VARARGS,
        "Find node by value."
    },
    {
        "sort",
        (PyCFunction) BinaryTree_Sort,
        METH_VARARGS,
        "Sort tree."
    },
    {NULL}
};

/** binary tree type def */
/*static*/ PyTypeObject BinaryTree_Type = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name      = "BinaryTree",
    .tp_doc       = "Custom Binary Tree Object",
    .tp_basicsize = sizeof(BinaryTree),
    .tp_itemsize  = 0,
    .tp_flags     = Py_TPFLAGS_DEFAULT,
    .tp_new       = newBinaryTree,
    .tp_init      = (initproc) BinaryTree_Init,
    .tp_dealloc   = (destructor) BinaryTree_Dealloc,
    .tp_members   = BinaryTree_Members,
    .tp_methods   = BinaryTree_Methods,
};

/** MODULE */

/** Module def */
static struct PyModuleDef mylib_module = {
    PyModuleDef_HEAD_INIT,
    MODULE_NAME, /* name of module */
    MODULE_DOC,  /* module documentation, may be NULL */
    -1,          /* size of per-interpreter state of the module */
};

/** Module init */
PyMODINIT_FUNC PyInit_mylib(void) {
    PyObject *mod;
    
    if(PyType_Ready(&BinaryTree_Type) < 0) {
        return NULL;
    }
    
    if(PyType_Ready(&Node_Type) < 0) {
        return NULL;
    }
    
    mod = PyModule_Create(&mylib_module);
    if(mod == NULL) {
        return NULL;
    }
    
    Py_INCREF(&BinaryTree_Type);
    if(PyModule_AddObject(mod, "BinaryTree", (PyObject *) &BinaryTree_Type) < 0) {
        Py_DECREF(&BinaryTree_Type);
        Py_DECREF(mod);
        return NULL;
    }
    
    Py_INCREF(&Node_Type);
    if(PyModule_AddObject(mod, "Node", (PyObject *) &Node_Type) < 0) {
        Py_DECREF(&Node_Type);
        Py_DECREF(mod);
        return NULL;
    }
    
    return mod;
}

编辑

print(tree.root.getValue())
print(tree.find(tree.root.value).getValue())

工作正常!错误出在查找函数上!

感谢DavidW,他指出这一定是某处的引用错误,所以我数了数,在打印它的值之前和之后有多少引用引用了那个 Node 对象。结果:

2                  # references before
56                 # value before, this line accessed the Node.
b'56\n'            # value after 
139690028005977    # references after
Segmentation fault # segfault while sorting

因此,在使用 Find 时,节点的引用计数会减少,因此所有 Node 个对象都是 deallocated/deleted。返回前递增,自定义的setter/getter方法和INCREF/DECREF-s也是无效的。

完成

按照DavidW的建议,我找到了答案!我没有增加引用计数,因此 Nodes 在搜索后立即释放。当我没有请求节点的值时,或者当我没有打印值时,对象的行为方式不同 out.I 建议打印函数以某种方式减少了引用计数。

右键找到控制器:

/** Find nood by value */
/*static*/ PyObject *BinaryTree_Find(BinaryTree *self, PyObject *args, PyObject *kwds) {
    int value;
    PyObject *result;
    
    if(!PyArg_ParseTuple(args, "i", &value)) {
        return NULL;
    }
    
    result = find((Node *) self->root, value);
    if(result == NULL) {
        result = Py_None;
    }
    
    Py_INCREF(result); // Increasing the Node object's reference count here
    
    return (PyObject *) result;
}

正如评论中所讨论的那样,这是一个引用计数错误。 C API 函数必须 return Python 所谓的“新引用”。这意味着要么 return 在函数内部有意创建的东西(例如 PyList_New 的结果)增加现有对象的引用计数。

BinaryTree_Find 具体来说,您并不是 return 新参考。结果 Python 最终会释放一个仍然构成二叉树一部分的对象。一旦发生这种情况,您可能会遇到各种奇怪且令人困惑的行为。我建议添加 Py_INCREF(result).

为了帮助诊断此类问题,值得向对象构造函数和析构函数添加 printf 语句(作为临时调试措施),这样您就可以检查有关何时分配和释放它们的假设。