Python 无法调用绑定 C++ 虚拟成员函数

Python binding C++ virtual member function cannot be called

最近用C++写了一个Python3的扩展,但是在python中调用C++的时候遇到了一些麻烦,也不打算使用第三方库。

我用过Python绑定C++虚成员函数调用不了,去掉virtual关键字就可以了

运行return PyObject_CallObject(pFunction, args);的时候崩溃了,但是没找到原因

这是我的代码:

class A 
{
    PyObject_HEAD
public:
    A()
    {
        std::cout << "A::A()" << std::endl;
    }

    ~A()
    {
        std::cout << "A::~A()" << std::endl;
    }

    virtual void test()
    {
        std::cout << "A::test()" << std::endl;
    }
};

class B : public A
{
public:
    B()
    {
        std::cout << "B::B()" << std::endl;
    }

    ~B()
    {
        std::cout << "B::~B()" << std::endl;
    }

    static PyObject *py(B *self) {
        self->test();
        return PyLong_FromLong((long)123456);
    }
};

static void B_dealloc(B *self) 
{
    self->~B();
    Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *B_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    B *self = (B*)type->tp_alloc(type, 0);
    new (self)B;
    return (PyObject*)self;
}

static PyMethodDef B_methods[] = {
    {"test", (PyCFunction)(B::py), METH_NOARGS, nullptr},
    {nullptr}
};

static struct PyModuleDef example_definition = {
    PyModuleDef_HEAD_INIT,
    "example",
    "example",
    -1,
    B_methods
};

static PyTypeObject ClassyType = {
    PyVarObject_HEAD_INIT(NULL, 0) "example.B", /* tp_name */
    sizeof(B),                                  /* tp_basicsize */
    0,                                          /* tp_itemsize */
    (destructor)B_dealloc,                      /* tp_dealloc */
    0,                                          /* tp_print */
    0,                                          /* tp_getattr */
    0,                                          /* tp_setattr */
    0,                                          /* tp_reserved */
    0,                                          /* tp_repr */
    0,                                          /* tp_as_number */
    0,                                          /* tp_as_sequence */
    0,                                          /* tp_as_mapping */
    0,                                          /* tp_hash  */
    0,                                          /* tp_call */
    0,                                          /* tp_str */
    0,                                          /* tp_getattro */
    0,                                          /* tp_setattro */
    0,                                          /* tp_as_buffer */
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,   /* tp_flags */
    "B objects",                                /* tp_doc */
    0,                                          /* tp_traverse */
    0,                                          /* tp_clear */
    0,                                          /* tp_richcompare */
    0,                                          /* tp_weaklistoffset */
    0,                                          /* tp_iter */
    0,                                          /* tp_iternext */
    B_methods,                                  /* tp_methods */
    nullptr,                                    /* tp_members */
    0,                                          /* tp_getset */
    0,                                          /* tp_base */
    0,                                          /* tp_dict */
    0,                                          /* tp_descr_get */
    0,                                          /* tp_descr_set */
    0,                                          /* tp_dictoffset */
    nullptr,                                    /* tp_init */
    0,                                          /* tp_alloc */
    B_new,                                      /* tp_new */
};

PyMODINIT_FUNC PyInit_example(void)
{

    PyObject *m = PyModule_Create(&example_definition);

    if (PyType_Ready(&ClassyType) < 0)
        return NULL;

    Py_INCREF(&ClassyType);
    PyModule_AddObject(m, "B", (PyObject*)&ClassyType);

    return m;
}

PyObject* importModule(std::string name)
{
    PyObject* pModule = PyImport_ImportModule(name.c_str());    // module name
    if (pModule == nullptr)
    {
        std::cout << "load module error!" << std::endl;
        return nullptr;
    }

    return pModule;
}

PyObject* callFunction(PyObject* pModule, std::string name, PyObject* args = nullptr)
{
    PyObject* pFunction = PyObject_GetAttrString(pModule, name.c_str());    // function name
    if (pFunction == nullptr)
    {
        std::cout << "call function error!" << std::endl;
        return nullptr;
    }

    return PyObject_CallObject(pFunction, args);
}

int main()
{
    // add module
    PyImport_AppendInittab("example", PyInit_example);

    // init python
    Py_Initialize();
    {
        PyRun_SimpleString("import sys");
        PyRun_SimpleString("import os");
        PyRun_SimpleString("sys.path.append(os.getcwd() + '\script')");    // add script path
    }

    // import module
    PyImport_ImportModule("example");

    PyObject* pModule = importModule("Test");
    if (pModule != nullptr)
    {
        PyObject* pReturn = callFunction(pModule, "main");
    }

    PyErr_Print();

    Py_Finalize();

    system("pause");
    return 0;
}

我假设 OP 使用的是 CPython API。 (我们 使用 CPython 并且部分代码看起来很奇怪 similar/familiar。)

顾名思义,它是用 C 语言编写的。

因此,当使用它为 C++ classes 编写 Python 绑定时,开发人员必须意识到 CPython 而它的 C API 不会不知道关于 C++ 的任何事情。必须仔细考虑这一点(类似于为 C++ class 库编写 C 绑定)。

当我写 Python Wrapper classes 时,我总是用 structs 来写(为了记住这个事实)。可以在 CPython 的包装器中使用 C++ 继承来类似于包装的 C++ classes 的继承(但这是我上述规则的唯一例外)。

structclass 在 C++ 中是一回事,唯一的例外是默认情况下 struct 中的所有内容都是 publicprivateclass 中。 SO: Class vs Struct for data only? 顺便说一句。 CPython 将访问它的 resp。 成员变量结构组件(例如ob_base)被C指针转换(重新解释转换)甚至无法识别private-safety-attempts.

恕我直言,值得一提的是术语 POD (plain old data, also called passive data structure) because this is what makes the C++ wrapper classes compatible with C. SO: What are Aggregates and PODs and how/why are they special? 对此进行了全面的概述。

在 CPython 包装器 class 中引入至少一个 virtual 成员函数会产生致命的后果。仔细阅读上面的 link 可以清楚地了解这一点。但是,我决定通过一些示例代码来说明这一点:

#include <iomanip>
#include <iostream>

// a little experimentation framework:

struct _typeobject { }; // replacement (to keep it simple)
typedef size_t Py_ssize_t; // replacement (to keep it simple)

// copied from object.h of CPython:
/* Define pointers to support a doubly-linked list of all live heap objects. */
#define _PyObject_HEAD_EXTRA            \
    struct _object *_ob_next;           \
    struct _object *_ob_prev;

// copied from object.h of CPython:
/* Nothing is actually declared to be a PyObject, but every pointer to
 * a Python object can be cast to a PyObject*.  This is inheritance built
 * by hand.  Similarly every pointer to a variable-size Python object can,
 * in addition, be cast to PyVarObject*.
 */
typedef struct _object {
  _PyObject_HEAD_EXTRA
  Py_ssize_t ob_refcnt;
  struct _typeobject *ob_type;
} PyObject;

/* PyObject_HEAD defines the initial segment of every PyObject. */
#define PyObject_HEAD                   PyObject ob_base;

void dump(std::ostream &out, const char *p, size_t size)
{
  const size_t n = 16;
  for (size_t i = 0; i < size; ++p) {
    if (i % n == 0) {
      out << std::hex << std::setw(2 * sizeof p) << std::setfill('0')
        << (size_t)p << ": ";
    }
    out << ' '
      << std::hex << std::setw(2) << std::setfill('0')
      << (unsigned)*(unsigned char*)p;
    if (++i % n == 0) out << '\n';
  }
  if (size % n != 0) out << '\n';
}

// the experiment:

static PyObject pyObj;

// This is correct:
struct Wrapper1 {
  PyObject_HEAD
  int myExt;
};
static Wrapper1 wrap1;

// This is possible:
struct Wrapper1Derived: Wrapper1 {
  double myExtD;
};
static Wrapper1Derived wrap1D;

// This is effectively not different from struct Wrapper1
// but things are private in Wrapper2
// ...and Python will just ignore this (using C pointer casts).
class Wrapper2 {
  PyObject_HEAD
  int myExt;
};
static Wrapper2 wrap2;

// This is FATAL - introduces a virtual method table.
class Wrapper3 {
  private:
    PyObject_HEAD
    int myExt;
  public:
    Wrapper3(int value): myExt(value) { }
    virtual ~Wrapper3() { myExt = 0; }
};
static Wrapper3 wrap3{123};

int main()
{
  std::cout << "Dump of PyObject pyObj:\n";
  dump(std::cout, (const char*)&pyObj, sizeof pyObj);
  std::cout << "Dump of Wrapper1 wrap1:\n";
  dump(std::cout, (const char*)&wrap1, sizeof wrap1);
  std::cout << "Dump of Wrapper1Derived wrap1D:\n";
  dump(std::cout, (const char*)&wrap1D, sizeof wrap1D);
  std::cout << "Dump of Wrapper2 wrap2:\n";
  dump(std::cout, (const char*)&wrap2, sizeof wrap2);
  std::cout << "Dump of Wrapper3 wrap3:\n";
  dump(std::cout, (const char*)&wrap3, sizeof wrap3);
  return 0;
}

编译和运行:

Dump of PyObject pyObj:
0000000000601640:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0000000000601650:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Dump of Wrapper1 wrap1:
0000000000601600:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0000000000601610:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0000000000601620:  00 00 00 00 00 00 00 00
Dump of Wrapper1Derived wrap1D:
00000000006015c0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00000000006015d0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00000000006015e0:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Dump of Wrapper2 wrap2:
0000000000601580:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0000000000601590:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00000000006015a0:  00 00 00 00 00 00 00 00
Dump of Wrapper3 wrap3:
0000000000601540:  d8 0e 40 00 00 00 00 00 00 00 00 00 00 00 00 00
0000000000601550:  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0000000000601560:  00 00 00 00 00 00 00 00 7b 00 00 00 00 00 00 00

Live Demo on coliru

pyObjwrap1wrap1Dwrap2 的转储仅由 00 组成——难怪,我制作了它们 static. wrap3 看起来有点不同,部分原因是构造函数 (7b == 123),部分原因是 C++ 编译器将 VMT 指针放入 class 实例,d8 0e 40很可能属于。 (我假设 VMT 指针具有任何函数指针的大小,但我真的不知道编译器如何在内部组织事物。)

想象一下,当 CPython 获取 wrap3 的地址,将其转换为 PyObject*,并写入具有偏移量 0 并被使用的 _ob_next 指针时会发生什么将 Python 对象链接到双 linked 列表中。 (希望是崩溃或其他让事情变得更糟的事情。)

依次想象一下OP的create函数会发生什么

static PyObject *B_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    B *self = (B*)type->tp_alloc(type, 0);
    new (self)B;
    return (PyObject*)self;
}

B 的放置构造函数覆盖 PyObject 内部的初始化时,这可能发生在 tp_alloc().