使用 lxml 和 Jupyter notebook 的 Cython 编译错误('lxml\includes.pxd' 未找到)

Cython compile error('lxml\includes.pxd' not found) with lxml and Jupyter notebook

我已经将 Cython 魔法加载为(在我的 ipython 笔记本中):

%load_ext Cython

以上步骤成功。

那么我指的是“http://lxml.de/capi.html#writing-external-modules-in-cython”;在我的 ipython 笔记本中作为

%%cython

cimport etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()

以上步骤报错:

Error compiling Cython file:
------------------------------------------------------------
...


cimport etreepublic as cetree
       ^
------------------------------------------------------------

C:\Users\swaga\.ipython\cython\_cython_magic_c8f46d6a60c08cbbc40ceaeafaf39062.pyx:3:8: 'etreepublic.pxd' not found

然后我更正了它:

%%cython


from lxml.includes cimport etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()

现在它给我这样的错误:

Error compiling Cython file:
------------------------------------------------------------
...


from lxml.includes cimport etreepublic as cetree
^
------------------------------------------------------------

C:\Users\swaga\.ipython\cython\_cython_magic_00532cf9159d976bfb24010b76ff3dfc.pyx:3:0: 'lxml\includes.pxd' not found

其他详情: lxml 版本为 3.4.4,Cython 版本为 0.22,Python 为 2.7.9 [MSC v.1500 32 位(英特尔)]

编辑:2019 年 4 月 Cython Jupyter 实验室扩展参数列表位于:https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#compiling-with-a-jupyter-notebook

对于 lxml 你需要编译为 cpp

在一个笔记本单元格中

import os
import lxml
def find_libxml2_include():
    include_dirs = []
    for d in ['/usr/include/libxml2', '/usr/local/include/libxml2']:
        if os.path.exists(os.path.join(d, 'libxml/tree.h')):
            include_dirs.append(d)
    return include_dirs

for l in find_libxml2_include() + lxml.get_include():
    print("--include={0}".format(l))

它给了我:

--include=/usr/include/libxml2
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes/__pycache__

然后我将使用的另一个笔记本单元 lxml:

%%cython --cplus --lib=xml2 --include=/usr/include/libxml2 --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes/__pycache__

from cpython.list cimport PyList_Append
from lxml.includes.etreepublic cimport _Element, _Document
cimport lxml.includes.etreepublic as cetree

cdef object etree

from lxml import etree
cetree.import_lxml__etree()

cdef bytes xmldoc

xmldoc = b"""<?xml version="1.0"?>
<Document>
    <Header>
        <Title>USEPA Geospatial Data</Title>
        <Creator>Environmental Protection Agency</Creator>
        <Subject>USEPA Geospatial Data</Subject>
        <Description>This XML file was produced by US EPA</Description>
        <Date>MAR-08-2013</Date>
    </Header>
    <Header>
        <Title>USEPA Geospatial Data</Title>
        <Creator>Environmental Protection Agency</Creator>
        <Subject>USEPA Geospatial Data</Subject>
        <Description>This XML file was produced by US EPA</Description>
        <Date>MAR-08-2013</Date>
    </Header>    
</Document>
"""

cdef void my_iterator(cetree._Document mydoc, cetree.tree.xmlNode* subnode, list result):
    cdef str tag
    cdef cetree.tree.xmlNode *node
    cdef cetree.tree.xmlNode *next_node

    if cetree.hasChild(subnode):
        node = cetree.findChild(subnode, 0)
    else:
        node = NULL

    while node != NULL:
        tag = cetree.namespacedName(node)
        PyList_Append(result, cetree.elementFactory(mydoc, node))
        my_iterator(mydoc, node, result)
        next_node = cetree.nextElement(node)
        node = next_node


cpdef test():
    cdef _Element e
    cdef list r
    e = etree.fromstring(xmldoc)
    r = []
    my_iterator(e._doc, e._c_node, r)
    for elem in r:
        print(elem)
    return e

现在,当我 运行 test 函数:

>>> test()
<Element Header at 0x7fb370e4ab88>
<Element Title at 0x7fb370e7f7c8>
<Element Creator at 0x7fb370f13d08>
<Element Subject at 0x7fb370030848>
<Element Description at 0x7fb370030708>
<Element Date at 0x7fb370030748>
<Element Header at 0x7fb370f03148>
<Element Title at 0x7fb370f03588>
<Element Creator at 0x7fb370f03708>
<Element Subject at 0x7fb370603508>
<Element Description at 0x7fb370e8abc8>
<Element Date at 0x7fb370e8ab08>