使用 lxml 和 Jupyter notebook 的 Cython 编译错误('lxml\includes.pxd' 未找到)
Cython compile error('lxml\includes.pxd' not found) with lxml and Jupyter notebook
我已经将 Cython 魔法加载为(在我的 ipython 笔记本中):
%load_ext Cython
以上步骤成功。
那么我指的是“http://lxml.de/capi.html#writing-external-modules-in-cython”;在我的 ipython 笔记本中作为
%%cython
cimport etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()
以上步骤报错:
Error compiling Cython file:
------------------------------------------------------------
...
cimport etreepublic as cetree
^
------------------------------------------------------------
C:\Users\swaga\.ipython\cython\_cython_magic_c8f46d6a60c08cbbc40ceaeafaf39062.pyx:3:8: 'etreepublic.pxd' not found
然后我更正了它:
%%cython
from lxml.includes cimport etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()
现在它给我这样的错误:
Error compiling Cython file:
------------------------------------------------------------
...
from lxml.includes cimport etreepublic as cetree
^
------------------------------------------------------------
C:\Users\swaga\.ipython\cython\_cython_magic_00532cf9159d976bfb24010b76ff3dfc.pyx:3:0: 'lxml\includes.pxd' not found
其他详情:
lxml 版本为 3.4.4,Cython 版本为 0.22,Python 为 2.7.9 [MSC v.1500 32 位(英特尔)]
编辑:2019 年 4 月
Cython Jupyter 实验室扩展参数列表位于:https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#compiling-with-a-jupyter-notebook
对于 lxml
你需要编译为 cpp
在一个笔记本单元格中
import os
import lxml
def find_libxml2_include():
include_dirs = []
for d in ['/usr/include/libxml2', '/usr/local/include/libxml2']:
if os.path.exists(os.path.join(d, 'libxml/tree.h')):
include_dirs.append(d)
return include_dirs
for l in find_libxml2_include() + lxml.get_include():
print("--include={0}".format(l))
它给了我:
--include=/usr/include/libxml2
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes/__pycache__
然后我将使用的另一个笔记本单元 lxml
:
%%cython --cplus --lib=xml2 --include=/usr/include/libxml2 --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes/__pycache__
from cpython.list cimport PyList_Append
from lxml.includes.etreepublic cimport _Element, _Document
cimport lxml.includes.etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()
cdef bytes xmldoc
xmldoc = b"""<?xml version="1.0"?>
<Document>
<Header>
<Title>USEPA Geospatial Data</Title>
<Creator>Environmental Protection Agency</Creator>
<Subject>USEPA Geospatial Data</Subject>
<Description>This XML file was produced by US EPA</Description>
<Date>MAR-08-2013</Date>
</Header>
<Header>
<Title>USEPA Geospatial Data</Title>
<Creator>Environmental Protection Agency</Creator>
<Subject>USEPA Geospatial Data</Subject>
<Description>This XML file was produced by US EPA</Description>
<Date>MAR-08-2013</Date>
</Header>
</Document>
"""
cdef void my_iterator(cetree._Document mydoc, cetree.tree.xmlNode* subnode, list result):
cdef str tag
cdef cetree.tree.xmlNode *node
cdef cetree.tree.xmlNode *next_node
if cetree.hasChild(subnode):
node = cetree.findChild(subnode, 0)
else:
node = NULL
while node != NULL:
tag = cetree.namespacedName(node)
PyList_Append(result, cetree.elementFactory(mydoc, node))
my_iterator(mydoc, node, result)
next_node = cetree.nextElement(node)
node = next_node
cpdef test():
cdef _Element e
cdef list r
e = etree.fromstring(xmldoc)
r = []
my_iterator(e._doc, e._c_node, r)
for elem in r:
print(elem)
return e
现在,当我 运行 test
函数:
>>> test()
<Element Header at 0x7fb370e4ab88>
<Element Title at 0x7fb370e7f7c8>
<Element Creator at 0x7fb370f13d08>
<Element Subject at 0x7fb370030848>
<Element Description at 0x7fb370030708>
<Element Date at 0x7fb370030748>
<Element Header at 0x7fb370f03148>
<Element Title at 0x7fb370f03588>
<Element Creator at 0x7fb370f03708>
<Element Subject at 0x7fb370603508>
<Element Description at 0x7fb370e8abc8>
<Element Date at 0x7fb370e8ab08>
我已经将 Cython 魔法加载为(在我的 ipython 笔记本中):
%load_ext Cython
以上步骤成功。
那么我指的是“http://lxml.de/capi.html#writing-external-modules-in-cython”;在我的 ipython 笔记本中作为
%%cython
cimport etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()
以上步骤报错:
Error compiling Cython file:
------------------------------------------------------------
...
cimport etreepublic as cetree
^
------------------------------------------------------------
C:\Users\swaga\.ipython\cython\_cython_magic_c8f46d6a60c08cbbc40ceaeafaf39062.pyx:3:8: 'etreepublic.pxd' not found
然后我更正了它:
%%cython
from lxml.includes cimport etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()
现在它给我这样的错误:
Error compiling Cython file:
------------------------------------------------------------
...
from lxml.includes cimport etreepublic as cetree
^
------------------------------------------------------------
C:\Users\swaga\.ipython\cython\_cython_magic_00532cf9159d976bfb24010b76ff3dfc.pyx:3:0: 'lxml\includes.pxd' not found
其他详情: lxml 版本为 3.4.4,Cython 版本为 0.22,Python 为 2.7.9 [MSC v.1500 32 位(英特尔)]
编辑:2019 年 4 月 Cython Jupyter 实验室扩展参数列表位于:https://cython.readthedocs.io/en/latest/src/userguide/source_files_and_compilation.html#compiling-with-a-jupyter-notebook
对于 lxml
你需要编译为 cpp
在一个笔记本单元格中
import os
import lxml
def find_libxml2_include():
include_dirs = []
for d in ['/usr/include/libxml2', '/usr/local/include/libxml2']:
if os.path.exists(os.path.join(d, 'libxml/tree.h')):
include_dirs.append(d)
return include_dirs
for l in find_libxml2_include() + lxml.get_include():
print("--include={0}".format(l))
它给了我:
--include=/usr/include/libxml2
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml
--include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes/__pycache__
然后我将使用的另一个笔记本单元 lxml
:
%%cython --cplus --lib=xml2 --include=/usr/include/libxml2 --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml --include=/home/nk/miniconda2/envs/py35/lib/python3.5/site-packages/lxml/includes/__pycache__
from cpython.list cimport PyList_Append
from lxml.includes.etreepublic cimport _Element, _Document
cimport lxml.includes.etreepublic as cetree
cdef object etree
from lxml import etree
cetree.import_lxml__etree()
cdef bytes xmldoc
xmldoc = b"""<?xml version="1.0"?>
<Document>
<Header>
<Title>USEPA Geospatial Data</Title>
<Creator>Environmental Protection Agency</Creator>
<Subject>USEPA Geospatial Data</Subject>
<Description>This XML file was produced by US EPA</Description>
<Date>MAR-08-2013</Date>
</Header>
<Header>
<Title>USEPA Geospatial Data</Title>
<Creator>Environmental Protection Agency</Creator>
<Subject>USEPA Geospatial Data</Subject>
<Description>This XML file was produced by US EPA</Description>
<Date>MAR-08-2013</Date>
</Header>
</Document>
"""
cdef void my_iterator(cetree._Document mydoc, cetree.tree.xmlNode* subnode, list result):
cdef str tag
cdef cetree.tree.xmlNode *node
cdef cetree.tree.xmlNode *next_node
if cetree.hasChild(subnode):
node = cetree.findChild(subnode, 0)
else:
node = NULL
while node != NULL:
tag = cetree.namespacedName(node)
PyList_Append(result, cetree.elementFactory(mydoc, node))
my_iterator(mydoc, node, result)
next_node = cetree.nextElement(node)
node = next_node
cpdef test():
cdef _Element e
cdef list r
e = etree.fromstring(xmldoc)
r = []
my_iterator(e._doc, e._c_node, r)
for elem in r:
print(elem)
return e
现在,当我 运行 test
函数:
>>> test()
<Element Header at 0x7fb370e4ab88>
<Element Title at 0x7fb370e7f7c8>
<Element Creator at 0x7fb370f13d08>
<Element Subject at 0x7fb370030848>
<Element Description at 0x7fb370030708>
<Element Date at 0x7fb370030748>
<Element Header at 0x7fb370f03148>
<Element Title at 0x7fb370f03588>
<Element Creator at 0x7fb370f03708>
<Element Subject at 0x7fb370603508>
<Element Description at 0x7fb370e8abc8>
<Element Date at 0x7fb370e8ab08>