saxonc 的 transform_to_file(),在循环中执行,不转换但给出无意义的错误或部分输出
saxonc's transform_to_file(), executed in a loop, doesn't transform but gives non-sensical errors or partial output
我的转换样式表文件包含:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<cities>
<xsl:for-each select="cities/country">
<city name="{@capital}" isCapital="true"/>
</xsl:for-each>
</cities>
</xsl:template>
</xsl:stylesheet>
我的python代码:
import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
# create individual raw xmls
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
# tempfile = "C:\pythonProject\Whosebug\tmp.xml"
# tempfile = "C:\gaga\tmp.xml"
# tempfile = os.path.abspath("tmp.xml")
tempfile = "tmp.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
xsltproc.transform_to_file(source_file=tempfile,
stylesheet_file="transformer.xsl",
output_file=transformedfile)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
我的问题
我导入了 saxonpy。
我试图在一个循环中 运行 saxonc 的 transform_to_file() 。我无法获得转换后的输出文件。
根据我为临时文件设置的内容,我得到
# I/O error reported by XML parser processing C:\pythonProject\Whosebug\░╚╒E ⌂:
# unknown protocol: c. Caused by java.net.MalformedURLException: unknown protocol: c
或
Content is not allowed in prolog
(this is definitely not the case, I checked the tempfile with a hexeditor)
或没有错误但输出文件仅包含:
# <?xml version="1.0" encoding="UTF-8"?>
# <cities/>
还有正确的输出(但我不能再重现了)。
注意:我在安装 python 3.10 时禁用了 MAX_PATH。
注意:使用 Pycharm 与 poetry venv
使用 SaxonC 11.3 我设法 运行 上面的 python 脚本稍作改动:
from saxonpy import PySaxonProcessor
替换为:
from saxonc import *
我得到了输出:
starting code...
output_1.xml has been created.
output_2.xml has been created.
output_3.xml has been created.
这些文件都有以下内容:
<?xml version="1.0" encoding="UTF-8"?>
<cities/>
作为解决方法,我将 transform_to_file
替换为 transform_to_string
:
valueStr = xsltproc.transform_to_string(source_file=tempfile,
stylesheet_file="transformer.xsl")
#output_file=transformedfile)
print(valueStr)
这确实产生了正确的输出:
starting code...
source in transformFiletoString=tmp.xml stylsheet=transformer.xsl
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Copenhagen" isCapital="true"/>
</cities>
output_1.xml has been created.
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Berlin" isCapital="true"/>
</cities>
output_2.xml has been created.
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Paris" isCapital="true"/>
</cities>
output_3.xml has been created.
我现在已经安装了 SaxonC 1.2.1
我使用以下 python 脚本得到了正确的输出:
import os
import xml.etree.ElementTree as ET
from saxonpy import *
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
# create individual raw xmls
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
# tempfile = "C:\pythonProject\Whosebug\tmp.xml"
# tempfile = "C:\gaga\tmp.xml"
# tempfile = os.path.abspath("tmp.xml")
tempfile = "tmp.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)
xsltproc.set_property("s",tempfile)
xsltproc.transform_to_file(source_file=tempfile,
stylesheet_file="transformer.xsl",
output_file=transformedfile)
#print(valueStr)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
使用 Apply_templates_returning_file() 有效:
import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
xslt30_transformer = xsltproc.compile_stylesheet(stylesheet_file="transformer.xsl")
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
tempfile = "tmp_1234567890ABCDEFGHIJKLMNOP.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, xml_declaration=True)
xslt30_transformer.apply_templates_returning_file(source_file=tempfile,
output_file=transformedfile)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
我的转换样式表文件包含:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:output method="xml" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<cities>
<xsl:for-each select="cities/country">
<city name="{@capital}" isCapital="true"/>
</xsl:for-each>
</cities>
</xsl:template>
</xsl:stylesheet>
我的python代码:
import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
# create individual raw xmls
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
# tempfile = "C:\pythonProject\Whosebug\tmp.xml"
# tempfile = "C:\gaga\tmp.xml"
# tempfile = os.path.abspath("tmp.xml")
tempfile = "tmp.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
xsltproc.transform_to_file(source_file=tempfile,
stylesheet_file="transformer.xsl",
output_file=transformedfile)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
我的问题
我导入了 saxonpy。 我试图在一个循环中 运行 saxonc 的 transform_to_file() 。我无法获得转换后的输出文件。 根据我为临时文件设置的内容,我得到
# I/O error reported by XML parser processing C:\pythonProject\Whosebug\░╚╒E ⌂:
# unknown protocol: c. Caused by java.net.MalformedURLException: unknown protocol: c
或
Content is not allowed in prolog
(this is definitely not the case, I checked the tempfile with a hexeditor)
或没有错误但输出文件仅包含:
# <?xml version="1.0" encoding="UTF-8"?>
# <cities/>
还有正确的输出(但我不能再重现了)。
注意:我在安装 python 3.10 时禁用了 MAX_PATH。 注意:使用 Pycharm 与 poetry venv
使用 SaxonC 11.3 我设法 运行 上面的 python 脚本稍作改动:
from saxonpy import PySaxonProcessor
替换为:
from saxonc import *
我得到了输出:
starting code...
output_1.xml has been created.
output_2.xml has been created.
output_3.xml has been created.
这些文件都有以下内容:
<?xml version="1.0" encoding="UTF-8"?>
<cities/>
作为解决方法,我将 transform_to_file
替换为 transform_to_string
:
valueStr = xsltproc.transform_to_string(source_file=tempfile,
stylesheet_file="transformer.xsl")
#output_file=transformedfile)
print(valueStr)
这确实产生了正确的输出:
starting code...
source in transformFiletoString=tmp.xml stylsheet=transformer.xsl
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Copenhagen" isCapital="true"/>
</cities>
output_1.xml has been created.
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Berlin" isCapital="true"/>
</cities>
output_2.xml has been created.
<?xml version="1.0" encoding="UTF-8"?>
<cities>
<city name="Paris" isCapital="true"/>
</cities>
output_3.xml has been created.
我现在已经安装了 SaxonC 1.2.1
我使用以下 python 脚本得到了正确的输出:
import os
import xml.etree.ElementTree as ET
from saxonpy import *
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
# create individual raw xmls
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
# tempfile = "C:\pythonProject\Whosebug\tmp.xml"
# tempfile = "C:\gaga\tmp.xml"
# tempfile = os.path.abspath("tmp.xml")
tempfile = "tmp.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, encoding='utf-8', xml_declaration=True)
xsltproc.set_property("s",tempfile)
xsltproc.transform_to_file(source_file=tempfile,
stylesheet_file="transformer.xsl",
output_file=transformedfile)
#print(valueStr)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()
使用 Apply_templates_returning_file() 有效:
import os
import xml.etree.ElementTree as ET
from saxonpy import PySaxonProcessor
def main():
print('starting code...')
source_XML = '''
<data>
<country name="Denmark" capital="Copenhagen"/>
<country name="Germany" capital="Berlin"/>
<country name="France" capital="Paris"/>
</data>
'''
parentroot = ET.fromstring(source_XML)
children = list(parentroot)
try:
with PySaxonProcessor(license=False) as proc:
proc.set_cwd(os.getcwd())
xsltproc = proc.new_xslt30_processor()
xslt30_transformer = xsltproc.compile_stylesheet(stylesheet_file="transformer.xsl")
cnt = 0
for child in children:
cnt = cnt + 1
childroot = ET.Element("cities")
childroot.append(child)
tempfile_tree = ET.ElementTree(childroot)
tempfile = "tmp_1234567890ABCDEFGHIJKLMNOP.xml"
transformedfile = f"output_{cnt}.xml"
with open(tempfile, 'wb') as f:
tempfile_tree.write(f, xml_declaration=True)
xslt30_transformer.apply_templates_returning_file(source_file=tempfile,
output_file=transformedfile)
print(f"{transformedfile} has been created.")
except Exception as e:
print(e)
if __name__ == "__main__":
main()