Python 解析 xml 文件并对 uuid 进行排序的脚本
Python Script to parse xml files and sort the uuids
这是 python 脚本,我必须对多个 .xml 文件的 uuid 进行排序:
import os
import lxml.etree as ET
inputpath =
xsltfile =
outpath =
dir = []
for dirpath, dirnames, filenames in os.walk(inputpath):
structure = os.path.join(outpath, dirpath[len(inputpath):])
if not os.path.isdir(structure):
os.mkdir(structure)
for filename in filenames:
if filename.endswith(('.xml')):
dir = os.path.join(dirpath, filename)
print(dir)
dom = ET.parse(dir)
xslt = ET.parse(xsltfile)
transform = ET.XSLT(xslt)
newdom = transform(dom)
outfile = open(structure + "\" + filename, 'a', encoding="utf-8")
outfile.write(ET.tostring(newdom,pretty_print=True,xml_declaration=True,standalone='yes').decode())
我在尝试解析多个 .xml 文件时看到此错误
Traceback (most recent call last):
File "python.py", line 23, in <module>
outfile.write(ET.tostring(newdom,pretty_print=True,xml_declaration=True,encoding='UTF-8',standalone='yes').decode())
File "C:\Program Files\Python38\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 19935-19939: character maps to <undefined>
只有当我在 py 文件 encoding='UTF-8' 中传递
时才会出现上述错误
这是 python 代码,运行 很好
import os
import lxml.etree as ET
import sys
inputpath = "C:\projects\xmlformat\uuid\"
xsltfile = "C:\projects\xmlformat\uuid\uuid.xslt"
outpath = "C:\projects\xmlformat\output"
dir = []
if sys.version_info[0] >= 3:
unicode = str
for dirpath, dirnames, filenames in os.walk(inputpath):
structure = os.path.join(outpath, dirpath[len(inputpath):])
if not os.path.isdir(structure):
os.mkdir(structure)
for filename in filenames:
if filename.endswith(('.xml')):
dir = os.path.join(dirpath, filename)
print(dir)
dom = ET.parse(dir)
xslt = ET.parse(xsltfile)
transform = ET.XSLT(xslt)
newdom = transform(dom)
infile = unicode(ET.tostring(newdom, pretty_print=True,xml_declaration=True,standalone='yes',encoding="UTF-8").decode())
outfile = open(structure + "\" + filename, 'a',encoding="UTF-8")
outfile.write(infile)
这是 python 脚本,我必须对多个 .xml 文件的 uuid 进行排序:
import os
import lxml.etree as ET
inputpath =
xsltfile =
outpath =
dir = []
for dirpath, dirnames, filenames in os.walk(inputpath):
structure = os.path.join(outpath, dirpath[len(inputpath):])
if not os.path.isdir(structure):
os.mkdir(structure)
for filename in filenames:
if filename.endswith(('.xml')):
dir = os.path.join(dirpath, filename)
print(dir)
dom = ET.parse(dir)
xslt = ET.parse(xsltfile)
transform = ET.XSLT(xslt)
newdom = transform(dom)
outfile = open(structure + "\" + filename, 'a', encoding="utf-8")
outfile.write(ET.tostring(newdom,pretty_print=True,xml_declaration=True,standalone='yes').decode())
我在尝试解析多个 .xml 文件时看到此错误
Traceback (most recent call last):
File "python.py", line 23, in <module>
outfile.write(ET.tostring(newdom,pretty_print=True,xml_declaration=True,encoding='UTF-8',standalone='yes').decode())
File "C:\Program Files\Python38\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 19935-19939: character maps to <undefined>
只有当我在 py 文件 encoding='UTF-8' 中传递
时才会出现上述错误这是 python 代码,运行 很好
import os
import lxml.etree as ET
import sys
inputpath = "C:\projects\xmlformat\uuid\"
xsltfile = "C:\projects\xmlformat\uuid\uuid.xslt"
outpath = "C:\projects\xmlformat\output"
dir = []
if sys.version_info[0] >= 3:
unicode = str
for dirpath, dirnames, filenames in os.walk(inputpath):
structure = os.path.join(outpath, dirpath[len(inputpath):])
if not os.path.isdir(structure):
os.mkdir(structure)
for filename in filenames:
if filename.endswith(('.xml')):
dir = os.path.join(dirpath, filename)
print(dir)
dom = ET.parse(dir)
xslt = ET.parse(xsltfile)
transform = ET.XSLT(xslt)
newdom = transform(dom)
infile = unicode(ET.tostring(newdom, pretty_print=True,xml_declaration=True,standalone='yes',encoding="UTF-8").decode())
outfile = open(structure + "\" + filename, 'a',encoding="UTF-8")
outfile.write(infile)