Python 解析 xml 文件并对 uuid 进行排序的脚本

Python Script to parse xml files and sort the uuids

这是 python 脚本,我必须对多个 .xml 文件的 uuid 进行排序:

import os
import lxml.etree as ET

inputpath = 
xsltfile = 
outpath = 

dir = []

for dirpath, dirnames, filenames in os.walk(inputpath):
    structure = os.path.join(outpath, dirpath[len(inputpath):])
    if not os.path.isdir(structure):
        os.mkdir(structure)
    for filename in filenames:
        if filename.endswith(('.xml')):
            dir = os.path.join(dirpath, filename)
            print(dir)
            dom = ET.parse(dir)
            xslt = ET.parse(xsltfile)
            transform = ET.XSLT(xslt)
            newdom = transform(dom)
            outfile = open(structure + "\" + filename, 'a', encoding="utf-8")
            outfile.write(ET.tostring(newdom,pretty_print=True,xml_declaration=True,standalone='yes').decode())

我在尝试解析多个 .xml 文件时看到此错误

Traceback (most recent call last):
  File "python.py", line 23, in <module>
outfile.write(ET.tostring(newdom,pretty_print=True,xml_declaration=True,encoding='UTF-8',standalone='yes').decode())
  File "C:\Program Files\Python38\lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 19935-19939: character maps to <undefined>

只有当我在 py 文件 encoding='UTF-8' 中传递

时才会出现上述错误

这是 python 代码,运行 很好

import os
import lxml.etree as ET
import sys


inputpath = "C:\projects\xmlformat\uuid\"
xsltfile = "C:\projects\xmlformat\uuid\uuid.xslt"
outpath = "C:\projects\xmlformat\output"

dir = []

if sys.version_info[0] >= 3:
    unicode = str

for dirpath, dirnames, filenames in os.walk(inputpath):
    structure = os.path.join(outpath, dirpath[len(inputpath):])
    if not os.path.isdir(structure):
        os.mkdir(structure)
    for filename in filenames:
        if filename.endswith(('.xml')):
            dir = os.path.join(dirpath, filename)
            print(dir)
            dom = ET.parse(dir)
            xslt = ET.parse(xsltfile)
            transform = ET.XSLT(xslt)
            newdom = transform(dom)
            infile = unicode(ET.tostring(newdom, pretty_print=True,xml_declaration=True,standalone='yes',encoding="UTF-8").decode())
            outfile = open(structure + "\" + filename, 'a',encoding="UTF-8")
            outfile.write(infile)