Python xml - 删除空格以对齐 xml 文档
Python xml - remove spaces to get aligned xml document
我有一个 MyXml.xml 结构:
<?xml version='1.0' encoding='utf-8'?>
<tag1 atrib1='bla' atrib1='bla' atrib1='bla' atrib1='bla'>
<tag2 atrib = 'something'>
<tag3 atrib = 'something'>
<tag4 atrib = '..'>
</tag4>
</tag3>
<tag5 atrib = 'important'><div><h1>ContentFrom **OldXml.xml** </h1></div>
...
</tag5>
</tag2>
</tag1>
有没有人知道如何使它成为这种形式(删除所有空格):
<?xml version='1.0' encoding='utf-8'?>
<tag1 atrib1='bla' atrib1='bla' atrib1='bla' atrib1='bla'>
<tag2 atrib = 'something'>
<tag3 atrib = 'something'>
<tag4 atrib = '..'>
<tag5 atrib = 'important'><div><h1>ContentFrom **OldXml.xml** </h1></div>
...
我试过这个但是不行:
# Read in the file to a DOM data structure.
original_document = minidom.parse("MyXml.xml")
# Open a UTF-8 encoded file, because it's fairly standard for XML.
stripped_file = codecs.open("New_MyXml.xml", "w", encoding="utf8")
# Tell minidom to format the child text nodes without any extra whitespace.
original_document.writexml(stripped_file, indent="", addindent="", newl="")
stripped_file.close()
编辑:
通过FOR循环创建文件,其中创建元素,最后是这样写的:
dom = xml.dom.minidom.parseString(ET.tostring(root))
xml_string = dom.toprettyxml()
part1, part2 = xml_string.split('?>')
with open("MyXml.xml", 'w') as xfile:
xfile.write(part1 + 'encoding=\"{}\"?>\n'.format(m_encoding) + part2)
xfile.close()
编辑在一行中打印整个文档的最新代码:
dom = xml.dom.minidom.parseString(ET.tostring(root))
xml_string = dom.toxml()
part1, part2 = xml_string.split('?>')
xmlstring = f'{part1} encoding="{m_encoding}"?>\n {part2}'
with open("MyXml.xml", 'w') as xfile:
for line in xmlstring.split("\n"):
xfile.write(line.strip() + "\n")
如果您真的只是想去除空格,则根本不需要(或不需要)xml 解析器:
from pathlib import Path
inf = Path("my-input.xml")
with inf.open() as f, inf.with_name(f"stripped-{inf.name}").open("w") as g:
for line in f:
g.write(line.strip() + "\n")
Pathlib 在这里只是扮演 os.path
、open
等角色:如果你碰巧不喜欢它,你可以在没有它的情况下重写(但是 pathlib
非常优越为路径修改文本字符串我相信你不会想要...)
如果您确实需要使用解析器加载,在编写时使用完全相同的技巧,但逐行迭代解析器对象。
示范:
from tempfile import TemporaryFile
data = """<?xml version='1.0' encoding='utf-8'?>
<tag1 atrib1='bla' atrib1='bla' atrib1='bla' atrib1='bla'>
<tag2 atrib = 'something'>
<tag3 atrib = 'something'>
<tag4 atrib = '..'>
</tag4>
</tag3>
<tag5 atrib = 'important'><div><h1>ContentFrom **OldXml.xml** </h1></div>
...
</tag5>
</tag2>
</tag1>"""
with TemporaryFile(mode="w+") as f, TemporaryFile(mode="w+") as g:
f.write(data)
f.seek(0)
print("Before:")
for line in f:
print(line, end="")
g.write(line.strip() + "\n")
print("\n\nAfter:")
g.seek(0)
for line in g:
print(line, end="")
编辑:
在你的情况下,有一个更简单的解决方案:根本不使用 toprettyxml
,使用 toxml
.(更新:显然呈现完全没有换行符)。但即使没有它,我们也可以做同样的事情:
xml_string = dom.toprettyxml()
part1, part2 = xml_string.split('?>')
xmlstring = f'{part1} encoding="{m_encoding}"?>\n {part2}'
with open("MyXml.xml", 'w') as xfile:
for line in xmlstring.split("\n"):
xfile.write(line.strip() + "\n")
但是我怀疑 toprettyxml(indent="")
会做同样的事情:
xml_string = dom.toprettyxml(indent="")
...
with open("MyFile.xml", "w") as f:
f.write(xml_string)
我有一个 MyXml.xml 结构:
<?xml version='1.0' encoding='utf-8'?>
<tag1 atrib1='bla' atrib1='bla' atrib1='bla' atrib1='bla'>
<tag2 atrib = 'something'>
<tag3 atrib = 'something'>
<tag4 atrib = '..'>
</tag4>
</tag3>
<tag5 atrib = 'important'><div><h1>ContentFrom **OldXml.xml** </h1></div>
...
</tag5>
</tag2>
</tag1>
有没有人知道如何使它成为这种形式(删除所有空格):
<?xml version='1.0' encoding='utf-8'?>
<tag1 atrib1='bla' atrib1='bla' atrib1='bla' atrib1='bla'>
<tag2 atrib = 'something'>
<tag3 atrib = 'something'>
<tag4 atrib = '..'>
<tag5 atrib = 'important'><div><h1>ContentFrom **OldXml.xml** </h1></div>
...
我试过这个但是不行:
# Read in the file to a DOM data structure.
original_document = minidom.parse("MyXml.xml")
# Open a UTF-8 encoded file, because it's fairly standard for XML.
stripped_file = codecs.open("New_MyXml.xml", "w", encoding="utf8")
# Tell minidom to format the child text nodes without any extra whitespace.
original_document.writexml(stripped_file, indent="", addindent="", newl="")
stripped_file.close()
编辑:
通过FOR循环创建文件,其中创建元素,最后是这样写的:
dom = xml.dom.minidom.parseString(ET.tostring(root))
xml_string = dom.toprettyxml()
part1, part2 = xml_string.split('?>')
with open("MyXml.xml", 'w') as xfile:
xfile.write(part1 + 'encoding=\"{}\"?>\n'.format(m_encoding) + part2)
xfile.close()
编辑在一行中打印整个文档的最新代码:
dom = xml.dom.minidom.parseString(ET.tostring(root))
xml_string = dom.toxml()
part1, part2 = xml_string.split('?>')
xmlstring = f'{part1} encoding="{m_encoding}"?>\n {part2}'
with open("MyXml.xml", 'w') as xfile:
for line in xmlstring.split("\n"):
xfile.write(line.strip() + "\n")
如果您真的只是想去除空格,则根本不需要(或不需要)xml 解析器:
from pathlib import Path
inf = Path("my-input.xml")
with inf.open() as f, inf.with_name(f"stripped-{inf.name}").open("w") as g:
for line in f:
g.write(line.strip() + "\n")
Pathlib 在这里只是扮演 os.path
、open
等角色:如果你碰巧不喜欢它,你可以在没有它的情况下重写(但是 pathlib
非常优越为路径修改文本字符串我相信你不会想要...)
如果您确实需要使用解析器加载,在编写时使用完全相同的技巧,但逐行迭代解析器对象。
示范:
from tempfile import TemporaryFile
data = """<?xml version='1.0' encoding='utf-8'?>
<tag1 atrib1='bla' atrib1='bla' atrib1='bla' atrib1='bla'>
<tag2 atrib = 'something'>
<tag3 atrib = 'something'>
<tag4 atrib = '..'>
</tag4>
</tag3>
<tag5 atrib = 'important'><div><h1>ContentFrom **OldXml.xml** </h1></div>
...
</tag5>
</tag2>
</tag1>"""
with TemporaryFile(mode="w+") as f, TemporaryFile(mode="w+") as g:
f.write(data)
f.seek(0)
print("Before:")
for line in f:
print(line, end="")
g.write(line.strip() + "\n")
print("\n\nAfter:")
g.seek(0)
for line in g:
print(line, end="")
编辑:
在你的情况下,有一个更简单的解决方案:根本不使用 (更新:显然呈现完全没有换行符)。但即使没有它,我们也可以做同样的事情:toprettyxml
,使用 toxml
.
xml_string = dom.toprettyxml()
part1, part2 = xml_string.split('?>')
xmlstring = f'{part1} encoding="{m_encoding}"?>\n {part2}'
with open("MyXml.xml", 'w') as xfile:
for line in xmlstring.split("\n"):
xfile.write(line.strip() + "\n")
但是我怀疑 toprettyxml(indent="")
会做同样的事情:
xml_string = dom.toprettyxml(indent="")
...
with open("MyFile.xml", "w") as f:
f.write(xml_string)