如何在 Python 中按属性(数字前的字符串)对 XML 进行排序
How to sort XML by attribute (strings before numbers) in Python
我想根据“条目”标签的属性“值”对下面的 xml 进行排序,并对数字之前的字符串(字母)进行排序。
<test>
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
</test>
我写了一些 python 来对 xml 进行排序,但它首先对数字进行排序,然后对字符串进行排序。
我检查了这个 ,但无法实施任何排序解决方案 XML。
import xml.etree.ElementTree as ElT
import os
from os.path import sep
def sort_xml(directory, xml_file, level1_tag, attribute, mode=0):
#mode 0 - numbers before letters
#mode 1 - letters before numbers
file = directory + sep + xml_file
tree = ElT.parse(file)
data = tree.getroot()
els = data.findall(level1_tag)
if mode == 0:
new_els = sorted(els, key=lambda e: (e.tag, e.attrib[attribute]))
if mode == 1:
new_els = sorted(els, key=lambda e: (isinstance(e.tag, (float, int)), e.attrib[attribute]))
for el in new_els:
if mode == 0:
el[:] = sorted(el, key=lambda e: (e.tag, e.attrib[attribute]))
if mode == 1:
el[:] = sorted(el, key=lambda e: (isinstance(e.tag, (float, int)), e.attrib[attribute]))
data[:] = new_els
tree.write(file, xml_declaration=True, encoding='utf-8')
with open(file, 'r') as fin:
data = fin.read().splitlines(True)
with open(file, 'w') as fout:
fout.writelines(data[1:])
sort_xml(os.getcwd(), "test.xml", "entry", "value", 1)
知道如何做到这一点吗?
Edit1:期望的输出
<test>
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
</test>
I took the part where the letters start and put it at the top. This the actual requirement to have the letters at the top, I don't care about the rest.
低于
import xml.etree.ElementTree as ET
xml = '''<test>
<entry value="-12" />
<entry value="/this" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
</test>'''
root = ET.fromstring(xml)
numeric = []
non_numeric = []
for entry in root.findall('.//entry'):
try:
x = int(entry.attrib['value'])
numeric.append((x, entry.attrib['value']))
except ValueError as e:
non_numeric.append(entry.attrib['value'])
sorted(numeric, key=lambda x: x[0])
sorted(non_numeric)
root = ET.Element('test')
for value in non_numeric:
entry = ET.SubElement(root, 'entry')
entry.attrib['value'] = value
for value in numeric:
entry = ET.SubElement(root, 'entry')
entry.attrib['value'] = str(value[1])
ET.dump(root)
输出
<?xml version="1.0" encoding="UTF-8"?>
<test>
<entry value="/this" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
</test>
我认为你的问题是当你排序时你正在检查值是 int
还是 float
。事实上,所有的值都是字符串,例如isinstance(e.tag, (float, int))
永远是假的。
像这样的排序函数可以满足您的需求
def sorter(x):
"Check if the value can be interpreted as an integer, then by the string"
value = x.get("value")
def is_integer(i):
try:
int(i)
except ValueError:
return False
return True
return is_integer(value), value
可以这样使用(使用StringIO
代替文件)
from xml.etree import ElementTree
from io import StringIO
xml = """<test>
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
</test>"""
tree = ElementTree.parse(StringIO(xml))
root = tree.getroot()
root[:] = sorted(root, key=sorter)
tree.write("output.xml")
output.xml
的内容是
<test>
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
</test>
我想根据“条目”标签的属性“值”对下面的 xml 进行排序,并对数字之前的字符串(字母)进行排序。
<test>
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
</test>
我写了一些 python 来对 xml 进行排序,但它首先对数字进行排序,然后对字符串进行排序。
我检查了这个
import xml.etree.ElementTree as ElT
import os
from os.path import sep
def sort_xml(directory, xml_file, level1_tag, attribute, mode=0):
#mode 0 - numbers before letters
#mode 1 - letters before numbers
file = directory + sep + xml_file
tree = ElT.parse(file)
data = tree.getroot()
els = data.findall(level1_tag)
if mode == 0:
new_els = sorted(els, key=lambda e: (e.tag, e.attrib[attribute]))
if mode == 1:
new_els = sorted(els, key=lambda e: (isinstance(e.tag, (float, int)), e.attrib[attribute]))
for el in new_els:
if mode == 0:
el[:] = sorted(el, key=lambda e: (e.tag, e.attrib[attribute]))
if mode == 1:
el[:] = sorted(el, key=lambda e: (isinstance(e.tag, (float, int)), e.attrib[attribute]))
data[:] = new_els
tree.write(file, xml_declaration=True, encoding='utf-8')
with open(file, 'r') as fin:
data = fin.read().splitlines(True)
with open(file, 'w') as fout:
fout.writelines(data[1:])
sort_xml(os.getcwd(), "test.xml", "entry", "value", 1)
知道如何做到这一点吗?
Edit1:期望的输出
<test>
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
</test>
I took the part where the letters start and put it at the top. This the actual requirement to have the letters at the top, I don't care about the rest.
低于
import xml.etree.ElementTree as ET
xml = '''<test>
<entry value="-12" />
<entry value="/this" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
</test>'''
root = ET.fromstring(xml)
numeric = []
non_numeric = []
for entry in root.findall('.//entry'):
try:
x = int(entry.attrib['value'])
numeric.append((x, entry.attrib['value']))
except ValueError as e:
non_numeric.append(entry.attrib['value'])
sorted(numeric, key=lambda x: x[0])
sorted(non_numeric)
root = ET.Element('test')
for value in non_numeric:
entry = ET.SubElement(root, 'entry')
entry.attrib['value'] = value
for value in numeric:
entry = ET.SubElement(root, 'entry')
entry.attrib['value'] = str(value[1])
ET.dump(root)
输出
<?xml version="1.0" encoding="UTF-8"?>
<test>
<entry value="/this" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
</test>
我认为你的问题是当你排序时你正在检查值是 int
还是 float
。事实上,所有的值都是字符串,例如isinstance(e.tag, (float, int))
永远是假的。
像这样的排序函数可以满足您的需求
def sorter(x):
"Check if the value can be interpreted as an integer, then by the string"
value = x.get("value")
def is_integer(i):
try:
int(i)
except ValueError:
return False
return True
return is_integer(value), value
可以这样使用(使用StringIO
代替文件)
from xml.etree import ElementTree
from io import StringIO
xml = """<test>
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
</test>"""
tree = ElementTree.parse(StringIO(xml))
root = tree.getroot()
root[:] = sorted(root, key=sorter)
tree.write("output.xml")
output.xml
的内容是
<test>
<entry value="_null" />
<entry value="abc" />
<entry value="abcd" />
<entry value="empty" />
<entry value="false" />
<entry value="test1" />
<entry value="test2" />
<entry value="true" />
<entry value="-12" />
<entry value="0" />
<entry value="043" />
<entry value="14" />
<entry value="6" />
</test>