Python 2.7 lxml:如何用注释替换标签
Python 2.7 lxml: How to replace a tag with a comment
我有休闲XML:
<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<SOME1>
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_2" path="path_2"/>
<TAG_3 name="NAME_3" path="path_3"/>
</SOME1>
<SOME2>
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_6" path="path_3"/>
</SOME2>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
我必须把它改成这样:
<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<!-- SOME1 -->
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_3" path="path_3"/>
<TAG_3 name="NAME_2" path="path_2"/>
<!-- SOME2 -->
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_6" path="path_3"/>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
你们有谁知道我怎样才能达到这样的效果吗?我尝试遍历 SOME1 和 SOME2 的所有子项并删除该标记,但我不知道如何:
- 去掉TAG本身,比如SOME1和SOME2
- 添加注释的第一行来代替该 TAG
你们中的任何人都可以给我任何提示我该怎么做吗?
将元素移动到 SOME1
父元素
from lxml import etree
tree = etree.parse('tmp.xml')
root = tree.getroot()
c = tree.xpath('/A/C[SOME1 | SOME2]')
print(c)
for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/* | //C[SOME1 | SOME2]/SOME2/*'):
print(s)
c[0].append(s)
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
c[0].remove(d)
print(etree.tostring(tree, encoding="utf-8", method="xml").decode("utf-8"))
结果
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_2" path="path_2"/>
<TAG_3 name="NAME_3" path="path_3"/>
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_6" path="path_3"/>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
用评论替换删除的元素
slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
if d.tag == 'SOME1':
c[0].insert(slist.index(d), etree.Comment(d.tag))
c[0][slist.index(d)].tail = "\n"
elif d.tag == 'SOME2':
c[0].insert(slist.index(d) + 1, etree.Comment(d.tag))
c[0][slist.index(d) + 1].tail = "\n"
slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
idx = slist.index(d)
if d.tag == 'SOME1':
for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/*'):
idx += 1
c[0].insert(idx, s)
for s in tree.xpath('//C[SOME1 | SOME2]/SOME2/*'):
print(s)
c[0].append(s)
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
c[0].remove(d)
结果
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<!--SOME1-->
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_2" path="path_2"/>
<TAG_3 name="NAME_3" path="path_3"/>
<!--SOME2-->
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_6" path="path_3"/>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
我有休闲XML:
<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<SOME1>
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_2" path="path_2"/>
<TAG_3 name="NAME_3" path="path_3"/>
</SOME1>
<SOME2>
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_6" path="path_3"/>
</SOME2>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
我必须把它改成这样:
<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<!-- SOME1 -->
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_3" path="path_3"/>
<TAG_3 name="NAME_2" path="path_2"/>
<!-- SOME2 -->
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_6" path="path_3"/>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
你们有谁知道我怎样才能达到这样的效果吗?我尝试遍历 SOME1 和 SOME2 的所有子项并删除该标记,但我不知道如何:
- 去掉TAG本身,比如SOME1和SOME2
- 添加注释的第一行来代替该 TAG
你们中的任何人都可以给我任何提示我该怎么做吗?
将元素移动到 SOME1
父元素
from lxml import etree
tree = etree.parse('tmp.xml')
root = tree.getroot()
c = tree.xpath('/A/C[SOME1 | SOME2]')
print(c)
for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/* | //C[SOME1 | SOME2]/SOME2/*'):
print(s)
c[0].append(s)
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
c[0].remove(d)
print(etree.tostring(tree, encoding="utf-8", method="xml").decode("utf-8"))
结果
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_2" path="path_2"/>
<TAG_3 name="NAME_3" path="path_3"/>
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_6" path="path_3"/>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>
用评论替换删除的元素
slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
if d.tag == 'SOME1':
c[0].insert(slist.index(d), etree.Comment(d.tag))
c[0][slist.index(d)].tail = "\n"
elif d.tag == 'SOME2':
c[0].insert(slist.index(d) + 1, etree.Comment(d.tag))
c[0][slist.index(d) + 1].tail = "\n"
slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
idx = slist.index(d)
if d.tag == 'SOME1':
for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/*'):
idx += 1
c[0].insert(idx, s)
for s in tree.xpath('//C[SOME1 | SOME2]/SOME2/*'):
print(s)
c[0].append(s)
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
c[0].remove(d)
结果
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
<B name="NAME" version="VERSION">
<AA>SOME NAME</AA>
<CC>SOME OTHER NAME</CC>
</B>
<C>
<!--SOME1-->
<TAG_3 name="NAME_1" path="path_1"/>
<TAG_3 name="NAME_2" path="path_2"/>
<TAG_3 name="NAME_3" path="path_3"/>
<!--SOME2-->
<TAG_3 name="NAME_4" path="path_1"/>
<TAG_3 name="NAME_5" path="path_2"/>
<TAG_3 name="NAME_6" path="path_3"/>
</C>
<D>
<TAG_3 type="type" name="NAME_1" version="version_1"/>
<TAG_3 type="type" name="NAME_2" version="version_2"/>
<TAG_3 type="type" name="NAME_3" version="version_3"/>
</D>
</A>