Python 2.7 lxml:如何用注释替换标签

Python 2.7 lxml: How to replace a tag with a comment

我有休闲XML:

<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
  <B name="NAME" version="VERSION">
    <AA>SOME NAME</AA>
    <CC>SOME OTHER NAME</CC>
  </B>
  <C>
    <SOME1>
      <TAG_3 name="NAME_1" path="path_1"/>
      <TAG_3 name="NAME_2" path="path_2"/>
      <TAG_3 name="NAME_3" path="path_3"/>
    </SOME1>
    <SOME2>
      <TAG_3 name="NAME_4" path="path_1"/>
      <TAG_3 name="NAME_5" path="path_2"/>
      <TAG_3 name="NAME_6" path="path_3"/>
    </SOME2>
  </C>
  <D>
    <TAG_3 type="type" name="NAME_1" version="version_1"/>
    <TAG_3 type="type" name="NAME_2" version="version_2"/>
    <TAG_3 type="type" name="NAME_3" version="version_3"/>
  </D>
</A>

我必须把它改成这样:

<?xml version='1.0' encoding='UTF-8'?>
<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
    <B name="NAME" version="VERSION">
        <AA>SOME NAME</AA>
        <CC>SOME OTHER NAME</CC>
    </B>
    <C>
        <!-- SOME1 -->
        <TAG_3 name="NAME_1" path="path_1"/>
        <TAG_3 name="NAME_3" path="path_3"/>
        <TAG_3 name="NAME_2" path="path_2"/>
        <!-- SOME2 -->
        <TAG_3 name="NAME_5" path="path_2"/>
        <TAG_3 name="NAME_4" path="path_1"/>
        <TAG_3 name="NAME_6" path="path_3"/>
    </C>
    <D>
        <TAG_3 type="type" name="NAME_1" version="version_1"/>
        <TAG_3 type="type" name="NAME_2" version="version_2"/>
        <TAG_3 type="type" name="NAME_3" version="version_3"/>
    </D>
</A>

你们有谁知道我怎样才能达到这样的效果吗?我尝试遍历 SOME1 和 SOME2 的所有子项并删除该标记,但我不知道如何:

  1. 去掉TAG本身,比如SOME1和SOME2
  2. 添加注释的第一行来代替该 TAG

你们中的任何人都可以给我任何提示我该怎么做吗?

将元素移动到 SOME1 父元素

from lxml import etree 

tree = etree.parse('tmp.xml')
root = tree.getroot()

c = tree.xpath('/A/C[SOME1 | SOME2]')

print(c)
for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/* | //C[SOME1 | SOME2]/SOME2/*'):
    print(s)
    c[0].append(s)
    
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
    c[0].remove(d)
    
print(etree.tostring(tree, encoding="utf-8", method="xml").decode("utf-8"))

结果

<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
  <B name="NAME" version="VERSION">
    <AA>SOME NAME</AA>
    <CC>SOME OTHER NAME</CC>
  </B>
  <C>
    <TAG_3 name="NAME_1" path="path_1"/>
      <TAG_3 name="NAME_2" path="path_2"/>
      <TAG_3 name="NAME_3" path="path_3"/>
      <TAG_3 name="NAME_4" path="path_1"/>
      <TAG_3 name="NAME_5" path="path_2"/>
      <TAG_3 name="NAME_6" path="path_3"/>
    </C>
  <D>
    <TAG_3 type="type" name="NAME_1" version="version_1"/>
    <TAG_3 type="type" name="NAME_2" version="version_2"/>
    <TAG_3 type="type" name="NAME_3" version="version_3"/>
  </D>
</A>

用评论替换删除的元素

slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
    if d.tag == 'SOME1':
        c[0].insert(slist.index(d), etree.Comment(d.tag))
        c[0][slist.index(d)].tail = "\n"
    elif d.tag == 'SOME2':
        c[0].insert(slist.index(d) + 1, etree.Comment(d.tag))
        c[0][slist.index(d) + 1].tail = "\n"

slist = tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2')
for d in slist:
    idx = slist.index(d)
    if d.tag == 'SOME1':
        for s in tree.xpath('//C[SOME1 | SOME2]/SOME1/*'):
            idx += 1
            c[0].insert(idx, s)
        
for s in tree.xpath('//C[SOME1 | SOME2]/SOME2/*'):
    print(s)
    c[0].append(s)
    
for d in tree.xpath('//C[SOME1 | SOME2]/SOME1 | //C[SOME1 | SOME2]/SOME2'):
    c[0].remove(d)

结果

<A xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://obe.nce.amadeus.net/bms/metadata/1-0/">
  <B name="NAME" version="VERSION">
    <AA>SOME NAME</AA>
    <CC>SOME OTHER NAME</CC>
  </B>
  <C>
    <!--SOME1-->
<TAG_3 name="NAME_1" path="path_1"/>
      <TAG_3 name="NAME_2" path="path_2"/>
      <TAG_3 name="NAME_3" path="path_3"/>
    <!--SOME2-->
<TAG_3 name="NAME_4" path="path_1"/>
      <TAG_3 name="NAME_5" path="path_2"/>
      <TAG_3 name="NAME_6" path="path_3"/>
    </C>
  <D>
    <TAG_3 type="type" name="NAME_1" version="version_1"/>
    <TAG_3 type="type" name="NAME_2" version="version_2"/>
    <TAG_3 type="type" name="NAME_3" version="version_3"/>
  </D>
</A>