lxml - 如何删除元素而不是它的内容?
lxml - how to remove element but not it's content?
假设我有以下代码:
<div id="first">
<div id="second">
<a></a>
<ul>...</ul>
</div>
</div>
这是我的代码:
div_parents = root_element.xpath('//div[div]')
for div in reversed(div_parents):
if len(div.getchildren()) == 1:
# remove second div and replace it with it's content
我用 div children 达到了 div,然后我想删除 child div 如果那是唯一的 child 是 parent 了。结果应该是:
<div id="first">
<a></a>
<ul>...</ul>
</div>
我想这样做:
div.replace(div.getchildren()[0], div.getchildren()[0].getchildren())
但不幸的是,replace 的两个参数应该只包含一个 element。有什么比将第一个 div 的所有属性重新分配给第二个 div 然后替换两者更容易的事情吗? - 因为我可以轻松做到:
div.getparent().replace(div, div.getchildren()[0])
考虑将 copy.deepcopy
用作 suggested in the docs:
例如:
div_parents = root_element.xpath('//div[div]')
for outer_div in div_parents:
if len(outer_div.getchildren()) == 1:
inner_div = outer_div[0]
# Copy the children of innder_div to outer_div
for e in inner_div: outer_div.append( copy.deepcopy(e) )
# Remove inner_div from outer_div
outer_div.remove(inner_div)
用于测试的完整代码:
import copy
import lxml.etree
def pprint(e): print(lxml.etree.tostring(e, pretty_print=True))
xml = '''
<body>
<div id="first">
<div id="second">
<a>...</a>
<ul>...</ul>
</div>
</div>
</body>
'''
root_element = lxml.etree.fromstring(xml)
div_parents = root_element.xpath('//div[div]')
for outer_div in div_parents:
if len(outer_div.getchildren()) == 1:
inner_div = outer_div[0]
# Copy the children of innder_div to outer_div
for e in inner_div: outer_div.append( copy.deepcopy(e) )
# Remove inner_div from outer_div
outer_div.remove(inner_div)
pprint(root_element)
输出:
<body>
<div id="first">
<a>...</a>
<ul>...</ul>
</div>
</body>
注意:测试代码中封闭的 <body>
标签是不必要的,我只是用它来测试多个案例。测试代码对您的输入没有问题。
我只想使用列表替换:
from lxml.etree import fromstring, tostring
xml = """<div id="first">
<div id="second">
<a></a>
<ul>...</ul>
</div>
</div>"""
doc = fromstring(xml)
outer_divs = doc.xpath("//div[div]")
for outer in outer_divs:
outer[:] = list(outer[0])
print tostring(doc)
假设我有以下代码:
<div id="first">
<div id="second">
<a></a>
<ul>...</ul>
</div>
</div>
这是我的代码:
div_parents = root_element.xpath('//div[div]')
for div in reversed(div_parents):
if len(div.getchildren()) == 1:
# remove second div and replace it with it's content
我用 div children 达到了 div,然后我想删除 child div 如果那是唯一的 child 是 parent 了。结果应该是:
<div id="first">
<a></a>
<ul>...</ul>
</div>
我想这样做:
div.replace(div.getchildren()[0], div.getchildren()[0].getchildren())
但不幸的是,replace 的两个参数应该只包含一个 element。有什么比将第一个 div 的所有属性重新分配给第二个 div 然后替换两者更容易的事情吗? - 因为我可以轻松做到:
div.getparent().replace(div, div.getchildren()[0])
考虑将 copy.deepcopy
用作 suggested in the docs:
例如:
div_parents = root_element.xpath('//div[div]')
for outer_div in div_parents:
if len(outer_div.getchildren()) == 1:
inner_div = outer_div[0]
# Copy the children of innder_div to outer_div
for e in inner_div: outer_div.append( copy.deepcopy(e) )
# Remove inner_div from outer_div
outer_div.remove(inner_div)
用于测试的完整代码:
import copy
import lxml.etree
def pprint(e): print(lxml.etree.tostring(e, pretty_print=True))
xml = '''
<body>
<div id="first">
<div id="second">
<a>...</a>
<ul>...</ul>
</div>
</div>
</body>
'''
root_element = lxml.etree.fromstring(xml)
div_parents = root_element.xpath('//div[div]')
for outer_div in div_parents:
if len(outer_div.getchildren()) == 1:
inner_div = outer_div[0]
# Copy the children of innder_div to outer_div
for e in inner_div: outer_div.append( copy.deepcopy(e) )
# Remove inner_div from outer_div
outer_div.remove(inner_div)
pprint(root_element)
输出:
<body>
<div id="first">
<a>...</a>
<ul>...</ul>
</div>
</body>
注意:测试代码中封闭的 <body>
标签是不必要的,我只是用它来测试多个案例。测试代码对您的输入没有问题。
我只想使用列表替换:
from lxml.etree import fromstring, tostring
xml = """<div id="first">
<div id="second">
<a></a>
<ul>...</ul>
</div>
</div>"""
doc = fromstring(xml)
outer_divs = doc.xpath("//div[div]")
for outer in outer_divs:
outer[:] = list(outer[0])
print tostring(doc)