Python xml 如果有标签值使用 xml.etree.ElementTree 读取
Python xml read if have tag value using xml.etree.ElementTree
如果“TypeOfVessel”值不为空,我正在尝试读取此文件。如果具有“TypeOfVessel”值,将读取列表。请在下面查看我的代码。请有任何建议。谢谢
<ArrayOfConsolidatedList xmlns:i="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schemas.datacontract.org/2004/07/">
<ConsolidatedList>
<RegimeName>Test1</RegimeName>
<Subsidiaries i:nil="true" />
<TonnageOfVessel i:nil="true" />
<TownOfBirth i:nil="true" />
<TypeOfVessel i:nil="true" />
</ConsolidatedList>
<ConsolidatedList>
<RegimeName>Test2</RegimeName>
<Subsidiaries i:nil="true"/>
<TonnageOfVessel>841</TonnageOfVessel>
<TownOfBirth i:nil="true"/>
<TypeOfVessel>Bunkering Vessel</TypeOfVessel>
</ConsolidatedList>
</ArrayOfConsolidatedList>
Python代码:
import xml.etree.ElementTree as ET
import inspect
def ListParse():
tree = ET.parse('ListRead.xml')
root = tree.getroot()
all_entity_entries = root.find("{http://schemas.datacontract.org/2004/07/}ArrayOfConsolidatedList")
for entry in all_entity_entries:
RegimeName = entry.find('RegimeName').text
TonnageOfVessel = entry.find('TonnageOfVessel')
TypeOfVessel = entry.find('TypeOfVessel')
print(TypeOfVessel)
ListParse()
import xml.etree.ElementTree as ET
def ListParse():
root = ET.parse('ListRead.xml')
vessels_entries = root.findall("{http://schemas.datacontract.org/2004/07/}ConsolidatedList")
for vessel_entry in vessels_entries:
RegimeName = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}RegimeName").text
TypeOfVessel = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}TypeOfVessel")
TypeOfVessel_is_missing = TypeOfVessel.attrib.get("{http://www.w3.org/2001/XMLSchema-instance}nil", "false")
print(RegimeName)
print("missing" if TypeOfVessel_is_missing == "true" else "available")
ListParse()
输出:
Test1
missing
Test2
available
编辑:在评论中您表示您不想将所有数据都存储在内存中。因此,您应该使用 基于事件的 解析而不是 树解析 ,并使用 Python 生成器。这是一个例子:
import xml.etree.ElementTree as ET
def get_vessels_with_non_null_type():
with open("ListRead.xml", "rb") as xml_file:
parser = ET.XMLPullParser(["end"]) # we are only interested in the end of tags
# now we read the file by chunk (deliberately low for example purposes)
chunk_size = 10
while True:
chunk = xml_file.read(chunk_size)
if chunk == b"":
break # end-of-file
else:
parser.feed(chunk)
# the parser received a few more bytes, let's see if there is new vessels
new_events = parser.read_events()
for event_name, element in new_events:
# we have to check the tag of the element that has just finished parsing for the one we are interested in
if element.tag == "{http://schemas.datacontract.org/2004/07/}ConsolidatedList":
# and we want to filter the ones which do not have a value for TypeOfVessel
TypeOfVessel = element.find("{http://schemas.datacontract.org/2004/07/}TypeOfVessel")
TypeOfVessel_is_missing = TypeOfVessel.attrib.get("{http://www.w3.org/2001/XMLSchema-instance}nil", "false")
if TypeOfVessel_is_missing == "false":
yield element
def do_something_with_a_vessel(vessel_entry):
RegimeName = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}RegimeName").text
TypeOfVessel = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}TypeOfVessel").text
print(RegimeName, TypeOfVessel)
for vessel_entry in get_vessels_with_non_null_type():
do_something_with_a_vessel(vessel_entry)
输出:只是Test2 Bunkering Vessel
这将内存占用减少到接近最小值。
如果“TypeOfVessel”值不为空,我正在尝试读取此文件。如果具有“TypeOfVessel”值,将读取列表。请在下面查看我的代码。请有任何建议。谢谢
<ArrayOfConsolidatedList xmlns:i="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://schemas.datacontract.org/2004/07/">
<ConsolidatedList>
<RegimeName>Test1</RegimeName>
<Subsidiaries i:nil="true" />
<TonnageOfVessel i:nil="true" />
<TownOfBirth i:nil="true" />
<TypeOfVessel i:nil="true" />
</ConsolidatedList>
<ConsolidatedList>
<RegimeName>Test2</RegimeName>
<Subsidiaries i:nil="true"/>
<TonnageOfVessel>841</TonnageOfVessel>
<TownOfBirth i:nil="true"/>
<TypeOfVessel>Bunkering Vessel</TypeOfVessel>
</ConsolidatedList>
</ArrayOfConsolidatedList>
Python代码:
import xml.etree.ElementTree as ET
import inspect
def ListParse():
tree = ET.parse('ListRead.xml')
root = tree.getroot()
all_entity_entries = root.find("{http://schemas.datacontract.org/2004/07/}ArrayOfConsolidatedList")
for entry in all_entity_entries:
RegimeName = entry.find('RegimeName').text
TonnageOfVessel = entry.find('TonnageOfVessel')
TypeOfVessel = entry.find('TypeOfVessel')
print(TypeOfVessel)
ListParse()
import xml.etree.ElementTree as ET
def ListParse():
root = ET.parse('ListRead.xml')
vessels_entries = root.findall("{http://schemas.datacontract.org/2004/07/}ConsolidatedList")
for vessel_entry in vessels_entries:
RegimeName = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}RegimeName").text
TypeOfVessel = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}TypeOfVessel")
TypeOfVessel_is_missing = TypeOfVessel.attrib.get("{http://www.w3.org/2001/XMLSchema-instance}nil", "false")
print(RegimeName)
print("missing" if TypeOfVessel_is_missing == "true" else "available")
ListParse()
输出:
Test1
missing
Test2
available
编辑:在评论中您表示您不想将所有数据都存储在内存中。因此,您应该使用 基于事件的 解析而不是 树解析 ,并使用 Python 生成器。这是一个例子:
import xml.etree.ElementTree as ET
def get_vessels_with_non_null_type():
with open("ListRead.xml", "rb") as xml_file:
parser = ET.XMLPullParser(["end"]) # we are only interested in the end of tags
# now we read the file by chunk (deliberately low for example purposes)
chunk_size = 10
while True:
chunk = xml_file.read(chunk_size)
if chunk == b"":
break # end-of-file
else:
parser.feed(chunk)
# the parser received a few more bytes, let's see if there is new vessels
new_events = parser.read_events()
for event_name, element in new_events:
# we have to check the tag of the element that has just finished parsing for the one we are interested in
if element.tag == "{http://schemas.datacontract.org/2004/07/}ConsolidatedList":
# and we want to filter the ones which do not have a value for TypeOfVessel
TypeOfVessel = element.find("{http://schemas.datacontract.org/2004/07/}TypeOfVessel")
TypeOfVessel_is_missing = TypeOfVessel.attrib.get("{http://www.w3.org/2001/XMLSchema-instance}nil", "false")
if TypeOfVessel_is_missing == "false":
yield element
def do_something_with_a_vessel(vessel_entry):
RegimeName = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}RegimeName").text
TypeOfVessel = vessel_entry.find("{http://schemas.datacontract.org/2004/07/}TypeOfVessel").text
print(RegimeName, TypeOfVessel)
for vessel_entry in get_vessels_with_non_null_type():
do_something_with_a_vessel(vessel_entry)
输出:只是Test2 Bunkering Vessel
这将内存占用减少到接近最小值。