如何用iterparse写?
How to write with iterparse?
我正在尝试遍历 XML 文档,找到一些标签,将它们组合成一个新标签,然后使用 [=21= 中的 ElementTree 模块写回 xml 文档].
我已经有了我认为可以工作的代码,但是当我开始写文件的部分时,我收到一个错误:
AttributeError: '_IterParseIterator' object has no attribute 'write'
我要解析的文件是 120mb,所以我认为使用 interparse 会更有效率。也是我比较熟悉的
import xml.etree.ElementTree as ET #imports the ElementTree module for working with XML
import pprint
from collections import defaultdict
def is_tigerbase(elem):
return (elem.tag =="tag") and (elem.attrib['k'] == "tiger:name_base")
def is_tigertype(elem):
return (elem.tag =="tag") and (elem.attrib['k'] == "tiger:name_type")
def audit():
tree = ET.iterparse('map')
base = 0
t_type = 0
for event, elem in tree:
#look for all nodes and ways
if elem.tag == "node" or elem.tag == "way":
#loop through all the tags
for tag in elem.iter('tag'):
#if the tag is a tiger base then change the base value to 1
#also get the v attribute and put it in the basedetail var
#then stop the loop
if is_tigerbase(tag):
base == 1
if 'v' in elem.attrib:
basedetail = elem.attrib['v']
break
#loop through all the tags again
for tag in elem.iter('tag'):
#look for the tiger type tag, if there is one change the base
#value to 1 and get the v attribute for the detail
#stop the loop
if is_tigertype(tag):
t_type == 1
if 'v' in elem.attrib:
t_typedetail = elem.attrib['v']
break
#look to see if you had a base and a type and get ready to create
#the new tag
if base == 1 and t_type == 1:
new = basedetail + " " + t_typedetail
ET.SubElement(elem, "tag", k="addr:street", v=new)
print(new)
elif base == 1 and ttype == 0:
new = basedetail
ET.SubElement(elem, "tag", k="addr:street", v=new)
print(new)
base = 0
ttype = 0
tree.write('map')
audit()
我正在解析的 XML 文件的一个小样本:
<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="Overpass API 0.7.55.3 9da5e7ae">
<note>The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.</note>
<meta osm_base="2018-06-22T21:32:02Z"/>
<bounds minlat="28.3156000" minlon="-81.6952000" maxlat="28.4497000" maxlon="-81.4257000"/>
<node id="26794208" lat="28.3306444" lon="-81.5475040" version="14" timestamp="2014-07-07T10:17:59Z" changeset="24000940" uid="14293" user="KindredCoda"/>
<node id="26794209" lat="28.3612495" lon="-81.5194078" version="17" timestamp="2014-07-05T01:17:25Z" changeset="23960255" uid="14293" user="KindredCoda"/>
<node id="26794210" lat="28.3822849" lon="-81.5005573" version="25" timestamp="2018-02-26T21:48:01Z" changeset="56704055" uid="4018842" user="Stephen214">
<tag k="highway" v="motorway_junction"/>
<tag k="old_ref" v="27"/>
<tag k="ref" v="68"/>
</node>
<way id="596852739" version="1" timestamp="2018-06-12T09:57:29Z" changeset="59771511" uid="5659851" user="marthaleena">
<nd ref="5289076747"/>
<nd ref="5126801577"/>
<tag k="HFCS" v="Urban Collector"/>
<tag k="highway" v="unclassified"/>
<tag k="lanes" v="2"/>
<tag k="name" v="Polynesian Isles Boulevard"/>
<tag k="tiger:cfcc" v="A41"/>
<tag k="tiger:county" v="Osceola, FL"/>
<tag k="tiger:name_base" v="Polynesian Isles"/>
<tag k="tiger:name_type" v="Blvd"/>
<tag k="tiger:reviewed" v="no"/>
<tag k="tiger:zip_left" v="34746"/>
<tag k="tiger:zip_right" v="34746"/>
</way>
因为 iterparse() 没有写入函数,因为它 returns 是一个元组,所以您不能像使用 .parse() 一样写入文档。将我的代码切换为使用解析解决了这个问题。
root = tree.getroot()
for way in root.findall(".//way"):
kbool = False
tbool = False
for key in way.iterfind(".//tag"):
if key.attrib['k'] == "tiger:name_base":
kbool = True
# print(key.attrib['v'])
base = key.attrib['v']
if key.attrib['k'] == "tiger:name_type":
tbool = True
ttype = key.attrib['v']
if kbool == True and tbool == True:
ET.SubElement(way, 'tag k="addr:street" v="{} {}"'.format(base, ttype))
elif kbool == True and tbool == False:
ET.SubElement(way, 'tag k="addr:street" v="{}"'.format(base))
tree.write('maps')
我正在尝试遍历 XML 文档,找到一些标签,将它们组合成一个新标签,然后使用 [=21= 中的 ElementTree 模块写回 xml 文档].
我已经有了我认为可以工作的代码,但是当我开始写文件的部分时,我收到一个错误:
AttributeError: '_IterParseIterator' object has no attribute 'write'
我要解析的文件是 120mb,所以我认为使用 interparse 会更有效率。也是我比较熟悉的
import xml.etree.ElementTree as ET #imports the ElementTree module for working with XML
import pprint
from collections import defaultdict
def is_tigerbase(elem):
return (elem.tag =="tag") and (elem.attrib['k'] == "tiger:name_base")
def is_tigertype(elem):
return (elem.tag =="tag") and (elem.attrib['k'] == "tiger:name_type")
def audit():
tree = ET.iterparse('map')
base = 0
t_type = 0
for event, elem in tree:
#look for all nodes and ways
if elem.tag == "node" or elem.tag == "way":
#loop through all the tags
for tag in elem.iter('tag'):
#if the tag is a tiger base then change the base value to 1
#also get the v attribute and put it in the basedetail var
#then stop the loop
if is_tigerbase(tag):
base == 1
if 'v' in elem.attrib:
basedetail = elem.attrib['v']
break
#loop through all the tags again
for tag in elem.iter('tag'):
#look for the tiger type tag, if there is one change the base
#value to 1 and get the v attribute for the detail
#stop the loop
if is_tigertype(tag):
t_type == 1
if 'v' in elem.attrib:
t_typedetail = elem.attrib['v']
break
#look to see if you had a base and a type and get ready to create
#the new tag
if base == 1 and t_type == 1:
new = basedetail + " " + t_typedetail
ET.SubElement(elem, "tag", k="addr:street", v=new)
print(new)
elif base == 1 and ttype == 0:
new = basedetail
ET.SubElement(elem, "tag", k="addr:street", v=new)
print(new)
base = 0
ttype = 0
tree.write('map')
audit()
我正在解析的 XML 文件的一个小样本:
<?xml version="1.0" encoding="UTF-8"?>
<osm version="0.6" generator="Overpass API 0.7.55.3 9da5e7ae">
<note>The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.</note>
<meta osm_base="2018-06-22T21:32:02Z"/>
<bounds minlat="28.3156000" minlon="-81.6952000" maxlat="28.4497000" maxlon="-81.4257000"/>
<node id="26794208" lat="28.3306444" lon="-81.5475040" version="14" timestamp="2014-07-07T10:17:59Z" changeset="24000940" uid="14293" user="KindredCoda"/>
<node id="26794209" lat="28.3612495" lon="-81.5194078" version="17" timestamp="2014-07-05T01:17:25Z" changeset="23960255" uid="14293" user="KindredCoda"/>
<node id="26794210" lat="28.3822849" lon="-81.5005573" version="25" timestamp="2018-02-26T21:48:01Z" changeset="56704055" uid="4018842" user="Stephen214">
<tag k="highway" v="motorway_junction"/>
<tag k="old_ref" v="27"/>
<tag k="ref" v="68"/>
</node>
<way id="596852739" version="1" timestamp="2018-06-12T09:57:29Z" changeset="59771511" uid="5659851" user="marthaleena">
<nd ref="5289076747"/>
<nd ref="5126801577"/>
<tag k="HFCS" v="Urban Collector"/>
<tag k="highway" v="unclassified"/>
<tag k="lanes" v="2"/>
<tag k="name" v="Polynesian Isles Boulevard"/>
<tag k="tiger:cfcc" v="A41"/>
<tag k="tiger:county" v="Osceola, FL"/>
<tag k="tiger:name_base" v="Polynesian Isles"/>
<tag k="tiger:name_type" v="Blvd"/>
<tag k="tiger:reviewed" v="no"/>
<tag k="tiger:zip_left" v="34746"/>
<tag k="tiger:zip_right" v="34746"/>
</way>
因为 iterparse() 没有写入函数,因为它 returns 是一个元组,所以您不能像使用 .parse() 一样写入文档。将我的代码切换为使用解析解决了这个问题。
root = tree.getroot()
for way in root.findall(".//way"):
kbool = False
tbool = False
for key in way.iterfind(".//tag"):
if key.attrib['k'] == "tiger:name_base":
kbool = True
# print(key.attrib['v'])
base = key.attrib['v']
if key.attrib['k'] == "tiger:name_type":
tbool = True
ttype = key.attrib['v']
if kbool == True and tbool == True:
ET.SubElement(way, 'tag k="addr:street" v="{} {}"'.format(base, ttype))
elif kbool == True and tbool == False:
ET.SubElement(way, 'tag k="addr:street" v="{}"'.format(base))
tree.write('maps')