使用 Python 将 xml 数据转换为 csv table 3

Convert xml data to csv table using Python 3

我正在使用 Python 3.8,我想将以下(示例 xml)数据从 xml 转换为 csv 格式的 table。网上有很多示例,但我还没有找到任何从格式如下的 xml 数据生成 table 的示例。 (我是 Python 3 的新手,甚至不知道从哪里开始)。

示例xml:

<?xml version="1.0"?>
-<observations>
-<station tz="Australia/Brisbane" description="Brisbane" forecast-district-id="QLD_PW015" lon="153.0389" lat="-27.4808" type="AWS" stn-height="8.13" stn-name="BRISBANE" bom-id="040913" wmo-id="94576">
-<period wind-src="metar_10" time-local="2021-07-11T09:30:00+10:00" time-utc="2021-07-10T23:30:00+00:00" index="0">
-<level type="surface" index="0">
<element type="apparent_temp" units="Celsius">14.0</element>
<element type="delta_t" units="Celsius">4.4</element>
<element type="gust_kmh" units="km/h">19</element>
<element type="wind_gust_spd" units="knots">10</element>
<element type="air_temperature" units="Celsius">16.6</element>
<element type="dew_point" units="Celsius">7.8</element>
<element type="pres" units="hPa">1022.0</element>
<element type="msl_pres" units="hPa">1022.0</element>
<element type="qnh_pres" units="hPa">1022.0</element>
<element type="rain_hour" units="mm">0.0</element>
<element type="rain_ten" units="mm">0.0</element>
<element type="rel-humidity" units="%">56</element>
<element type="wind_dir">WSW</element>
<element type="wind_dir_deg" units="deg">244</element>
<element type="wind_spd_kmh" units="km/h">11</element>
<element type="wind_spd" units="knots">6</element>
</level>
</period>
</station>
-<station tz="Australia/Brisbane" description="Brisbane Airport" forecast-district-id="QLD_PW015" lon="153.1292" lat="-27.3917" type="AWS" stn-height="4.51" stn-name="BRISBANE AERO" bom-id="040842" wmo-id="94578">
-<period wind-src="metar_10" time-local="2021-07-11T09:30:00+10:00" time-utc="2021-07-10T23:30:00+00:00" index="0">
-<level type="surface" index="0">
<element type="apparent_temp" units="Celsius">11.5</element>
<element type="cloud">Clear</element>
<element type="cloud_oktas">0</element>
<element type="delta_t" units="Celsius">4.1</element>
<element type="gust_kmh" units="km/h">33</element>
<element type="wind_gust_spd" units="knots">18</element>
<element type="air_temperature" units="Celsius">16.7</element>
<element type="dew_point" units="Celsius">8.7</element>
<element type="pres" units="hPa">1021.8</element>
<element type="msl_pres" units="hPa">1021.8</element>
<element type="qnh_pres" units="hPa">1021.9</element>
<element type="rain_hour" units="mm">0.0</element>
<element type="rain_ten" units="mm">0.0</element>
<element type="rel-humidity" units="%">59</element>
<element type="vis_km" units="km">10</element>
<element type="wind_dir">SW</element>
<element type="wind_dir_deg" units="deg">218</element>
<element type="wind_spd_kmh" units="km/h">26</element>
<element type="wind_spd" units="knots">14</element>
</level>
</period>
</station>

理想Table:

description          pres      dew_point    rel-humidity    wind_spd_kmh
Brisbane             1022      7.8          56              11
Brisbane Airport     1021.8    8.7          59              26

工作脚本

from xml.dom import minidom
file = minidom.parse('.xml')
  elems = file.getElementsByTagName('station')
  for ele in elems:
    with open('.csv', mode = 'a', newline = '') as fcsv1:
      dta = csv.writer(fcsv1, delimiter=',', quotechar=' ')
      row = []
      row.append(ele.attributes["stn-name"].value)
      subelems = ele.getElementsByTagName('element')
      for subel in subelems:
        if subel.getAttribute('type') == 'air_temperature':
          row.append(subel.firstChild.nodeValue)
          for subel2 in subelems:
            if subel2.getAttribute('type') == 'pres':
              row.append(subel2.firstChild.nodeValue)
              for subel3 in subelems:
                if subel3.getAttribute('type') == 'dew_point':
                  row.append(subel3.firstChild.nodeValue)
                  for subel4 in subelems:
                    if subel4.getAttribute('type') == 'rel-humidity':
                      row.append(subel4.firstChild.nodeValue)
                      for subel5 in subelems:
                        if subel5.getAttribute('type') == 'wind_spd_kmh':
                          row.append(subel5.firstChild.nodeValue)
                          for subel6 in subelems:
                            if subel6.getAttribute('type') == 'cloud':
                              row.append(subel6.firstChild.nodeValue)
                          dta.writerow(row)

好吧,这是一个开始。在等待会议开始时,我做的非常快,我很确定,minidom 不是最好的方法。它似乎在我的机器上工作,所以欢迎您尝试。请注意,我没有做您想要的所有事情,您必须完成它,因为当您还没有将代码放到线程开始时,这不是现成答案的地方。

from xml.dom import minidom

file = minidom.parse('something.xml')

elems = file.getElementsByTagName('station')

print('description', 'pres', sep=';', end='\n')

for ele in elems:
    print(ele.attributes['description'].value, ';', sep='', end='')
    subelems = ele.getElementsByTagName('element')
    for subel in subelems:
        # check here with if "subel" element contains correct value (pres) for type (attribute)
            # if it does, print here node value

输出(写入最后两行时):

description;pres
Brisbane;1022.0
Brisbane Airport;1021.8

使用 https://docs.python.org/3/library/xml.etree.elementtree.html 基本相同(我认为这比 minidom 好得多):

import xml.etree.ElementTree as ElemTree

root = ElemTree.parse('yourfile.xml').getroot()

print('description', 'pres', sep=';', end='\n')

for elem in root.findall("./station"):
    print(elem.attrib['description'], ';', sep='', end='')
    subelem = elem.findall("./period/level/element[@type = 'pres']")
    print(subelem[0].text)