Python - 使用 minidom 读取 XML

Python - Read an XML using minidom

我是 Python 的新人,我有一个问题。 我正在尝试解析这个 xml(这个 XML 有几个信息,这是我需要阅读的第一个数据):

<![CDATA[<?xml version="1.0" encoding="UTF-8"?><UDSObjectList>
<UDSObject>
<Handle>cr:908715</Handle>
<Attributes>
<Attribute DataType="2002">
<AttrName>ref_num</AttrName>
<AttrValue>497131</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>support_lev.sym</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2004">
<AttrName>open_date</AttrName>
<AttrValue>1516290907</AttrValue>
</Attribute>
<Attribute DataType="58814636">
<AttrName>agt.id</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2005">
<AttrName>priority</AttrName>
<AttrValue>3</AttrValue>
</Attribute>
<Attribute DataType="2009">
<AttrName>tenant.id</AttrName>
<AttrValue>F3CA8B5A2A456742B21EF8F3B5538623</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>tenant.name</AttrName>
<AttrValue>Ripley</AttrValue>
</Attribute>
<Attribute DataType="2005">
<AttrName>log_agent</AttrName>
<AttrValue>088966043F4D2944AA90067C52DA454F</AttrValue>
</Attribute>
<Attribute DataType="58826268">
<AttrName>request_by.first_name</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="58826268">
<AttrName>request_by.first_name</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2002">
<AttrName>customer.first_name</AttrName>
<AttrValue>Juan Guillermo</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>customer.last_name</AttrName>
<AttrValue>Mendoza Montero</AttrValue>
</Attribute>
<Attribute DataType="2009">
<AttrName>customer.id</AttrName>
<AttrValue>8C020EBAD32035419D7654CDE510D312</AttrValue>
</Attribute>
<Attribute DataType="2001">
<AttrName>category.id</AttrName>
<AttrValue>1121021012</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>category.sym</AttrName>
<AttrValue>Ripley.Sistemas Financieros.Terminal Financiero.Mensaje de 
 Error</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>status.sym</AttrName>
<AttrValue>Suspended</AttrValue>
</Attribute>
<Attribute DataType="2009">
<AttrName>group.id</AttrName>
<AttrValue>099621F7BD77C545B65FB65BFE466550</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>group.last_name</AttrName>
<AttrValue>EUS_Zona V Region</AttrValue>
</Attribute>
<Attribute DataType="2001">
<AttrName>zreporting_met.id</AttrName>
<AttrValue>7300</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>zreporting_met.sym</AttrName>
<AttrValue>E-Mail</AttrValue>
</Attribute>
<Attribute DataType="2002">
<AttrName>assignee.combo_name</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2004">
<AttrName>open_date</AttrName>
<AttrValue>1516290907</AttrValue>
</Attribute>
<Attribute DataType="2004">
<AttrName>close_date</AttrName>
<AttrValue/>
</Attribute>
<Attribute DataType="2002">
<AttrName>description</AttrName>
<AttrValue>Asunto       :Valaparaiso / Terminal Financiero Error
 Nombre Completo    :JUAN MENDOZA MONTERO
 Ubicación  :CCSS VALPARAISO Plaza victoria 1646, VALPARAISO
 País       :Chile
 Telefono   :ANEXO 2541
 Correo     :jmendozam@ripley.cl
 Descripción    :Error Terminal Financiero
 Descartes  :N/A</AttrValue>
 </Attribute>
 <Attribute DataType="2002">
 <AttrName>summary</AttrName>
 <AttrValue>Santiago / Modificación </AttrValue>
 </Attribute>
 </Attributes>
 </UDSObject>

但是当我用这种方法读取文件时:

from zeep import Client
import xml.dom.minidom
from xml.dom.minidom import Node

def select():
resultado = []
sid = _client.service.login("User","password")
objectType = 'cr'
whereClause = "group.last_name LIKE 'EUS_ZONA%' AND open_date > 1517454000 
AND open_date < 
1519786800"
maxRows = -1
attributes = ["ref_num"
      ,"agt.id"
      ,"priority"
      ,"pcat.id"
      ,"tenant.id"
      ,"tenant.name"
      ,"log_agent"
      ,"request_by.first_name"
      ,"request_by.last_name"
      ,"customer.first_name"
      ,"customer.last_name"
      ,"customer.id"
      ,"category.id"
      ,"category.sym"
      ,"status.sym"
      ,"group.id"
      ,"group.last_name"
      ,"zreporting_met.id"
      ,"zreporting_met.sym"
      ,"assignee.combo_name"
      ,"open_date"
      ,"close_date"
      ,"description"
      ,"summary"]
minim = _client.service.doSelect(sid=sid, objectType=objectType, 
whereClause=whereClause, maxRows= maxRows, attributes= attributes)
dom = xml.dom.minidom.parseString(minim)
nodeList = dom.getElementsByTagName('AttrValue')
for j in range(len(nodeList)):
    resultado.append(dom.getElementsByTagName('AttrValue')[j].firstChild.wholeText)
    print(resultado[j])

logout = _client.service.logout(sid)

这只打印第一个 AttrValue(ref_num 值),我需要做的是将 XML 文件的每个字段添加到 resultado 数组中,我需要帮助打印每个字段XML 文件,有人可以帮我吗?

请阅读并关注How to create a Minimal, Complete, and Verifiable example。 您应该删除所有服务器内容并减少示例数据的大小。


此代码段遵循您的代码并获取所有 attribute 元素,然后迭代这些元素:

import xml.dom.minidom
from xml.dom.minidom import Node

minim = """<?xml version="1.0" encoding="UTF-8"?>
<udsobjectlist>
    <udsobject>
        <handle>cr:908715</handle>
        <attributes>
            <attribute datatype="2002">
                <attrname>ref_num</attrname>
                <attrvalue>497131</attrvalue>
            </attribute>
            <attribute datatype="2002">
                <attrname>support_lev.sym</attrname>
                <attrvalue/>
            </attribute>
            <attribute datatype="2004">
                <attrname>open_date</attrname>
                <attrvalue>1516290907</attrvalue>
            </attribute>
        </attributes>
    </udsobject>
</udsobjectlist>
"""

dom = xml.dom.minidom.parseString(minim)
nodeList = dom.getElementsByTagName('attribute')

resultado = []
attributes = ["attrname", "attrvalue"]
for node in nodeList:
    a = []
    for attribute in attributes:
        try:
            a.append( node.getElementsByTagName(attribute)[0].firstChild.wholeText)
        except AttributeError:
            a.append("")
    resultado.append(a)
print(resultado)

打印

[['ref_num', '497131'], ['support_lev.sym', ''], ['open_date', '1516290907']]

更接近您的代码:

nodeList = dom.getElementsByTagName('attrvalue')
for node in nodeList:
    try:
        v = node.firstChild.wholeText
        resultado.append(v)
        print(v)
    except:
        pass
print(resultado)

打印

497131
1516290907
['497131', '1516290907']

正如评论中所建议的那样,使用 ET(虽然您可能不应该通过索引访问元素,但这可能会让您入门):

import xml.etree.ElementTree as ET
root = ET.fromstring(minim)

for child in root[0][1]:
    try:
        print(child[0].text)
        print(child[1].text)
    except:
        pass

打印

ref_num
497131
support_lev.sym
None
open_date
1516290907