如何将此 XML 解析为 python 数组或字典?

How to parse this XML to python array or dict?

这是我正在尝试解析的 XML 的片段:

<DSMs>
<DSM class="ACE" order="320"/>
<DSM class="ACS" order="1900"/>
<DSM class="Aironet" order="1050"/>
<DSM class="Apache" order="4700"/>
<DSM class="AppSecDbProtect" order="1477"/>
<DSM class="ArborNetworksPravail" order="1554">
  <Thresholds>
    <Threshold name="MinNumEvents" value="5"/>
    <Threshold name="AbandonAfterSuccessiveFailures" value="3"/>
  </Thresholds>
  <Templates>
    <Template name="DeviceName" value="Arbor Networks Pravail @ $$SOURCE_ADDRESS$$"/>
  </Templates>
</DSM>
<DSM class="ARN" order="2000"/>
<DSM class="ArpeggioSIFTIT" order="1553"/>
<DSM class="ArubaClearPass" order="545">
  <Thresholds>
    <Threshold name="MinNumEvents" value="5"/>
    <Threshold name="AbandonAfterSuccessiveFailures" value="3"/>
  </Thresholds>
  <Templates>
    <Template name="DeviceName" value="Aruba ClearPass Policy Manager @ $$SOURCE_ADDRESS$$"/>
    <Template name="DeviceDescription" value="Aruba ClearPass Policy Manager Device"/>
  </Templates>
 </DSM>
</DSMs>  

到目前为止我做了什么(部分代码):

ta_dsms = []
for level1 in root:
   if level1.tag == 'DSMs':
       for level2 in level1:
           ta_dsm = level2.attrib
           ta_dsms.append(ta_dsm)
print ta_dsms

ta_dsms 的当前输出如下:

 [{'class': 'ACE', 'order': '320'}, 
  {'class': 'ACS', 'order': '1900'}, 
 ...]

我的问题是获取阈值和模板信息并将它们添加到我的数组中的优雅方法是什么。只有一些 DSM 有 children。我整天都被困在这个问题上。谢谢你救了我的命!

这是否得到你想要的?

import xml.etree.ElementTree as ET
tree = ET.parse('data.xml')
root = tree.getroot()

ta_dsms = []
for level1 in root:
    d = {}
    if level1.tag == 'DSM':
        for k,v in level1.attrib.items():
            d[k] = v

            for level2 in level1:
                threshold_list = []
                if level2.tag == "Thresholds":
                    for c in level2.getchildren():
                        threshold_dic = {}
                        for k,v in c.attrib.items():
                            threshold_dic[k] = v
                        threshold_list.append(threshold_dic)
                    d["Thresholds"] = threshold_list
                template_list = []
                if level2.tag == "Templates":
                    for c in level2.getchildren():
                        template_dic = {}
                        for k,v in c.attrib.items():
                            template_dic[k] = v
                        template_list.append(template_dic)
                    d["Templates"] = template_list
        ta_dsms.append(d)


print(ta_dsms)

结果是:

[  
   {  
      "class":"ACE",
      "order":"320"
   },
   {  
      "class":"ACS",
      "order":"1900"
   },
   {  
      "class":"Aironet",
      "order":"1050"
   },
   {  
      "class":"Apache",
      "order":"4700"
   },
   {  
      "class":"AppSecDbProtect",
      "order":"1477"
   },
   {  
      "class":"ArborNetworksPravail",
      "Thresholds":[  
         {  
            "name":"MinNumEvents",
            "value":"5"
         },
         {  
            "name":"AbandonAfterSuccessiveFailures",
            "value":"3"
         }
      ],
      "Templates":[  
         {  
            "name":"DeviceName",
            "value":"Arbor Networks Pravail @ $$SOURCE_ADDRESS$$"
         }
      ],
      "order":"1554"
   },
   {  
      "class":"ARN",
      "order":"2000"
   },
   {  
      "class":"ArpeggioSIFTIT",
      "order":"1553"
   },
   {  
      "class":"ArubaClearPass",
      "Thresholds":[  
         {  
            "name":"MinNumEvents",
            "value":"5"
         },
         {  
            "name":"AbandonAfterSuccessiveFailures",
            "value":"3"
         }
      ],
      "Templates":[  
         {  
            "name":"DeviceName",
            "value":"Aruba ClearPass Policy Manager @ $$SOURCE_ADDRESS$$"
         },
         {  
            "name":"DeviceDescription",
            "value":"Aruba ClearPass Policy Manager Device"
         }
      ],
      "order":"545"
   }
]
from lxml import etree

class XmlParser(object):
    results = []
    def __init__(self, filename, **kwargs):
        self.__dict__.update(kwargs)
        self.filename = filename
        self._process()

    def _process(self):
        f=open(self.filename, "r")
        self.data = f.read()

    def get_result_dict(self):
        self._parse()
        return self._map_to_dict( )

    def _map_to_dict(self):
        for row in self.root:
            self.results.append(self.map_by_keys(row))
        return self.results

    def _parse(self):
        self.root = etree.fromstring(self.data)

    def map_by_keys(self, row ):
        """can be DMS"""
        """can be Threshhold no children"""
        """Can be Threshold with children"""
        if row.get('name') is not None:
            # threshold with children
            return (row.tag, {'name':row.get('name'), 'value':row.get('value')})

        elif (row.get('name') is None) and row.get('class') is None:
            # Threshold with no children
            children = []
            for child in row.getchildren():
                key, values = self.map_by_keys(child)
                children.append({key: values})
            return (row.tag, children )

        else:
            # parent DMS
            unit = {'class': row.get('class'), 'order': row.get('order')}
            if len(row.getchildren()) > 0:
                for child in row.getchildren():
                    key, values = self.map_by_keys( child )
                    unit[key] = values

            return unit


file = './x.xml'
parser = XmlParser(file)
print(parser.get_result_dict())

打印:

[{'class': 'ACE', 'order': '320'}, {'class': 'ACS', 'order': ' 1900'}, {'class': 'Aironet', 'order': '1050'}, {'class': 'Apache', 'order': '4700' }, {'class': 'AppSecDbProtect', 'order': '1477'}, {'class': 'ArborNetworksPravail', 'order': '1554', 'Thresholds': [{'Threshold': {'value': '5', 'name': 'MinNumEvents'}}, {'Threshold': {'value': '3', 'name': 'AbandonAfterSuccessiveFailures'}}], 'Templates': [{'Template': {'value': 'Arbor Networks Pravail @ $$SOURCE_ADDRESS$$', 'name': 'DeviceName'}}]}, {'class': 'ARN', 'order': '2000'}, {'class': 'ArpeggioSIFTIT' , 'order': '1553'}, {'class': 'ArubaClearPass', 'order': '545', 'Thresholds': [{'Threshold': { 'value': '5', 'name': 'MinNumEvents'}}, {'Threshold': {'value': '3', 'name': 'AbandonAfterSuccessiveFailures'}}], 'Templates': [{'Template': {'value': 'Aruba ClearPass Policy Manager @ $$SOURCE_ADDRESS$$', 'name': 'DeviceName'}}, {'Template': {'value': 'Aruba ClearPass Policy Manager Device', 'name': 'DeviceDescription'}}]}]

为了理解递归你必须先理解递归