通过 python 和 pandas 从 xml 获取特定属性名称及其值到 csv

getting specific attribute name and it's value from xml to csv via python and pandas

我正在通过 python 和 pandas 库将嵌套的 xml 文件转换为 csv。 xml 中有很多属性,但我只想导出特定属性名称及其在 csv 文件中的值。

    <TestReport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" id="33357fcc-edf2-3ad4-a6f2-588a9492f804" source="OI Stand" converter="irs_xml.xsl" version="1.6">
      <FileType>Test Report</FileType>
      <CreationTime>2020-09-28T09:42:07.3875529+02:00</CreationTime>
      <TestExecution start="2265.595799" time="68.619646" id="009b96f0-9d26-4226-9c9a-7364dcab0467" ts="2020-09-28T09:40:58">                     
        <Attr name="ProductNumber" type="String" value="11266" />        
        <Attr name="SerialNumber" type="Number" value="324" />      
        <TestSteps>
          <TestStep measid="CNID_0210" measname="UART Read Node ID" status="Passed" datatype="Boolean" value="True" stepname="[CNID_0210] UART Read Node ID" steptype="PassFailTest" start="2277.5219649" time="0.0280892" stepid="ID#:Ua1/2g1/6hGrTAABBUH5EC" group="[CNID] Config Node ID">
            <Attr name="ReadNodeId Cmd" type="String" value="RAC" />
            <Attr name="NodeId" type="String" value=" 255" />
          </TestStep>
          <TestStep measid="MOCV_1510" measname="Check OCV" status="Passed" datatype="Number" limlo="49.000" value="49.429" limhi="51.400" unit="V" stepname="[MOCV_1510] Check OCV" steptype="Test" start="2309.5348324" time="5.72E-05" stepid="ID#:pIldp+t98hG/wTz4Yu5AjB" group="[MOCV] Measure OCV" />
          <TestStep>
          .
          .
          </TestSteps>
          <PartNr />
      </TestExecution>
   </TestReport>

我的代码(显示所有属性)

import xml.etree.ElementTree as ET
import pandas as pd
tree = ET.parse('324.irp')
root = tree.getroot()
rows = []
df_columns = ['CreationTime', 'FileType', 'SerialNumber']
CreationTime = root.find("CreationTime").text
FileType = root.find("FileType").text
for child in root.iter('Attr'):
        name = child.attrib.get('name')
        value = child.attrib.get('value')
        print (name, value)
for elem in root.iter('TestStep'):
        measname = elem.attrib.get('measname')
        value = elem.attrib.get('value')
        print (measname, value)
if CreationTime and FileType and value:
        rows.append({"CreationTime": CreationTime, "FileType": FileType, "value": value})
        print(CreationTime, FileType, value)
df = pd.DataFrame(rows, columns= df_columns)
df.to_csv(r'C:\Users\PycharmProjects\pythonProject\output.csv', index = False)

预期结果:

CreationTime,FileType,SerialNumber,Check OCV
2020-09-28T09:42:07.3875529+02:00,Test Report,324,49.429V

你能请任何人帮助我如何打印特定属性名称(SerialNumber,Check OCV)及其值(324,49.429V)。出许多属性?

试试下面的方法

import xml.etree.ElementTree as ET
import pandas as pd

xml = '''<?xml version="1.0" encoding="UTF-8"?>
<TestReport xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="33357fcc-edf2-3ad4-a6f2-588a9492f804" source="OI Stand" converter="irs_xml.xsl" version="1.6">
   <FileType>Test Report</FileType>
   <CreationTime>2020-09-28T09:42:07.3875529+02:00</CreationTime>
   <TestExecution start="2265.595799" time="68.619646" id="009b96f0-9d26-4226-9c9a-7364dcab0467" ts="2020-09-28T09:40:58">
      <Attr name="ProductNumber" type="String" value="11266" />
      <Attr name="SerialNumber" type="Number" value="324" />
      <TestSteps>
         <TestStep measid="CNID_0210" measname="UART Read Node ID" status="Passed" datatype="Boolean" value="True" stepname="[CNID_0210] UART Read Node ID" steptype="PassFailTest" start="2277.5219649" time="0.0280892" stepid="ID#:Ua1/2g1/6hGrTAABBUH5EC" group="[CNID] Config Node ID">
            <Attr name="ReadNodeId Cmd" type="String" value="RAC" />
            <Attr name="NodeId" type="String" value=" 255" />
         </TestStep>
         <TestStep measid="MOCV_1510" measname="Check OCV" status="Passed" datatype="Number" limlo="49.000" value="49.429" limhi="51.400" unit="V" stepname="[MOCV_1510] Check OCV" steptype="Test" start="2309.5348324" time="5.72E-05" stepid="ID#:pIldp+t98hG/wTz4Yu5AjB" group="[MOCV] Measure OCV" />
         <TestStep />
      </TestSteps>
      <PartNr />
   </TestExecution>
</TestReport>'''

def get_ele(name):
  expr = f'.//{name}'
  return root.find(expr).text

def get_attr(name):
  expr = f'.//Attr[@name="{name}"]'
  return root.find(expr).attrib['value']  

df_columns = {'CreationTime':get_ele, 'FileType':get_ele, 'SerialNumber':get_attr}

root = ET.fromstring(xml)
data = [{name :func(name) for name,func in df_columns.items()}]

df = pd.DataFrame(data)
print(df)

输出

                      CreationTime     FileType SerialNumber
0  2020-09-28T09:42:07.3875529+02:00  Test Report          324