python 程序无法读取以下 xml 文件
python program can't read following xml file
这是 xml 文件:
-<Record name="AlarmRecord">
<column name="Log Serial Number">XXxxXX</column>
<column name="Object Identity">XXxxXXX</column>
</Record>
我得到这样的 xml 文件,但我不明白,因为 xml 列应该是列名而不是 ,当我读到这个python 的文件,它不识别列名,有没有办法改变这个输出?
我使用这个函数读取 xml 文件并解析它
import pandas as pd
import xml.etree.ElementTree as et
def parse_XML(xml_file, df_cols):
xtree = et.parse(xml_file)
xroot = xtree.getroot()
rows = []
for node in xroot:
res = []
res.append(node.attrib.get(df_cols[0]))
for el in df_cols[1:]:
if node is not None and node.find(el) is not None:
res.append(node.find(el).text)
else:
res.append(None)
rows.append({df_cols[i]: res[i]
for i, _ in enumerate(df_cols)})
out_df = pd.DataFrame(rows, columns=df_cols)
return out_df
考虑构建一个字典列表以传递给 DataFrame
构造函数,结构如下:XML:
<ExportFileInfo names="AlarmRecords">
<Record name="AlarmRecord">
<column name="Log Serial Number">1000000</column>
<column name="Object Identity">8888888</column>
</Record>
<Record name="AlarmRecord">
<column name="Log Serial Number">2000000</column>
<column name="Object Identity">9999999</column>
</Record>
</ExportFileInfo>
Pandas < 1.3
import pandas as pd
import xml.etree.ElementTree as et
xml_file = "RecordColumnDataFrame.xml"
def parse_XML(xml_file):
xtree = et.parse(xml_file)
data = [{column.attrib["name"]: column.text
for column in columns.findall("column")
} for columns in xtree.findall(".//Record")]
return pd.DataFrame(data)
df = parse_XML(xml_file)
print(df)
# Log Serial Number Object Identity
# 0 1000000 8888888
# 1 2000000 9999999
Pandas 1.3+
并且即将发布 Pandas 1.3(~2021 年 5 月 31 日),新的 IO 模块 read_xml
可用。但是,由于您有重复的子名称 column
,请传入 names
参数并忽略属性 @name
。默认解析器是 lxml
但支持 etree
(您当前使用的):
dfcols = ["Log Serial Number", "Object Identity"]
df = pd.read_xml(xml_file,
xpath = ".//Record",
elems_only = True,
names = dfcols,
parser = "etree")
print(df)
# Log Serial Number Object Identity
# 0 1000000 8888888
# 1 2000000 9999999
这是 xml 文件:
-<Record name="AlarmRecord">
<column name="Log Serial Number">XXxxXX</column>
<column name="Object Identity">XXxxXXX</column>
</Record>
我得到这样的 xml 文件,但我不明白,因为 xml 列应该是列名而不是
我使用这个函数读取 xml 文件并解析它
import pandas as pd
import xml.etree.ElementTree as et
def parse_XML(xml_file, df_cols):
xtree = et.parse(xml_file)
xroot = xtree.getroot()
rows = []
for node in xroot:
res = []
res.append(node.attrib.get(df_cols[0]))
for el in df_cols[1:]:
if node is not None and node.find(el) is not None:
res.append(node.find(el).text)
else:
res.append(None)
rows.append({df_cols[i]: res[i]
for i, _ in enumerate(df_cols)})
out_df = pd.DataFrame(rows, columns=df_cols)
return out_df
考虑构建一个字典列表以传递给 DataFrame
构造函数,结构如下:XML:
<ExportFileInfo names="AlarmRecords">
<Record name="AlarmRecord">
<column name="Log Serial Number">1000000</column>
<column name="Object Identity">8888888</column>
</Record>
<Record name="AlarmRecord">
<column name="Log Serial Number">2000000</column>
<column name="Object Identity">9999999</column>
</Record>
</ExportFileInfo>
Pandas < 1.3
import pandas as pd
import xml.etree.ElementTree as et
xml_file = "RecordColumnDataFrame.xml"
def parse_XML(xml_file):
xtree = et.parse(xml_file)
data = [{column.attrib["name"]: column.text
for column in columns.findall("column")
} for columns in xtree.findall(".//Record")]
return pd.DataFrame(data)
df = parse_XML(xml_file)
print(df)
# Log Serial Number Object Identity
# 0 1000000 8888888
# 1 2000000 9999999
Pandas 1.3+
并且即将发布 Pandas 1.3(~2021 年 5 月 31 日),新的 IO 模块 read_xml
可用。但是,由于您有重复的子名称 column
,请传入 names
参数并忽略属性 @name
。默认解析器是 lxml
但支持 etree
(您当前使用的):
dfcols = ["Log Serial Number", "Object Identity"]
df = pd.read_xml(xml_file,
xpath = ".//Record",
elems_only = True,
names = dfcols,
parser = "etree")
print(df)
# Log Serial Number Object Identity
# 0 1000000 8888888
# 1 2000000 9999999