Python XML 解析 - 获取 XML 文件名
Python XML Parsing - Get XML File Name
我有一个工作的 Python 2.7 脚本,它解析在同一目录中找到的所有 XML 文件,并将它们保存到 CSV 文件中。
问题
我如何修改以下代码以显示(在 CSV 输出文件中)解析记录来自哪个 XML 文件?
示例 XML 个文件
Fruit1.xml
<CASES>
<CASE
Fruit="Apple"
Color="Red">
</CASE>
<CASE
Fruit="Pear"
Color="Yellow">
</CASE>
</CASES>
Fruit2.xml
<CASES>
<CASE
Fruit="Banana"
Color="Yellow">
</CASE>
<CASE
Fruit="Orange"
Color="Orange">
</CASE>
</CASES>
当前代码
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
def run(files):
first = None
for filename in os.listdir(files):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
if first is None: first = e
else: first.extend(e)
return first
fruit = []
color = []
e = run(os.path.dirname(os.path.realpath(__file__)))
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color))
当前输出
+--------+--------+
| fruit | color |
+--------+--------+
| Apple | Red |
| Pear | Yellow |
| Banana | Yellow |
| Orange | Orange |
+--------+--------+
期望的输出
+--------+--------+------------+
| fruit | color | filename |
+--------+--------+------------+
| Apple | Red | fruit1.xml |
| Pear | Yellow | fruit1.xml |
| Banana | Yellow | fruit2.xml |
| Orange | Orange | fruit2.xml |
+--------+--------+------------+
我曾尝试使用元组将 XML 文件名与已解析的 XML 元素配对,但未能成功。
成功了:
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
fruit = []
color = []
filenames = []
directory = os.path.dirname(os.path.realpath(__file__))
for filename in os.listdir(directory):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
for x in e.findall('CASE'): filenames.append(filename)
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color","filenames"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color,filenames))
我有一个工作的 Python 2.7 脚本,它解析在同一目录中找到的所有 XML 文件,并将它们保存到 CSV 文件中。
问题
我如何修改以下代码以显示(在 CSV 输出文件中)解析记录来自哪个 XML 文件?
示例 XML 个文件
Fruit1.xml
<CASES>
<CASE
Fruit="Apple"
Color="Red">
</CASE>
<CASE
Fruit="Pear"
Color="Yellow">
</CASE>
</CASES>
Fruit2.xml
<CASES>
<CASE
Fruit="Banana"
Color="Yellow">
</CASE>
<CASE
Fruit="Orange"
Color="Orange">
</CASE>
</CASES>
当前代码
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
def run(files):
first = None
for filename in os.listdir(files):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
if first is None: first = e
else: first.extend(e)
return first
fruit = []
color = []
e = run(os.path.dirname(os.path.realpath(__file__)))
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color))
当前输出
+--------+--------+
| fruit | color |
+--------+--------+
| Apple | Red |
| Pear | Yellow |
| Banana | Yellow |
| Orange | Orange |
+--------+--------+
期望的输出
+--------+--------+------------+
| fruit | color | filename |
+--------+--------+------------+
| Apple | Red | fruit1.xml |
| Pear | Yellow | fruit1.xml |
| Banana | Yellow | fruit2.xml |
| Orange | Orange | fruit2.xml |
+--------+--------+------------+
我曾尝试使用元组将 XML 文件名与已解析的 XML 元素配对,但未能成功。
成功了:
from itertools import izip
import xml.etree.ElementTree as et
import os
import csv
fruit = []
color = []
filenames = []
directory = os.path.dirname(os.path.realpath(__file__))
for filename in os.listdir(directory):
if filename.endswith('.xml'):
e = et.parse(filename).getroot()
for x in e.findall('CASE'): fruit.append(x.get('Fruit'))
for x in e.findall('CASE'): color.append(x.get('Color'))
for x in e.findall('CASE'): filenames.append(filename)
with open('parsed.csv', 'wb') as myfile:
fieldnames = ["fruit","color","filenames"]
writer = csv.writer(myfile)
writer.writerow(fieldnames)
writer.writerows(izip(fruit,color,filenames))