展平和组合有序字典的各个部分
Flattening and combining parts of an ordered dictionary
我已经获取了一个 xml 文件并将其更改为带有 xmltodict 的有序字典。它看起来像这样:
XML 文件
<part_report>
<metadata>
<data1>apple</data1>
<data2>car</data2>
<data3>date</data3>
</metadata>
<parts>
<part>
<data4>people</data4>
<data5>cats</data5>
<data6>244234</data6>
</part>
<part>
<data4>children</data4>
<data5>dogs</data5>
<data6>342342</data6>
</part>
</parts>
</part_report>
OrderedDict
fullFile: OrderedDict([('part_report', OrderedDict([('metadata', OrderedDict([('data1', 'apple'), ('data2', 'car'), ('data3', 'date')])), ('parts', OrderedDict([('part', [OrderedDict([('data4', 'people'), ('data5', 'cats'), ('data6', '244234')]), OrderedDict([('data4', 'children'), ('data5', 'dogs'), ('data6', '342342')])])]))]))])
目标与问题
我需要做的是展平数据。我想将元数据数据与每个部分的 parts.part 数据组合在一起。困难在于元数据和部件处于同一级别。如果部件是元数据的子级别,那么我可以这样做。
我一直在为此苦苦挣扎。我试图在 OrderedDict 中拆分单独的数据(即元数据 + part1、元数据 + part2 等),然后将它们组合起来。它不工作。我在倒数第三行尝试组合它们时失败了。也许也不是最好的方法。接受建议。
最终目标是将其放入 dynamodb 数据库中。谢谢!!
这是我的代码:
import xmltodict
import json
import boto3
import os
import datetime
from xml.dom import minidom
# load file
with open('dummy.xml', 'r') as f:
#dummyxml ='<root>' + f.read() + '</root>'
dummyxml =f.read()
print(dummyxml)
# parse an xml file by name
my_xml = dummyxml
fullFile = xmltodict.parse(my_xml)
print('fullFile: ',fullFile)
# Serializing json
json_object = json.dumps(fullFile, indent = 4)
print('json_object: ',json_object)
#want to flatten dictionary by combining metadata part of dictionary with the individual parts part of dictionary
allItems = [];
fullFile = xmltodict.parse(my_xml)
print('fullFile: ',fullFile)
# Serializing json
json_object = json.dumps(fullFile, indent = 4)
print('json_object: ',json_object)
if("part_report" in fullFile):
catalog = fullFile["part_report"]
for key, value in catalog.items():
print('catalog')
print(key, value) # metadata orderddict and parts ordereddict
print()
metadata_part=catalog["metadata"]
print('metadata_part')
print(metadata_part)
print()
parts_part = catalog["parts"]
print('parts_part')
print(parts_part)
print()
for key,value in parts_part.items():
cor=(key,value)
combo=metadata_part.update(cor)
print('combo')
print()
import xml.etree.ElementTree as ET
def process():
doc = ET.parse("test.xml")
metadata = [] # stores values from metadata
for node in doc.iter('metadata'):
for elem in node.iter():
if re.match(r'data\d', elem.tag):
metadata.append(elem.text)
parts_list = [] # stores values from parts/parts
for parts in doc.findall('./parts/'):
arr = []
for part in parts:
arr.append(part.text)
parts_list.append(arr)
results = [] # create desired list of tuples
for arr in parts_list:
result = tuple(metadata + arr)
results.append(result)
return results
output = process()
print(output)
将输出以下内容:
[('apple', 'car', 'date', 'people', 'cats', '244234'), ('apple', 'car', 'date', 'children', 'dogs', '342342')]
我已经获取了一个 xml 文件并将其更改为带有 xmltodict 的有序字典。它看起来像这样:
XML 文件
<part_report>
<metadata>
<data1>apple</data1>
<data2>car</data2>
<data3>date</data3>
</metadata>
<parts>
<part>
<data4>people</data4>
<data5>cats</data5>
<data6>244234</data6>
</part>
<part>
<data4>children</data4>
<data5>dogs</data5>
<data6>342342</data6>
</part>
</parts>
</part_report>
OrderedDict
fullFile: OrderedDict([('part_report', OrderedDict([('metadata', OrderedDict([('data1', 'apple'), ('data2', 'car'), ('data3', 'date')])), ('parts', OrderedDict([('part', [OrderedDict([('data4', 'people'), ('data5', 'cats'), ('data6', '244234')]), OrderedDict([('data4', 'children'), ('data5', 'dogs'), ('data6', '342342')])])]))]))])
目标与问题 我需要做的是展平数据。我想将元数据数据与每个部分的 parts.part 数据组合在一起。困难在于元数据和部件处于同一级别。如果部件是元数据的子级别,那么我可以这样做。
我一直在为此苦苦挣扎。我试图在 OrderedDict 中拆分单独的数据(即元数据 + part1、元数据 + part2 等),然后将它们组合起来。它不工作。我在倒数第三行尝试组合它们时失败了。也许也不是最好的方法。接受建议。 最终目标是将其放入 dynamodb 数据库中。谢谢!!
这是我的代码:
import xmltodict
import json
import boto3
import os
import datetime
from xml.dom import minidom
# load file
with open('dummy.xml', 'r') as f:
#dummyxml ='<root>' + f.read() + '</root>'
dummyxml =f.read()
print(dummyxml)
# parse an xml file by name
my_xml = dummyxml
fullFile = xmltodict.parse(my_xml)
print('fullFile: ',fullFile)
# Serializing json
json_object = json.dumps(fullFile, indent = 4)
print('json_object: ',json_object)
#want to flatten dictionary by combining metadata part of dictionary with the individual parts part of dictionary
allItems = [];
fullFile = xmltodict.parse(my_xml)
print('fullFile: ',fullFile)
# Serializing json
json_object = json.dumps(fullFile, indent = 4)
print('json_object: ',json_object)
if("part_report" in fullFile):
catalog = fullFile["part_report"]
for key, value in catalog.items():
print('catalog')
print(key, value) # metadata orderddict and parts ordereddict
print()
metadata_part=catalog["metadata"]
print('metadata_part')
print(metadata_part)
print()
parts_part = catalog["parts"]
print('parts_part')
print(parts_part)
print()
for key,value in parts_part.items():
cor=(key,value)
combo=metadata_part.update(cor)
print('combo')
print()
import xml.etree.ElementTree as ET
def process():
doc = ET.parse("test.xml")
metadata = [] # stores values from metadata
for node in doc.iter('metadata'):
for elem in node.iter():
if re.match(r'data\d', elem.tag):
metadata.append(elem.text)
parts_list = [] # stores values from parts/parts
for parts in doc.findall('./parts/'):
arr = []
for part in parts:
arr.append(part.text)
parts_list.append(arr)
results = [] # create desired list of tuples
for arr in parts_list:
result = tuple(metadata + arr)
results.append(result)
return results
output = process()
print(output)
将输出以下内容:
[('apple', 'car', 'date', 'people', 'cats', '244234'), ('apple', 'car', 'date', 'children', 'dogs', '342342')]