展平和组合有序字典的各个部分

Flattening and combining parts of an ordered dictionary

我已经获取了一个 xml 文件并将其更改为带有 xmltodict 的有序字典。它看起来像这样:

XML 文件

<part_report>
    <metadata>
        <data1>apple</data1>
        <data2>car</data2>
        <data3>date</data3>
    </metadata>
    <parts>
        <part>
        <data4>people</data4>
        <data5>cats</data5>
        <data6>244234</data6>
        </part>
        <part>
        <data4>children</data4>
        <data5>dogs</data5>
        <data6>342342</data6>
        </part>
    </parts>
</part_report>

OrderedDict

fullFile:    OrderedDict([('part_report', OrderedDict([('metadata', OrderedDict([('data1', 'apple'), ('data2', 'car'), ('data3', 'date')])), ('parts', OrderedDict([('part', [OrderedDict([('data4', 'people'), ('data5', 'cats'), ('data6', '244234')]), OrderedDict([('data4', 'children'), ('data5', 'dogs'), ('data6', '342342')])])]))]))])

目标与问题 我需要做的是展平数据。我想将元数据数据与每个部分的 parts.part 数据组合在一起。困难在于元数据和部件处于同一级别。如果部件是元数据的子级别,那么我可以这样做。

我一直在为此苦苦挣扎。我试图在 OrderedDict 中拆分单独的数据(即元数据 + part1、元数据 + part2 等),然后将它们组合起来。它不工作。我在倒数第三行尝试组合它们时失败了。也许也不是最好的方法。接受建议。 最终目标是将其放入 dynamodb 数据库中。谢谢!!

这是我的代码:

import xmltodict
import json
import boto3
import os
import datetime
from xml.dom import minidom

# load file
with open('dummy.xml', 'r') as f: 
    #dummyxml ='<root>' +  f.read() + '</root>'
    dummyxml =f.read()
print(dummyxml)
# parse an xml file by name
my_xml = dummyxml

fullFile = xmltodict.parse(my_xml)
print('fullFile:   ',fullFile)
# Serializing json    
json_object = json.dumps(fullFile, indent = 4)   
print('json_object:     ',json_object)  

#want to flatten dictionary by combining metadata part of dictionary with the individual parts part of dictionary
    
allItems = [];
fullFile = xmltodict.parse(my_xml)
print('fullFile:   ',fullFile)
# Serializing json    
json_object = json.dumps(fullFile, indent = 4)   
print('json_object:     ',json_object)  

if("part_report" in fullFile):
    catalog = fullFile["part_report"]
for key, value in catalog.items():
    print('catalog')
    print(key, value) # metadata orderddict and parts ordereddict
    print()
metadata_part=catalog["metadata"]
print('metadata_part')
print(metadata_part)
print()
parts_part = catalog["parts"]
print('parts_part')
print(parts_part)
print()

for  key,value in parts_part.items():
    cor=(key,value)
    combo=metadata_part.update(cor)
    print('combo')
    print()
import xml.etree.ElementTree as ET

def process():
    doc = ET.parse("test.xml")

    metadata = []        # stores values from metadata    
    for node in doc.iter('metadata'):
        for elem in node.iter():
            if re.match(r'data\d', elem.tag):
                metadata.append(elem.text)

    parts_list = []     # stores values from parts/parts
    for parts in doc.findall('./parts/'):
        arr = []
        for part in parts:
            arr.append(part.text)
        parts_list.append(arr)

    results = []        # create desired list of tuples
    for arr in parts_list:
        result = tuple(metadata + arr)
        results.append(result)

    return results

output = process()
print(output)

将输出以下内容:

[('apple', 'car', 'date', 'people', 'cats', '244234'), ('apple', 'car', 'date', 'children', 'dogs', '342342')]