如何通过一些修改将 xml 转换为字典?
How to convert an xml to a dictionary with some modifications?
我目前有一个 xml 文件,格式如下:
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
我想使用 xmltodict
将此 xml 转换为字典,这非常简单。但是我想做一点修改。
我想把我的 json 改成这样。
{
"Garden": {
"id": "97",
"state": "0",
"Flowers": {
"id": "98",
"state": "0",
"Type": {
"id": "99",
"state": "0",
"Level": {
"id": "100",
"state": "0"
}
}
}
}
}
我希望能够为所有级别添加默认值 "state": "0"
。我真的很困惑该怎么做。非常感谢任何帮助。
这是我目前拥有的:
with open("gardenlist.xml", 'r') as file:
xmlString = file.read()
print(xmlString)
jsonString = json.dumps(xmltodict.parse(xmlString), indent=4)
这只打印 json 但没有 "state": "0"
值。
你可以在拿到字典后递归地做这个。您可以检查哪些值也包含字典并向其中添加目标。参见:
import collections
def addAttr(target):
target["state"] = "0"
for key in target:
if isinstance(target[key], collections.OrderedDict):
addAttr(target[key])
d1 = xmltodict.parse(xmlString)
addAttr(d1["Garden"])
我会说正确的方法是准备所需的 XML 结构 - 然后将其转换为 dict和 json 字符串:
复杂的方法:
import xml.etree.ElementTree as ET
import xmltodict
import json
tree = ET.parse('gardenlist.xml')
root = tree.getroot()
state_el = ET.Element('state') # prepare `state` node
state_el.text = '0'
root.insert(1, state_el)
def add_state(root, el_to_insert):
for el in root:
if len(list(el)): # check if element has child nodes
el.insert(1, el_to_insert)
add_state(el, el_to_insert)
add_state(root, state_el)
json_str = json.dumps(xmltodict.parse(ET.tostring(root, encoding="unicode")), indent=4)
print(json_str)
实际输出:
{
"Garden": {
"id": "97",
"state": "0",
"Flowers": {
"id": "98",
"state": "0",
"Type": {
"id": "99",
"state": "0",
"Level": {
"id": "100",
"state": "0"
}
}
}
}
}
我觉得这是一个拥有自己的解析器的好机会:
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode
xml = """
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
"""
class XMLVisitor(NodeVisitor):
grammar = Grammar(
r"""
program = root expr+
expr = opentag list closetag
item = (opentag notpar closetag) / expr
list = item+
root = ws? lpar "?xml" notpar rpar
opentag = ws? lpar word rpar ws?
closetag = lpar slash word rpar ws?
lpar = "<"
rpar = ">"
notpar = ~"[^<>]+"
slash = "/"
word = ~"\w+"
ws = ~"\s+"
"""
)
def generic_visit(self, node, visited_children):
return visited_children or node
def visit_opentag(self, node, visited_children):
ws, lpar, word, *_ = visited_children
return word.text
def visit_closetag(self, node, visited_children):
lpar, slash, word, *_ = visited_children
return word.text
def visit_notpar(self, node, visited_children):
return node.text
def visit_item(self, node, visited_children):
if len(visited_children[0]) == 3:
# first branch
opentag, content, *_= visited_children[0]
return (opentag, content)
else:
return visited_children[0]
def visit_expr(self, node, visited_children):
tag, lst, *_ = visited_children
return (tag, lst)
def visit_program(self, node, visited_children):
root, content = visited_children
return self.__makeDict__(content)
def __makeDict__(self, struct, level = 0):
d = {}
for item in struct:
key, value = item
if isinstance(value, list):
value = self.__makeDict__(value)
d[key] = value
d["state"] = 0
return d
visitor = XMLVisitor()
output = visitor.parse(xml)
print(output)
这个易于理解的片段正确地产生了
{'Garden': {'id': '97', 'state': 0, 'Flowers': {'id': '98', 'state': 0, 'Type': {'id': '99', 'state': 0, 'Level': {'id': '100', 'state': 0}}}}, 'state': 0}
我目前有一个 xml 文件,格式如下:
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
我想使用 xmltodict
将此 xml 转换为字典,这非常简单。但是我想做一点修改。
我想把我的 json 改成这样。
{
"Garden": {
"id": "97",
"state": "0",
"Flowers": {
"id": "98",
"state": "0",
"Type": {
"id": "99",
"state": "0",
"Level": {
"id": "100",
"state": "0"
}
}
}
}
}
我希望能够为所有级别添加默认值 "state": "0"
。我真的很困惑该怎么做。非常感谢任何帮助。
这是我目前拥有的:
with open("gardenlist.xml", 'r') as file:
xmlString = file.read()
print(xmlString)
jsonString = json.dumps(xmltodict.parse(xmlString), indent=4)
这只打印 json 但没有 "state": "0"
值。
你可以在拿到字典后递归地做这个。您可以检查哪些值也包含字典并向其中添加目标。参见:
import collections
def addAttr(target):
target["state"] = "0"
for key in target:
if isinstance(target[key], collections.OrderedDict):
addAttr(target[key])
d1 = xmltodict.parse(xmlString)
addAttr(d1["Garden"])
我会说正确的方法是准备所需的 XML 结构 - 然后将其转换为 dict和 json 字符串:
复杂的方法:
import xml.etree.ElementTree as ET
import xmltodict
import json
tree = ET.parse('gardenlist.xml')
root = tree.getroot()
state_el = ET.Element('state') # prepare `state` node
state_el.text = '0'
root.insert(1, state_el)
def add_state(root, el_to_insert):
for el in root:
if len(list(el)): # check if element has child nodes
el.insert(1, el_to_insert)
add_state(el, el_to_insert)
add_state(root, state_el)
json_str = json.dumps(xmltodict.parse(ET.tostring(root, encoding="unicode")), indent=4)
print(json_str)
实际输出:
{
"Garden": {
"id": "97",
"state": "0",
"Flowers": {
"id": "98",
"state": "0",
"Type": {
"id": "99",
"state": "0",
"Level": {
"id": "100",
"state": "0"
}
}
}
}
}
我觉得这是一个拥有自己的解析器的好机会:
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode
xml = """
<?xml version="1.0" encoding="UTF-8" ?>
<Garden>
<id>97</id>
<Flowers>
<id>98</id>
<Type>
<id>99</id>
<Level>
<id>100</id>
</Level>
</Type>
</Flowers>
</Garden>
"""
class XMLVisitor(NodeVisitor):
grammar = Grammar(
r"""
program = root expr+
expr = opentag list closetag
item = (opentag notpar closetag) / expr
list = item+
root = ws? lpar "?xml" notpar rpar
opentag = ws? lpar word rpar ws?
closetag = lpar slash word rpar ws?
lpar = "<"
rpar = ">"
notpar = ~"[^<>]+"
slash = "/"
word = ~"\w+"
ws = ~"\s+"
"""
)
def generic_visit(self, node, visited_children):
return visited_children or node
def visit_opentag(self, node, visited_children):
ws, lpar, word, *_ = visited_children
return word.text
def visit_closetag(self, node, visited_children):
lpar, slash, word, *_ = visited_children
return word.text
def visit_notpar(self, node, visited_children):
return node.text
def visit_item(self, node, visited_children):
if len(visited_children[0]) == 3:
# first branch
opentag, content, *_= visited_children[0]
return (opentag, content)
else:
return visited_children[0]
def visit_expr(self, node, visited_children):
tag, lst, *_ = visited_children
return (tag, lst)
def visit_program(self, node, visited_children):
root, content = visited_children
return self.__makeDict__(content)
def __makeDict__(self, struct, level = 0):
d = {}
for item in struct:
key, value = item
if isinstance(value, list):
value = self.__makeDict__(value)
d[key] = value
d["state"] = 0
return d
visitor = XMLVisitor()
output = visitor.parse(xml)
print(output)
这个易于理解的片段正确地产生了
{'Garden': {'id': '97', 'state': 0, 'Flowers': {'id': '98', 'state': 0, 'Type': {'id': '99', 'state': 0, 'Level': {'id': '100', 'state': 0}}}}, 'state': 0}