Python 中的 Ruamel Yaml 转储:需要确保根据数据级别引用特定键
Ruamel Yaml dump in Python: Need to ensure specific keys are quoted based on level of the data
面临的问题:需要一个解决方案来考虑数据的位置,而不是基于键和引号进行匹配,或者在使用 [ 转储时不基于该位置进行引用=46=].
请在此处查看之前发布的类似问题: 因为这是可靠的上下文。
当前数据集:
data2 = {'bar': 'var', 'lar': 'nar'}
data = {'foo': [{
'zar': 'mar',
'car': [data2]
}]
}
当前 Hacky 解决方案:
import ruamel.yaml
import yaml
import sys
# Data that is actually ever changing in terms of content, but never structure
data2 = {'bar': 'var', 'lar': 'nar'}
data = {'foo': [{
'zar': 'mar',
'car': [data2]
}]
}
# This is the solution portion which doesn't quote if they keys match
#Not a valid solution as the keys are liable to change weekly, but data structure would remain the same
def non_quoted_key(self, data):
if data == 'foo' or data == 'bar' or data == 'lar':
data = ruamel.yaml.scalarstring.PlainScalarString(data)
return self.represent_data(data)
#executing the dump
yaml = ruamel.yaml.YAML()
yaml.default_flow_style = False
yaml.default_style = '"'
yaml.indent = 4
yaml.explicit_start = True
yaml.Representer.represent_key = non_quoted_key
yaml.dump(data, sys.stdout)
当前输出(这是有效的,但一旦密钥更改,这将再次失败):
---
foo:
- "zar": "mar"
"car":
- bar: "var"
lar: "nar"
当前解决方案的问题:密钥不断变化,并且不断添加新密钥,但数据结构将始终保持不变。数据结构的最高级别键始终需要不加引号,下一个字典集中的键必须加引号,如果这些键包含字典集的值,则这些子键必须始终加引号。
加分题:
如何确保转储中显示序列 ([
) 和映射 ({
) 的图标?
举一个直观的例子,使用上面的数据,你如何确保输出总是显示如下所示的序列和映射?
---
foo:
[
{
"car":
[
{
bar: "var",
lar: "nar"
}
],
"zar": "mar"
},
]
如其他一些问题所示,从输出开始通常是个好主意,如果
ruamel.yaml
可以重现。因为 ruamel.yaml
主要是为了保留评论
在块样式 YAML 中(空行是 EOL 注释的延续,无论是否为空,
上一行),我补充了一些评论,看看会发生什么。
import sys
import ruamel.yaml
yaml_str = """---
foo:
[
{
"car":
[ # c1
{
bar: "var", # c2
lar: "nar", # c3
}
],
"zar": "mar"
},
]
"""
yaml = ruamel.yaml.YAML()
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.preserve_quotes = True
yaml.explicit_start = True
data = yaml.load(yaml_str)
print(data)
yaml.dump(data, sys.stdout)
给出:
ordereddict([('foo', [ordereddict([('car', [ordereddict([('bar', 'var'), ('lar', 'nar')])]), ('zar', 'mar')])])])
---
foo: [{"car": [{bar: "var", lar: "nar"}], "zar": "mar"}]
从这里你可以看出获取报价应该不是问题,但是流程
项目在单独行上的样式将是一个问题,因为在
流式输出,插入空注释无济于事。
当您从 data
和 data2
开始时,您可以递归遍历数据结构并
根据递归深度转换每个键 and/or 值。你没有明确描述规则是什么
对于非引用是,但我在这里假设原始数据结构的根级键。
import sys
import ruamel.yaml
DQ = ruamel.yaml.scalarstring.DoubleQuotedScalarString
Pl = ruamel.yaml.scalarstring.PlainScalarString
def set_non_root_strings_double_quoted(d, lvl=0):
if isinstance(d, dict):
for k in list(d.keys()): # need list as d will change
if not isinstance(k, (DQ, Pl)):
k = DQ(k) if lvl > 0 else Pl(k)
d[k] = set_non_root_strings_double_quoted(d.pop(k), lvl=lvl+1)
elif isinstance(d, list):
for idx, elem in enumerate(d):
d[idx] = set_non_root_strings_double_quoted(elem, lvl=lvl+1)
elif isinstance(d, str):
d = DQ(d)
return d
data2 = {'bar': 'var', 'lar': 'nar'}
data = {'foo': [{
'zar': 'mar',
'car': [data2]
}]
}
set_non_root_strings_double_quoted(data2)
set_non_root_strings_double_quoted(data)
yaml = ruamel.yaml.YAML()
yaml.explicit_start = True
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.dump(data, sys.stdout)
给出:
---
foo:
- "zar": "mar"
"car":
- bar: "var"
lar: "nar"
由于 set_non_root_strings_double_quoted
就地工作,您可以 运行
在将 data2
分配给 data
之后,它在 data2
和 data
上,但是
像往常一样重要的顺序 str
只设置为 DQ
或 Pl
一次。
您不能依赖将 dict/list 转换为 CommentedMap
/CommentedSeq
并分配评论,但您可以使用它来转换输出流
为 data
的根(data2
的根
也没有设置为流式,但在倾倒时强制使用,因为你
在 YAML 中不能有带块样式的流样式。
import sys
import ruamel.yaml
DQ = ruamel.yaml.scalarstring.DoubleQuotedScalarString
Pl = ruamel.yaml.scalarstring.PlainScalarString
Map = ruamel.yaml.CommentedMap
Seq = ruamel.yaml.CommentedSeq
def set_non_root_strings_double_quoted(d, lvl=0):
if isinstance(d, dict):
res = Map()
if lvl == 1: res.fa.set_flow_style()
for k in d:
if not isinstance(k, (DQ, Pl)):
k = DQ(k) if lvl > 0 else Pl(k)
res[k] = set_non_root_strings_double_quoted(d[k], lvl=lvl+1)
res.yaml_set_comment_before_after_key(k, after='\n')
elif isinstance(d, list):
res = Seq()
if lvl == 1: res.fa.set_flow_style()
for elem in d:
res.append(set_non_root_strings_double_quoted(elem, lvl=lvl+1))
elif isinstance(d, str):
res = DQ(d)
else:
res = d
return res
data2 = {'bar': 'var', 'lar': 'nar'}
data2 = set_non_root_strings_double_quoted(data2)
data = {'foo': [{
'car': [data2], # <<< in the bonus question car & zar have a different order, so change here
'zar': 'mar'
}]
}
data = set_non_root_strings_double_quoted(data)
yaml = ruamel.yaml.YAML()
yaml.explicit_start = True
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.dump(data, sys.stdout) # just to show how it looks before postprocessing
给出:
---
foo: [{"car": [{bar: "var", lar: "nar"}], "zar": "mar"}]
并且您可以 post- 在转储过程中处理:
def multiline_flowstyle(s):
idx = -1
def peek():
if idx >= len(s):
return None
return s[idx+1]
res = ""
lvl = 0
indent = ' ' * 4
in_quotes = False # you could have a bracket or curly brace within a quoted value
comma_found = False
while idx < len(s) - 1:
idx += 1
ch = s[idx]
if comma_found:
comma_found = False
if ch == ' ':
res += ',\n' + (lvl * indent) # this gobbels the space
else: # not sure this ever happens
res += ',\n' + (lvl * indent) + ch
continue
if in_quotes:
if ch == '"':
in_quotes = False
res += ch
continue
elif ch == '"':
in_quotes = True
res += ch
elif ch in '{[':
if peek() == ',':
idx += 1
ch += ','
lvl += 1
res += '\n' + (lvl * indent) + ch
if peek() not in '{[':
lvl += 1
res += '\n' + (lvl * indent)
elif ch in '}]':
if peek() == ',':
idx += 1
ch += ','
lvl -= 1
res += '\n' + (lvl * indent) + ch
if peek() not in '}]':
lvl -= 1
res += '\n' + (lvl * indent)
elif ch == ',':
comma_found = True
elif ch == '\n':
res += ch + (lvl * indent)
else:
res += ch
return res
yaml.dump(data, sys.stdout, transform=multiline_flowstyle)
接近您要求的输出:
---
foo:
[
{
"car":
[
{
bar: "var",
lar: "nar"
}
],
"zar": "mar"
}
]
面临的问题:需要一个解决方案来考虑数据的位置,而不是基于键和引号进行匹配,或者在使用 [ 转储时不基于该位置进行引用=46=].
请在此处查看之前发布的类似问题:
当前数据集:
data2 = {'bar': 'var', 'lar': 'nar'}
data = {'foo': [{
'zar': 'mar',
'car': [data2]
}]
}
当前 Hacky 解决方案:
import ruamel.yaml
import yaml
import sys
# Data that is actually ever changing in terms of content, but never structure
data2 = {'bar': 'var', 'lar': 'nar'}
data = {'foo': [{
'zar': 'mar',
'car': [data2]
}]
}
# This is the solution portion which doesn't quote if they keys match
#Not a valid solution as the keys are liable to change weekly, but data structure would remain the same
def non_quoted_key(self, data):
if data == 'foo' or data == 'bar' or data == 'lar':
data = ruamel.yaml.scalarstring.PlainScalarString(data)
return self.represent_data(data)
#executing the dump
yaml = ruamel.yaml.YAML()
yaml.default_flow_style = False
yaml.default_style = '"'
yaml.indent = 4
yaml.explicit_start = True
yaml.Representer.represent_key = non_quoted_key
yaml.dump(data, sys.stdout)
当前输出(这是有效的,但一旦密钥更改,这将再次失败):
---
foo:
- "zar": "mar"
"car":
- bar: "var"
lar: "nar"
当前解决方案的问题:密钥不断变化,并且不断添加新密钥,但数据结构将始终保持不变。数据结构的最高级别键始终需要不加引号,下一个字典集中的键必须加引号,如果这些键包含字典集的值,则这些子键必须始终加引号。
加分题:
如何确保转储中显示序列 ([
) 和映射 ({
) 的图标?
举一个直观的例子,使用上面的数据,你如何确保输出总是显示如下所示的序列和映射?
---
foo:
[
{
"car":
[
{
bar: "var",
lar: "nar"
}
],
"zar": "mar"
},
]
如其他一些问题所示,从输出开始通常是个好主意,如果
ruamel.yaml
可以重现。因为 ruamel.yaml
主要是为了保留评论
在块样式 YAML 中(空行是 EOL 注释的延续,无论是否为空,
上一行),我补充了一些评论,看看会发生什么。
import sys
import ruamel.yaml
yaml_str = """---
foo:
[
{
"car":
[ # c1
{
bar: "var", # c2
lar: "nar", # c3
}
],
"zar": "mar"
},
]
"""
yaml = ruamel.yaml.YAML()
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.preserve_quotes = True
yaml.explicit_start = True
data = yaml.load(yaml_str)
print(data)
yaml.dump(data, sys.stdout)
给出:
ordereddict([('foo', [ordereddict([('car', [ordereddict([('bar', 'var'), ('lar', 'nar')])]), ('zar', 'mar')])])])
---
foo: [{"car": [{bar: "var", lar: "nar"}], "zar": "mar"}]
从这里你可以看出获取报价应该不是问题,但是流程 项目在单独行上的样式将是一个问题,因为在 流式输出,插入空注释无济于事。
当您从 data
和 data2
开始时,您可以递归遍历数据结构并
根据递归深度转换每个键 and/or 值。你没有明确描述规则是什么
对于非引用是,但我在这里假设原始数据结构的根级键。
import sys
import ruamel.yaml
DQ = ruamel.yaml.scalarstring.DoubleQuotedScalarString
Pl = ruamel.yaml.scalarstring.PlainScalarString
def set_non_root_strings_double_quoted(d, lvl=0):
if isinstance(d, dict):
for k in list(d.keys()): # need list as d will change
if not isinstance(k, (DQ, Pl)):
k = DQ(k) if lvl > 0 else Pl(k)
d[k] = set_non_root_strings_double_quoted(d.pop(k), lvl=lvl+1)
elif isinstance(d, list):
for idx, elem in enumerate(d):
d[idx] = set_non_root_strings_double_quoted(elem, lvl=lvl+1)
elif isinstance(d, str):
d = DQ(d)
return d
data2 = {'bar': 'var', 'lar': 'nar'}
data = {'foo': [{
'zar': 'mar',
'car': [data2]
}]
}
set_non_root_strings_double_quoted(data2)
set_non_root_strings_double_quoted(data)
yaml = ruamel.yaml.YAML()
yaml.explicit_start = True
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.dump(data, sys.stdout)
给出:
---
foo:
- "zar": "mar"
"car":
- bar: "var"
lar: "nar"
由于 set_non_root_strings_double_quoted
就地工作,您可以 运行
在将 data2
分配给 data
之后,它在 data2
和 data
上,但是
像往常一样重要的顺序 str
只设置为 DQ
或 Pl
一次。
您不能依赖将 dict/list 转换为 CommentedMap
/CommentedSeq
并分配评论,但您可以使用它来转换输出流
为 data
的根(data2
的根
也没有设置为流式,但在倾倒时强制使用,因为你
在 YAML 中不能有带块样式的流样式。
import sys
import ruamel.yaml
DQ = ruamel.yaml.scalarstring.DoubleQuotedScalarString
Pl = ruamel.yaml.scalarstring.PlainScalarString
Map = ruamel.yaml.CommentedMap
Seq = ruamel.yaml.CommentedSeq
def set_non_root_strings_double_quoted(d, lvl=0):
if isinstance(d, dict):
res = Map()
if lvl == 1: res.fa.set_flow_style()
for k in d:
if not isinstance(k, (DQ, Pl)):
k = DQ(k) if lvl > 0 else Pl(k)
res[k] = set_non_root_strings_double_quoted(d[k], lvl=lvl+1)
res.yaml_set_comment_before_after_key(k, after='\n')
elif isinstance(d, list):
res = Seq()
if lvl == 1: res.fa.set_flow_style()
for elem in d:
res.append(set_non_root_strings_double_quoted(elem, lvl=lvl+1))
elif isinstance(d, str):
res = DQ(d)
else:
res = d
return res
data2 = {'bar': 'var', 'lar': 'nar'}
data2 = set_non_root_strings_double_quoted(data2)
data = {'foo': [{
'car': [data2], # <<< in the bonus question car & zar have a different order, so change here
'zar': 'mar'
}]
}
data = set_non_root_strings_double_quoted(data)
yaml = ruamel.yaml.YAML()
yaml.explicit_start = True
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.dump(data, sys.stdout) # just to show how it looks before postprocessing
给出:
---
foo: [{"car": [{bar: "var", lar: "nar"}], "zar": "mar"}]
并且您可以 post- 在转储过程中处理:
def multiline_flowstyle(s):
idx = -1
def peek():
if idx >= len(s):
return None
return s[idx+1]
res = ""
lvl = 0
indent = ' ' * 4
in_quotes = False # you could have a bracket or curly brace within a quoted value
comma_found = False
while idx < len(s) - 1:
idx += 1
ch = s[idx]
if comma_found:
comma_found = False
if ch == ' ':
res += ',\n' + (lvl * indent) # this gobbels the space
else: # not sure this ever happens
res += ',\n' + (lvl * indent) + ch
continue
if in_quotes:
if ch == '"':
in_quotes = False
res += ch
continue
elif ch == '"':
in_quotes = True
res += ch
elif ch in '{[':
if peek() == ',':
idx += 1
ch += ','
lvl += 1
res += '\n' + (lvl * indent) + ch
if peek() not in '{[':
lvl += 1
res += '\n' + (lvl * indent)
elif ch in '}]':
if peek() == ',':
idx += 1
ch += ','
lvl -= 1
res += '\n' + (lvl * indent) + ch
if peek() not in '}]':
lvl -= 1
res += '\n' + (lvl * indent)
elif ch == ',':
comma_found = True
elif ch == '\n':
res += ch + (lvl * indent)
else:
res += ch
return res
yaml.dump(data, sys.stdout, transform=multiline_flowstyle)
接近您要求的输出:
---
foo:
[
{
"car":
[
{
bar: "var",
lar: "nar"
}
],
"zar": "mar"
}
]