Python 中的 Ruamel Yaml 转储:需要确保根据数据级别引用特定键

Ruamel Yaml dump in Python: Need to ensure specific keys are quoted based on level of the data

面临的问题:需要一个解决方案来考虑数据的位置,而不是基于键和引号进行匹配,或者在使用 [ 转储时不基于该位置进行引用=46=].

请在此处查看之前发布的类似问题: 因为这是可靠的上下文。

当前数据集:

data2 = {'bar': 'var', 'lar': 'nar'}

data = {'foo': [{
    'zar': 'mar',
    'car': [data2]
}]
}

当前 Hacky 解决方案:

import ruamel.yaml
import yaml
import sys

# Data that is actually ever changing in terms of content, but never structure
data2 = {'bar': 'var', 'lar': 'nar'}

data = {'foo': [{
    'zar': 'mar',
    'car': [data2]
}]
}

# This is the solution portion which doesn't quote if they keys match
#Not a valid solution as the keys are liable to change weekly, but data structure would remain the same

def non_quoted_key(self, data):
    if data == 'foo' or data == 'bar' or data == 'lar':
        data = ruamel.yaml.scalarstring.PlainScalarString(data)
    return self.represent_data(data)

#executing the dump
yaml = ruamel.yaml.YAML()
yaml.default_flow_style = False
yaml.default_style = '"'
yaml.indent = 4
yaml.explicit_start = True
yaml.Representer.represent_key = non_quoted_key
yaml.dump(data, sys.stdout)

当前输出(这是有效的,但一旦密钥更改,这将再次失败):

---
foo:
-   "zar": "mar"
    "car":
    -   bar: "var"
        lar: "nar"

当前解决方案的问题:密钥不断变化,并且不断添加新密钥,但数据结构将始终保持不变。数据结构的最高级别键始终需要不加引号,下一个字典集中的键必须加引号,如果这些键包含字典集的值,则这些子键必须始终加引号。

加分题: 如何确保转储中显示序列 ([) 和映射 ({) 的图标? 举一个直观的例子,使用上面的数据,你如何确保输出总是显示如下所示的序列和映射?

---
foo:
    [
        {
            "car":
                [
                    {
                        bar: "var",
                        lar: "nar"
                    }
                ],
            "zar": "mar"               
        },
    ]

如其他一些问题所示,从输出开始通常是个好主意,如果 ruamel.yaml 可以重现。因为 ruamel.yaml 主要是为了保留评论 在块样式 YAML 中(空行是 EOL 注释的延续,无论是否为空, 上一行),我补充了一些评论,看看会发生什么。

import sys
import ruamel.yaml

yaml_str = """---
foo:
    [
        {
            "car":
                [ # c1
                    {
                        bar: "var", # c2

                        lar: "nar", # c3
                    }
                ],
            "zar": "mar"               
        },
    ]
"""

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.preserve_quotes = True
yaml.explicit_start = True
data = yaml.load(yaml_str)
print(data)
yaml.dump(data, sys.stdout)

给出:

ordereddict([('foo', [ordereddict([('car', [ordereddict([('bar', 'var'), ('lar', 'nar')])]), ('zar', 'mar')])])])
---
foo: [{"car": [{bar: "var", lar: "nar"}], "zar": "mar"}]

从这里你可以看出获取报价应该不是问题,但是流程 项目在单独行上的样式将是一个问题,因为在 流式输出,插入空注释无济于事。

当您从 datadata2 开始时,您可以递归遍历数据结构并 根据递归深度转换每个键 and/or 值。你没有明确描述规则是什么 对于非引用是,但我在这里假设原始数据结构的根级键。

import sys
import ruamel.yaml

DQ = ruamel.yaml.scalarstring.DoubleQuotedScalarString
Pl = ruamel.yaml.scalarstring.PlainScalarString

def set_non_root_strings_double_quoted(d, lvl=0):
    if isinstance(d, dict):
        for k in list(d.keys()):  # need list as d will change
            if not isinstance(k, (DQ, Pl)):
                k = DQ(k) if lvl > 0 else Pl(k)
            d[k] = set_non_root_strings_double_quoted(d.pop(k), lvl=lvl+1)
    elif isinstance(d, list):
        for idx, elem in enumerate(d):
            d[idx] = set_non_root_strings_double_quoted(elem, lvl=lvl+1)
    elif isinstance(d, str):
        d = DQ(d)
    return d

data2 = {'bar': 'var', 'lar': 'nar'}

data = {'foo': [{
    'zar': 'mar',
    'car': [data2]
}]
}

set_non_root_strings_double_quoted(data2)
set_non_root_strings_double_quoted(data)

yaml = ruamel.yaml.YAML()
yaml.explicit_start = True
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.dump(data, sys.stdout)

给出:

---
foo:
  - "zar": "mar"
    "car":
      - bar: "var"
        lar: "nar"

由于 set_non_root_strings_double_quoted 就地工作,您可以 运行 在将 data2 分配给 data 之后,它在 data2data 上,但是 像往常一样重要的顺序 str 只设置为 DQPl 一次。

您不能依赖将 dict/list 转换为 CommentedMap/CommentedSeq 并分配评论,但您可以使用它来转换输出流 为 data 的根(data2 的根 也没有设置为流式,但在倾倒时强制使用,因为你 在 YAML 中不能有带块样式的流样式。

import sys
import ruamel.yaml

DQ = ruamel.yaml.scalarstring.DoubleQuotedScalarString
Pl = ruamel.yaml.scalarstring.PlainScalarString
Map = ruamel.yaml.CommentedMap
Seq = ruamel.yaml.CommentedSeq

def set_non_root_strings_double_quoted(d, lvl=0):
    if isinstance(d, dict):
        res = Map()
        if lvl == 1: res.fa.set_flow_style()
        for k in d:
            if not isinstance(k, (DQ, Pl)):
                k = DQ(k) if lvl > 0 else Pl(k)
            res[k] = set_non_root_strings_double_quoted(d[k], lvl=lvl+1)
            res.yaml_set_comment_before_after_key(k, after='\n')
    elif isinstance(d, list):
        res = Seq()
        if lvl == 1: res.fa.set_flow_style()
        for elem in d:
            res.append(set_non_root_strings_double_quoted(elem, lvl=lvl+1))
    elif isinstance(d, str):
        res = DQ(d)
    else:
        res = d
    return res

data2 = {'bar': 'var', 'lar': 'nar'}
data2 = set_non_root_strings_double_quoted(data2)

data = {'foo': [{
    'car': [data2],  # <<< in the bonus question car & zar have a different order, so change here
    'zar': 'mar'
}]
}

data = set_non_root_strings_double_quoted(data)

yaml = ruamel.yaml.YAML()
yaml.explicit_start = True
yaml.indent(mapping=4, sequence=4, offset=2)
yaml.dump(data, sys.stdout) # just to show how it looks before postprocessing

给出:

---
foo: [{"car": [{bar: "var", lar: "nar"}], "zar": "mar"}]

并且您可以 post- 在转储过程中处理:

def multiline_flowstyle(s):
    idx = -1

    def peek():
        if idx >= len(s):
            return None
        return s[idx+1]

    res = ""
    lvl = 0
    indent = ' ' * 4
    in_quotes = False  # you could have a bracket or curly brace within a quoted value
    comma_found = False
    while idx < len(s) - 1:
        idx += 1
        ch = s[idx]
        if comma_found:
            comma_found = False
            if ch == ' ':
                res += ',\n' + (lvl * indent) # this gobbels the space
            else: # not sure this ever happens
                res += ',\n' + (lvl * indent) + ch
            continue
        if in_quotes:
            if ch == '"':
                in_quotes = False
            res += ch
            continue
        elif ch == '"':
            in_quotes = True
            res += ch
        elif ch in '{[':
            if peek() == ',':
                idx += 1
                ch += ','
            lvl += 1
            res += '\n' + (lvl * indent) + ch
            if peek() not in '{[':
                lvl += 1
                res += '\n' + (lvl * indent)
        elif ch in '}]':
            if peek() == ',':
                idx += 1
                ch += ','
            lvl -= 1
            res += '\n' + (lvl * indent) + ch
            if peek() not in '}]':
                lvl -= 1
                res += '\n' + (lvl * indent)
        elif ch == ',':
            comma_found = True
        elif ch == '\n':
            res += ch + (lvl * indent)
        else:
            res += ch
    return res

yaml.dump(data, sys.stdout, transform=multiline_flowstyle)

接近您要求的输出:

---
foo: 
    [
        {
            "car": 
                [
                    {
                        bar: "var",
                        lar: "nar"
                    }
                ],
             "zar": "mar"
        }
    ]