如何防止在 YAML 中重新定义密钥?

How to prevent re-definition of keys in YAML?

有什么方法可以让 yaml.load 在给定键在同一字典中出现多次时引发异常?

例如,解析以下 YAML 会引发异常,因为 some_key 出现了两次:

{
  some_key: 0,
  another_key: 1,
  some_key: 1
}

实际上,上述行为对应于关于键重定义的最简单策略。例如,更详细的策略可以指定只有更改分配给键的值的重新定义才会导致异常,或者可以允许将键重新定义的严重级别设置为 "warning" 而不是 "error"。等等。这个问题的理想答案应该能够支持这样的变体。

如果您希望加载程序抛出错误,那么您应该定义自己的加载程序,并使用构造函数检查键是否已在映射中 ¹:

import collections
import ruamel.yaml as yaml

from ruamel.yaml.reader import Reader
from ruamel.yaml.scanner import Scanner
from ruamel.yaml.parser_ import Parser
from ruamel.yaml.composer import Composer
from ruamel.yaml.constructor import Constructor
from ruamel.yaml.resolver import Resolver
from ruamel.yaml.nodes import MappingNode
from ruamel.yaml.compat import PY2, PY3


class MyConstructor(Constructor):
    def construct_mapping(self, node, deep=False):
        if not isinstance(node, MappingNode):
            raise ConstructorError(
                None, None,
                "expected a mapping node, but found %s" % node.id,
                node.start_mark)
        mapping = {}
        for key_node, value_node in node.value:
            # keys can be list -> deep
            key = self.construct_object(key_node, deep=True)
            # lists are not hashable, but tuples are
            if not isinstance(key, collections.Hashable):
                if isinstance(key, list):
                    key = tuple(key)
            if PY2:
                try:
                    hash(key)
                except TypeError as exc:
                    raise ConstructorError(
                        "while constructing a mapping", node.start_mark,
                        "found unacceptable key (%s)" %
                        exc, key_node.start_mark)
            else:
                if not isinstance(key, collections.Hashable):
                    raise ConstructorError(
                        "while constructing a mapping", node.start_mark,
                        "found unhashable key", key_node.start_mark)

            value = self.construct_object(value_node, deep=deep)
            # next two lines differ from original
            if key in mapping:
                raise KeyError
            mapping[key] = value
        return mapping


class MyLoader(Reader, Scanner, Parser, Composer, MyConstructor, Resolver):
    def __init__(self, stream):
        Reader.__init__(self, stream)
        Scanner.__init__(self)
        Parser.__init__(self)
        Composer.__init__(self)
        MyConstructor.__init__(self)
        Resolver.__init__(self)



yaml_str = """\
some_key: 0,
another_key: 1,
some_key: 1
"""

data = yaml.load(yaml_str, Loader=MyLoader)
print(data)

然后抛出 KeyError

请注意,您在示例中使用的花括号是不必要的。

我不确定这是否适用于 merge keys


¹ 这是使用 ruamel.yaml 完成的,我是作者。 ruamel.yamlPyYAML的增强版,后者的loader代码应该类似

如果您使用的是 pyyaml,这是来自 Anthon 的答案的等效代码:

import collections
import yaml
import sys

from yaml.reader import Reader
from yaml.scanner import Scanner
from yaml.parser import Parser
from yaml.composer import Composer
from yaml.constructor import Constructor, ConstructorError
from yaml.resolver import Resolver
from yaml.nodes import MappingNode


class NoDuplicateConstructor(Constructor):
    def construct_mapping(self, node, deep=False):
        if not isinstance(node, MappingNode):
            raise ConstructorError(
                None, None,
                "expected a mapping node, but found %s" % node.id,
                node.start_mark)
        mapping = {}
        for key_node, value_node in node.value:
            # keys can be list -> deep
            key = self.construct_object(key_node, deep=True)
            # lists are not hashable, but tuples are
            if not isinstance(key, collections.Hashable):
                if isinstance(key, list):
                    key = tuple(key)

            if sys.version_info.major == 2:
                try:
                    hash(key)
                except TypeError as exc:
                    raise ConstructorError(
                        "while constructing a mapping", node.start_mark,
                        "found unacceptable key (%s)" %
                        exc, key_node.start_mark)
            else:
                if not isinstance(key, collections.Hashable):
                    raise ConstructorError(
                        "while constructing a mapping", node.start_mark,
                        "found unhashable key", key_node.start_mark)

            value = self.construct_object(value_node, deep=deep)

            # Actually do the check.
            if key in mapping:
                raise KeyError("Got duplicate key: {!r}".format(key))

            mapping[key] = value
        return mapping


class NoDuplicateLoader(Reader, Scanner, Parser, Composer, NoDuplicateConstructor, Resolver):
    def __init__(self, stream):
        Reader.__init__(self, stream)
        Scanner.__init__(self)
        Parser.__init__(self)
        Composer.__init__(self)
        NoDuplicateConstructor.__init__(self)
        Resolver.__init__(self)



yaml_str = """\
some_key: 0,
another_key:
  x: 1
"""

data = yaml.load(yaml_str, Loader=NoDuplicateLoader)
print(data)