如何防止在 YAML 中重新定义密钥?
How to prevent re-definition of keys in YAML?
有什么方法可以让 yaml.load
在给定键在同一字典中出现多次时引发异常?
例如,解析以下 YAML 会引发异常,因为 some_key
出现了两次:
{
some_key: 0,
another_key: 1,
some_key: 1
}
实际上,上述行为对应于关于键重定义的最简单策略。例如,更详细的策略可以指定只有更改分配给键的值的重新定义才会导致异常,或者可以允许将键重新定义的严重级别设置为 "warning" 而不是 "error"。等等。这个问题的理想答案应该能够支持这样的变体。
如果您希望加载程序抛出错误,那么您应该定义自己的加载程序,并使用构造函数检查键是否已在映射中 ¹:
import collections
import ruamel.yaml as yaml
from ruamel.yaml.reader import Reader
from ruamel.yaml.scanner import Scanner
from ruamel.yaml.parser_ import Parser
from ruamel.yaml.composer import Composer
from ruamel.yaml.constructor import Constructor
from ruamel.yaml.resolver import Resolver
from ruamel.yaml.nodes import MappingNode
from ruamel.yaml.compat import PY2, PY3
class MyConstructor(Constructor):
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(
None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
# keys can be list -> deep
key = self.construct_object(key_node, deep=True)
# lists are not hashable, but tuples are
if not isinstance(key, collections.Hashable):
if isinstance(key, list):
key = tuple(key)
if PY2:
try:
hash(key)
except TypeError as exc:
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" %
exc, key_node.start_mark)
else:
if not isinstance(key, collections.Hashable):
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
# next two lines differ from original
if key in mapping:
raise KeyError
mapping[key] = value
return mapping
class MyLoader(Reader, Scanner, Parser, Composer, MyConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
MyConstructor.__init__(self)
Resolver.__init__(self)
yaml_str = """\
some_key: 0,
another_key: 1,
some_key: 1
"""
data = yaml.load(yaml_str, Loader=MyLoader)
print(data)
然后抛出 KeyError
。
请注意,您在示例中使用的花括号是不必要的。
我不确定这是否适用于 merge keys。
¹ 这是使用 ruamel.yaml 完成的,我是作者。 ruamel.yaml
PyYAML的增强版,后者的loader代码应该类似
如果您使用的是 pyyaml,这是来自 Anthon 的答案的等效代码:
import collections
import yaml
import sys
from yaml.reader import Reader
from yaml.scanner import Scanner
from yaml.parser import Parser
from yaml.composer import Composer
from yaml.constructor import Constructor, ConstructorError
from yaml.resolver import Resolver
from yaml.nodes import MappingNode
class NoDuplicateConstructor(Constructor):
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(
None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
# keys can be list -> deep
key = self.construct_object(key_node, deep=True)
# lists are not hashable, but tuples are
if not isinstance(key, collections.Hashable):
if isinstance(key, list):
key = tuple(key)
if sys.version_info.major == 2:
try:
hash(key)
except TypeError as exc:
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" %
exc, key_node.start_mark)
else:
if not isinstance(key, collections.Hashable):
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
# Actually do the check.
if key in mapping:
raise KeyError("Got duplicate key: {!r}".format(key))
mapping[key] = value
return mapping
class NoDuplicateLoader(Reader, Scanner, Parser, Composer, NoDuplicateConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
NoDuplicateConstructor.__init__(self)
Resolver.__init__(self)
yaml_str = """\
some_key: 0,
another_key:
x: 1
"""
data = yaml.load(yaml_str, Loader=NoDuplicateLoader)
print(data)
有什么方法可以让 yaml.load
在给定键在同一字典中出现多次时引发异常?
例如,解析以下 YAML 会引发异常,因为 some_key
出现了两次:
{
some_key: 0,
another_key: 1,
some_key: 1
}
实际上,上述行为对应于关于键重定义的最简单策略。例如,更详细的策略可以指定只有更改分配给键的值的重新定义才会导致异常,或者可以允许将键重新定义的严重级别设置为 "warning" 而不是 "error"。等等。这个问题的理想答案应该能够支持这样的变体。
如果您希望加载程序抛出错误,那么您应该定义自己的加载程序,并使用构造函数检查键是否已在映射中 ¹:
import collections
import ruamel.yaml as yaml
from ruamel.yaml.reader import Reader
from ruamel.yaml.scanner import Scanner
from ruamel.yaml.parser_ import Parser
from ruamel.yaml.composer import Composer
from ruamel.yaml.constructor import Constructor
from ruamel.yaml.resolver import Resolver
from ruamel.yaml.nodes import MappingNode
from ruamel.yaml.compat import PY2, PY3
class MyConstructor(Constructor):
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(
None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
# keys can be list -> deep
key = self.construct_object(key_node, deep=True)
# lists are not hashable, but tuples are
if not isinstance(key, collections.Hashable):
if isinstance(key, list):
key = tuple(key)
if PY2:
try:
hash(key)
except TypeError as exc:
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" %
exc, key_node.start_mark)
else:
if not isinstance(key, collections.Hashable):
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
# next two lines differ from original
if key in mapping:
raise KeyError
mapping[key] = value
return mapping
class MyLoader(Reader, Scanner, Parser, Composer, MyConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
MyConstructor.__init__(self)
Resolver.__init__(self)
yaml_str = """\
some_key: 0,
another_key: 1,
some_key: 1
"""
data = yaml.load(yaml_str, Loader=MyLoader)
print(data)
然后抛出 KeyError
。
请注意,您在示例中使用的花括号是不必要的。
我不确定这是否适用于 merge keys。
¹ 这是使用 ruamel.yaml 完成的,我是作者。 ruamel.yaml
PyYAML的增强版,后者的loader代码应该类似
如果您使用的是 pyyaml,这是来自 Anthon 的答案的等效代码:
import collections
import yaml
import sys
from yaml.reader import Reader
from yaml.scanner import Scanner
from yaml.parser import Parser
from yaml.composer import Composer
from yaml.constructor import Constructor, ConstructorError
from yaml.resolver import Resolver
from yaml.nodes import MappingNode
class NoDuplicateConstructor(Constructor):
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(
None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
# keys can be list -> deep
key = self.construct_object(key_node, deep=True)
# lists are not hashable, but tuples are
if not isinstance(key, collections.Hashable):
if isinstance(key, list):
key = tuple(key)
if sys.version_info.major == 2:
try:
hash(key)
except TypeError as exc:
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" %
exc, key_node.start_mark)
else:
if not isinstance(key, collections.Hashable):
raise ConstructorError(
"while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
# Actually do the check.
if key in mapping:
raise KeyError("Got duplicate key: {!r}".format(key))
mapping[key] = value
return mapping
class NoDuplicateLoader(Reader, Scanner, Parser, Composer, NoDuplicateConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
NoDuplicateConstructor.__init__(self)
Resolver.__init__(self)
yaml_str = """\
some_key: 0,
another_key:
x: 1
"""
data = yaml.load(yaml_str, Loader=NoDuplicateLoader)
print(data)