使用 ruamel.yaml 安全转储和加载 defaultdict
Safe dumping and loading of defaultdict with ruamel.yaml
我正在尝试(反)序列化 class 具有 collections.defaultdict
属性且在 Python 中具有 ruamel.yaml 的元素(在我的例子中是 3.6+)。
这将是我想要开始工作的一个最小示例:
from collections import defaultdict
import ruamel.yaml
from pathlib import Path
class Foo:
def __init__(self):
self.x = defaultdict()
YAML = ruamel.yaml.YAML(typ="safe")
YAML.register_class(Foo)
YAML.register_class(defaultdict)
fp = Path("./test.yaml")
YAML.dump(Foo(), fp)
YAML.load(fp)
但这失败了:
AttributeError: 'collections.defaultdict' object has no attribute '__dict__'
任何不需要为每个 "Foo-like" class 编写自定义代码的想法?我希望我可以为 defaultdict
个对象添加一个不同的代表,但到目前为止我的尝试都是徒劳的。
完整追溯:
Traceback (most recent call last):
File "./tests/test_yaml.py", line 18, in <module>
YAML.dump(Foo(), fp)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 439, in dump
return self.dump_all([data], stream, _kw, transform=transform)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 453, in dump_all
self._context_manager.dump(data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 801, in dump
self._yaml.representer.represent(data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 81, in represent
node = self.represent_data(data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 108, in represent_data
node = self.yaml_representers[data_types[0]](self, data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 638, in t_y
tag, data, cls, flow_style=representer.default_flow_style
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 384, in represent_yaml_object
return self.represent_mapping(tag, state, flow_style=flow_style)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 218, in represent_mapping
node_value = self.represent_data(item_value)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 108, in represent_data
node = self.yaml_representers[data_types[0]](self, data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 638, in t_y
tag, data, cls, flow_style=representer.default_flow_style
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 383, in represent_yaml_object
state = data.__dict__.copy()
AttributeError: 'collections.defaultdict' object has no attribute '__dict__'
这是因为 defaultdict
是内置 class dict
的子 class,它没有 __dict__
属性用于生成 class 属性名称的 YAML 编码器。在这种情况下 defaultdict
应该被视为 dict
,但问题是 ruamel.yaml.representer.BaseRepresenter
class 的 represent_data
方法只查看 class 对象本身来确定是否有对象的代表:
data_types = type(data).__mro__
# ...skipped
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
它应该做的是检查 __mro__
中的任何数据类型是否有代表,如果找到就使用它:
if any(data_type in self.yaml_representers for data_type in data_types):
node = self.yaml_representers[next(data_type for data_type in data_types if data_type in self.yaml_representers)](self, data)
所以我们可以自己修改这个方法:
def represent_data(self, data):
# type: (Any) -> Any
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
# if node is None:
# raise RepresenterError(
# "recursive objects are not allowed: %r" % data)
return node
# self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if representer.PY2:
# if type(data) is types.InstanceType:
if isinstance(data, representer.types.InstanceType):
data_types = representer.get_classobj_bases(data.__class__) + list(data_types)
if any(data_type in self.yaml_representers for data_type in data_types):
node = self.yaml_representers[next(data_type for data_type in data_types if data_type in self.yaml_representers)](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = representer.ScalarNode(None, representer.text_type(data))
# if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
representer.BaseRepresenter.represent_data = represent_data
这样您的代码就可以在不注册的情况下运行 defaultdict
:
class Foo:
def __init__(self):
self.x = defaultdict()
YAML = ruamel.yaml.YAML(typ="safe")
YAML.register_class(Foo)
# YAML.register_class(defaultdict)
fp = Path("/temp/test.yaml")
YAML.dump(Foo(), fp)
YAML.load(fp)
编辑:一个更优雅的解决方案是简单地添加 SafeRepresenter.represent_dict
方法作为 defaultdict
:
的代表
from ruamel.yaml import representer
representer.SafeRepresenter.add_representer(defaultdict, representer.SafeRepresenter.represent_dict)
现在有一个包 ruamel.yaml.pytypes
支持转储 defaultdict
个实例。请注意,如果您提供一个函数作为参数(对于 default_factory
),您将需要指定 typ='unsafe'
,否则您的工厂函数将无法表示。
在您的 virtualenv 中安装 ruamel.yaml.pytypes
和 ruamel.yaml
后,您可以:
yaml = ruamel.yaml.YAML(typ=['unsafe', 'pytypes'])
yaml.default_flow_style = False
buf = ruamel.yaml.compat.StringIO()
def factory():
import datetime
return datetime.datetime.now()
data = defaultdict(factory)
x = data[4]
data[2] = 42
yaml.dump(data, buf)
print(buf.getvalue(), end='')
d = yaml.load(buf.getvalue())
assert data == d
assert data.default_factory == d.default_factory
将打印以上内容(您的日期时间会有所不同)。
!defaultdict
- !!python/name:__main__.factory
- 2: 42
4: 2019-08-19 13:06:05.129019
(断言不会抛出异常)
查看编辑历史以了解 "manual" 种实现类似结果的方法。
我正在尝试(反)序列化 class 具有 collections.defaultdict
属性且在 Python 中具有 ruamel.yaml 的元素(在我的例子中是 3.6+)。
这将是我想要开始工作的一个最小示例:
from collections import defaultdict
import ruamel.yaml
from pathlib import Path
class Foo:
def __init__(self):
self.x = defaultdict()
YAML = ruamel.yaml.YAML(typ="safe")
YAML.register_class(Foo)
YAML.register_class(defaultdict)
fp = Path("./test.yaml")
YAML.dump(Foo(), fp)
YAML.load(fp)
但这失败了:
AttributeError: 'collections.defaultdict' object has no attribute '__dict__'
任何不需要为每个 "Foo-like" class 编写自定义代码的想法?我希望我可以为 defaultdict
个对象添加一个不同的代表,但到目前为止我的尝试都是徒劳的。
完整追溯:
Traceback (most recent call last):
File "./tests/test_yaml.py", line 18, in <module>
YAML.dump(Foo(), fp)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 439, in dump
return self.dump_all([data], stream, _kw, transform=transform)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 453, in dump_all
self._context_manager.dump(data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 801, in dump
self._yaml.representer.represent(data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 81, in represent
node = self.represent_data(data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 108, in represent_data
node = self.yaml_representers[data_types[0]](self, data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 638, in t_y
tag, data, cls, flow_style=representer.default_flow_style
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 384, in represent_yaml_object
return self.represent_mapping(tag, state, flow_style=flow_style)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 218, in represent_mapping
node_value = self.represent_data(item_value)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 108, in represent_data
node = self.yaml_representers[data_types[0]](self, data)
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\main.py", line 638, in t_y
tag, data, cls, flow_style=representer.default_flow_style
File "C:\miniconda-windows\envs\ratio\lib\site-packages\ruamel\yaml\representer.py", line 383, in represent_yaml_object
state = data.__dict__.copy()
AttributeError: 'collections.defaultdict' object has no attribute '__dict__'
这是因为 defaultdict
是内置 class dict
的子 class,它没有 __dict__
属性用于生成 class 属性名称的 YAML 编码器。在这种情况下 defaultdict
应该被视为 dict
,但问题是 ruamel.yaml.representer.BaseRepresenter
class 的 represent_data
方法只查看 class 对象本身来确定是否有对象的代表:
data_types = type(data).__mro__
# ...skipped
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
它应该做的是检查 __mro__
中的任何数据类型是否有代表,如果找到就使用它:
if any(data_type in self.yaml_representers for data_type in data_types):
node = self.yaml_representers[next(data_type for data_type in data_types if data_type in self.yaml_representers)](self, data)
所以我们可以自己修改这个方法:
def represent_data(self, data):
# type: (Any) -> Any
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
# if node is None:
# raise RepresenterError(
# "recursive objects are not allowed: %r" % data)
return node
# self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if representer.PY2:
# if type(data) is types.InstanceType:
if isinstance(data, representer.types.InstanceType):
data_types = representer.get_classobj_bases(data.__class__) + list(data_types)
if any(data_type in self.yaml_representers for data_type in data_types):
node = self.yaml_representers[next(data_type for data_type in data_types if data_type in self.yaml_representers)](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = representer.ScalarNode(None, representer.text_type(data))
# if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
representer.BaseRepresenter.represent_data = represent_data
这样您的代码就可以在不注册的情况下运行 defaultdict
:
class Foo:
def __init__(self):
self.x = defaultdict()
YAML = ruamel.yaml.YAML(typ="safe")
YAML.register_class(Foo)
# YAML.register_class(defaultdict)
fp = Path("/temp/test.yaml")
YAML.dump(Foo(), fp)
YAML.load(fp)
编辑:一个更优雅的解决方案是简单地添加 SafeRepresenter.represent_dict
方法作为 defaultdict
:
from ruamel.yaml import representer
representer.SafeRepresenter.add_representer(defaultdict, representer.SafeRepresenter.represent_dict)
现在有一个包 ruamel.yaml.pytypes
支持转储 defaultdict
个实例。请注意,如果您提供一个函数作为参数(对于 default_factory
),您将需要指定 typ='unsafe'
,否则您的工厂函数将无法表示。
在您的 virtualenv 中安装 ruamel.yaml.pytypes
和 ruamel.yaml
后,您可以:
yaml = ruamel.yaml.YAML(typ=['unsafe', 'pytypes'])
yaml.default_flow_style = False
buf = ruamel.yaml.compat.StringIO()
def factory():
import datetime
return datetime.datetime.now()
data = defaultdict(factory)
x = data[4]
data[2] = 42
yaml.dump(data, buf)
print(buf.getvalue(), end='')
d = yaml.load(buf.getvalue())
assert data == d
assert data.default_factory == d.default_factory
将打印以上内容(您的日期时间会有所不同)。
!defaultdict
- !!python/name:__main__.factory
- 2: 42
4: 2019-08-19 13:06:05.129019
(断言不会抛出异常)
查看编辑历史以了解 "manual" 种实现类似结果的方法。