YAML - 序列化属于类型的属性

YAML - Serializing attributes which are types

我在 YAML 序列化 class 具有类型引用作为成员的 es 时遇到问题。我正在使用 ruamel.yaml.

的安全加载程序

我 运行 来自 REPL 提示的所有以下内容(得到多个错误)。

初始化:

import sys
from ruamel.yaml import YAML, yaml_object

Y = YAML(typ="safe",pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type
    def f(self):
        return self.type()
    pass

class T1(object):
    """This will be referenced."""
    pass

@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    pass

class T3(object):
    """Yet another try"""
    pass
Y.register_class(T3.__class__)

导致失败的代码:

Y.dump(A(T1), sys.stdout)
Y.dump(A(T2), sys.stdout)
Y.dump(A(T3), sys.stdout)
Y.dump(A(int), sys.stdout)

此输出(仅最后一行回溯):

ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T1' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T2' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T3' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__abs__' of 'int' objects>

任何能让我(安全地)唯一保存类型的解决方案(我需要生成该类型的对象并检查传入的对象是否属于某种类型)将不胜感激。 生成我所需类型的函数或 class 也会有同样的问题,即无法序列化。


P.S。我也可能发现了一个错误,由于某种原因,解析器会根据是否(尝试)序列化相同的有效参数而具有不同的行为。

Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)

输出:

>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 352, in dump
    return self.dump_all([data], stream, _kw, transform=transform)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 383, in dump_all
    self.representer.represent(data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 73, in represent
    node = self.represent_data(data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
    node = self.yaml_representers[data_types[0]](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 552, in t_y
    tag, data, cls, flow_style=representer.default_flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
    return self.represent_mapping(tag, state, flow_style=flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
    node_value = self.represent_data(item_value)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
    node = self.yaml_representers[data_types[0]](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 492, in t_y
    tag, data, cls, flow_style=representer.default_flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
    return self.represent_mapping(tag, state, flow_style=flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
    node_value = self.represent_data(item_value)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 111, in represent_data
    node = self.yaml_representers[None](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 375, in represent_undefined
    raise RepresenterError("cannot represent an object: %s" % data)
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
# same traceback here
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> 

YAML 期望转储对象,并最终通过写出标量字符串来实现。 T1 不是对象(T2T3 也不是对象),这就是问题所在。您可以尝试使每个 class 引用成为一个对象并在这些对象上使用标签,但 IMO 只会使事情复杂化。

最终这一切都归结为获取标量表示,即 class 的字符串表示到文件中,因此您不妨修改 A() 直接转储字符串表示并读取它回来了:

import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString


Y = YAML(typ="safe", pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type  #.__class__.__name__

    @classmethod
    def to_yaml(cls, representer, node):
        return representer.represent_scalar(
            cls.yaml_tag, u'{}'.format(node.type.__name__)
        )

    @classmethod
    def from_yaml(cls, constructor, node):
        if '.' in node.value:  # in some other module
            m, n = node.value.rsplit('.', 1)
            return cls(getattr(sys.modules[m], n))
        else:
            return cls(globals()[node.value])


class T1(object):
    """This will be referenced."""
    pass


@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    pass


class T3(object):
    """Yet another try"""
    pass
Y.register_class(T3)


for t in T1, T2, T3, DoubleQuotedScalarString:
    print('----------------------')
    x = StringIO()
    s = A(t)
    print('s', s.type)
    Y.dump(s, x)
    print(x.getvalue())

    d = Y.load(x.getvalue())
    print('d', d.type)

给出:

----------------------
s <class '__main__.T1'>
!Aclass T1
...

d <class '__main__.T1'>
----------------------
s <class '__main__.T2'>
!Aclass T2
...

d <class '__main__.T2'>
----------------------
s <class '__main__.T3'>
!Aclass T3
...

d <class '__main__.T3'>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...

d <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>

如果 A() 上还有其他属性需要 dumped/loaded,您应该创建一个字典(将字符串转换为 .type)和 dump/load。

我认为您没有发现真正的错误,但您在错误后继续操作会遇到副作用:Y 对象(及其组件)处于未定义状态。你不应该在捕获错误后重用 YAML() 实例。这在文档中应该更清楚。因此,如果您想在 for 循环中执行 try/except,则应将 Y = YAML(typ='safe', pure=True) 移动到 try 部分。

添加到 ,我开始修改 A.from_yaml 以使其更安全,尽管我还没有完成 _check_registered() 的所有案例。这个想法是加载 Y 允许加载其实例的所有类型,并阻止所有其他类型。将其视为 WIP:

import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString


Y = YAML(typ="safe", pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type  #.__class__.__name__

    @classmethod
    def to_yaml(cls, representer, node):
        return representer.represent_scalar(
            cls.yaml_tag, u'{}'.format(node.type.__name__)
        )

    @classmethod
    def from_yaml(cls, constructor, node):
        if '.' in node.value:  # in some other module
            m, n = node.value.rsplit('.', 1)
            t = getattr(sys.modules[m], n)
        else:
            t = globals()[node.value]
        cls._check_registered(t,constructor, node)
        return cls(t)

    @classmethod
    def _check_registered(cls, t, constructor, node):
        # Check if type "t" is registered in "constr"
        # Note: only a very basic check, 
        # and ideally should be made more secure

        if hasattr(t,"yaml_tag"):
            if t.yaml_tag in constructor.yaml_constructors: 

                return
            raise Exception("Error: Tag not registered!")
        else:
            #
            raise Exception("Error: No attribute 'yaml_tag'!")
        pass

    pass

class T1(object):
    """This will be referenced."""
    yaml_tag = u"!T1"
    pass


@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    yaml_tag = u"!T2"

    def __init__(self):
        print("Initializing...")
        pass
    pass

class T2_bad(object):
    """Malicious class impersonating T2"""
    # Note: It's not registered
    yaml_tag = u"!T2"

    def __init__(self):
        print("Evil code here!")
        pass

    pass


class T3(object):
    """Yet another try"""
    yaml_tag = u"!T3"
    pass
Y.register_class(T3)



for t in T1, T2, T2_bad, T3, DoubleQuotedScalarString:
    try:
        print('----------------------')
        x = StringIO()
        s = A(t)
        print('s', s.type)
        Y.dump(s, x)
        print(x.getvalue())
        d = Y.load(x.getvalue())
        print('d', d.type)
        d.type()
    except Exception as e:
        print(e)
        continue
    pass

这个returns:

----------------------
s <class '__main__.T1'>
!Aclass T1
...

Error: Tag not registered!
----------------------
s <class '__main__.T2'>
!Aclass T2
...

d <class '__main__.T2'>
Initializing...
<__main__.T2 object at 0x0000015B8EC82F60>
----------------------
s <class '__main__.T2_bad'>
!Aclass T2_bad
...

d <class '__main__.T2_bad'>
Evil code here!
<__main__.T2_bad object at 0x0000015B8EC82EF0>
----------------------
s <class '__main__.T3'>
!Aclass T3
...

d <class '__main__.T3'>
<__main__.T3 object at 0x0000015B8EC82E10>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...

Error: No attribute 'yaml_tag'!

可以看出,它仍然不安全("Evil code" 是 运行),也不允许未定义 yaml_tag 的类型。请随意修改以解决此问题。