如何使用数据类制作 "keyword-only" 字段?
How to make "keyword-only" fields with dataclasses?
Since 3.0只支持做参数关键字:
class S3Obj:
def __init__(self, bucket, key, *, storage_class='Standard'):
self.bucket = bucket
self.key = key
self.storage_class = storage_class
如何使用dataclasses获得那种签名?像这样,但最好没有 SyntaxError
:
@dataclass
class S3Obj:
bucket: str
key: str
*
storage_class: str = 'Standard'
理想情况下是声明式的,但使用 __post_init__
钩子 and/or 替换 class 装饰器也很好 - 只要代码可重用。
编辑: 可能类似于此语法,使用省略号文字
@mydataclass
class S3Obj:
bucket: str
key: str
...
storage_class: str = 'Standard'
更新:在 Python 3.10 中,有一个新的 dataclasses.KW_ONLY
哨兵,其工作方式如下:
@dataclasses.dataclass
class Example:
a: int
b: int
_: dataclasses.KW_ONLY
c: int
d: int
KW_ONLY
伪字段之后的任何字段都是关键字。
dataclasses.dataclass
装饰器还有一个 kw_only
参数,它使所有字段仅包含关键字:
@dataclasses.dataclass(kw_only=True)
class Example:
a: int
b: int
也可以将 kw_only=True
传递给 dataclasses.field
以将单个字段标记为仅限关键字。
如果仅关键字字段出现在非仅关键字字段之后(可能通过继承,或通过单独标记字段仅关键字),仅关键字字段将 reordered 在其他字段之后,专门用于__init__
的目的。其他数据类功能将保持声明的顺序。这种重新排序令人困惑,应该避免。
前Python3.10答案:
这样做时,您不会从 dataclasses
那里得到太多帮助。没有办法说一个字段应该由仅关键字参数初始化,并且 __post_init__
挂钩不知道原始构造函数参数是否由关键字传递。此外,没有很好的方法来反省 InitVar
s,更不用说将 InitVar
s 标记为仅关键字了。
至少,您必须替换生成的 __init__
。可能最简单的方法就是手动定义 __init__
。如果您不想这样做,可能最可靠的方法是创建字段对象并在 metadata
中将它们标记为 kwonly,然后在您自己的装饰器中检查元数据。这比听起来更复杂:
import dataclasses
import functools
import inspect
# Helper to make calling field() less verbose
def kwonly(default=dataclasses.MISSING, **kwargs):
kwargs.setdefault('metadata', {})
kwargs['metadata']['kwonly'] = True
return dataclasses.field(default=default, **kwargs)
def mydataclass(_cls, *, init=True, **kwargs):
if _cls is None:
return functools.partial(mydataclass, **kwargs)
no_generated_init = (not init or '__init__' in _cls.__dict__)
_cls = dataclasses.dataclass(_cls, **kwargs)
if no_generated_init:
# No generated __init__. The user will have to provide __init__,
# and they probably already have. We assume their __init__ does
# what they want.
return _cls
fields = dataclasses.fields(_cls)
if any(field.metadata.get('kwonly') and not field.init for field in fields):
raise TypeError('Non-init field marked kwonly')
# From this point on, ignore non-init fields - but we don't know
# about InitVars yet.
init_fields = [field for field in fields if field.init]
for i, field in enumerate(init_fields):
if field.metadata.get('kwonly'):
first_kwonly = field.name
num_kwonly = len(init_fields) - i
break
else:
# No kwonly fields. Why were we called? Assume there was a reason.
return _cls
if not all(field.metadata.get('kwonly') for field in init_fields[-num_kwonly:]):
raise TypeError('non-kwonly init fields following kwonly fields')
required_kwonly = [field.name for field in init_fields[-num_kwonly:]
if field.default is field.default_factory is dataclasses.MISSING]
original_init = _cls.__init__
# Time to handle InitVars. This is going to get ugly.
# InitVars don't show up in fields(). They show up in __annotations__,
# but the current dataclasses implementation doesn't understand string
# annotations, and we want an implementation that's robust against
# changes in string annotation handling.
# We could inspect __post_init__, except there doesn't have to be a
# __post_init__. (It'd be weird to use InitVars with no __post_init__,
# but it's allowed.)
# As far as I can tell, that leaves inspecting __init__ parameters as
# the only option.
init_params = tuple(inspect.signature(original_init).parameters)
if init_params[-num_kwonly] != first_kwonly:
# InitVars following kwonly fields. We could adopt a convention like
# "InitVars after kwonly are kwonly" - in fact, we could have adopted
# "all fields after kwonly are kwonly" too - but it seems too likely
# to cause confusion with inheritance.
raise TypeError('InitVars after kwonly fields.')
# -1 to exclude self from this count.
max_positional = len(init_params) - num_kwonly - 1
@functools.wraps(original_init)
def __init__(self, *args, **kwargs):
if len(args) > max_positional:
raise TypeError('Too many positional arguments')
check_required_kwargs(kwargs, required_kwonly)
return original_init(self, *args, **kwargs)
_cls.__init__ = __init__
return _cls
def check_required_kwargs(kwargs, required):
# Not strictly necessary, but if we don't do this, error messages for
# required kwonly args will list them as positional instead of
# keyword-only.
missing = [name for name in required if name not in kwargs]
if not missing:
return
# We don't bother to exactly match the built-in logic's exception
raise TypeError(f"__init__ missing required keyword-only argument(s): {missing}")
用法示例:
@mydataclass
class S3Obj:
bucket: str
key: str
storage_class: str = kwonly('Standard')
这已经过一些测试,但没有我想要的那么彻底。
您无法获得您使用 ...
建议的语法,因为 ...
不会执行元类或装饰器可以看到的任何内容。您可以获得与实际触发名称查找或分配的东西非常接近的东西,例如 kwonly_start = True
,因此元类可以看到它发生。然而,编写一个健壮的实现是很复杂的,因为有很多事情需要专门处理。继承、typing.ClassVar
、dataclasses.InitVar
、注解中的前向引用等处理不当都会出问题。继承可能会导致最多的问题。
不处理所有繁琐位的概念验证可能如下所示:
# Does not handle inheritance, InitVar, ClassVar, or anything else
# I'm forgetting.
class POCMetaDict(dict):
def __setitem__(self, key, item):
# __setitem__ instead of __getitem__ because __getitem__ is
# easier to trigger by accident.
if key == 'kwonly_start':
self['__non_kwonly'] = len(self['__annotations__'])
super().__setitem__(key, item)
class POCMeta(type):
@classmethod
def __prepare__(cls, name, bases, **kwargs):
return POCMetaDict()
def __new__(cls, name, bases, classdict, **kwargs):
classdict.pop('kwonly_start')
non_kwonly = classdict.pop('__non_kwonly')
newcls = super().__new__(cls, name, bases, classdict, **kwargs)
newcls = dataclass(newcls)
if non_kwonly is None:
return newcls
original_init = newcls.__init__
@functools.wraps(original_init)
def __init__(self, *args, **kwargs):
if len(args) > non_kwonly:
raise TypeError('Too many positional arguments')
return original_init(self, *args, **kwargs)
newcls.__init__ = __init__
return newcls
你会喜欢
class S3Obj(metaclass=POCMeta):
bucket: str
key: str
kwonly_start = True
storage_class: str = 'Standard'
这是未经测试的。
我想知道为什么这不是数据类的一部分 API,这对我来说很重要。
如果所有参数都是关键字参数可能会更简单一些,以下就足够了吗?
from dataclasses import dataclass
from functools import wraps
def kwargs_only(cls):
@wraps(cls)
def call(**kwargs):
return cls(**kwargs)
return call
@kwargs_only
@dataclass
class Coordinates:
latitude: float = 0
longitude: float = 0
这并不完美,因为使用位置参数时的错误是指 call
:
--------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-24-fb588c816ecf> in <module>
----> 1 c = Coordinates(1, longitude=2)
2 help(c)
TypeError: call() takes 0 positional arguments but 1 was given
同样,数据类的构造函数文档已过时且未反映新约束。
如果只有一些关键字字段,可能是这个?
def kwargs(*keywords):
def decorator(cls):
@wraps(cls)
def call(*args, **kwargs):
if any(kw not in kwargs for kw in keywords):
raise TypeError(f"{cls.__name__}.__init__() requires {keywords} as keyword arguments")
return cls(*args, **kwargs)
return call
return decorator
@kwargs('longitude')
@dataclass(frozen=True)
class Coordinates:
latitude: float
longitude: float = 0
Since 3.0只支持做参数关键字:
class S3Obj:
def __init__(self, bucket, key, *, storage_class='Standard'):
self.bucket = bucket
self.key = key
self.storage_class = storage_class
如何使用dataclasses获得那种签名?像这样,但最好没有 SyntaxError
:
@dataclass
class S3Obj:
bucket: str
key: str
*
storage_class: str = 'Standard'
理想情况下是声明式的,但使用 __post_init__
钩子 and/or 替换 class 装饰器也很好 - 只要代码可重用。
编辑: 可能类似于此语法,使用省略号文字
@mydataclass
class S3Obj:
bucket: str
key: str
...
storage_class: str = 'Standard'
更新:在 Python 3.10 中,有一个新的 dataclasses.KW_ONLY
哨兵,其工作方式如下:
@dataclasses.dataclass
class Example:
a: int
b: int
_: dataclasses.KW_ONLY
c: int
d: int
KW_ONLY
伪字段之后的任何字段都是关键字。
dataclasses.dataclass
装饰器还有一个 kw_only
参数,它使所有字段仅包含关键字:
@dataclasses.dataclass(kw_only=True)
class Example:
a: int
b: int
也可以将 kw_only=True
传递给 dataclasses.field
以将单个字段标记为仅限关键字。
如果仅关键字字段出现在非仅关键字字段之后(可能通过继承,或通过单独标记字段仅关键字),仅关键字字段将 reordered 在其他字段之后,专门用于__init__
的目的。其他数据类功能将保持声明的顺序。这种重新排序令人困惑,应该避免。
前Python3.10答案:
这样做时,您不会从 dataclasses
那里得到太多帮助。没有办法说一个字段应该由仅关键字参数初始化,并且 __post_init__
挂钩不知道原始构造函数参数是否由关键字传递。此外,没有很好的方法来反省 InitVar
s,更不用说将 InitVar
s 标记为仅关键字了。
至少,您必须替换生成的 __init__
。可能最简单的方法就是手动定义 __init__
。如果您不想这样做,可能最可靠的方法是创建字段对象并在 metadata
中将它们标记为 kwonly,然后在您自己的装饰器中检查元数据。这比听起来更复杂:
import dataclasses
import functools
import inspect
# Helper to make calling field() less verbose
def kwonly(default=dataclasses.MISSING, **kwargs):
kwargs.setdefault('metadata', {})
kwargs['metadata']['kwonly'] = True
return dataclasses.field(default=default, **kwargs)
def mydataclass(_cls, *, init=True, **kwargs):
if _cls is None:
return functools.partial(mydataclass, **kwargs)
no_generated_init = (not init or '__init__' in _cls.__dict__)
_cls = dataclasses.dataclass(_cls, **kwargs)
if no_generated_init:
# No generated __init__. The user will have to provide __init__,
# and they probably already have. We assume their __init__ does
# what they want.
return _cls
fields = dataclasses.fields(_cls)
if any(field.metadata.get('kwonly') and not field.init for field in fields):
raise TypeError('Non-init field marked kwonly')
# From this point on, ignore non-init fields - but we don't know
# about InitVars yet.
init_fields = [field for field in fields if field.init]
for i, field in enumerate(init_fields):
if field.metadata.get('kwonly'):
first_kwonly = field.name
num_kwonly = len(init_fields) - i
break
else:
# No kwonly fields. Why were we called? Assume there was a reason.
return _cls
if not all(field.metadata.get('kwonly') for field in init_fields[-num_kwonly:]):
raise TypeError('non-kwonly init fields following kwonly fields')
required_kwonly = [field.name for field in init_fields[-num_kwonly:]
if field.default is field.default_factory is dataclasses.MISSING]
original_init = _cls.__init__
# Time to handle InitVars. This is going to get ugly.
# InitVars don't show up in fields(). They show up in __annotations__,
# but the current dataclasses implementation doesn't understand string
# annotations, and we want an implementation that's robust against
# changes in string annotation handling.
# We could inspect __post_init__, except there doesn't have to be a
# __post_init__. (It'd be weird to use InitVars with no __post_init__,
# but it's allowed.)
# As far as I can tell, that leaves inspecting __init__ parameters as
# the only option.
init_params = tuple(inspect.signature(original_init).parameters)
if init_params[-num_kwonly] != first_kwonly:
# InitVars following kwonly fields. We could adopt a convention like
# "InitVars after kwonly are kwonly" - in fact, we could have adopted
# "all fields after kwonly are kwonly" too - but it seems too likely
# to cause confusion with inheritance.
raise TypeError('InitVars after kwonly fields.')
# -1 to exclude self from this count.
max_positional = len(init_params) - num_kwonly - 1
@functools.wraps(original_init)
def __init__(self, *args, **kwargs):
if len(args) > max_positional:
raise TypeError('Too many positional arguments')
check_required_kwargs(kwargs, required_kwonly)
return original_init(self, *args, **kwargs)
_cls.__init__ = __init__
return _cls
def check_required_kwargs(kwargs, required):
# Not strictly necessary, but if we don't do this, error messages for
# required kwonly args will list them as positional instead of
# keyword-only.
missing = [name for name in required if name not in kwargs]
if not missing:
return
# We don't bother to exactly match the built-in logic's exception
raise TypeError(f"__init__ missing required keyword-only argument(s): {missing}")
用法示例:
@mydataclass
class S3Obj:
bucket: str
key: str
storage_class: str = kwonly('Standard')
这已经过一些测试,但没有我想要的那么彻底。
您无法获得您使用 ...
建议的语法,因为 ...
不会执行元类或装饰器可以看到的任何内容。您可以获得与实际触发名称查找或分配的东西非常接近的东西,例如 kwonly_start = True
,因此元类可以看到它发生。然而,编写一个健壮的实现是很复杂的,因为有很多事情需要专门处理。继承、typing.ClassVar
、dataclasses.InitVar
、注解中的前向引用等处理不当都会出问题。继承可能会导致最多的问题。
不处理所有繁琐位的概念验证可能如下所示:
# Does not handle inheritance, InitVar, ClassVar, or anything else
# I'm forgetting.
class POCMetaDict(dict):
def __setitem__(self, key, item):
# __setitem__ instead of __getitem__ because __getitem__ is
# easier to trigger by accident.
if key == 'kwonly_start':
self['__non_kwonly'] = len(self['__annotations__'])
super().__setitem__(key, item)
class POCMeta(type):
@classmethod
def __prepare__(cls, name, bases, **kwargs):
return POCMetaDict()
def __new__(cls, name, bases, classdict, **kwargs):
classdict.pop('kwonly_start')
non_kwonly = classdict.pop('__non_kwonly')
newcls = super().__new__(cls, name, bases, classdict, **kwargs)
newcls = dataclass(newcls)
if non_kwonly is None:
return newcls
original_init = newcls.__init__
@functools.wraps(original_init)
def __init__(self, *args, **kwargs):
if len(args) > non_kwonly:
raise TypeError('Too many positional arguments')
return original_init(self, *args, **kwargs)
newcls.__init__ = __init__
return newcls
你会喜欢
class S3Obj(metaclass=POCMeta):
bucket: str
key: str
kwonly_start = True
storage_class: str = 'Standard'
这是未经测试的。
我想知道为什么这不是数据类的一部分 API,这对我来说很重要。
如果所有参数都是关键字参数可能会更简单一些,以下就足够了吗?
from dataclasses import dataclass
from functools import wraps
def kwargs_only(cls):
@wraps(cls)
def call(**kwargs):
return cls(**kwargs)
return call
@kwargs_only
@dataclass
class Coordinates:
latitude: float = 0
longitude: float = 0
这并不完美,因为使用位置参数时的错误是指 call
:
--------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-24-fb588c816ecf> in <module>
----> 1 c = Coordinates(1, longitude=2)
2 help(c)
TypeError: call() takes 0 positional arguments but 1 was given
同样,数据类的构造函数文档已过时且未反映新约束。
如果只有一些关键字字段,可能是这个?
def kwargs(*keywords):
def decorator(cls):
@wraps(cls)
def call(*args, **kwargs):
if any(kw not in kwargs for kw in keywords):
raise TypeError(f"{cls.__name__}.__init__() requires {keywords} as keyword arguments")
return cls(*args, **kwargs)
return call
return decorator
@kwargs('longitude')
@dataclass(frozen=True)
class Coordinates:
latitude: float
longitude: float = 0