使用 Pydantic in Python 将 JSON 文件中的字符串解析为范围的类型注释
Type annotation for parsing a string in a JSON file to a range using Pydantic in Python
我设置了一个 Pydantic class 来解析 JSON 文件。 range 属性是从 "11-34"
形式的字符串中解析出来的(或者更准确地说,是从显示的正则表达式中解析出来的):
RANGE_STRING_REGEX = r"^(?P<first>[1-6]+)(-(?P<last>[1-6]+))?$"
class RandomTableEvent(BaseModel):
name: str
range: Annotated[str, Field(regex=RANGE_STRING_REGEX)]
@validator("range", allow_reuse=True)
def convert_range_string_to_range(cls, r) -> "range":
match_groups = re.fullmatch(RANGE_STRING_REGEX, r).groupdict()
first = int(match_groups["first"])
last = int(match_groups["last"]) if match_groups["last"] else first
return range(first, last + 1)
生成的架构有效并且验证通过。
但是,range 属性在 class 中的类型注释严格来说是不正确的,因为 range属性在验证器函数中从字符串(类型注释)转换为 range
对象。
注释此内容并仍然维护架构生成的正确方法是什么?
是否有另一种方法来处理这种隐式类型转换(例如,字符串在 Pydantic 中自动转换为 int - 自定义类型是否有类似的东西)?
range
不是 pydantic
支持的类型,使用它作为字段类型会在尝试创建 JSON 模式时导致错误,但是 pydantic
支持 Custom Data Types:
You can also define your own custom data types. There are several ways to achieve it.
Classes with get_validators
You use a custom class with a classmethod __get_validators__
. It will be called to get validators to parse and validate the input data.
但是这个自定义数据类型不能从 range
继承,因为它是最终的。因此,您可以创建一个在内部使用 range
并公开范围方法的自定义数据类型:它将像 range
一样工作,但它不会是 range
(isinstance(..., range)
将是 False
).
相同的 pydantic
文档显示了如何使用 __modify_schema__
方法自定义自定义数据类型的 JSON 架构。
完整示例:
import re
from typing import Any, Callable, Dict, Iterator, SupportsIndex, Union
from pydantic import BaseModel
class Range:
_RANGE_STRING_REGEX = r"^(?P<first>[1-6]+)(-(?P<last>[1-6]+))?$"
@classmethod
def __get_validators__(cls) -> Iterator[Callable[[Any], Any]]:
yield cls.validate
@classmethod
def validate(cls, v: Any) -> "Range":
if not isinstance(v, str):
raise ValueError("expected string")
match = re.fullmatch(cls._RANGE_STRING_REGEX, v)
if not match:
raise ValueError("invalid string")
match_groups = match.groupdict()
first = int(match_groups["first"])
last = int(match_groups["last"]) if match_groups["last"] else first
return cls(range(first, last + 1))
def __init__(self, r: range) -> None:
self._range = r
@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
# Customize the JSON schema as you want
field_schema["pattern"] = cls._RANGE_STRING_REGEX
field_schema["type"] = "string"
# Implement the range methods and use self._range
@property
def start(self) -> int:
return self._range.start
@property
def stop(self) -> int:
return self._range.stop
@property
def step(self) -> int:
return self._range.step
def count(self, value: int) -> int:
return self._range.count(value)
def index(self, value: int) -> int:
return self._range.index(value)
def __len__(self) -> int:
return self._range.__len__()
def __contains__(self, o: object) -> bool:
return self._range.__contains__(o)
def __iter__(self) -> Iterator[int]:
return self._range.__iter__()
def __getitem__(self, key: Union[SupportsIndex, slice]) -> int:
return self._range.__getitem__(key)
def __reversed__(self) -> Iterator[int]:
return self._range.__reversed__()
def __repr__(self) -> str:
return self._range.__repr__()
class RandomTableEvent(BaseModel):
name: str
range: Range
event = RandomTableEvent(name="foo", range="11-34")
print("event:", event)
print("event.range:", event.range)
print("schema:", event.schema_json(indent=2))
print("is instance of range:", isinstance(event.range, range))
print("event.range.start:", event.range.start)
print("event.range.stop:", event.range.stop)
print("event.range[0:5]", event.range[0:5])
print("last 3 elements:", list(event.range[-3:]))
输出:
event: name='foo' range=range(11, 35)
event.range: range(11, 35)
schema: {
"title": "RandomTableEvent",
"type": "object",
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"range": {
"title": "Range",
"pattern": "^(?P<first>[1-6]+)(-(?P<last>[1-6]+))?$",
"type": "string"
}
},
"required": [
"name",
"range"
]
}
is instance of range: False
event.range.start: 11
event.range.stop: 35
event.range[0:5] range(11, 16)
last 3 elements: [32, 33, 34]
我设置了一个 Pydantic class 来解析 JSON 文件。 range 属性是从 "11-34"
形式的字符串中解析出来的(或者更准确地说,是从显示的正则表达式中解析出来的):
RANGE_STRING_REGEX = r"^(?P<first>[1-6]+)(-(?P<last>[1-6]+))?$"
class RandomTableEvent(BaseModel):
name: str
range: Annotated[str, Field(regex=RANGE_STRING_REGEX)]
@validator("range", allow_reuse=True)
def convert_range_string_to_range(cls, r) -> "range":
match_groups = re.fullmatch(RANGE_STRING_REGEX, r).groupdict()
first = int(match_groups["first"])
last = int(match_groups["last"]) if match_groups["last"] else first
return range(first, last + 1)
生成的架构有效并且验证通过。
但是,range 属性在 class 中的类型注释严格来说是不正确的,因为 range属性在验证器函数中从字符串(类型注释)转换为 range
对象。
注释此内容并仍然维护架构生成的正确方法是什么? 是否有另一种方法来处理这种隐式类型转换(例如,字符串在 Pydantic 中自动转换为 int - 自定义类型是否有类似的东西)?
range
不是 pydantic
支持的类型,使用它作为字段类型会在尝试创建 JSON 模式时导致错误,但是 pydantic
支持 Custom Data Types:
You can also define your own custom data types. There are several ways to achieve it.
Classes with get_validators
You use a custom class with a classmethod
__get_validators__
. It will be called to get validators to parse and validate the input data.
但是这个自定义数据类型不能从 range
继承,因为它是最终的。因此,您可以创建一个在内部使用 range
并公开范围方法的自定义数据类型:它将像 range
一样工作,但它不会是 range
(isinstance(..., range)
将是 False
).
相同的 pydantic
文档显示了如何使用 __modify_schema__
方法自定义自定义数据类型的 JSON 架构。
完整示例:
import re
from typing import Any, Callable, Dict, Iterator, SupportsIndex, Union
from pydantic import BaseModel
class Range:
_RANGE_STRING_REGEX = r"^(?P<first>[1-6]+)(-(?P<last>[1-6]+))?$"
@classmethod
def __get_validators__(cls) -> Iterator[Callable[[Any], Any]]:
yield cls.validate
@classmethod
def validate(cls, v: Any) -> "Range":
if not isinstance(v, str):
raise ValueError("expected string")
match = re.fullmatch(cls._RANGE_STRING_REGEX, v)
if not match:
raise ValueError("invalid string")
match_groups = match.groupdict()
first = int(match_groups["first"])
last = int(match_groups["last"]) if match_groups["last"] else first
return cls(range(first, last + 1))
def __init__(self, r: range) -> None:
self._range = r
@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
# Customize the JSON schema as you want
field_schema["pattern"] = cls._RANGE_STRING_REGEX
field_schema["type"] = "string"
# Implement the range methods and use self._range
@property
def start(self) -> int:
return self._range.start
@property
def stop(self) -> int:
return self._range.stop
@property
def step(self) -> int:
return self._range.step
def count(self, value: int) -> int:
return self._range.count(value)
def index(self, value: int) -> int:
return self._range.index(value)
def __len__(self) -> int:
return self._range.__len__()
def __contains__(self, o: object) -> bool:
return self._range.__contains__(o)
def __iter__(self) -> Iterator[int]:
return self._range.__iter__()
def __getitem__(self, key: Union[SupportsIndex, slice]) -> int:
return self._range.__getitem__(key)
def __reversed__(self) -> Iterator[int]:
return self._range.__reversed__()
def __repr__(self) -> str:
return self._range.__repr__()
class RandomTableEvent(BaseModel):
name: str
range: Range
event = RandomTableEvent(name="foo", range="11-34")
print("event:", event)
print("event.range:", event.range)
print("schema:", event.schema_json(indent=2))
print("is instance of range:", isinstance(event.range, range))
print("event.range.start:", event.range.start)
print("event.range.stop:", event.range.stop)
print("event.range[0:5]", event.range[0:5])
print("last 3 elements:", list(event.range[-3:]))
输出:
event: name='foo' range=range(11, 35)
event.range: range(11, 35)
schema: {
"title": "RandomTableEvent",
"type": "object",
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"range": {
"title": "Range",
"pattern": "^(?P<first>[1-6]+)(-(?P<last>[1-6]+))?$",
"type": "string"
}
},
"required": [
"name",
"range"
]
}
is instance of range: False
event.range.start: 11
event.range.stop: 35
event.range[0:5] range(11, 16)
last 3 elements: [32, 33, 34]