Pydantic 检查列表字段是否唯一
Pydantic Checking if list field is unique
目前,我正在尝试为 pandas 数据框创建一个 pydantic 模型。我想通过以下
检查一列是否唯一
import pandas as pd
from typing import List
from pydantic import BaseModel
class CustomerRecord(BaseModel):
id: int
name: str
address: str
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
df = pd.DataFrame({'id':[1,2,3],
'name':['Bob','Joe','Justin'],
'address': ['123 Fake St', '125 Fake St', '123 Fake St']})
df_dict = df.to_dict(orient='records')
CustomerRecordDF.parse_obj(df_dict)
我现在想 运行 验证并失败,因为地址不是唯一的。
以下returns我需要的
from pydantic import root_validator
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
@root_validator(pre=True)
def unique_values(cls, values):
root_values = values.get('__root__')
value_set = set()
for value in root_values:
print(value['address'])
if value['address'] in value_set:
raise ValueError('Duplicate Address')
else:
value_set.add(value['address'])
return values
CustomerRecordDF.parse_obj(df_dict)
>>> ValidationError: 1 validation error for CustomerRecordDF
__root__
Duplicate Address (type=value_error)
但我希望能够将此验证器重复用于我创建的其他其他数据框,并在多列上传递此唯一检查。不仅仅是地址。
最好像下面这样
from pydantic import root_validator
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
_validate_unique_name = root_unique_validator('name')
_validate_unique_address = root_unique_validator('address')
您可以使用内部函数和 allow_reuse
参数:
def root_unique_validator(field):
def validator(cls, values):
# Use the field arg to validate a specific field
...
return root_validator(pre=True, allow_reuse=True)(validator)
完整示例:
import pandas as pd
from typing import List
from pydantic import BaseModel, root_validator
class CustomerRecord(BaseModel):
id: int
name: str
address: str
def root_unique_validator(field):
def validator(cls, values):
root_values = values.get("__root__")
value_set = set()
for value in root_values:
if value[field] in value_set:
raise ValueError(f"Duplicate {field}")
else:
value_set.add(value[field])
return values
return root_validator(pre=True, allow_reuse=True)(validator)
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
_validate_unique_name = root_unique_validator("name")
_validate_unique_address = root_unique_validator("address")
df = pd.DataFrame(
{
"id": [1, 2, 3],
"name": ["Bob", "Joe", "Justin"],
"address": ["123 Fake St", "125 Fake St", "123 Fake St"],
}
)
df_dict = df.to_dict(orient="records")
CustomerRecordDF.parse_obj(df_dict)
# Output:
# pydantic.error_wrappers.ValidationError: 1 validation error for CustomerRecordDF
# __root__
# Duplicate address (type=value_error)
如果您使用重名:
# Here goes the most part of the full example above
df = pd.DataFrame(
{
"id": [1, 2, 3],
"name": ["Bob", "Joe", "Bob"],
"address": ["123 Fake St", "125 Fake St", "127 Fake St"],
}
)
df_dict = df.to_dict(orient="records")
CustomerRecordDF.parse_obj(df_dict)
# Output:
# pydantic.error_wrappers.ValidationError: 1 validation error for CustomerRecordDF
# __root__
# Duplicate name (type=value_error)
您还可以收到多个 field
并拥有一个验证所有字段的根验证器。这可能会使 allow_reuse
参数变得不必要。
目前,我正在尝试为 pandas 数据框创建一个 pydantic 模型。我想通过以下
检查一列是否唯一import pandas as pd
from typing import List
from pydantic import BaseModel
class CustomerRecord(BaseModel):
id: int
name: str
address: str
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
df = pd.DataFrame({'id':[1,2,3],
'name':['Bob','Joe','Justin'],
'address': ['123 Fake St', '125 Fake St', '123 Fake St']})
df_dict = df.to_dict(orient='records')
CustomerRecordDF.parse_obj(df_dict)
我现在想 运行 验证并失败,因为地址不是唯一的。
以下returns我需要的
from pydantic import root_validator
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
@root_validator(pre=True)
def unique_values(cls, values):
root_values = values.get('__root__')
value_set = set()
for value in root_values:
print(value['address'])
if value['address'] in value_set:
raise ValueError('Duplicate Address')
else:
value_set.add(value['address'])
return values
CustomerRecordDF.parse_obj(df_dict)
>>> ValidationError: 1 validation error for CustomerRecordDF
__root__
Duplicate Address (type=value_error)
但我希望能够将此验证器重复用于我创建的其他其他数据框,并在多列上传递此唯一检查。不仅仅是地址。
最好像下面这样
from pydantic import root_validator
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
_validate_unique_name = root_unique_validator('name')
_validate_unique_address = root_unique_validator('address')
您可以使用内部函数和 allow_reuse
参数:
def root_unique_validator(field):
def validator(cls, values):
# Use the field arg to validate a specific field
...
return root_validator(pre=True, allow_reuse=True)(validator)
完整示例:
import pandas as pd
from typing import List
from pydantic import BaseModel, root_validator
class CustomerRecord(BaseModel):
id: int
name: str
address: str
def root_unique_validator(field):
def validator(cls, values):
root_values = values.get("__root__")
value_set = set()
for value in root_values:
if value[field] in value_set:
raise ValueError(f"Duplicate {field}")
else:
value_set.add(value[field])
return values
return root_validator(pre=True, allow_reuse=True)(validator)
class CustomerRecordDF(BaseModel):
__root__: List[CustomerRecord]
_validate_unique_name = root_unique_validator("name")
_validate_unique_address = root_unique_validator("address")
df = pd.DataFrame(
{
"id": [1, 2, 3],
"name": ["Bob", "Joe", "Justin"],
"address": ["123 Fake St", "125 Fake St", "123 Fake St"],
}
)
df_dict = df.to_dict(orient="records")
CustomerRecordDF.parse_obj(df_dict)
# Output:
# pydantic.error_wrappers.ValidationError: 1 validation error for CustomerRecordDF
# __root__
# Duplicate address (type=value_error)
如果您使用重名:
# Here goes the most part of the full example above
df = pd.DataFrame(
{
"id": [1, 2, 3],
"name": ["Bob", "Joe", "Bob"],
"address": ["123 Fake St", "125 Fake St", "127 Fake St"],
}
)
df_dict = df.to_dict(orient="records")
CustomerRecordDF.parse_obj(df_dict)
# Output:
# pydantic.error_wrappers.ValidationError: 1 validation error for CustomerRecordDF
# __root__
# Duplicate name (type=value_error)
您还可以收到多个 field
并拥有一个验证所有字段的根验证器。这可能会使 allow_reuse
参数变得不必要。