Skip to content

Latest commit

 

History

History

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 

README.md

pydantic 教學

今天來介紹 pydantic, pydantic 完全支援 IDE,

你可能會說已經有內建的 PEP 484 Type Hints 介紹dataclasses_tutorial.py 了, 為什麼還需要這個 ❓

原因是 pydantic 在處理 json 以及資料驗證上面比較強大 😀

我建議大家還是可以花個時間稍微看一下 😆

安裝

pip install pydantic==2.9.1

官網文件 寫的非常豐富,

這邊就寫一些官方的範例給大家看

教學

Models

Basic model usage

from pydantic import BaseModel


class User(BaseModel):
    id: int
    name: str = 'Jane Doe'

# 正確的初始化
user = User(id=3)

# 錯誤的初始化 (因為定義的是 int)
user = User(id='abc')

# 可以使用 model_dump 來完成 serialized
user.model_dump() # {'id': 3, 'name': 'Jane Doe'}

建立 Nested models

from typing import List, Optional

from pydantic import BaseModel


class Foo(BaseModel):
    count: int
    size: Optional[float] = None


class Bar(BaseModel):
    apple: str = 'x'
    banana: str = 'y'


class Spam(BaseModel):
    foo: Foo
    bars: List[Bar]

# 以下兩種方法都可以
m = Spam(foo={'count': 4}, bars=[{'apple': 'x1'}, {'apple': 'x2'}])
m1 = Spam(foo=Foo(count=4), bars=[Bar(apple='x1'), Bar(apple='x2')])
print(m)
print(m1)

Error handling

from pydantic import BaseModel, ValidationError


class User(BaseModel):
    id: int
    name: str = 'Jane Doe'

data = dict(
    id=['1', 2, 'bad'],
)

try:
    User(**data)
except ValidationError as e:
    print(e)

Generic models

from typing import Generic, List, Optional, TypeVar

from pydantic import BaseModel, ValidationError

DataT = TypeVar('DataT')


class DataModel(BaseModel):
    numbers: List[int]
    people: List[str]


class Response(BaseModel, Generic[DataT]):
    data: Optional[DataT] = None


print(Response[int](data=1))
#> data=1
print(Response[str](data='value'))
#> data='value'
print(Response[str](data='value').model_dump())
#> {'data': 'value'}

data = DataModel(numbers=[1, 2, 3], people=[])
print(Response[DataModel](data=data).model_dump())
#> {'data': {'numbers': [1, 2, 3], 'people': []}}
try:
    Response[int](data='value') # 型態錯誤
except ValidationError as e:
    print(e)

parametrized generic

from typing import Generic, TypeVar

from pydantic import BaseModel, ValidationError

T = TypeVar('T')


class ResponseModel(BaseModel, Generic[T]):
    content: T


class Product(BaseModel):
    name: str
    price: float


class Order(BaseModel):
    id: int
    product: ResponseModel[Product]


product = Product(name='Apple', price=0.5)
response = ResponseModel[Product](content=product)
order = Order(id=1, product=response)
print(repr(order))

try:
    response = ResponseModel[int](content=product)
except ValidationError as e:
    print(e)

Validation of unparametrized type variables

from typing import Generic

from typing_extensions import TypeVar

from pydantic import BaseModel, ValidationError

T = TypeVar('T')
U = TypeVar('U', bound=int)
V = TypeVar('V', default=str)


class Model(BaseModel, Generic[T, U, V]):
    t: T
    u: U
    v: V


print(Model(t='t', u=1, v='v'))
#> t='t' u=1 v='v'

try:
    Model(t='t', u='u', v=1)
except ValidationError as exc:
    print(exc)

Faux immutability

如果 frozen 設定為 False, 是可以動態修改變數的屬性的.

from pydantic import BaseModel, ConfigDict, ValidationError


class FooBarModel(BaseModel):
    model_config = ConfigDict(frozen=True)

    a: str
    b: dict


foobar = FooBarModel(a='hello', b={'apple': 'pear'})

try:
    foobar.a = 'different'
except ValidationError as e:
    print(e)
    """
    1 validation error for FooBarModel
    a
      Instance is frozen [type=frozen_instance, input_value='different', input_type=str]
    """

Required fields

以下三種方法都代表必填

from pydantic import BaseModel, Field


class Model(BaseModel):
    a: int
    b: int = ... # emphasize that a field is required
    c: int = Field(..., alias='C') # emphasize that a field is required

model = Model(a=1, b=2, C=3) # 使用 C 是因為設定了 alias
# model = Model(a=1, b=2, c=3) # ValidationError
print(model)

Fields with non-hashable default values

from typing import Dict, List

from pydantic import BaseModel


class Model(BaseModel):
    item_counts: List[Dict[str, int]] = [{}]


m1 = Model()
m1.item_counts[0]['a'] = 1
print(m1.item_counts)
#> [{'a': 1}]

m2 = Model()
print(m2.item_counts)
#> [{}]

Fields with dynamic default values

使用 default_factory 建立 default

from datetime import datetime, timezone
from uuid import UUID, uuid4

from pydantic import BaseModel, Field


def datetime_now() -> datetime:
    return datetime.now(timezone.utc)


class Model(BaseModel):
    uid: UUID = Field(default_factory=uuid4)
    updated: datetime = Field(default_factory=datetime_now)


print(Model())
print(Model())

Structural pattern matching

支援 match_case, 可參考 match_case_tutorial.py

from pydantic import BaseModel


class Pet(BaseModel):
    name: str
    species: str


a = Pet(name='Bones', species='dog') # Bones is a dog
# a = Pet(name='Bones', species='dog1') # No dog matched
print(a)

match a:
    # match `species` to 'dog', declare and initialize `dog_name`
    case Pet(species='dog', name=dog_name):
        print(f'{dog_name} is a dog')

    # default case
    case _:
        print('No dog matched')

Extra fields

正常是可以加入額外的 fields

from pydantic import BaseModel


class Model(BaseModel):
    x: int


m = Model(x=1, y='a')

如果你不想要可以加入額外的 fields, 可以這樣設定

from pydantic import BaseModel, ConfigDict, ValidationError


class Model(BaseModel):
    x: int

    model_config = ConfigDict(extra='forbid')


try:
    Model(x=1, y='a')
except ValidationError as exc:
    print(exc)

Fields

Default values

from pydantic import BaseModel, Field


class User(BaseModel):
    name: str = Field(default='John Doe')


user = User()
print(user)

或是使用前面介紹的 default_factory 都可以

from uuid import uuid4

from pydantic import BaseModel, Field


class User(BaseModel):
    id: str = Field(default_factory=lambda: uuid4().hex)

Using Annotated

from uuid import uuid4

from typing_extensions import Annotated

from pydantic import BaseModel, Field


class User(BaseModel):
    id: Annotated[str, Field(default_factory=lambda: uuid4().hex)]

Field aliases

from pydantic import BaseModel, Field


class User(BaseModel):
    name: str = Field(..., alias='username')


user = User(username='johndoe')
# user = User(name='johndoe') # ValidationError
print(user) # name='johndoe'
print(user.model_dump(by_alias=True)) # {'username': 'johndoe'}

Numeric Constraints/

from pydantic import BaseModel, Field


class Foo(BaseModel):
    positive: int = Field(gt=0) # greater than
    non_negative: int = Field(ge=0) # greater than or equal to
    negative: int = Field(lt=0) # less than
    non_positive: int = Field(le=0) # less than or equal to
    even: int = Field(multiple_of=2) # a multiple of the given number
    love_for_pydantic: float = Field(allow_inf_nan=True) # allow 'inf', '-inf', 'nan' values


foo = Foo(
    positive=1,
    non_negative=0,
    negative=-1,
    non_positive=0,
    even=2,
    love_for_pydantic=float('inf'),
)
print(foo)

String Constraints

from pydantic import BaseModel, Field


class Foo(BaseModel):
    short: str = Field(min_length=3)
    long: str = Field(max_length=10)
    regex: str = Field(pattern=r'^\d*$')


foo = Foo(short='foo', long='foobarbaz', regex='123')
print(foo)

Decimal Constraints

from decimal import Decimal

from pydantic import BaseModel, Field


class Foo(BaseModel):
    precise: Decimal = Field(max_digits=5, decimal_places=2)


foo = Foo(precise=Decimal('123.45'))
print(foo)

Strict Mode

from pydantic import BaseModel, Field


class User(BaseModel):
    name: str = Field(strict=True)
    age: int = Field(strict=False)
    c: int = Field(strict=False, default=1)


user = User(name='John', age='42')
user = User(name='John', age='42', c='3')

The computed_field decorator

from pydantic import BaseModel, computed_field


class Box(BaseModel):
    width: float
    height: float
    depth: float

    @computed_field
    @property # 可以不加, 但建議還是加進去
    def volume(self) -> float:
        return self.width * self.height * self.depth


b = Box(width=1, height=2, depth=3)
print(b.model_dump())
#> {'width': 1.0, 'height': 2.0, 'depth': 3.0, 'volume': 6.0}

JSON Schema

JSON Schema

import json
from enum import Enum

from typing import Annotated

from pydantic import BaseModel, Field
from pydantic.config import ConfigDict


class FooBar(BaseModel):
    count: int
    size: float | None = None


class Gender(str, Enum):
    male = 'male'
    female = 'female'
    other = 'other'
    not_given = 'not_given'


class MainModel(BaseModel):
    """
    This is the description of the main model
    """

    model_config = ConfigDict(title='Main')

    foo_bar: FooBar
    gender: Annotated[Gender | None, Field(alias='Gender')] = None
    snap: int = Field(
        42,
        title='The Snap',
        description='this is the value of snap',
        gt=30,
        lt=50,
    )


main_model_schema = MainModel.model_json_schema()  # (1)!
print(json.dumps(main_model_schema, indent=2))  # (2)!

Validators

Annotated Validators

from typing import Any, List

from typing_extensions import Annotated

from pydantic import BaseModel, ValidationError
from pydantic.functional_validators import AfterValidator


def check_squares(v: int) -> int:
    assert v**0.5 % 1 == 0, f'{v} is not a square number'
    return v


def double(v: Any) -> Any:
    return v * 2


MyNumber = Annotated[int, AfterValidator(double), AfterValidator(check_squares)]


class DemoModel(BaseModel):
    number: List[MyNumber]


print(DemoModel(number=[2, 8])) # number=[4, 16]
try:
    DemoModel(number=[2, 4])
except ValidationError as e:
    print(e)

Field validators

from pydantic import (
    BaseModel,
    ValidationError,
    ValidationInfo,
    field_validator,
)


class UserModel(BaseModel):
    name: str
    id: int

    @field_validator('name')
    @classmethod
    def name_must_contain_space(cls, v: str) -> str:
        if ' ' not in v:
            raise ValueError('must contain a space')
        return v.title()

    # you can select multiple fields, or use '*' to select all fields
    @field_validator('id', 'name')
    @classmethod
    def check_alphanumeric(cls, v: str, info: ValidationInfo) -> str:
        if isinstance(v, str):
            # info.field_name is the name of the field being validated
            is_alphanumeric = v.replace(' ', '').isalnum()
            assert is_alphanumeric, f'{info.field_name} must be alphanumeric'
        return v


print(UserModel(name='John Doe', id=1)) # name='John Doe' id=1

try:
    UserModel(name='samuel', id=1)
except ValidationError as e:
    print(e)

try:
    UserModel(name='John Doe', id='abc')
except ValidationError as e:
    print(e)

try:
    UserModel(name='John Doe!', id=1)
except ValidationError as e:
    print(e)

Model validators

from typing import Any

from typing_extensions import Self

from pydantic import BaseModel, ValidationError, model_validator


class UserModel(BaseModel):
    username: str
    password1: str
    password2: str

    @model_validator(mode='before')
    @classmethod
    def check_card_number_omitted(cls, data: Any) -> Any:
        if isinstance(data, dict):
            assert (
                'card_number' not in data
            ), 'card_number should not be included'
        return data

    @model_validator(mode='after')
    def check_passwords_match(self) -> Self:
        pw1 = self.password1
        pw2 = self.password2
        if pw1 is not None and pw2 is not None and pw1 != pw2:
            raise ValueError('passwords do not match')
        return self


print(UserModel(username='scolvin', password1='zxcvbn', password2='zxcvbn'))
#> username='scolvin' password1='zxcvbn' password2='zxcvbn'
try:
    UserModel(username='scolvin', password1='zxcvbn', password2='zxcvbn2')
except ValidationError as e:
    print(e)

try:
    UserModel(
        username='scolvin',
        password1='zxcvbn',
        password2='zxcvbn',
        card_number='1234',
    )
except ValidationError as e:
    print(e)

Type Adapter

Type Adapter

from typing import List

from typing_extensions import TypedDict

from pydantic import TypeAdapter, ValidationError


class User(TypedDict):
    name: str
    id: int


user_list_adapter = TypeAdapter(List[User])
user_list = user_list_adapter.validate_python([{'name': 'Fred', 'id': '3'}])
print(repr(user_list))
#> [{'name': 'Fred', 'id': 3}]

try:
    user_list_adapter.validate_python(
        [{'name': 'Fred', 'id': 'wrong', 'other': 'no'}]
    )
except ValidationError as e:
    print(e)

print(repr(user_list_adapter.dump_json(user_list)))
#> b'[{"name":"Fred","id":3}]'

在 Performance 中, nested 的情境底下, 使用 Type Adapter 會比 model 好,

可參考 Use TypedDict over nested models

Performance

可參考 Performance tips