如何注释 return 类型取决于其参数的函数?

How do I annotate a function whose return type depends on its argument?

在 Python 中,我经常编写函数来过滤集合以查找特定子类型的实例。例如,我可能会在 DOM 中查找特定类型的节点或在日志中查找特定类型的事件:

def find_pre(soup: TagSoup) -> List[tags.pre]:
    """Find all <pre> nodes in `tag_soup`."""
    …

def filter_errors(log: List[LogEvent]) -> List[LogError]:
    """Keep only errors from `log`.""" 
    …

为这些函数编写类型很容易。但是这些函数的通用版本如何使用参数来指定要 return 的类型?

def find_tags(tag_soup: TagSoup, T: type) -> List[T]:
    """Find all nodes of type `T` in `tag_soup`."""
    …

def filter_errors(log: List[LogEvent], T: type) -> List[T]:
    """Keep only events of type `T` from `log`.""" 
    …

(上面的签名是错误的:我不能在 return 类型中引用 T。)

这是一个相当常见的设计:docutilsnode.traverse(T: type)BeautifulSoupsoup.find_all(),等等。当然它可以变得任意复杂,但可以 Python 类型注释处理像上面这样的简单情况?

这里有一个 MWE 使其非常具体:

from dataclasses import dataclass
from typing import *

@dataclass
class Packet: pass

@dataclass
class Done(Packet): pass

@dataclass
class Exn(Packet):
    exn: str
    loc: Tuple[int, int]

@dataclass
class Message(Packet):
    ref: int
    msg: str

Stream = Callable[[], Union[Packet, None]]

def stream_response(stream: Stream, types) -> Iterator[??]:
    while response := stream():
        if isinstance(response, Done): return
        if isinstance(response, types): yield response

def print_messages(stream: Stream):
    for m in stream_response(stream, Message):
        print(m.msg) # Error: Cannot access member "msg" for "Packet"

msgs = iter((Message(0, "hello"), Exn("Oops", (1, 42)), Done()))
print_messages(lambda: next(msgs))

Pyright 说:

  29:17 - error: Cannot access member "msg" for type "Packet"
  Member "msg" is unknown (reportGeneralTypeIssues)

在上面的例子中,有没有办法注释 stream_response 以便 Python 类型检查器将接受 print_messages 的定义?

好的,我们开始吧。它通过了 MyPy --strict,但它并不漂亮。

这是怎么回事

对于给定的 class A,我们知道 A 的实例类型将是 A(显然)。但是 A 本身的类型是什么?从技术上讲,A 的类型是 type,因为所有不使用 metaclass 的 python class 都是 type 的实例.但是,用 type 注释参数并不能告诉类型检查器太多信息。用于 python 类型检查以在类型层次结构中“更上一层楼”的语法是 Type[A]。因此,如果我们有一个函数 myfunc,它 returns 一个 class 的实例作为参数输入,我们可以相当简单地注释如下:

from typing import TypeVar, Type

T = TypeVar('T')

def myfunc(some_class: Type[T]) -> T:
    # do some stuff
    return some_class()

但是,您的情况要复杂得多。您可以输入一个 class 作为参数,或者您可以输入两个 classes,或三个 classes...等等。我们可以使用 [=23= 来解决这个问题],它允许我们为给定函数注册多个签名。这些签名在运行时被完全忽略;它们纯粹是为了类型检查器;因此,这些函数的主体可以留空。通常,您只在用 @overload.

装饰的函数体中放置文档字符串或文字省略号 ...

我不认为有一种方法可以概括这些重载函数,这就是为什么可以传递到 types 参数的最大元素数很重要的原因。您必须繁琐地枚举函数的每个可能签名。如果你走这条路,你可能想考虑将 @overload 签名移动到一个单独的 .pyi 存根文件。

from dataclasses import dataclass
from typing import (
    Callable,
    Tuple,
    Union,
    Iterator,
    overload,
    TypeVar,
    Type, 
    Sequence
)

@dataclass
class Packet: pass

P1 = TypeVar('P1', bound=Packet)
P2 = TypeVar('P2', bound=Packet)
P3 = TypeVar('P3', bound=Packet)
P4 = TypeVar('P4', bound=Packet)
P5 = TypeVar('P5', bound=Packet)
P6 = TypeVar('P6', bound=Packet)
P7 = TypeVar('P7', bound=Packet)
P8 = TypeVar('P8', bound=Packet)
P9 = TypeVar('P9', bound=Packet)
P10 = TypeVar('P10', bound=Packet)

@dataclass
class Done(Packet): pass

@dataclass
class Exn(Packet):
    exn: str
    loc: Tuple[int, int]

@dataclass
class Message(Packet):
    ref: int
    msg: str

Stream = Callable[[], Union[Packet, None]]

@overload
def stream_response(stream: Stream, types: Type[P1]) -> Iterator[P1]:
    """Signature if exactly one type is passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2]]
) -> Iterator[Union[P1, P2]]:
    """Signature if exactly two types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3]]
) -> Iterator[Union[P1, P2, P3]]:
    """Signature if exactly three types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4]]
) -> Iterator[Union[P1, P2, P3, P4]]:
    """Signature if exactly four types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5]]
) -> Iterator[Union[P1, P2, P3, P4, P5]]:
    """Signature if exactly five types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5], Type[P6]]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6]]:
    """Signature if exactly six types are passed in for the `types` parameter"""

@overload
def stream_response(
    stream: Stream, 
    types: Tuple[
        Type[P1], 
        Type[P2],
        Type[P3],
        Type[P4], 
        Type[P5],
        Type[P6],
        Type[P7]
    ]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7]]:
    """Signature if exactly seven types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[
        Type[P1], 
        Type[P2],
        Type[P3],
        Type[P4], 
        Type[P5],
        Type[P6],
        Type[P7],
        Type[P8]
    ]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8]]:
    """Signature if exactly eight types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[
        Type[P1], 
        Type[P2],
        Type[P3],
        Type[P4], 
        Type[P5],
        Type[P6],
        Type[P7],
        Type[P8],
        Type[P9]
    ]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9]]:
    """Signature if exactly nine types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[
        Type[P1], 
        Type[P2],
        Type[P3],
        Type[P4], 
        Type[P5],
        Type[P6],
        Type[P7],
        Type[P8],
        Type[P9],
        Type[P10]
    ]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9, P10]]:
    """Signature if exactly ten types are passed in for the `types` parameter"""

# We have to be more generic in our type-hinting for the concrete implementation 
# Otherwise, MyPy struggles to figure out that it's a valid argument to `isinstance`
def stream_response(
    stream: Stream,
    types: Union[type, Tuple[type, ...]]
) -> Iterator[Packet]:
    
    while response := stream():
        if isinstance(response, Done): return
        if isinstance(response, types): yield response

def print_messages(stream: Stream) -> None:
    for m in stream_response(stream, Message):
        print(m.msg)

msgs = iter((Message(0, "hello"), Exn("Oops", (1, 42)), Done()))
print_messages(lambda: next(msgs))

减少冗长的策略

如果你想让它更简洁,实现它的一种方法是为某些类型构造引入别名。这里的危险是类型提示的意图和含义变得相当难以阅读,但它确实使重载 7-10 看起来不那么可怕:

from dataclasses import dataclass
from typing import (
    Callable,
    Tuple,
    Union,
    Iterator,
    overload,
    TypeVar,
    Type, 
    Sequence
)

@dataclass
class Packet: pass

P1 = TypeVar('P1', bound=Packet)
P2 = TypeVar('P2', bound=Packet)
P3 = TypeVar('P3', bound=Packet)
P4 = TypeVar('P4', bound=Packet)
P5 = TypeVar('P5', bound=Packet)
P6 = TypeVar('P6', bound=Packet)
P7 = TypeVar('P7', bound=Packet)
P8 = TypeVar('P8', bound=Packet)
P9 = TypeVar('P9', bound=Packet)
P10 = TypeVar('P10', bound=Packet)

_P = TypeVar('_P', bound=Packet)
S = Type[_P]

T7 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7]]
T8 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7], S[P8]]
T9 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7], S[P8], S[P9]]
T10 = Tuple[S[P1], S[P2], S[P3], S[P4], S[P5], S[P6], S[P7], S[P8], S[P9], S[P10]]

@dataclass
class Done(Packet): pass

@dataclass
class Exn(Packet):
    exn: str
    loc: Tuple[int, int]

@dataclass
class Message(Packet):
    ref: int
    msg: str

Stream = Callable[[], Union[Packet, None]]

@overload
def stream_response(stream: Stream, types: Type[P1]) -> Iterator[P1]:
    """Signature if exactly one type is passed in for the `types` parameter"""

@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2]]
) -> Iterator[Union[P1, P2]]:
    """Signature if exactly two types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3]]
) -> Iterator[Union[P1, P2, P3]]:
    """Signature if exactly three types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4]]
) -> Iterator[Union[P1, P2, P3, P4]]:
    """Signature if exactly four types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5]]
) -> Iterator[Union[P1, P2, P3, P4, P5]]:
    """Signature if exactly five types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: Tuple[Type[P1], Type[P2], Type[P3], Type[P4], Type[P5], Type[P6]]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6]]:
    """Signature if exactly six types are passed in for the `types` parameter"""

@overload
def stream_response(
    stream: Stream, 
    types: T7[P1, P2, P3, P4, P5, P6, P7]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7]]:
    """Signature if exactly seven types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: T8[P1, P2, P3, P4, P5, P6, P7, P8]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8]]:
    """Signature if exactly eight types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: T9[P1, P2, P3, P4, P5, P6, P7, P8, P9]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9]]:
    """Signature if exactly nine types are passed in for the `types` parameter"""
    
@overload
def stream_response(
    stream: Stream, 
    types: T10[P1, P2, P3, P4, P5, P6, P7, P8, P9, P10]
) -> Iterator[Union[P1, P2, P3, P4, P5, P6, P7, P8, P9, P10]]:
    """Signature if exactly ten types are passed in for the `types` parameter"""

# We have to be more generic in our type-hinting for the concrete implementation 
# Otherwise, MyPy struggles to figure out that it's a valid argument to `isinstance`
def stream_response(
    stream: Stream,
    types: Union[type, Tuple[type, ...]]
) -> Iterator[Packet]:
    
    while response := stream():
        if isinstance(response, Done): return
        if isinstance(response, types): yield response

def print_messages(stream: Stream) -> None:
    for m in stream_response(stream, Message):
        print(m.msg)

msgs = iter((Message(0, "hello"), Exn("Oops", (1, 42)), Done()))
print_messages(lambda: next(msgs))