Python 中通过修改代码对象实现的低开销跟踪功能

Question

sys.settrace 至少可以说是低效的。它为 Python.

中的每个函数调用增加了很多开销

相反，我正在寻找一种方法来跟踪 Python 中仅几百个函数的“调用”事件，而不会产生额外的开销。我正在考虑修改每个函数的代码对象，以便它们都调用一个跟踪函数。

import sys


def trace():
    frame = sys._getframe(1)
    ...


def func():
    trace()  # TODO: Add programmatically
    ...

有人知道怎么做吗？到目前为止，我已经找到了这些资源...

https://hardenedapple.github.io/stories/computers/python_function_override/
Modifying Code of Function at Runtime

我不能是唯一对低开销本地化跟踪感兴趣的人吗？感谢您为我提供的任何帮助！

Answer 1

好的，所以按照 Modifying Code of Function at Runtime 中概述的方法，我试了一下...

# boo.py
import inspect
import sys
import types


def ___trace():
    """
    NOTE: This function needs a unique name, so, it doesn't interfere with globals.
    """
    frame = sys._getframe(1)
    print("Tracing:", frame.f_code.co_name)
    print("Locals:", frame.f_locals)


def _unwrap(fn):
    while hasattr(fn, "__wrapped__"):
        fn = fn.__wrapped__
    return fn


_orig_code = "___trace_orig_code"
_closure = "___closure"


def set_trace(fn):
    """Call `trace` at the beginning of `fn`."""
    fn = _unwrap(fn)
    if getattr(fn, _orig_code, None) is None:
        setattr(fn, _orig_code, fn.__code__)
    else:
        raise ValueError("Function is already being traced.")
    lines = inspect.getsourcelines(fn)[0]
    lines = lines[next(i for i, l in enumerate(lines) if "@" != l[0]) :]
    init_indent = len(lines[0]) - len(lines[0].lstrip())
    lines = ["    " + l[init_indent:] for l in lines]
    whitespace = lines[1][: len(lines[1]) - len(lines[1].lstrip())]
    # NOTE: So that the line numbers remain the name, the trace function is added to the first
    # line.
    lines[1] = f"{whitespace}{___trace.__name__}(); {lines[1].strip()}\n"
    free_vars = " ".join([f"    {var} = None;" for var in fn.__code__.co_freevars])
    code = "".join(lines)
    code = f"""
def {_closure}():
{free_vars}

{code}

    return {fn.__name__}.__code__
"""
    module = fn.__globals__.copy()
    try:
        exec(code, module)
    except SyntaxError:
        raise SyntaxError("Unable to add `___trace` to function definition.")
    new = module[_closure]()
    fn.__code__ = types.CodeType(
        fn.__code__.co_argcount,
        fn.__code__.co_posonlyargcount,
        fn.__code__.co_kwonlyargcount,
        fn.__code__.co_nlocals,
        fn.__code__.co_stacksize,
        fn.__code__.co_flags,
        new.co_code,
        fn.__code__.co_consts,
        tuple([___trace.__name__] + list(fn.__code__.co_names)),
        fn.__code__.co_varnames,
        fn.__code__.co_filename,
        fn.__code__.co_name,
        fn.__code__.co_firstlineno,
        new.co_lnotab,
        fn.__code__.co_freevars,
        fn.__code__.co_cellvars,
    )

    if ___trace.__name__ in fn.__globals__:
        if fn.__globals__[___trace.__name__] is not ___trace:
            raise RuntimeError()
    else:
        fn.__globals__[___trace.__name__] = ___trace


def unset_trace(fn):
    """Remove `trace` from the beginning of `fn`."""
    if hasattr(fn, _orig_code):
        fn.__code__ = getattr(fn, _orig_code)

我在类、闭包、装饰器、行号、回溯、全局...

上进行了测试

import functools
import sys
import traceback
import typing

from boo import set_trace, unset_trace


def testing(*args, **kwargs):
    print("Inside `testing`...")


def testing_closure(*args, **kwargs):
    variable = "hello!"

    def func(*args, **kwargs):
        print(f"Inside `testing_closure`... {variable}")

    func()
    set_trace(func)
    func()


def test_lineno(*args, **kwargs):
    print("Inside `test_lineno`...")
    print("Line no", sys._getframe(0).f_lineno)


def test_args(a: typing.List, b: typing.List):
    print("Inside `test_args`...")


def test_traceback(*args, **kwargs):
    print("".join(traceback.format_stack()))


def test_cellvars():
    print("Inside `test_cellvars`...")

    a = 10

    def func():
        return a

    return func()


def test_funky_first_line():
    def func():
        return


@functools.lru_cache()
@functools.lru_cache()
def testing_decorator(*args, **kwargs):
    print("Inside `testing_decorator`...")


class Test:
    def __init__(self, *args, **kwargs):
        print("Inside `__init__`...")


if __name__ == "__main__":
    set_trace(testing)
    testing()
    unset_trace(testing)
    testing()

    Test()
    set_trace(Test.__init__)
    Test()
    set_trace(testing_decorator)
    testing_decorator()
    testing_closure()
    set_trace(test_lineno)
    test_lineno()
    set_trace(test_traceback)
    test_traceback()
    set_trace(test_cellvars)
    test_cellvars()
    set_trace(test_args)
    test_args(["a"], ["b"])
    try:
        set_trace(test_funky_first_line)
    except SyntaxError:
        print("Unable to modify handle.")

如果我遗漏了什么，请告诉我！我没有太多代码编辑经验。

这是我的完整工作库实现（在生产代码库中测试过！）：https://github.com/PetrochukM/HParams/blob/master/config/trace.py

Python 中通过修改代码对象实现的低开销跟踪功能

Low-overhead tracing function in Python by modify the code object

python

trace

metaprogramming

overhead

bytecode-manipulation