用注释注释 Python print() 输出

Question

给定一个带有 print() 语句的 Python 脚本，我希望能够运行通过脚本并在每个语句后插入一条注释，以显示每个语句的输出.为了演示，使用名为 example.py:

的脚本

a, b = 1, 2

print('a + b:', a + b)

c, d = 3, 4

print('c + d:', c + d)

所需的输出将是：

a, b = 1, 2

print('a + b:', a + b)
# a + b: 3

c, d = 3, 4

print('c + d:', c + d)
# c + d: 7

这是我的尝试，适用于像上面这样的简单示例：

import sys
from io import StringIO

def intercept_stdout(func):
    "redirect stdout from a target function"
    def wrapper(*args, **kwargs):
        "wrapper function for intercepting stdout"
        # save original stdout
        original_stdout = sys.stdout

        # set up StringIO object to temporarily capture stdout
        capture_stdout = StringIO()
        sys.stdout = capture_stdout

        # execute wrapped function
        func(*args, **kwargs)

        # assign captured stdout to value
        func_output = capture_stdout.getvalue()

        # reset stdout
        sys.stdout = original_stdout

        # return captured value
        return func_output

    return wrapper


@intercept_stdout
def exec_target(name):
    "execute a target script"
    with open(name, 'r') as f:    
        exec(f.read())


def read_target(name):
    "read source code from a target script & return it as a list of lines"
    with open(name) as f:
        source = f.readlines()

    # to properly format last comment, ensure source ends in a newline
    if len(source[-1]) >= 1 and source[-1][-1] != '\n':
        source[-1] += '\n'

    return source


def annotate_source(target):
    "given a target script, return the source with comments under each print()"
    target_source = read_target(target)

    # find each line that starts with 'print(' & get indices in reverse order
    print_line_indices = [i for i, j in enumerate(target_source)
                              if len(j) > 6 and j[:6] == 'print(']
    print_line_indices.reverse()

    # execute the target script and get each line output in reverse order
    target_output = exec_target(target)
    printed_lines = target_output.split('\n')
    printed_lines.reverse()

    # iterate over the source and insert commented target output line-by-line
    annotated_source = []
    for i, line in enumerate(target_source):
        annotated_source.append(line)
        if print_line_indices and i == print_line_indices[-1]:
            annotated_source.append('# ' + printed_lines.pop() + '\n')
            print_line_indices.pop()

    # return new annotated source as a string
    return ''.join(annotated_source)


if __name__ == '__main__':
    target_script = 'example.py'
    with open('annotated_example.py', 'w') as f:
        f.write(annotate_source(target_script))

但是，对于具有跨多行的 print() 语句的脚本以及不在开头的 print() 语句，它会失败的一条线。在最好的情况下，它甚至可以用于函数内的 print() 语句。举个例子：

print('''print to multiple lines, first line
second line
third line''')

print('print from partial line, first part') if True else 0

1 if False else print('print from partial line, second part')

print('print from compound statement, first part'); pass

pass; print('print from compound statement, second part')

def foo():
    print('bar')

foo()

理想情况下，输出如下所示：

print('''print to multiple lines, first line
second line
third line''')
# print to multiple lines, first line
# second line
# third line

print('print from partial line, first part') if True else 0
# print from partial line, first part

1 if False else print('print from partial line, second part')
# print from partial line, second part

print('print from compound statement, first part'); pass
# print from compound statement, first part

pass; print('print from compound statement, second part')
# print from compound statement, second part

def foo():
    print('bar')

foo()
# bar

但是上面的脚本像这样破坏它：

print('''print to multiple lines, first line
# print to multiple lines, first line
second line
third line''')

print('print from partial line, first part') if True else 0
# second line

1 if False else print('print from partial line, second part')

print('print from compound statement, first part'); pass
# third line

pass; print('print from compound statement, second part')

def foo():
    print('bar')

foo()

什么方法可以使这个过程更加稳健？

Answer 1

感谢@Lennart 的反馈，我已经几乎开始工作了...它逐行迭代，将行聚集成越来越长的块因为当前块包含 SyntaxError 时被馈送到 exec()。以防万一它对其他人有用：

import sys
from io import StringIO

def intercept_stdout(func):
    "redirect stdout from a target function"
    def wrapper(*args, **kwargs):
        "wrapper function for intercepting stdout"
        # save original stdout
        original_stdout = sys.stdout

        # set up StringIO object to temporarily capture stdout
        capture_stdout = StringIO()
        sys.stdout = capture_stdout

        # execute wrapped function
        func(*args, **kwargs)

        # assign captured stdout to value
        func_output = capture_stdout.getvalue()

        # reset stdout
        sys.stdout = original_stdout

        # return captured value
        return func_output

    return wrapper

@intercept_stdout
def exec_line(source, block_globals):
    "execute a target block of source code and get output" 
    exec(source, block_globals)

def read_target(name):
    "read source code from a target script & return it as a list of lines"
    with open(name) as f:
        source = f.readlines()

    # to properly format last comment, ensure source ends in a newline
    if len(source[-1]) >= 1 and source[-1][-1] != '\n':
        source[-1] += '\n'

    return source

def get_blocks(target, block_globals):
    "get outputs for each block of code in source"
    outputs = []
    lines = 1

    @intercept_stdout
    def eval_blocks(start_index, end_index, full_source, block_globals):
        "work through a group of lines of source code and exec each block"
        nonlocal lines
        try:    
            exec(''.join(full_source[start_index:end_index]), block_globals)
        except SyntaxError:
            lines += 1
            eval_blocks(start_index, start_index + lines,
                        full_source, block_globals)

    for i, s in enumerate(target):
        if lines > 1:
            lines -= 1
            continue  
        outputs.append((eval_blocks(i, i+1, target, block_globals), i, lines))

    return [(i[1], i[1] + i[2]) for i in outputs]

def annotate_source(target, block_globals={}):
    "given a target script, return the source with comments under each print()"
    target_source = read_target(target)

    # get each block's start and end indices
    outputs = get_blocks(target_source, block_globals)
    code_blocks = [''.join(target_source[i[0]:i[1]]) for i in outputs]

    # iterate through each
    annotated_source = []
    for c in code_blocks:
        annotated_source.append(c)
        printed_lines = exec_line(c, block_globals).split('\n')
        if printed_lines and printed_lines[-1] == '':
            printed_lines.pop()
        for line in printed_lines:
            annotated_source.append('# ' + line + '\n')

    # return new annotated source as a string
    return ''.join(annotated_source)

def main():
    ### script to format goes here
    target_script = 'example.py'

    ### name of formatted script goes here
    new_script = 'annotated_example.py'

    new_code = annotate_source(target_script)
    with open(new_script, 'w') as f:
        f.write(new_code)

if __name__ == '__main__':
    main()

它适用于上面两个示例中的每一个。但是，当尝试执行以下操作时：

def foo():
    print('bar')
    print('baz')

foo()

而不是给我想要的输出：

def foo():
    print('bar')
    print('baz')

foo()
# bar
# baz

它失败了，回溯很长：

Traceback (most recent call last):
  File "ex.py", line 55, in eval_blocks
    exec(''.join(full_source[start_index:end_index]), block_globals)
  File "<string>", line 1
    print('baz')
    ^
IndentationError: unexpected indent

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "ex.py", line 55, in eval_blocks
    exec(''.join(full_source[start_index:end_index]), block_globals)
  File "<string>", line 1
    print('baz')
    ^
IndentationError: unexpected indent

During handling of the above exception, another exception occurred:

...

Traceback (most recent call last):
  File "ex.py", line 55, in eval_blocks
    exec(''.join(full_source[start_index:end_index]), block_globals)
  File "<string>", line 1
    print('baz')
    ^
IndentationError: unexpected indent

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "ex.py", line 102, in <module>
    main()
  File "ex.py", line 97, in main
    new_code = annotate_source(target_script)
  File "ex.py", line 74, in annotate_source
    outputs = get_blocks(target_source, block_globals)
  File "ex.py", line 65, in get_blocks
    outputs.append((eval_blocks(i, i+1, target, block_globals), i, lines))
  File "ex.py", line 16, in wrapper
    func(*args, **kwargs)
  File "ex.py", line 59, in eval_blocks
    full_source, block_globals)
  File "ex.py", line 16, in wrapper
    func(*args, **kwargs)   

...

  File "ex.py", line 16, in wrapper
    func(*args, **kwargs)
  File "ex.py", line 55, in eval_blocks
    exec(''.join(full_source[start_index:end_index]), block_globals)
RecursionError: maximum recursion depth exceeded while calling a Python object

看起来发生这种情况是因为 def foo(): print('bar') 是有效代码，因此 print('baz') 未包含在函数中，导致它失败并显示 IndentationError。关于如何避免此问题的任何想法？我怀疑它可能需要按照上面的建议深入研究 ast，但希望能有进一步的输入或用法示例。

Answer 2

您可以使用现有的 python 解析器从您的代码中提取顶级语句，从而使它变得更加容易。例如标准库中的 ast 模块。但是，ast 会丢失一些信息，例如评论。

考虑到源代码转换（您正在做的）而构建的库可能更适合这里。 redbaron 是一个很好的例子。

要将全局变量传递到下一个 exec()，您必须使用第二个参数 (documentation):

environment = {}
for statement in statements:
    exec(statement, environment)

Answer 3

您是否考虑过使用 inspect 模块？如果你愿意说你总是想要最上面的调用旁边的注释，并且你正在注释的文件足够简单，你可以获得合理的结果。以下是我的尝试，它覆盖了内置的 print 函数并查看堆栈跟踪以确定调用 print 的位置：

import inspect
import sys
from io import StringIO

file_changes = {}

def anno_print(old_print, *args, **kwargs):
    (frame, filename, line_number,
     function_name, lines, index) = inspect.getouterframes(inspect.currentframe())[-2]
    if filename not in file_changes:
        file_changes[filename] = {}
    if line_number not in file_changes[filename]:
        file_changes[filename][line_number] = []
    orig_stdout = sys.stdout
    capture_stdout = StringIO()
    sys.stdout = capture_stdout
    old_print(*args, **kwargs)
    output = capture_stdout.getvalue()
    file_changes[filename][line_number].append(output)
    sys.stdout = orig_stdout
    return

def make_annotated_file(old_source, new_source):
    changes = file_changes[old_source]
    old_source_F = open(old_source)
    new_source_F = open(new_source, 'w')
    content = old_source_F.readlines()
    for i in range(len(content)):
        line_num = i + 1
        new_source_F.write(content[i])
        if content[i][-1] != '\n':
            new_source_F.write('\n')
        if line_num in changes:
            for output in changes[line_num]:
                output = output[:-1].replace('\n', '\n#') + '\n'
                new_source_F.write("#" + output)
    new_source_F.close()



if __name__=='__main__':
    target_source = "foo.py"
    old_print = __builtins__.print
    __builtins__.print = lambda *args, **kwargs: anno_print(old_print, *args, **kwargs)
    with open(target_source) as f:
        code = compile(f.read(), target_source, 'exec')
        exec(code)
    __builtins__.print = old_print
    make_annotated_file(target_source, "foo_annotated.py")

如果我运行它在下面的文件"foo.py":

def foo():
    print("a")
    print("b")

def cool():
    foo()
    print("c")

def doesnt_print():
    a = 2 + 3

print(1+2)
foo()
doesnt_print()
cool()

输出为"foo_annotated.py":

def foo():
    print("a")
    print("b")

def cool():
    foo()
    print("c")

def doesnt_print():
    a = 2 + 3

print(1+2)
#3
foo()
#a
#b
doesnt_print()
cool()
#a
#b
#c

Answer 4

看起来 except SyntaxError 不足以检查完整功能，因为它将在第一行结束块，不会产生语法错误。你想要的是确保整个功能都包含在同一个块中。为此：

检查当前块是否是函数。检查第一行是否以 def.
检查 full_source 中的下一行是否以大于或等于函数第二行（定义缩进的行）的空格数开头。这意味着 eval_blocks 将检查代码的下一行是否具有更大或相等的间距，因此在函数内部。

get_blocks 的代码可能如下所示：

# function for finding num of spaces at beginning (could be in global spectrum)
def get_front_whitespace(string):
    spaces = 0
    for char in string:
        # end loop at end of spaces
        if char not in ('\t', ' '): 
            break
        # a tab is equal to 8 spaces
        elif char == '\t':
            spaces += 8
        # otherwise must be a space
        else:
            spaces += 1
    return spaces

...

def get_blocks(target, block_globals):
    "get outputs for each block of code in source"
    outputs = []
    lines = 1
    # variable to check if current block is a function
    block_is_func = False

    @intercept_stdout
    def eval_blocks(start_index, end_index, full_source, block_globals):
        "work through a group of lines of source code and exec each block"
        nonlocal lines
        nonlocal block_is_func
        # check if block is a function
        block_is_func = ( full_source[start_index][:3] == 'def' )
        try:    
            exec(''.join(full_source[start_index:end_index]), block_globals)
        except SyntaxError:
            lines += 1
            eval_blocks(start_index, start_index + lines,
                        full_source, block_globals)
        else:
            # if the block is a function, check for indents
            if block_is_func:
                # get number of spaces in first indent of function
                func_indent= get_front_whitespace( full_source[start_index + 1] )
                # get number of spaces in the next index 
                next_index_spaces = get_front_whitespace( full_source[end_index + 1] )
                # if the next line is equally or more indented than the function indent, continue to next recursion layer
                if func_indent >= next_index_spaces:
                    lines += 1
                    eval_blocks(start_index, start_index + lines,
                               full_source, block_globals)

    for i, s in enumerate(target):
        # reset the function variable for next block
        if block_is_func: block_is_func = False
        if lines > 1:
            lines -= 1
            continue  
        outputs.append((eval_blocks(i, i+1, target, block_globals), i, lines))

    return [(i[1], i[1] + i[2]) for i in outputs]

如果函数的最后一行是文件的末尾，这可能会产生索引错误，因为 end_index_spaces = get_front_whitespace( full_source[end_index + 1] )

处的前向索引

这也可以用于选择语句和循环，它们可能有同样的问题：只需检查 [=20] 开头的 if for 和 while =] 行以及 def。这将导致注释位于缩进区域之后，但由于缩进区域内的打印输出取决于用于调用它们的变量，我认为在任何情况下都需要在缩进之外输出。

Answer 5

尝试https://github.com/eevleevs/hashequal/

我做这个是为了尝试替换 Mathcad。不作用于 print 语句，而是作用于 #= 注释，例如：

a = 1 + 1 #=

变成

a = 1 + 1 #= 2

用注释注释 Python print() 输出

Annotating Python print() output with comments

python

comments

code-formatting

stdout

python-3.x