如何根据Python中的起始行和结束行来获取class的范围?

How to get the scope of a class in terms of starting line and end line in Python?

我想检查 class c 是否使用某个模块 m。我能得到两件事: - 模块在该文件中的使用(f), - 文件 f.

中 classes 的起始行号

但是,为了知道某个特定的 class 是否使用该模块,我需要知道 class c 的起始行和结束行。我不知道如何获取 class c.

的结束行

我尝试浏览 ast 的文档,但找不到任何找到 class 范围的方法。

我当前的代码是:

source_code = "car"
source_code_data = pyclbr.readmodule(source_code)

这在 source_code_data 变量中给出以下内容: 1

如图所示,有 lineno 但没有 end lineno 表示 class 的结束行。

我希望得到 class 的结束行,以便了解其范围并最终了解模块的用法。目前,该模块具有以下结构:

Module: vehicles used in file: /Users/aviralsrivastava/dev/generate_uml/inheritance_and_dependencies/car.py at: [('vehicles.Vehicle', 'Vehicle', 5), ('vehicles.Vehicle', 'Vehicle', 20)]

因此,根据这些信息,我将能够知道模块是否在以下范围内使用:used in a class.

我的整个代码库是:

import ast
import os
import pyclbr
import subprocess
import sys
from operator import itemgetter

from dependency_collector import ModuleUseCollector
from plot_uml_in_excel import WriteInExcel


class GenerateUML:
    def __init__(self):
        self.class_dict = {}  # it will have a class:children mapping.

    def show_class(self, name, class_data):
        print(class_data)
        self.class_dict[name] = []
        self.show_super_classes(name, class_data)

    def show_methods(self, class_name, class_data):
        methods = []
        for name, lineno in sorted(class_data.methods.items(),
                                   key=itemgetter(1)):
            # print('  Method: {0} [{1}]'.format(name, lineno))
            methods.append(name)
        return methods

    def show_super_classes(self, name, class_data):
        super_class_names = []
        for super_class in class_data.super:
            if super_class == 'object':
                continue
            if isinstance(super_class, str):
                super_class_names.append(super_class)
            else:
                super_class_names.append(super_class.name)
        for super_class_name in super_class_names:
            if self.class_dict.get(super_class_name, None):
                self.class_dict[super_class_name].append(name)
            else:
                self.class_dict[super_class_name] = [name]
        # adding all parents for a class in one place for later usage: children
        return super_class_names

    def get_children(self, name):
        if self.class_dict.get(name, None):
            return self.class_dict[name]
        return []


source_code = "car"
source_code_data = pyclbr.readmodule(source_code)
print(source_code_data)
generate_uml = GenerateUML()
for name, class_data in sorted(source_code_data.items(), key=lambda x: x[1].lineno):
    print(
        "Class: {}, Methods: {}, Parent(s): {}, File: {}".format(
            name,
            generate_uml.show_methods(
                name, class_data
            ),
            generate_uml.show_super_classes(name, class_data),
            class_data.file
        )
    )
print('-----------------------------------------')
# create a list with all the data
# the frame of the list is: [{}, {}, {},....] where each dict is: {"name": <>, "methods": [], "children": []}
agg_data = []
files = {}
for name, class_data in sorted(source_code_data.items(), key=lambda x: x[1].lineno):
    methods = generate_uml.show_methods(name, class_data)
    children = generate_uml.get_children(name)
    # print(
    #     "Class: {}, Methods: {}, Child(ren): {}, File: {}".format(
    #         name,
    #         methods,
    #         children,
    #         class_data.file
    #     )
    # )
    agg_data.append(
        {
            "Class": name,
            "Methods": methods,
            "Children": children,
            "File": class_data.file
        }
    )
    files[class_data.file] = name
print('-----------------------------------------')
# print(agg_data)


for data_index in range(len(agg_data)):
    agg_data[data_index]['Dependents'] = None
    module = agg_data[data_index]["File"].split('/')[-1].split('.py')[0]
    used_in = []
    for file_ in files.keys():
        if file_ == agg_data[data_index]["File"]:
            continue
        collector = ModuleUseCollector(module)
        source = open(file_).read()
        collector.visit(ast.parse(source))
        print('Module: {} used in file: {} at: {}'.format(
            module, file_, collector.used_at))
        if len(collector.used_at):
            used_in.append(files[file_])
    agg_data[data_index]['Dependents'] = used_in
'''
# checking the dependencies
dependencies = []
for data_index in range(len(agg_data)):
    collector = ModuleUseCollector(
        agg_data[data_index]['File'].split('/')[-1].split('.py')[0]
    )
    collector.visit(ast.parse(source))
    dependencies.append(collector.used_at)

agg_data[source_code_index]['Dependencies'] = dependencies
# next thing, for each class, find the dependency in each class.

'''
print('------------------------------------------------------------------')
print('FINAL')
for data in agg_data:
    print(data)
    print('-----------')
    print('\n')

# The whole data is now collected and we need to form the dataframe of it:
'''
write_in_excel = WriteInExcel(file_name='dependency_1.xlsx')
df = write_in_excel.create_pandas_dataframe(agg_data)
write_in_excel.write_df_to_excel(df, 'class_to_child_and_dependents')
'''
'''
print(generate_uml.class_dict)
write_in_excel = WriteInExcel(
    classes=generate_uml.class_dict, file_name='{}.xlsx'.format(source_code))
write_in_excel.form_uml_sheet_for_classes()
'''

编辑:侵入性方法

我确实设法找到了一种使用 pyclbr 执行此操作的方法,但它涉及更改部分源代码。本质上,您将堆栈(通常是一个列表)设为自定义 class。当一个项目从堆栈中移除时(当它的范围结束时),结束行号被添加。我试着让它尽可能地没有侵入性。

首先在pyclbr模块中定义一个堆栈class:

class Stack(list):

    def __init__(self):
        import inspect
        super().__init__()

    def __delitem__(self, key):
        frames = inspect.stack()
        setattr(self[key][0], 'endline', frames[1].frame.f_locals['start'][0] - 1)
        super().__delitem__(key)

然后你在_create_tree函数中改变栈,原来在第193行:

stack = Stack()

现在您也可以使用

访问结束行
class_data.endline



我找不到使用您正在使用的库的解决方案。因此,我编写了一个简单的解析器,用于查找模块中包含的每个 class 的开始行和结束行。您必须传入文本包装器本身或由 readlines() 在您的文件中创建的列表。还值得注意的是,如果 class 一直持续到文件末尾,则结尾为 -1.

import re

def collect_classes(source):
    classes = {}
    current_class = None
    for lineno, line in enumerate(source, start=1):
        if current_class and not line.startswith(' '):
            if line != '\n':
                classes[current_class]['end'] = lineno - 1
                current_class = None

        if line.startswith('class'):
                current_class = re.search(r'class (.+?)(?:\(|:)', line).group(1)
                classes[current_class] = {'start': lineno}

    if current_class:
        classes[current_class]['end'] = -1

    return classes

import datetime
file = open(datetime.__file__, 'r') # opening the source of datetime for a test
scopes = collect_classes(file)
print(scopes)

这输出:

{'timedelta': {'start': 454, 'end': 768}, 'date': {'start': 774, 'end': 1084}, 'tzinfo': {'start': 1092, 'end': 1159}, 'time': {'start': 1162, 'end': 1502}, 'datetime': {'start': 1509, 'end': 2119}, 'timezone': {'start': 2136, 'end': 2251}}

谢谢!端线确实是那里缺少的功能。

对此的讨论导致了另一种(仍然是侵入性的)方法来实现这一点,但这几乎是一种不需要子类化 list 和覆盖 del 运算符的单行代码:

https://github.com/python/cpython/pull/16466#issuecomment-539664180

基本上,只需添加

stack[-1].endline = start - 1

pyclbr.py

中的所有 del stack[-1] 之前