使用 Python 计算目录中的代码行数
Count lines of code in directory using Python
我有一个项目,我想计算它的代码行数。是否可以通过Python?
统计项目所在文件目录下的所有代码行数
from os import listdir
from os.path import isfile, join
def countLinesInPath(path,directory):
count=0
for line in open(join(directory,path), encoding="utf8"):
count+=1
return count
def countLines(paths,directory):
count=0
for path in paths:
count=count+countLinesInPath(path,directory)
return count
def getPaths(directory):
return [f for f in listdir(directory) if isfile(join(directory, f))]
def countIn(directory):
return countLines(getPaths(directory),directory)
要计算目录中文件的所有代码行数,调用“countIn”函数,将目录作为参数传递。
这是我编写的一个函数,用于计算 python 包中的所有代码行并打印信息输出。它将计算所有 .py
中的所有行
import os
def countlines(start, lines=0, header=True, begin_start=None):
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isfile(thing):
if thing.endswith('.py'):
with open(thing, 'r') as f:
newlines = f.readlines()
newlines = len(newlines)
lines += newlines
if begin_start is not None:
reldir_of_thing = '.' + thing.replace(begin_start, '')
else:
reldir_of_thing = '.' + thing.replace(start, '')
print('{:>10} |{:>10} | {:<20}'.format(
newlines, lines, reldir_of_thing))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isdir(thing):
lines = countlines(thing, lines, header=False, begin_start=start)
return lines
要使用它,只需传递您想要开始的目录。例如,计算某个包中的代码行数 foo
:
countlines(r'...\foo')
这会输出如下内容:
ADDED | TOTAL | FILE
-----------|-----------|--------------------
5 | 5 | .\__init__.py
539 | 578 | .\bar.py
558 | 1136 | .\baz\qux.py
这是从 Daniel 的回答中得出的(尽管重构得足够多以至于这不会很明显)。那个不通过子目录递归,这是我想要的行为。
from os import listdir
from os.path import isfile, isdir, join
def item_line_count(path):
if isdir(path):
return dir_line_count(path)
elif isfile(path):
return len(open(path, 'rb').readlines())
else:
return 0
def dir_line_count(dir):
return sum(map(lambda item: item_line_count(join(dir, item)), listdir(dir)))
pygount
将显示文件夹中的所有文件,每个文件都有代码行数(不包括文档)
https://pypi.org/project/pygount/
pip install pygount
列出当前目录的结果运行:
pygount ~/path_to_directory
这有点家庭作业的味道:-) -- none不过,这是一个值得的练习,而且 Bryce93 的格式很好。我认为很多人不太可能为此使用 Python,因为它可以通过几个 shell 命令快速完成,例如:
cat $(find . -name "*.py") | grep -E -v '^\s*$|^\s*#' | wc -l
请注意,这些解决方案中有 none 个涉及多行 ('''
) 注释。
作为 pygount
答案的补充,他们刚刚添加了选项 --format=summary
以获取目录中不同文件类型的总行数。
pygount --format=summary ./your-directory
可以输出类似
的东西
Language Code % Comment %
------------- ---- ------ ------- ------
XML 1668 48.56 10 0.99
Python 746 21.72 150 14.90
TeX 725 21.11 57 5.66
HTML 191 5.56 0 0.00
markdown 58 1.69 0 0.00
JSON 37 1.08 0 0.00
INI 10 0.29 0 0.00
Text 0 0.00 790 78.45
__duplicate__ 0 0.00 0 0.00
------------- ---- ------ ------- ------
Sum total 3435 1007
基于 Bryce93 的回答,使用 code_only
选项从行数中排除注释、文档字符串和空行:
import os
def countlines(rootdir, total_lines=0, header=True, begin_start=None,
code_only=True):
def _get_new_lines(source):
total = len(source)
i = 0
while i < len(source):
line = source[i]
trimline = line.lstrip(" ")
if trimline.startswith('#') or trimline == '':
total -= 1
elif '"""' in trimline: # docstring begin
if trimline.count('"""') == 2: # docstring end on same line
total -= 1
i += 1
continue
doc_start = i
i += 1
while '"""' not in source[i]: # docstring end
i += 1
doc_end = i
total -= (doc_end - doc_start + 1)
i += 1
return total
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for name in os.listdir(rootdir):
file = os.path.join(rootdir, name)
if os.path.isfile(file) and file.endswith('.py'):
with open(file, 'r') as f:
source = f.readlines()
if code_only:
new_lines = _get_new_lines(source)
else:
new_lines = len(source)
total_lines += new_lines
if begin_start is not None:
reldir_of_file = '.' + file.replace(begin_start, '')
else:
reldir_of_file = '.' + file.replace(rootdir, '')
print('{:>10} |{:>10} | {:<20}'.format(
new_lines, total_lines, reldir_of_file))
for file in os.listdir(rootdir):
file = os.path.join(rootdir, file)
if os.path.isdir(file):
total_lines = countlines(file, total_lines, header=False,
begin_start=rootdir, code_only=code_only)
return total_lines
如果您想计算项目中有多少行,请在项目文件夹中创建一个脚本并将以下内容粘贴到其中:
import os
directory = "[project_directory]"
directory_depth = 100 # How deep you would like to go
extensions_to_consider = [".py", ".css"] # Change to ["all"] to include all extensions
exclude_filenames = ["venv", ".idea", "__pycache__", "cache"]
skip_file_error_list = True
this_file_dir = os.path.realpath(__file__)
print("Path to ignore:", this_file_dir)
print("=====================================")
def _walk(path, depth):
"""Recursively list files and directories up to a certain depth"""
depth -= 1
with os.scandir(path) as p:
for entry in p:
skip_entry = False
for fName in exclude_filenames:
if entry.path.endswith(fName):
skip_entry = True
break
if skip_entry:
print("Skipping entry", entry.path)
continue
yield entry.path
if entry.is_dir() and depth > 0:
yield from _walk(entry.path, depth)
print("Caching entries")
files = list(_walk(directory, directory_depth))
print("=====================================")
print("Counting Lines")
file_err_list = []
line_count = 0
len_files = len(files)
for i, file_dir in enumerate(files):
if file_dir == this_file_dir:
print("=[Rejected file directory", file_dir, "]=")
continue
if not os.path.isfile(file_dir):
continue
skip_File = True
for ending in extensions_to_consider:
if file_dir.endswith(ending) or ending == "all":
skip_File = False
if not skip_File:
try:
file = open(file_dir, "r")
local_count = 0
for line in file:
if line != "\n":
local_count += 1
print("({:.1f}%)".format(100*i/len_files), file_dir, "|", local_count)
line_count += local_count
file.close()
except:
file_err_list.append(file_dir)
continue
print("=====================================")
print("File Count Errors:", len(file_err_list))
if not skip_file_error_list:
for file in file_err_list:
print(file_err_list)
print("=====================================")
print("Total lines |", line_count)
可能有更快、更有效的方法来做到这一点,但这是一个不错的开始。
可变信息
directory
是你要统计的项目目录
directory_depth
是项目基础结构中的深度
即深度为 3 意味着它只会扫描以下深度:
- project_dir
- sub_dir
- sub2_dir
extensions_to_consider
是文件扩展名计数代码。如果你只想计算.py文件,你设置extensions_to_consider = [".py"]
exclude_filenames
是一个文件名(和目录)数组,您不希望脚本为其计算代码。
skip_file_error_list
是一个布尔变量。如果您希望在计数时看到所有错误的打印输出,请设置为 True。否则设置为 False。
如何运行
运行 脚本使用 Python 编译器。
在终端运行
python path_to_file.py
或
python3 path_to_file.py
这是另一个,使用 pathlib
。列出单个(相对)文件路径以及行数、文件总数和总行数。
import pathlib
class LoC(object):
suffixes = ['.py']
def count(self, path: pathlib.Path, init=True):
if init:
self.root = path
self.files = 0
self.lines = 0
if path.is_dir():
# recursive case
for item in path.iterdir():
self.count(path=item, init=False)
elif path.is_file() and path.suffix in self.suffixes:
# base case
with path.open(mode='r') as f:
line_count = len(f.readlines())
print(f'{path.relative_to(self.root)}: {line_count}')
self.files += 1
self.lines += line_count
if init:
print(f'\n{self.lines} lines in {self.files} files')
请注意,为了清楚起见,我跳过了 __init__
方法。
用法示例:
loc = LoC()
loc.count(path=pathlib.Path('/path/to/your/project/directory'))
我有一个项目,我想计算它的代码行数。是否可以通过Python?
统计项目所在文件目录下的所有代码行数from os import listdir
from os.path import isfile, join
def countLinesInPath(path,directory):
count=0
for line in open(join(directory,path), encoding="utf8"):
count+=1
return count
def countLines(paths,directory):
count=0
for path in paths:
count=count+countLinesInPath(path,directory)
return count
def getPaths(directory):
return [f for f in listdir(directory) if isfile(join(directory, f))]
def countIn(directory):
return countLines(getPaths(directory),directory)
要计算目录中文件的所有代码行数,调用“countIn”函数,将目录作为参数传递。
这是我编写的一个函数,用于计算 python 包中的所有代码行并打印信息输出。它将计算所有 .py
中的所有行import os
def countlines(start, lines=0, header=True, begin_start=None):
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isfile(thing):
if thing.endswith('.py'):
with open(thing, 'r') as f:
newlines = f.readlines()
newlines = len(newlines)
lines += newlines
if begin_start is not None:
reldir_of_thing = '.' + thing.replace(begin_start, '')
else:
reldir_of_thing = '.' + thing.replace(start, '')
print('{:>10} |{:>10} | {:<20}'.format(
newlines, lines, reldir_of_thing))
for thing in os.listdir(start):
thing = os.path.join(start, thing)
if os.path.isdir(thing):
lines = countlines(thing, lines, header=False, begin_start=start)
return lines
要使用它,只需传递您想要开始的目录。例如,计算某个包中的代码行数 foo
:
countlines(r'...\foo')
这会输出如下内容:
ADDED | TOTAL | FILE
-----------|-----------|--------------------
5 | 5 | .\__init__.py
539 | 578 | .\bar.py
558 | 1136 | .\baz\qux.py
这是从 Daniel 的回答中得出的(尽管重构得足够多以至于这不会很明显)。那个不通过子目录递归,这是我想要的行为。
from os import listdir
from os.path import isfile, isdir, join
def item_line_count(path):
if isdir(path):
return dir_line_count(path)
elif isfile(path):
return len(open(path, 'rb').readlines())
else:
return 0
def dir_line_count(dir):
return sum(map(lambda item: item_line_count(join(dir, item)), listdir(dir)))
pygount
将显示文件夹中的所有文件,每个文件都有代码行数(不包括文档)
https://pypi.org/project/pygount/
pip install pygount
列出当前目录的结果运行:
pygount ~/path_to_directory
这有点家庭作业的味道:-) -- none不过,这是一个值得的练习,而且 Bryce93 的格式很好。我认为很多人不太可能为此使用 Python,因为它可以通过几个 shell 命令快速完成,例如:
cat $(find . -name "*.py") | grep -E -v '^\s*$|^\s*#' | wc -l
请注意,这些解决方案中有 none 个涉及多行 ('''
) 注释。
作为 pygount
答案的补充,他们刚刚添加了选项 --format=summary
以获取目录中不同文件类型的总行数。
pygount --format=summary ./your-directory
可以输出类似
的东西 Language Code % Comment %
------------- ---- ------ ------- ------
XML 1668 48.56 10 0.99
Python 746 21.72 150 14.90
TeX 725 21.11 57 5.66
HTML 191 5.56 0 0.00
markdown 58 1.69 0 0.00
JSON 37 1.08 0 0.00
INI 10 0.29 0 0.00
Text 0 0.00 790 78.45
__duplicate__ 0 0.00 0 0.00
------------- ---- ------ ------- ------
Sum total 3435 1007
基于 Bryce93 的回答,使用 code_only
选项从行数中排除注释、文档字符串和空行:
import os
def countlines(rootdir, total_lines=0, header=True, begin_start=None,
code_only=True):
def _get_new_lines(source):
total = len(source)
i = 0
while i < len(source):
line = source[i]
trimline = line.lstrip(" ")
if trimline.startswith('#') or trimline == '':
total -= 1
elif '"""' in trimline: # docstring begin
if trimline.count('"""') == 2: # docstring end on same line
total -= 1
i += 1
continue
doc_start = i
i += 1
while '"""' not in source[i]: # docstring end
i += 1
doc_end = i
total -= (doc_end - doc_start + 1)
i += 1
return total
if header:
print('{:>10} |{:>10} | {:<20}'.format('ADDED', 'TOTAL', 'FILE'))
print('{:->11}|{:->11}|{:->20}'.format('', '', ''))
for name in os.listdir(rootdir):
file = os.path.join(rootdir, name)
if os.path.isfile(file) and file.endswith('.py'):
with open(file, 'r') as f:
source = f.readlines()
if code_only:
new_lines = _get_new_lines(source)
else:
new_lines = len(source)
total_lines += new_lines
if begin_start is not None:
reldir_of_file = '.' + file.replace(begin_start, '')
else:
reldir_of_file = '.' + file.replace(rootdir, '')
print('{:>10} |{:>10} | {:<20}'.format(
new_lines, total_lines, reldir_of_file))
for file in os.listdir(rootdir):
file = os.path.join(rootdir, file)
if os.path.isdir(file):
total_lines = countlines(file, total_lines, header=False,
begin_start=rootdir, code_only=code_only)
return total_lines
如果您想计算项目中有多少行,请在项目文件夹中创建一个脚本并将以下内容粘贴到其中:
import os
directory = "[project_directory]"
directory_depth = 100 # How deep you would like to go
extensions_to_consider = [".py", ".css"] # Change to ["all"] to include all extensions
exclude_filenames = ["venv", ".idea", "__pycache__", "cache"]
skip_file_error_list = True
this_file_dir = os.path.realpath(__file__)
print("Path to ignore:", this_file_dir)
print("=====================================")
def _walk(path, depth):
"""Recursively list files and directories up to a certain depth"""
depth -= 1
with os.scandir(path) as p:
for entry in p:
skip_entry = False
for fName in exclude_filenames:
if entry.path.endswith(fName):
skip_entry = True
break
if skip_entry:
print("Skipping entry", entry.path)
continue
yield entry.path
if entry.is_dir() and depth > 0:
yield from _walk(entry.path, depth)
print("Caching entries")
files = list(_walk(directory, directory_depth))
print("=====================================")
print("Counting Lines")
file_err_list = []
line_count = 0
len_files = len(files)
for i, file_dir in enumerate(files):
if file_dir == this_file_dir:
print("=[Rejected file directory", file_dir, "]=")
continue
if not os.path.isfile(file_dir):
continue
skip_File = True
for ending in extensions_to_consider:
if file_dir.endswith(ending) or ending == "all":
skip_File = False
if not skip_File:
try:
file = open(file_dir, "r")
local_count = 0
for line in file:
if line != "\n":
local_count += 1
print("({:.1f}%)".format(100*i/len_files), file_dir, "|", local_count)
line_count += local_count
file.close()
except:
file_err_list.append(file_dir)
continue
print("=====================================")
print("File Count Errors:", len(file_err_list))
if not skip_file_error_list:
for file in file_err_list:
print(file_err_list)
print("=====================================")
print("Total lines |", line_count)
可能有更快、更有效的方法来做到这一点,但这是一个不错的开始。
可变信息
directory
是你要统计的项目目录
directory_depth
是项目基础结构中的深度
即深度为 3 意味着它只会扫描以下深度:
- project_dir
- sub_dir
- sub2_dir
- sub_dir
extensions_to_consider
是文件扩展名计数代码。如果你只想计算.py文件,你设置extensions_to_consider = [".py"]
exclude_filenames
是一个文件名(和目录)数组,您不希望脚本为其计算代码。
skip_file_error_list
是一个布尔变量。如果您希望在计数时看到所有错误的打印输出,请设置为 True。否则设置为 False。
如何运行
运行 脚本使用 Python 编译器。 在终端运行
python path_to_file.py
或
python3 path_to_file.py
这是另一个,使用 pathlib
。列出单个(相对)文件路径以及行数、文件总数和总行数。
import pathlib
class LoC(object):
suffixes = ['.py']
def count(self, path: pathlib.Path, init=True):
if init:
self.root = path
self.files = 0
self.lines = 0
if path.is_dir():
# recursive case
for item in path.iterdir():
self.count(path=item, init=False)
elif path.is_file() and path.suffix in self.suffixes:
# base case
with path.open(mode='r') as f:
line_count = len(f.readlines())
print(f'{path.relative_to(self.root)}: {line_count}')
self.files += 1
self.lines += line_count
if init:
print(f'\n{self.lines} lines in {self.files} files')
请注意,为了清楚起见,我跳过了 __init__
方法。
用法示例:
loc = LoC()
loc.count(path=pathlib.Path('/path/to/your/project/directory'))