Awk/sed 将文件从驼峰式转换为下划线的脚本
Awk/sed script to convert a file from camelCase to underscores
我想将项目中的多个文件从 camelCase
转换为 underscore_case
。
我想要一个只需要文件名就可以工作的在线工具。
经过几次不成功的尝试,我得到了这个(为了便于阅读,我把它写在几行上,但我们可以删除换行符以获得在线):
awk -i inplace '{
while ( match([=10=], /(.*)([a-z0-9])([A-Z])(.*)/, cap))
[=10=] = cap[1] cap[2] "_" tolower(cap[3]) cap[4];
print
}' FILE
为了完整起见,我们可以修改它来做相反的事情(下划线为 CamelCase):
awk -i inplace '{
while ( match([=11=], /(.*)([a-z0-9])_([a-z])(.*)/, cap))
[=11=] = cap[1] cap[2] toupper(cap[3]) cap[4];
print
}' FILE
如果您想知道,-i inplace
是一个仅在 awk >=4.1.0 中可用的标志,它会就地修改文件(与 sed -i
一样)。如果您的 awk 版本较旧,则必须执行以下操作:
awk '{...}' FILE > FILE.tmp && mv FILE.tmp FILE
希望对大家有所帮助!
你也可以使用 sed。
$ echo 'fooBar' | sed -r 's/([a-z0-9])([A-Z])/_\L/g'
foo_bar
$ echo 'fooBar' | sed 's/\([a-z0-9]\)\([A-Z]\)/_\L/g'
foo_bar
这可能是您想要的:
$ cat tst.awk
{
head = ""
tail = [=10=]
while ( match(tail,/[[:upper:]]/) ) {
tgt = substr(tail,RSTART,1)
if ( substr(tail,RSTART-1,1) ~ /[[:lower:]]/ ) {
tgt = "_" tolower(tgt)
}
head = head substr(tail,1,RSTART-1) tgt
tail = substr(tail,RSTART+1)
}
print head tail
}
$ cat file
nowIs theWinterOfOur disContent
From ThePlay About RichardIII
$ awk -f tst.awk file
now_is the_winter_of_our dis_content
From The_play About Richard_iII
但如果没有您的示例输入和预期输出,这只是一个猜测。
建议的 sed
答案有一些问题:
$ echo 'FooBarFB' | sed -r 's/([a-z0-9])([A-Z])/_\L/g'
Foo_bar_fB
我建议如下
$ echo 'FooBarFB' | sed -r 's/([A-Z])/_\L/g' | sed 's/^_//'
foo_bar_f_b
这是一个 Python 脚本,可以将带有 CamelCase 函数的文件转换为 snake_case,然后也修复调用者。可选地,它创建一个包含更改的提交。
用法:
style.py -s -c tools/patman/terminal.py
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0+
#
# Copyright 2021 Google LLC
# Written by Simon Glass <sjg@chromium.org>
#
"""Changes the functions and class methods in a file to use snake case, updating
other tools which use them"""
from argparse import ArgumentParser
import glob
import os
import re
import subprocess
import camel_case
# Exclude functions with these names
EXCLUDE_NAMES = set(['setUp', 'tearDown', 'setUpClass', 'tearDownClass'])
# Find function definitions in a file
RE_FUNC = re.compile(r' *def (\w+)\(')
# Where to find files that might call the file being converted
FILES_GLOB = 'tools/**/*.py'
def collect_funcs(fname):
"""Collect a list of functions in a file
Args:
fname (str): Filename to read
Returns:
tuple:
str: contents of file
list of str: List of function names
"""
with open(fname, encoding='utf-8') as inf:
data = inf.read()
funcs = RE_FUNC.findall(data)
return data, funcs
def get_module_name(fname):
"""Convert a filename to a module name
Args:
fname (str): Filename to convert, e.g. 'tools/patman/command.py'
Returns:
tuple:
str: Full module name, e.g. 'patman.command'
str: Leaf module name, e.g. 'command'
str: Program name, e.g. 'patman'
"""
parts = os.path.splitext(fname)[0].split('/')[1:]
module_name = '.'.join(parts)
return module_name, parts[-1], parts[0]
def process_caller(data, conv, module_name, leaf):
"""Process a file that might call another module
This converts all the camel-case references in the provided file contents
with the corresponding snake-case references.
Args:
data (str): Contents of file to convert
conv (dict): Identifies to convert
key: Current name in camel case, e.g. 'DoIt'
value: New name in snake case, e.g. 'do_it'
module_name: Name of module as referenced by the file, e.g.
'patman.command'
leaf: Leaf module name, e.g. 'command'
Returns:
str: New file contents, or None if it was not modified
"""
total = 0
# Update any simple functions calls into the module
for name, new_name in conv.items():
newdata, count = re.subn(fr'{leaf}.{name}\(',
f'{leaf}.{new_name}(', data)
total += count
data = newdata
# Deal with files that import symbols individually
imports = re.findall(fr'from {module_name} import (.*)\n', data)
for item in imports:
#print('item', item)
names = [n.strip() for n in item.split(',')]
new_names = [conv.get(n) or n for n in names]
new_line = f"from {module_name} import {', '.join(new_names)}\n"
data = re.sub(fr'from {module_name} import (.*)\n', new_line, data)
for name in names:
new_name = conv.get(name)
if new_name:
newdata = re.sub(fr'\b{name}\(', f'{new_name}(', data)
data = newdata
# Deal with mocks like:
# unittest.mock.patch.object(module, 'Function', ...
for name, new_name in conv.items():
newdata, count = re.subn(fr"{leaf}, '{name}'",
f"{leaf}, '{new_name}'", data)
total += count
data = newdata
if total or imports:
return data
return None
def process_file(srcfile, do_write, commit):
"""Process a file to rename its camel-case functions
This renames the class methods and functions in a file so that they use
snake case. Then it updates other modules that call those functions.
Args:
srcfile (str): Filename to process
do_write (bool): True to write back to files, False to do a dry run
commit (bool): True to create a commit with the changes
"""
data, funcs = collect_funcs(srcfile)
module_name, leaf, prog = get_module_name(srcfile)
#print('module_name', module_name)
#print(len(funcs))
#print(funcs[0])
conv = {}
for name in funcs:
if name not in EXCLUDE_NAMES:
conv[name] = camel_case.to_snake(name)
# Convert name to new_name in the file
for name, new_name in conv.items():
#print(name, new_name)
# Don't match if it is preceeded by a '.', since that indicates that
# it is calling this same function name but in a different module
newdata = re.sub(fr'(?<!\.){name}\(', f'{new_name}(', data)
data = newdata
# But do allow self.xxx
newdata = re.sub(fr'self.{name}\(', f'self.{new_name}(', data)
data = newdata
if do_write:
with open(srcfile, 'w', encoding='utf-8') as out:
out.write(data)
# Now find all files which use these functions and update them
for fname in glob.glob(FILES_GLOB, recursive=True):
with open(fname, encoding='utf-8') as inf:
data = inf.read()
newdata = process_caller(fname, conv, module_name, leaf)
if do_write and newdata:
with open(fname, 'w', encoding='utf-8') as out:
out.write(newdata)
if commit:
subprocess.call(['git', 'add', '-u'])
subprocess.call([
'git', 'commit', '-s', '-m',
f'''{prog}: Convert camel case in {os.path.basename(srcfile)}
Convert this file to snake case and update all files which use it.
'''])
def main():
"""Main program"""
epilog = 'Convert camel case function names to snake in a file and callers'
parser = ArgumentParser(epilog=epilog)
parser.add_argument('-c', '--commit', action='store_true',
help='Add a commit with the changes')
parser.add_argument('-n', '--dry_run', action='store_true',
help='Dry run, do not write back to files')
parser.add_argument('-s', '--srcfile', type=str, required=True, help='Filename to convert')
args = parser.parse_args()
process_file(args.srcfile, not args.dry_run, args.commit)
if __name__ == '__main__':
main()
我想将项目中的多个文件从 camelCase
转换为 underscore_case
。
我想要一个只需要文件名就可以工作的在线工具。
经过几次不成功的尝试,我得到了这个(为了便于阅读,我把它写在几行上,但我们可以删除换行符以获得在线):
awk -i inplace '{
while ( match([=10=], /(.*)([a-z0-9])([A-Z])(.*)/, cap))
[=10=] = cap[1] cap[2] "_" tolower(cap[3]) cap[4];
print
}' FILE
为了完整起见,我们可以修改它来做相反的事情(下划线为 CamelCase):
awk -i inplace '{
while ( match([=11=], /(.*)([a-z0-9])_([a-z])(.*)/, cap))
[=11=] = cap[1] cap[2] toupper(cap[3]) cap[4];
print
}' FILE
如果您想知道,-i inplace
是一个仅在 awk >=4.1.0 中可用的标志,它会就地修改文件(与 sed -i
一样)。如果您的 awk 版本较旧,则必须执行以下操作:
awk '{...}' FILE > FILE.tmp && mv FILE.tmp FILE
希望对大家有所帮助!
你也可以使用 sed。
$ echo 'fooBar' | sed -r 's/([a-z0-9])([A-Z])/_\L/g'
foo_bar
$ echo 'fooBar' | sed 's/\([a-z0-9]\)\([A-Z]\)/_\L/g'
foo_bar
这可能是您想要的:
$ cat tst.awk
{
head = ""
tail = [=10=]
while ( match(tail,/[[:upper:]]/) ) {
tgt = substr(tail,RSTART,1)
if ( substr(tail,RSTART-1,1) ~ /[[:lower:]]/ ) {
tgt = "_" tolower(tgt)
}
head = head substr(tail,1,RSTART-1) tgt
tail = substr(tail,RSTART+1)
}
print head tail
}
$ cat file
nowIs theWinterOfOur disContent
From ThePlay About RichardIII
$ awk -f tst.awk file
now_is the_winter_of_our dis_content
From The_play About Richard_iII
但如果没有您的示例输入和预期输出,这只是一个猜测。
建议的 sed
答案有一些问题:
$ echo 'FooBarFB' | sed -r 's/([a-z0-9])([A-Z])/_\L/g'
Foo_bar_fB
我建议如下
$ echo 'FooBarFB' | sed -r 's/([A-Z])/_\L/g' | sed 's/^_//'
foo_bar_f_b
这是一个 Python 脚本,可以将带有 CamelCase 函数的文件转换为 snake_case,然后也修复调用者。可选地,它创建一个包含更改的提交。
用法: style.py -s -c tools/patman/terminal.py
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0+
#
# Copyright 2021 Google LLC
# Written by Simon Glass <sjg@chromium.org>
#
"""Changes the functions and class methods in a file to use snake case, updating
other tools which use them"""
from argparse import ArgumentParser
import glob
import os
import re
import subprocess
import camel_case
# Exclude functions with these names
EXCLUDE_NAMES = set(['setUp', 'tearDown', 'setUpClass', 'tearDownClass'])
# Find function definitions in a file
RE_FUNC = re.compile(r' *def (\w+)\(')
# Where to find files that might call the file being converted
FILES_GLOB = 'tools/**/*.py'
def collect_funcs(fname):
"""Collect a list of functions in a file
Args:
fname (str): Filename to read
Returns:
tuple:
str: contents of file
list of str: List of function names
"""
with open(fname, encoding='utf-8') as inf:
data = inf.read()
funcs = RE_FUNC.findall(data)
return data, funcs
def get_module_name(fname):
"""Convert a filename to a module name
Args:
fname (str): Filename to convert, e.g. 'tools/patman/command.py'
Returns:
tuple:
str: Full module name, e.g. 'patman.command'
str: Leaf module name, e.g. 'command'
str: Program name, e.g. 'patman'
"""
parts = os.path.splitext(fname)[0].split('/')[1:]
module_name = '.'.join(parts)
return module_name, parts[-1], parts[0]
def process_caller(data, conv, module_name, leaf):
"""Process a file that might call another module
This converts all the camel-case references in the provided file contents
with the corresponding snake-case references.
Args:
data (str): Contents of file to convert
conv (dict): Identifies to convert
key: Current name in camel case, e.g. 'DoIt'
value: New name in snake case, e.g. 'do_it'
module_name: Name of module as referenced by the file, e.g.
'patman.command'
leaf: Leaf module name, e.g. 'command'
Returns:
str: New file contents, or None if it was not modified
"""
total = 0
# Update any simple functions calls into the module
for name, new_name in conv.items():
newdata, count = re.subn(fr'{leaf}.{name}\(',
f'{leaf}.{new_name}(', data)
total += count
data = newdata
# Deal with files that import symbols individually
imports = re.findall(fr'from {module_name} import (.*)\n', data)
for item in imports:
#print('item', item)
names = [n.strip() for n in item.split(',')]
new_names = [conv.get(n) or n for n in names]
new_line = f"from {module_name} import {', '.join(new_names)}\n"
data = re.sub(fr'from {module_name} import (.*)\n', new_line, data)
for name in names:
new_name = conv.get(name)
if new_name:
newdata = re.sub(fr'\b{name}\(', f'{new_name}(', data)
data = newdata
# Deal with mocks like:
# unittest.mock.patch.object(module, 'Function', ...
for name, new_name in conv.items():
newdata, count = re.subn(fr"{leaf}, '{name}'",
f"{leaf}, '{new_name}'", data)
total += count
data = newdata
if total or imports:
return data
return None
def process_file(srcfile, do_write, commit):
"""Process a file to rename its camel-case functions
This renames the class methods and functions in a file so that they use
snake case. Then it updates other modules that call those functions.
Args:
srcfile (str): Filename to process
do_write (bool): True to write back to files, False to do a dry run
commit (bool): True to create a commit with the changes
"""
data, funcs = collect_funcs(srcfile)
module_name, leaf, prog = get_module_name(srcfile)
#print('module_name', module_name)
#print(len(funcs))
#print(funcs[0])
conv = {}
for name in funcs:
if name not in EXCLUDE_NAMES:
conv[name] = camel_case.to_snake(name)
# Convert name to new_name in the file
for name, new_name in conv.items():
#print(name, new_name)
# Don't match if it is preceeded by a '.', since that indicates that
# it is calling this same function name but in a different module
newdata = re.sub(fr'(?<!\.){name}\(', f'{new_name}(', data)
data = newdata
# But do allow self.xxx
newdata = re.sub(fr'self.{name}\(', f'self.{new_name}(', data)
data = newdata
if do_write:
with open(srcfile, 'w', encoding='utf-8') as out:
out.write(data)
# Now find all files which use these functions and update them
for fname in glob.glob(FILES_GLOB, recursive=True):
with open(fname, encoding='utf-8') as inf:
data = inf.read()
newdata = process_caller(fname, conv, module_name, leaf)
if do_write and newdata:
with open(fname, 'w', encoding='utf-8') as out:
out.write(newdata)
if commit:
subprocess.call(['git', 'add', '-u'])
subprocess.call([
'git', 'commit', '-s', '-m',
f'''{prog}: Convert camel case in {os.path.basename(srcfile)}
Convert this file to snake case and update all files which use it.
'''])
def main():
"""Main program"""
epilog = 'Convert camel case function names to snake in a file and callers'
parser = ArgumentParser(epilog=epilog)
parser.add_argument('-c', '--commit', action='store_true',
help='Add a commit with the changes')
parser.add_argument('-n', '--dry_run', action='store_true',
help='Dry run, do not write back to files')
parser.add_argument('-s', '--srcfile', type=str, required=True, help='Filename to convert')
args = parser.parse_args()
process_file(args.srcfile, not args.dry_run, args.commit)
if __name__ == '__main__':
main()