从名称字符串中删除特定数字和字符
Removing specific numbers and character from name string
我有一个 python 功能,可以从一个文件夹中提取我所有的 .md
文件并将它们全部转换为 html 文件,同时还制作一个大的 markdown 文件。
import glob
import os
import markdown
def main():
file_list_md = glob.glob(os.path.join("\\servermame\prod_help_file\input\*", "*.md"))
file_list_html = glob.glob(os.path.join("\\servername\prod_help_file\input\*", "*.html"))
config = {
'extra': {
'footnotes': {
'UNIQUE_IDS': True
}
}
}
with open('\\servername\prod_help_file\bigfile.md', 'w') as output:
for x in file_list_md:
with open(x, 'r') as body:
text = body.read()
html = markdown.markdown(text, extensions=['extra'], extension_configs=config)
output.write(html)
y = x.replace('input', 'output')
k = y.replace('.md', '.html')
with open(k, 'w') as output2:
with open(file_list_html[0], 'r') as head:
text = head.read()
output2.write(text)
output2.write(html)
with open(file_list_html[1], 'r') as foot:
text = foot.read()
output2.write(text)
if __name__ == "__main__":
main()
但我必须使用完整目录并按顺序保存它们,文件有 5 个数字和一个下划线,如下所示:
"C:\servername\prod_help_file\input809_file.md"
我希望输出文件是这样的:
"C:\servername\prod_help_file\output\file.md"
没有数字或下划线。有什么方法可以只删除 5 个数字和下划线吗?
你可以使用 re 模块
import re
re.sub(r'\d\d\d\d\d_','','19345_file.md')
# file.md
import re
txt = "The rain 85452_in Spain"
x = re.sub(r"\d{5}_", "", txt) # Subtitute 5 digits of number with underscore with empty string
如果下划线总是存在那么你可以这样做:
import os
path = '/my/path_1222/10809_file.md'
dir = os.path.dirname(path)
file = os.path.basename(path)
file = file.split('_', 1)[1]
f = os.path.join(dir, file)
我有一个 python 功能,可以从一个文件夹中提取我所有的 .md
文件并将它们全部转换为 html 文件,同时还制作一个大的 markdown 文件。
import glob
import os
import markdown
def main():
file_list_md = glob.glob(os.path.join("\\servermame\prod_help_file\input\*", "*.md"))
file_list_html = glob.glob(os.path.join("\\servername\prod_help_file\input\*", "*.html"))
config = {
'extra': {
'footnotes': {
'UNIQUE_IDS': True
}
}
}
with open('\\servername\prod_help_file\bigfile.md', 'w') as output:
for x in file_list_md:
with open(x, 'r') as body:
text = body.read()
html = markdown.markdown(text, extensions=['extra'], extension_configs=config)
output.write(html)
y = x.replace('input', 'output')
k = y.replace('.md', '.html')
with open(k, 'w') as output2:
with open(file_list_html[0], 'r') as head:
text = head.read()
output2.write(text)
output2.write(html)
with open(file_list_html[1], 'r') as foot:
text = foot.read()
output2.write(text)
if __name__ == "__main__":
main()
但我必须使用完整目录并按顺序保存它们,文件有 5 个数字和一个下划线,如下所示:
"C:\servername\prod_help_file\input809_file.md"
我希望输出文件是这样的:
"C:\servername\prod_help_file\output\file.md"
没有数字或下划线。有什么方法可以只删除 5 个数字和下划线吗?
你可以使用 re 模块
import re
re.sub(r'\d\d\d\d\d_','','19345_file.md')
# file.md
import re
txt = "The rain 85452_in Spain"
x = re.sub(r"\d{5}_", "", txt) # Subtitute 5 digits of number with underscore with empty string
如果下划线总是存在那么你可以这样做:
import os
path = '/my/path_1222/10809_file.md'
dir = os.path.dirname(path)
file = os.path.basename(path)
file = file.split('_', 1)[1]
f = os.path.join(dir, file)