使用 os.walk 方法获取包含 2 种类型文件的目录路径
using os.walk method to get directory paths containing 2 types of files
我想在 Python 2.7 中使用 os()
.walk 方法列出所有包含 docx
文件的文件夹。我设法用下面编写的代码做到了这一点,但我想知道是否可以限制此列表以仅显示恰好包含两种特定文件类型的文件夹(例如 "docx" 和 "pdf")?
import os
import walk
a = open("output.txt", "w")
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
for filename in files:
if filename.endswith(('.docx')):
f = os.path.join(path, filename)
a.write(str(f) + os.linesep)
跳过至少没有这两个扩展名的目录;每个目录的文件列表是有限的,因此使用 any()
来测试特定扩展名很便宜:
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
if not (any(f.endswith('.pdf') for f in files) and
any(f.endswith('.docx') for f in files)):
# no PDF or Word files here, skip
continue
# directory contains *both* PDF and Word documets
当要测试的扩展列表变长时,您可能只想创建一组所有可用的扩展:
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
extensions = {os.path.splitext(f)[-1] for f in files}
if not extensions >= {'.pdf', '.docx', '.odt', '.wpf'}:
# directory doesn't contain *all* required file types
continue
>=
测试右边的集合是否是左边的子集(所以 extensions
是 superset of the right-hand set);所以 extensions
应该 至少 包含右侧命名的所有扩展名:
>>> {'.foo', '.docx', '.pdf', '.odt'} >= {'.pdf', '.docx', '.odt', '.wpf'} # missing .wpf
False
>>> {'.foo', '.wpf', '.docx', '.pdf', '.odt'} >= {'.pdf', '.docx', '.odt', '.wpf'} # complete
True
这个?
import os
a = open("output.txt", "w")
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
docx = False
pdf = False
rest = True
for filename in files:
if filename.endswith(('.docx')):
docx = True
elif filename.endswith(('.pdf')):
pdf = True
else:
rest = False
break
if docx and pdf and rest:
f = os.path.join(path, filename)
a.write(str(f) + os.linesep)
我想在 Python 2.7 中使用 os()
.walk 方法列出所有包含 docx
文件的文件夹。我设法用下面编写的代码做到了这一点,但我想知道是否可以限制此列表以仅显示恰好包含两种特定文件类型的文件夹(例如 "docx" 和 "pdf")?
import os
import walk
a = open("output.txt", "w")
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
for filename in files:
if filename.endswith(('.docx')):
f = os.path.join(path, filename)
a.write(str(f) + os.linesep)
跳过至少没有这两个扩展名的目录;每个目录的文件列表是有限的,因此使用 any()
来测试特定扩展名很便宜:
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
if not (any(f.endswith('.pdf') for f in files) and
any(f.endswith('.docx') for f in files)):
# no PDF or Word files here, skip
continue
# directory contains *both* PDF and Word documets
当要测试的扩展列表变长时,您可能只想创建一组所有可用的扩展:
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
extensions = {os.path.splitext(f)[-1] for f in files}
if not extensions >= {'.pdf', '.docx', '.odt', '.wpf'}:
# directory doesn't contain *all* required file types
continue
>=
测试右边的集合是否是左边的子集(所以 extensions
是 superset of the right-hand set);所以 extensions
应该 至少 包含右侧命名的所有扩展名:
>>> {'.foo', '.docx', '.pdf', '.odt'} >= {'.pdf', '.docx', '.odt', '.wpf'} # missing .wpf
False
>>> {'.foo', '.wpf', '.docx', '.pdf', '.odt'} >= {'.pdf', '.docx', '.odt', '.wpf'} # complete
True
这个?
import os
a = open("output.txt", "w")
for path, subdirs, files in os.walk(r'C:\Users\Stephen\Desktop'):
docx = False
pdf = False
rest = True
for filename in files:
if filename.endswith(('.docx')):
docx = True
elif filename.endswith(('.pdf')):
pdf = True
else:
rest = False
break
if docx and pdf and rest:
f = os.path.join(path, filename)
a.write(str(f) + os.linesep)