从多个文件夹读取多个txt文件
Reading multiple txt files from multiple folders
我有20个文件夹,每个文件夹包含50个txt文件,我需要全部阅读以比较每个文件夹的字数。我知道如何读取一个文件夹中的多个文件,但是很慢,有没有更有效的方法而不是像下面这样一个一个地读取文件夹?
import re
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import os
import glob
1. folder1
folder_path = '/home/runner/Final-Project/folder1'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
2. folder2
folder_path = '/home/runner/Final-Project/folder2'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
您可以像以前一样使用 glob
来做类似的事情,但要使用目录名称。
folder_path = '/home/runner/Final-Project'
for filename in glob.glob(os.path.join(folder_path,'*','*.txt')):
# process your files
os.path.join()
中的第一个'*'
表示任意名称的目录。所以像这样调用 glob.glob()
将会遍历并在 folder_path
中的任何目录中找到任何文本文件
下面的函数将 return 所有目录和子目录中的文件列表,而不使用 glob。从文件列表中读取并打开以读取。
def list_of_files(dirName):
files_list = os.listdir(dirName)
all_files = list()
for entry in files_list:
# Create full path
full_path = os.path.join(dirName, entry)
if os.path.isdir(full_path):
all_files = all_files + list_of_files(full_path)
else:
all_files.append(full_path)
return all_files
print(list_of_files(<Dir Path>)) # <Dir Path> ==> your directory path
我有20个文件夹,每个文件夹包含50个txt文件,我需要全部阅读以比较每个文件夹的字数。我知道如何读取一个文件夹中的多个文件,但是很慢,有没有更有效的方法而不是像下面这样一个一个地读取文件夹?
import re
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import os
import glob
1. folder1
folder_path = '/home/runner/Final-Project/folder1'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
2. folder2
folder_path = '/home/runner/Final-Project/folder2'
for filename in glob.glob(os.path.join(folder_path, '*.txt')):
with open(filename, 'r') as f:
text = f.read()
print (filename)
print (len(text))
您可以像以前一样使用 glob
来做类似的事情,但要使用目录名称。
folder_path = '/home/runner/Final-Project'
for filename in glob.glob(os.path.join(folder_path,'*','*.txt')):
# process your files
os.path.join()
中的第一个'*'
表示任意名称的目录。所以像这样调用 glob.glob()
将会遍历并在 folder_path
下面的函数将 return 所有目录和子目录中的文件列表,而不使用 glob。从文件列表中读取并打开以读取。
def list_of_files(dirName):
files_list = os.listdir(dirName)
all_files = list()
for entry in files_list:
# Create full path
full_path = os.path.join(dirName, entry)
if os.path.isdir(full_path):
all_files = all_files + list_of_files(full_path)
else:
all_files.append(full_path)
return all_files
print(list_of_files(<Dir Path>)) # <Dir Path> ==> your directory path