将多个文本文件转换为 csv 以创建带标签的数据集
Converting multiple text files to a csv to create a labelled dataset
我在多个文件夹中有文本文件(文件夹名称是 categories/labels 的名称)。我想生成一个 csv 文件(数据集),其中还有一列作为该文本类别的标签(文件夹名称)。
import csv
import os
folder = os.path.dirname("/home/jaideep/Desktop/folder/ML DS/Csv/Datasets/")
folder_list = os.listdir(folder)
with open("/home/jaideep/Desktop/folder/ML DS/Csv/data.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['Label', 'Email','Message'])
for f in folder_list:
file_list = os.listdir(folder+"/"+f+"/")
print(file_list)
for file in file_list:
with open(file, "r") as infile:
contents = infile.read()
outfile.write(f+',')
outfile.write(contents)
但是我得到了
File "/home/jaideep/Desktop/folder/ML DS/Csv/Main.py", line 15, in <module>
with open(file, "r") as infile:
FileNotFoundError: [Errno 2] No such file or directory: 'file2.txt'
我知道之前有人问过类似的问题,但我无法提交问题的解决方案。如有任何帮助,我们将不胜感激。
os.listdir
只列出了目录的文件名,需要重新构建路径
您可能需要查看 glob
以了解此事。
此版本应该可以解决您的问题。
import csv
import os
folder = os.path.dirname("/home/jaideep/Desktop/folder/ML DS/Csv/Datasets/")
folder_list = os.listdir(folder)
with open("/home/jaideep/Desktop/folder/ML DS/Csv/data.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['Label', 'Email','Message'])
for f in folder_list:
file_list = os.listdir(os.path.join(folder, f))
print(file_list)
for file in file_list:
with open(os.path.join(folder, f, file), "r") as infile:
contents = infile.read()
outfile.write(f+',')
outfile.write(contents)
我在多个文件夹中有文本文件(文件夹名称是 categories/labels 的名称)。我想生成一个 csv 文件(数据集),其中还有一列作为该文本类别的标签(文件夹名称)。
import csv
import os
folder = os.path.dirname("/home/jaideep/Desktop/folder/ML DS/Csv/Datasets/")
folder_list = os.listdir(folder)
with open("/home/jaideep/Desktop/folder/ML DS/Csv/data.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['Label', 'Email','Message'])
for f in folder_list:
file_list = os.listdir(folder+"/"+f+"/")
print(file_list)
for file in file_list:
with open(file, "r") as infile:
contents = infile.read()
outfile.write(f+',')
outfile.write(contents)
但是我得到了
File "/home/jaideep/Desktop/folder/ML DS/Csv/Main.py", line 15, in <module>
with open(file, "r") as infile:
FileNotFoundError: [Errno 2] No such file or directory: 'file2.txt'
我知道之前有人问过类似的问题,但我无法提交问题的解决方案。如有任何帮助,我们将不胜感激。
os.listdir
只列出了目录的文件名,需要重新构建路径
您可能需要查看 glob
以了解此事。
此版本应该可以解决您的问题。
import csv
import os
folder = os.path.dirname("/home/jaideep/Desktop/folder/ML DS/Csv/Datasets/")
folder_list = os.listdir(folder)
with open("/home/jaideep/Desktop/folder/ML DS/Csv/data.csv", "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['Label', 'Email','Message'])
for f in folder_list:
file_list = os.listdir(os.path.join(folder, f))
print(file_list)
for file in file_list:
with open(os.path.join(folder, f, file), "r") as infile:
contents = infile.read()
outfile.write(f+',')
outfile.write(contents)