合并数据框中的所有 txt 文件 pandas
Merge all txt files in dataframe pandas
我的文件夹中有一系列 txt 文件,我想将这些文件移动到数据框中
但我只能通过保存 csv 文件来做到这一点,我如何直接使用数据框而无需保存到 csv 文件?
按照我下面的代码
import os
import csv
import pandas as pd
main_folder = ('F:\PROJETOS\LOTE45\ARQUIVOS\RISK\RISK_CUSTOM_FUND_N1'
def get_filename(path):
filenames = []
files = [i.path for i in os.scandir(path) if i.is_file()]
for filename in files:
filename = os.path.basename(filename)
filenames.append(filename)
return filenames
files = get_filename(main_folder)
with open('some.csv', 'w', encoding = 'utf8', newline = '') as csv_file:
for _file in files:
file_name = _file
with open(main_folder +'\'+ _file,'r') as f:
text = f.read()
writer = csv.writer(csv_file)
writer.writerow([file_name, text])
df = pd.read_csv('some.csv')
你可以尝试运行这样的事情:
df = pd.DataFrame()
for _file in files:
df = df.append(pd.read_csv(_file), ignore_index=True, sort=False)
您可以尝试使用字典并将其转换为数据框。
# your code to obtain the files
data = {"filename":[], "text":[]}
for file in files:
with open(file, "r") as file_object:
content = file_object.read()
data["filename"].append(file)
data["text"].append(content)
dataframe = pd.DataFrame(data)
我的文件夹中有一系列 txt 文件,我想将这些文件移动到数据框中
但我只能通过保存 csv 文件来做到这一点,我如何直接使用数据框而无需保存到 csv 文件?
按照我下面的代码
import os
import csv
import pandas as pd
main_folder = ('F:\PROJETOS\LOTE45\ARQUIVOS\RISK\RISK_CUSTOM_FUND_N1'
def get_filename(path):
filenames = []
files = [i.path for i in os.scandir(path) if i.is_file()]
for filename in files:
filename = os.path.basename(filename)
filenames.append(filename)
return filenames
files = get_filename(main_folder)
with open('some.csv', 'w', encoding = 'utf8', newline = '') as csv_file:
for _file in files:
file_name = _file
with open(main_folder +'\'+ _file,'r') as f:
text = f.read()
writer = csv.writer(csv_file)
writer.writerow([file_name, text])
df = pd.read_csv('some.csv')
你可以尝试运行这样的事情:
df = pd.DataFrame()
for _file in files:
df = df.append(pd.read_csv(_file), ignore_index=True, sort=False)
您可以尝试使用字典并将其转换为数据框。
# your code to obtain the files
data = {"filename":[], "text":[]}
for file in files:
with open(file, "r") as file_object:
content = file_object.read()
data["filename"].append(file)
data["text"].append(content)
dataframe = pd.DataFrame(data)