Pandas 将文件名存储在列表中并将所有文件读入数据帧
Pandas store filenames in list and read all the files into a dataframe
您好,我正在尝试使用 glob 和 re 搜索指定的文件名。找到匹配项后,我将匹配文件列表存储到文本文件中。我需要的是我想将所有这些匹配的文件名提取到一个列表中,以便我可以将它们存储到一个数据框中。
import re
import sys
import os
import glob
import pandas as pd
import logging
with open('files_matched.txt', 'w') as f_matched, open('files_not_matched.txt','w') as f_notmatched:
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
filename=[file] # unable to store list of files
match=f'File matched:{file}'
f_matched.write(match+'\n')
else:
not_match=f'File not matched:{file}'
f_notmatched.write(not_match + '\n')
except Exception as e:
pass
df=[pd.read_csv(f,encoding='ISO-8859-1',error_bad_lines=False,engine='python') for f in filename]
for df_new, f in zip(df,filename):
df_new['f'] = f
combined_df = pd.concat(df, ignore_index=False)
combined_df.head()
通过 append
创建新的文件列表,然后通过 DataFrame.assign
创建新的列:
filenames = []
with open('files_matched.txt', 'w') as f_matched, open('files_not_matched.txt','w') as f_notmatched:
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
filenames.append(file) # add filename to list
match=f'File matched:{file}'
f_matched.write(match+'\n')
else:
not_match=f'File not matched:{file}'
f_notmatched.write(not_match + '\n')
except Exception as e:
pass
#add new column by assign
df=[pd.read_csv(f,
encoding='ISO-8859-1',
error_bad_lines=False,
engine='python').assign(f = f)
for f in filenames]
combined_df = pd.concat(df, ignore_index=False)
您好,我正在尝试使用 glob 和 re 搜索指定的文件名。找到匹配项后,我将匹配文件列表存储到文本文件中。我需要的是我想将所有这些匹配的文件名提取到一个列表中,以便我可以将它们存储到一个数据框中。
import re
import sys
import os
import glob
import pandas as pd
import logging
with open('files_matched.txt', 'w') as f_matched, open('files_not_matched.txt','w') as f_notmatched:
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
filename=[file] # unable to store list of files
match=f'File matched:{file}'
f_matched.write(match+'\n')
else:
not_match=f'File not matched:{file}'
f_notmatched.write(not_match + '\n')
except Exception as e:
pass
df=[pd.read_csv(f,encoding='ISO-8859-1',error_bad_lines=False,engine='python') for f in filename]
for df_new, f in zip(df,filename):
df_new['f'] = f
combined_df = pd.concat(df, ignore_index=False)
combined_df.head()
通过 append
创建新的文件列表,然后通过 DataFrame.assign
创建新的列:
filenames = []
with open('files_matched.txt', 'w') as f_matched, open('files_not_matched.txt','w') as f_notmatched:
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
filenames.append(file) # add filename to list
match=f'File matched:{file}'
f_matched.write(match+'\n')
else:
not_match=f'File not matched:{file}'
f_notmatched.write(not_match + '\n')
except Exception as e:
pass
#add new column by assign
df=[pd.read_csv(f,
encoding='ISO-8859-1',
error_bad_lines=False,
engine='python').assign(f = f)
for f in filenames]
combined_df = pd.concat(df, ignore_index=False)