如果文件不存在,如何更改 python 脚本继续循环
How to change python script that it continue with looping if file not exist
使用我的脚本,我遍历多个子目录并创建一个包含子目录中 3 个文件的数据框。我想将每个子目录的输出写入每个子目录,但我的代码给出了一个错误: "df1 not defined" 在
行
dfmerge1 = pd.merge(df1, df2, on=['genome', 'contig'], how='outer')
这可能是因为并非所有文件都存在于子目录中,脚本停止了。如果一个子目录不包含所有三个文件,我希望脚本继续下一个子目录。我怎样才能做到这一点?
我的密码是
import os
import pandas as pd
print('Start merging contig files')
for root, dirs, files in os.walk(os.getcwd()):
filepath = os.path.join(root, 'genes.faa.genespercontig.csv')
if os.path.isfile(filepath):
with open(filepath, 'r') as f1:
df1 = pd.read_csv(f1, header=None, delim_whitespace=True, names = ["contig", "genes"])
df1['genome'] = os.path.basename(os.path.dirname(filepath))
filepath = os.path.join(root, 'hmmer.analyze.txt.results.txt')
if os.path.isfile(filepath):
with open(filepath, 'r') as f2:
df2 = pd.read_csv(f2, header=None, delim_whitespace=True, names = ["contig", "SCM"])
df2['genome'] = os.path.basename(os.path.dirname(filepath))
filepath = os.path.join(root, 'genes.fna.output_blastplasmiddb.out.count_plasmiddbhit.out')
if os.path.isfile(filepath):
with open(filepath, 'r') as f3:
df3 = pd.read_csv(f3, header=None, delim_whitespace=True, names = ["contig", "plasmid_genes"])
df3['genome'] = os.path.basename(os.path.dirname(filepath))
#merge dataframes
dfmerge1 = pd.merge(df1, df2, on=['genome', 'contig'], how='outer')
df_end = pd.merge(dfmerge1, df3, on=['genome', 'contig'], how='outer')
#set NaN in columns to 0
nan_cols = df_end.columns[df_end.isnull().any(axis=0)]
for col in nan_cols:
df_end[col] = df_end[col].fillna(0).astype(int)
df_end.to_csv(os.path.join(root, 'outputgenesdf.csv'))
您正确地检查了 filepath
是否存在,但您没有处理文件不存在的情况。因此,如果文件不存在,那么 df1
将是前一个循环迭代的剩余值,或者如果这是第一次通过循环则未定义。
if os.path.isfile(filepath):
with open(filepath, 'r') as f1:
df1 = pd.read_csv(f1, header=None, delim_whitespace=True, names = ["contig", "genes"])
df1['genome'] = os.path.basename(os.path.dirname(filepath))
else:
continue
使用我的脚本,我遍历多个子目录并创建一个包含子目录中 3 个文件的数据框。我想将每个子目录的输出写入每个子目录,但我的代码给出了一个错误: "df1 not defined" 在
行dfmerge1 = pd.merge(df1, df2, on=['genome', 'contig'], how='outer')
这可能是因为并非所有文件都存在于子目录中,脚本停止了。如果一个子目录不包含所有三个文件,我希望脚本继续下一个子目录。我怎样才能做到这一点?
我的密码是
import os
import pandas as pd
print('Start merging contig files')
for root, dirs, files in os.walk(os.getcwd()):
filepath = os.path.join(root, 'genes.faa.genespercontig.csv')
if os.path.isfile(filepath):
with open(filepath, 'r') as f1:
df1 = pd.read_csv(f1, header=None, delim_whitespace=True, names = ["contig", "genes"])
df1['genome'] = os.path.basename(os.path.dirname(filepath))
filepath = os.path.join(root, 'hmmer.analyze.txt.results.txt')
if os.path.isfile(filepath):
with open(filepath, 'r') as f2:
df2 = pd.read_csv(f2, header=None, delim_whitespace=True, names = ["contig", "SCM"])
df2['genome'] = os.path.basename(os.path.dirname(filepath))
filepath = os.path.join(root, 'genes.fna.output_blastplasmiddb.out.count_plasmiddbhit.out')
if os.path.isfile(filepath):
with open(filepath, 'r') as f3:
df3 = pd.read_csv(f3, header=None, delim_whitespace=True, names = ["contig", "plasmid_genes"])
df3['genome'] = os.path.basename(os.path.dirname(filepath))
#merge dataframes
dfmerge1 = pd.merge(df1, df2, on=['genome', 'contig'], how='outer')
df_end = pd.merge(dfmerge1, df3, on=['genome', 'contig'], how='outer')
#set NaN in columns to 0
nan_cols = df_end.columns[df_end.isnull().any(axis=0)]
for col in nan_cols:
df_end[col] = df_end[col].fillna(0).astype(int)
df_end.to_csv(os.path.join(root, 'outputgenesdf.csv'))
您正确地检查了 filepath
是否存在,但您没有处理文件不存在的情况。因此,如果文件不存在,那么 df1
将是前一个循环迭代的剩余值,或者如果这是第一次通过循环则未定义。
if os.path.isfile(filepath):
with open(filepath, 'r') as f1:
df1 = pd.read_csv(f1, header=None, delim_whitespace=True, names = ["contig", "genes"])
df1['genome'] = os.path.basename(os.path.dirname(filepath))
else:
continue