如何 运行 多个 fastq 文件的代码?
How to run a code for multiple fastq files?
对于一个文件夹中的多个 fastq 文件,我会 运行 以下代码。在一个文件夹中,我有不同的 fastq 文件;首先,我必须读取一个文件并执行所需的操作,然后将结果存储在一个单独的文件中。 fastq 然后读取第二个文件,执行相同的操作并将结果保存在新的 2nd file.fastq 中。对文件夹中的所有文件重复相同的步骤。
我该怎么办?有人可以建议我解决这个问题的方法吗?
from Bio.SeqIO.QualityIO import FastqGeneralIterator
fout=open("prova_FiltraN_CE_filt.fastq","w")
fin=open("prova_FiltraN_CE.fastq","rU")
maxN=0
countall=0
countincl=0
with open("prova_FiltraN_CE.fastq", "rU") as handle:
for (title, sequence, quality) in FastqGeneralIterator(handle):
countN = sequence.count("N", 0, len(sequence))
countall+=1
if countN==maxN:
fout.write("@%s\n%s\n+\n%s\n" % (title, sequence, quality))
countincl+=1
fin.close
fout.close
print countall, countincl
我认为以下内容可以满足您的要求。我所做的是将您的代码变成一个函数(并将其修改为我认为更正确的形式),然后为指定文件夹中找到的每个 .fastq
文件调用该函数。输出文件名是根据找到的输入文件生成的。
from Bio.SeqIO.QualityIO import FastqGeneralIterator
import glob
import os
def process(in_filepath, out_filepath):
maxN = 0
countall = 0
countincl = 0
with open(in_filepath, "rU") as fin:
with open(out_filepath, "w") as fout:
for (title, sequence, quality) in FastqGeneralIterator(fin):
countN = sequence.count("N", 0, len(sequence))
countall += 1
if countN == maxN:
fout.write("@%s\n%s\n+\n%s\n" % (title, sequence, quality))
countincl += 1
print os.path.split(in_filepath)[1], countall, countincl
folder = "/path/to/folder" # folder to process
for in_filepath in glob.glob(os.path.join(folder, "*.fastq")):
root, ext = os.path.splitext(in_filepath)
if not root.endswith("_filt"): # avoid processing existing output files
out_filepath = root + "_filt" + ext
process(in_filepath, out_filepath)
对于一个文件夹中的多个 fastq 文件,我会 运行 以下代码。在一个文件夹中,我有不同的 fastq 文件;首先,我必须读取一个文件并执行所需的操作,然后将结果存储在一个单独的文件中。 fastq 然后读取第二个文件,执行相同的操作并将结果保存在新的 2nd file.fastq 中。对文件夹中的所有文件重复相同的步骤。
我该怎么办?有人可以建议我解决这个问题的方法吗?
from Bio.SeqIO.QualityIO import FastqGeneralIterator
fout=open("prova_FiltraN_CE_filt.fastq","w")
fin=open("prova_FiltraN_CE.fastq","rU")
maxN=0
countall=0
countincl=0
with open("prova_FiltraN_CE.fastq", "rU") as handle:
for (title, sequence, quality) in FastqGeneralIterator(handle):
countN = sequence.count("N", 0, len(sequence))
countall+=1
if countN==maxN:
fout.write("@%s\n%s\n+\n%s\n" % (title, sequence, quality))
countincl+=1
fin.close
fout.close
print countall, countincl
我认为以下内容可以满足您的要求。我所做的是将您的代码变成一个函数(并将其修改为我认为更正确的形式),然后为指定文件夹中找到的每个 .fastq
文件调用该函数。输出文件名是根据找到的输入文件生成的。
from Bio.SeqIO.QualityIO import FastqGeneralIterator
import glob
import os
def process(in_filepath, out_filepath):
maxN = 0
countall = 0
countincl = 0
with open(in_filepath, "rU") as fin:
with open(out_filepath, "w") as fout:
for (title, sequence, quality) in FastqGeneralIterator(fin):
countN = sequence.count("N", 0, len(sequence))
countall += 1
if countN == maxN:
fout.write("@%s\n%s\n+\n%s\n" % (title, sequence, quality))
countincl += 1
print os.path.split(in_filepath)[1], countall, countincl
folder = "/path/to/folder" # folder to process
for in_filepath in glob.glob(os.path.join(folder, "*.fastq")):
root, ext = os.path.splitext(in_filepath)
if not root.endswith("_filt"): # avoid processing existing output files
out_filepath = root + "_filt" + ext
process(in_filepath, out_filepath)