Python: 使用循环符号将文本文件分隔为多个文件
Python: Separating txt file to multiple files using a reoccuring symbol
我有一个 .txt 氨基酸文件,由“>node”分隔,如下所示:
Filename.txt :
>NODE_1
MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI
KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY
GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
>NODE_2
MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD
MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV
PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
我想将这个文件分成两个(或尽可能多的节点)文件;
Filename1.txt :
>NODE
MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI
KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY
GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
Filename2.txt :
>NODE
MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD
MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV
PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
文件名后有数字
这段代码有效,但是它删除了“>NODE”行并且没有为最后一个节点(后面没有“>”的节点)创建文件。
with open('FilePathway') as fo:
op = ''
start = 0
cntr = 1
for x in fo.read().split("\n"):
if x.startswith('>'):
if start == 1:
with open (str(cntr) + '.fasta','w') as opf:
opf.write(op)
opf.close()
op = ''
cntr += 1
else:
start = 1
else:
if op == '':
op = x
else:
op = op + '\n' + x
fo.close()
我似乎找不到错误。如果您能指出给我,将不胜感激。
感谢您的帮助!
大家好!感谢您的所有评论。在您的帮助下,我设法让它完美地工作。对于任何有类似问题的人,这是我的最终代码:
import os
import glob
folder_path = 'FilePathway'
for filename in glob.glob(os.path.join(folder_path, '*.fasta')):
with open(filename) as fo:
for line in fo.readlines():
if line.startswith('>'):
original = line
content = [original]
fileno = 1
filename = filename
y = filename.replace(".fasta","_")
def writefasta():
global content, fileno
if len(content) > 1:
with open(f'{y}{fileno}.fasta', 'w') as fout:
fout.write(''.join(content))
content = [line]
fileno += 1
with open('FilePathway') as fin:
for line in fin:
if line.startswith('>NODE'):
writefasta()
else:
content.append(line)
writefasta()
with open('FileName') as fo:
cntr = 1
for line in fo.readlines():
with open (f'{str(cntr)}.fasta','w') as opf:
opf.write(line)
opf.close()
op = ''
cntr += 1
fo.close()
这样会更好。它只会在奇数次迭代中写入。这样,“>NODE”将被跳过,文件将只为真正的内容创建。
with open('filename.txt') as fo:
cntr=1
for i,content in enumerate(fo.read().split("\n")):
if i%2 == 1:
with open (str(cntr) + '.txt','w') as opf:
opf.write(content)
cntr += 1
顺便说一句,因为您正在使用 上下文管理器,所以您不需要关闭文件。
Context managers allow you to allocate and release resources precisely
when you want to. It opens the file, writes some data to it and then
closes it.
请检查:https://book.pythontips.com/en/latest/context_managers.html
你可以这样做:
def writefasta(d):
if len(d['content']) > 1:
with open(f'Filename{d["fileno"]}.fasta', 'w') as fout:
fout.write(''.join(d['content']))
d['content'] = ['>NODE\n']
d['fileno'] += 1
with open('test.fasta') as fin:
D = {'content': ['>NODE\n'], 'fileno': 1}
for line in fin:
if line.startswith('>NODE'):
writefasta(D)
else:
D['content'].append(line)
writefasta(D)
我有一个 .txt 氨基酸文件,由“>node”分隔,如下所示:
Filename.txt :
>NODE_1 MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
>NODE_2 MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
我想将这个文件分成两个(或尽可能多的节点)文件;
Filename1.txt :
>NODE MSETLVLTRPDDWHVHLRDGAALQSVVPYTARQFARAIAMPNLKPPITTAEQAQAYRERI KFFLGTDSAPHASVMKENSVCGAGCFTALSALELYAEAFEAAGALDKLEAFASFHGADFY GLPRNTTQVTLRKTEWTLPESVPFGEAAQLKPLRGGEALRWKLD*
Filename2.txt :
>NODE MSTWHKVQGRPKAQARRPGRKSKDDFVTRVEHDAKNDALLQLVRAEWAMLRSDIATFRGD MVERFGKVEGEITGIKGQIDGLKGEMQGVKGEVEGLRGSLTTTQWVVGTAMALLAVVTQV PSIISAYRFPPAGSSAFPAPGSLPTVPGSPASAASAP*
文件名后有数字
这段代码有效,但是它删除了“>NODE”行并且没有为最后一个节点(后面没有“>”的节点)创建文件。
with open('FilePathway') as fo:
op = ''
start = 0
cntr = 1
for x in fo.read().split("\n"):
if x.startswith('>'):
if start == 1:
with open (str(cntr) + '.fasta','w') as opf:
opf.write(op)
opf.close()
op = ''
cntr += 1
else:
start = 1
else:
if op == '':
op = x
else:
op = op + '\n' + x
fo.close()
我似乎找不到错误。如果您能指出给我,将不胜感激。
感谢您的帮助!
大家好!感谢您的所有评论。在您的帮助下,我设法让它完美地工作。对于任何有类似问题的人,这是我的最终代码:
import os
import glob
folder_path = 'FilePathway'
for filename in glob.glob(os.path.join(folder_path, '*.fasta')):
with open(filename) as fo:
for line in fo.readlines():
if line.startswith('>'):
original = line
content = [original]
fileno = 1
filename = filename
y = filename.replace(".fasta","_")
def writefasta():
global content, fileno
if len(content) > 1:
with open(f'{y}{fileno}.fasta', 'w') as fout:
fout.write(''.join(content))
content = [line]
fileno += 1
with open('FilePathway') as fin:
for line in fin:
if line.startswith('>NODE'):
writefasta()
else:
content.append(line)
writefasta()
with open('FileName') as fo:
cntr = 1
for line in fo.readlines():
with open (f'{str(cntr)}.fasta','w') as opf:
opf.write(line)
opf.close()
op = ''
cntr += 1
fo.close()
这样会更好。它只会在奇数次迭代中写入。这样,“>NODE”将被跳过,文件将只为真正的内容创建。
with open('filename.txt') as fo:
cntr=1
for i,content in enumerate(fo.read().split("\n")):
if i%2 == 1:
with open (str(cntr) + '.txt','w') as opf:
opf.write(content)
cntr += 1
顺便说一句,因为您正在使用 上下文管理器,所以您不需要关闭文件。
Context managers allow you to allocate and release resources precisely when you want to. It opens the file, writes some data to it and then closes it.
请检查:https://book.pythontips.com/en/latest/context_managers.html
你可以这样做:
def writefasta(d):
if len(d['content']) > 1:
with open(f'Filename{d["fileno"]}.fasta', 'w') as fout:
fout.write(''.join(d['content']))
d['content'] = ['>NODE\n']
d['fileno'] += 1
with open('test.fasta') as fin:
D = {'content': ['>NODE\n'], 'fileno': 1}
for line in fin:
if line.startswith('>NODE'):
writefasta(D)
else:
D['content'].append(line)
writefasta(D)