根据两个文件中的匹配部分将数据从一个文件加载到另一个文件
Load data from one file to the other based on the matching part in two files
输入1
>Aa,Ab,Ac,ET0001
>Ba,Bb,Bc,ET0002
>Ca,Cb,Cc,ET0003
输入2
>Infor_a, aa:dfkvl, bb:csd, cc:ET0001
sldcksdlksjelkjfslkdjflskdjflskd
sldkcmowdimnwo
>Infor_b, aa:coeq, bb:ock, cc:ET0056
lskdjocisnmodk
>Infor_c, aa:vwjm, bb:cxj, cc:ET0751
spodcisdokfmnwoke
woeinmfwoeinflsdkvm
sldknmflwkenmlwk
>Infor_d, aa:wokx, bb:rkx, cc:ET0003
sodicjsodijsoi
预期输出
>Aa,Ab,Ac,ET0001
sldcksdlksjelkjfslkdjflskdjflskd
sldkcmowdimnwo
>Ba,Bb,Bc,ET0002
NaN
>Ca,Cb,Cc,ET0003
sodicjsodijsoi
代码
with open(input1, 'r') as fr1, open(input2, 'r') as fr2, open(output, 'w') as fw:
temp = []
while (fr2):
line2 = fr2.readline()
if line2.startswith('>'):
templist = line2.strip().split()
for element in templist:
if element.startswith('cc:ET'):
replaced_element = element.replace('cc:','')
temp.append(replaced_element)
if not line2:
break
我在模板中添加数据(从 'input2' 中的 'ET' 开始)。
- 我想获取“>”下与'input2'中的'ETnumber'匹配的信息。
2.If 'ETnumber' 和 'input1' 匹配,我想在 'input1'.
的每一行下面写下 '>' 下的信息
你对我的代码(使用 os.listdir)的下一步有什么建议吗?
你可以用一个字典来保存f2中的匹配项,然后一个简单的循环:
import re
with open(input1, 'r') as fr1, open(input2, 'r') as fr2, open(output, 'w') as fw:
values = dict(re.findall('(ET\d+)\n([^>]+)', f2.read()))
for line in f1:
fw.write(line)
fw.write(values.get(line.rsplit(',',1)[-1],'NaN\n'))
输出:
>Aa,Ab,Ac,ET0001
sldcksdlksjelkjfslkdjflskdjflskd
sldkcmowdimnwo
>Ba,Bb,Bc,ET0002
NaN
>Ca,Cb,Cc,ET0003
sodicjsodijsoi
注意。就像你之前的问题一样,不要做你奇怪的事情:
while (f):
line = f.readline()
...
if not line:
break
就这样:
for line in f:
...
这不是对@mozway 提供的优秀产品的功能改进,而是采取了一种循序渐进的方法,对于新手来说可能更容易理解:
import os
from collections import defaultdict
TILDE = '~'
DIR = 'logan'
HOME = os.path.expanduser(TILDE)
NAN = 'NaN\n'
CC = 'cc:'
I_1 = dict()
with open(os.path.join(HOME, DIR, 'input1.txt')) as infile:
for line in infile:
t = line.strip().split(',')
I_1[t[-1]] = t
I_2 = defaultdict(list)
k = TILDE
with open(os.path.join(HOME, DIR, 'input2.txt')) as infile:
for line in infile:
if line.startswith('>'):
i = line.find(CC)
if i >= 0:
k = line[i+len(CC):].strip().split(',')[0]
else:
I_2[k].append(line)
with open(os.path.join(HOME, DIR, 'output1.txt'), 'w') as outfile:
for k, v in I_1.items():
print(f'{",".join(v)}', file=outfile)
print(f'{"".join(I_2.get(k, NAN))}', file=outfile, end='')
输入1
>Aa,Ab,Ac,ET0001
>Ba,Bb,Bc,ET0002
>Ca,Cb,Cc,ET0003
输入2
>Infor_a, aa:dfkvl, bb:csd, cc:ET0001
sldcksdlksjelkjfslkdjflskdjflskd
sldkcmowdimnwo
>Infor_b, aa:coeq, bb:ock, cc:ET0056
lskdjocisnmodk
>Infor_c, aa:vwjm, bb:cxj, cc:ET0751
spodcisdokfmnwoke
woeinmfwoeinflsdkvm
sldknmflwkenmlwk
>Infor_d, aa:wokx, bb:rkx, cc:ET0003
sodicjsodijsoi
预期输出
>Aa,Ab,Ac,ET0001
sldcksdlksjelkjfslkdjflskdjflskd
sldkcmowdimnwo
>Ba,Bb,Bc,ET0002
NaN
>Ca,Cb,Cc,ET0003
sodicjsodijsoi
代码
with open(input1, 'r') as fr1, open(input2, 'r') as fr2, open(output, 'w') as fw:
temp = []
while (fr2):
line2 = fr2.readline()
if line2.startswith('>'):
templist = line2.strip().split()
for element in templist:
if element.startswith('cc:ET'):
replaced_element = element.replace('cc:','')
temp.append(replaced_element)
if not line2:
break
我在模板中添加数据(从 'input2' 中的 'ET' 开始)。
- 我想获取“>”下与'input2'中的'ETnumber'匹配的信息。 2.If 'ETnumber' 和 'input1' 匹配,我想在 'input1'. 的每一行下面写下 '>' 下的信息
你对我的代码(使用 os.listdir)的下一步有什么建议吗?
你可以用一个字典来保存f2中的匹配项,然后一个简单的循环:
import re
with open(input1, 'r') as fr1, open(input2, 'r') as fr2, open(output, 'w') as fw:
values = dict(re.findall('(ET\d+)\n([^>]+)', f2.read()))
for line in f1:
fw.write(line)
fw.write(values.get(line.rsplit(',',1)[-1],'NaN\n'))
输出:
>Aa,Ab,Ac,ET0001
sldcksdlksjelkjfslkdjflskdjflskd
sldkcmowdimnwo
>Ba,Bb,Bc,ET0002
NaN
>Ca,Cb,Cc,ET0003
sodicjsodijsoi
注意。就像你之前的问题一样,不要做你奇怪的事情:
while (f):
line = f.readline()
...
if not line:
break
就这样:
for line in f:
...
这不是对@mozway 提供的优秀产品的功能改进,而是采取了一种循序渐进的方法,对于新手来说可能更容易理解:
import os
from collections import defaultdict
TILDE = '~'
DIR = 'logan'
HOME = os.path.expanduser(TILDE)
NAN = 'NaN\n'
CC = 'cc:'
I_1 = dict()
with open(os.path.join(HOME, DIR, 'input1.txt')) as infile:
for line in infile:
t = line.strip().split(',')
I_1[t[-1]] = t
I_2 = defaultdict(list)
k = TILDE
with open(os.path.join(HOME, DIR, 'input2.txt')) as infile:
for line in infile:
if line.startswith('>'):
i = line.find(CC)
if i >= 0:
k = line[i+len(CC):].strip().split(',')[0]
else:
I_2[k].append(line)
with open(os.path.join(HOME, DIR, 'output1.txt'), 'w') as outfile:
for k, v in I_1.items():
print(f'{",".join(v)}', file=outfile)
print(f'{"".join(I_2.get(k, NAN))}', file=outfile, end='')