使用 itertools 或 zip 解析 Python 中的当前值和下一个值
Using itertools or zip to parse current value and next value in Python
考虑文件 eclip_bam_paths.txt
:
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample3Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample4Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample5Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample6Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample7Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample8Aligned.sortedByCoord.out.bam
和文件 eclip_bais_paths.txt
:
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample3Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample4Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample5Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample6Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample7Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample8Aligned.sortedByCoord.out.bai
此代码循环遍历两个文件,但不检索循环中的下一个值:
keys = ['bam_rep_1','bai_rep_1','bam_rep_2','bai_rep_2']
l = []
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
for bamline,bailine in zip(bams,bais):
d = { keys[0]: bamline.strip(), keys[1]: bailine.strip() }
l.append(d)
import json
final_code = json.dumps(l)
with open('./output/eclip_array.json','w') as out:
out.write(final_code)
我想要一个迭代来获取每个文件中的下一个值,例如第一次迭代应该如下所示:
keys = ['bam_rep_1','bai_rep_1','bam_rep_2','bai_rep_2']
l = []
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
for bamline,bailine in zip(bams,bais):
d = { keys[0]: bamline.strip(), keys[1]: bailine.strip() , keys[2]: bamline.secondvalueinthefile , keys[3]: bailine.secondvalueinthefile }
l.append(d)
import json
final_code = json.dumps(l)
是否可以使用 itertools
或内置 python 函数来实现?
第一次迭代的预期输出:
[{"bam_rep_1": "/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bam", "bai_rep_1": "/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bai","bam_rep_2":"/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bam","bai_rep_2":"/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bai"},....]
P.S:我想知道如何在我的案例中实现这个 itertools
函数:
import itertools
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
您可以使用 readlines()
获取文件中的所有行,并使用具有 2 个间隔的索引循环结果
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
all_lines = list(zip(bams.readlines(), bais.readlines()))
for i in range(0, len(all_lines) - 1, 2):
d = {keys[0]: all_lines[i][0].strip(), keys[1]: all_lines[i][1].strip(),
keys[2]: all_lines[i + 1][0].strip(), keys[3]: all_lines[i + 1][1].strip()}
l.append(d)
itertools.islice
的另一种方法:
def chunks(size, lists):
while True:
val = list(itertools.islice(lists, size))
if not val:
break
yield val
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
for group in chunks(2, zip(bams.readlines(), bais.readlines())):
d = {keys[0]: group[0][0].strip(), keys[1]: group[0][1].strip(),
keys[2]: group[1][0].strip(), keys[3]: group[1][1].strip()}
l.append(d)
考虑文件 eclip_bam_paths.txt
:
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample3Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample4Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample5Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample6Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample7Aligned.sortedByCoord.out.bam
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample8Aligned.sortedByCoord.out.bam
和文件 eclip_bais_paths.txt
:
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample3Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample4Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample5Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample6Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample7Aligned.sortedByCoord.out.bai
/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample8Aligned.sortedByCoord.out.bai
此代码循环遍历两个文件,但不检索循环中的下一个值:
keys = ['bam_rep_1','bai_rep_1','bam_rep_2','bai_rep_2']
l = []
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
for bamline,bailine in zip(bams,bais):
d = { keys[0]: bamline.strip(), keys[1]: bailine.strip() }
l.append(d)
import json
final_code = json.dumps(l)
with open('./output/eclip_array.json','w') as out:
out.write(final_code)
我想要一个迭代来获取每个文件中的下一个值,例如第一次迭代应该如下所示:
keys = ['bam_rep_1','bai_rep_1','bam_rep_2','bai_rep_2']
l = []
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
for bamline,bailine in zip(bams,bais):
d = { keys[0]: bamline.strip(), keys[1]: bailine.strip() , keys[2]: bamline.secondvalueinthefile , keys[3]: bailine.secondvalueinthefile }
l.append(d)
import json
final_code = json.dumps(l)
是否可以使用 itertools
或内置 python 函数来实现?
第一次迭代的预期输出:
[{"bam_rep_1": "/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bam", "bai_rep_1": "/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample1Aligned.sortedByCoord.out.bai","bam_rep_2":"/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bam","bai_rep_2":"/groups/cgsd/alexandre/eclip/bam_inputs_akshay/10249_sample2Aligned.sortedByCoord.out.bai"},....]
P.S:我想知道如何在我的案例中实现这个 itertools
函数:
import itertools
def pairwise(iterable):
"s -> (s0,s1), (s1,s2), (s2, s3), ..."
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
您可以使用 readlines()
获取文件中的所有行,并使用具有 2 个间隔的索引循环结果
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
all_lines = list(zip(bams.readlines(), bais.readlines()))
for i in range(0, len(all_lines) - 1, 2):
d = {keys[0]: all_lines[i][0].strip(), keys[1]: all_lines[i][1].strip(),
keys[2]: all_lines[i + 1][0].strip(), keys[3]: all_lines[i + 1][1].strip()}
l.append(d)
itertools.islice
的另一种方法:
def chunks(size, lists):
while True:
val = list(itertools.islice(lists, size))
if not val:
break
yield val
with open('src/eclip_bam_paths.txt') as bams, open('src/eclip_bais_paths.txt') as bais:
for group in chunks(2, zip(bams.readlines(), bais.readlines())):
d = {keys[0]: group[0][0].strip(), keys[1]: group[0][1].strip(),
keys[2]: group[1][0].strip(), keys[3]: group[1][1].strip()}
l.append(d)