比较多个列表,输出是每对列表之间每个不匹配的数量和标识
Compare multiple lists with the output being the number and identity of each missmatch between every pair of lists
我正在尝试比较多个列表:
list1 = ['A', 'B', 'C', ...]
list2 = ['A', 'B', 'X', ...]
list3 = ['A', 'X', 'C', ...]
输出应该为每对列表提供每个不匹配的身份和性质。例如:
Comp_list1_list2 = C,X and pos3
Comp_list1_list3 = B,X and pos2
Comp_list2_list3 = B,X; X,C and pos2; pos3
我意识到这可能是一个难题,必须将其分解成更小的部分。非常感谢任何让我走上正轨的提示!
我自己解决了这个问题 - 不是最优雅的解决方案,但无论如何它都有效...
from xlrd import *
def main():
'''This Program reads input (A:clone name, B:sequence, C:elisa) from an
Excel file and makes a cross comparison of each sequence pair'''
book = open_workbook("mtask.xlsx")
Input = book.sheet_by_index(0)
# naming of input data
a = (Input.col_values(0,0))
b = (Input.col_values(1,0))
c = (Input.col_values(2,0))
# make dictionary: keys are seq numbers; values are residues
y = {}
for i in range(Input.nrows):
x = []
for j in b[i]:
x.append(j)
y[a[i]] = x
# comparison of sequences and extraction of mutations for each sequence pair
List = []
for shit in range(Input.nrows):
for seq in range(Input.nrows):
seq12 = []
z = 0
for i in y[a[seq]]:
try:
for j in y[a[shit]][z]:
if i == j:
seq12.append(i.lower()+j.lower())
else:
seq12.append(i+j)
z = z+1
except IndexError:
print("oops")
lib = [a[seq],a[shit],c[seq],c[shit]]
for position, item in enumerate(seq12):
if item.isupper():
x = (str(item[0])+str(position+1)+str(item[1]))
lib.append(x)
List.append(lib)
# comparison of sequences and extraction of mutations for each sequence pair
dic = {}
for i in range(Input.nrows*Input.nrows):
x = []
for j in List[i]:
x.append(j)
dic[i] = x
# sort
a = []
for i in dic.values():
a.append(i)
# collect number of mutations in data files
import csv
null = []
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []
nine = []
ten = []
for i in range(Input.nrows*Input.nrows):
if len(a[i]) <= 4:
null.append(a[i])
with open("no_mut.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(null)
elif len(a[i]) == 5:
one.append(a[i])
with open("one.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(one)
elif len(a[i]) == 6:
two.append(a[i])
with open("two.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(two)
elif len(a[i]) == 7:
three.append(a[i])
with open("three.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(three)
elif len(a[i]) == 8:
four.append(a[i])
with open("four.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(four)
elif len(a[i]) == 9:
five.append(a[i])
with open("five.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(five)
elif len(a[i]) == 10:
six.append(a[i])
with open("six.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(six)
elif len(a[i]) == 11:
seven.append(a[i])
with open("seven.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(seven)
elif len(a[i]) == 12:
eight.append(a[i])
with open("eight.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(eight)
elif len(a[i]) == 13:
nine.append(a[i])
with open("nine.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(nine)
elif len(a[i]) == 14:
ten.append(a[i])
with open("ten.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(ten)
main()
我正在尝试比较多个列表:
list1 = ['A', 'B', 'C', ...]
list2 = ['A', 'B', 'X', ...]
list3 = ['A', 'X', 'C', ...]
输出应该为每对列表提供每个不匹配的身份和性质。例如:
Comp_list1_list2 = C,X and pos3
Comp_list1_list3 = B,X and pos2
Comp_list2_list3 = B,X; X,C and pos2; pos3
我意识到这可能是一个难题,必须将其分解成更小的部分。非常感谢任何让我走上正轨的提示!
我自己解决了这个问题 - 不是最优雅的解决方案,但无论如何它都有效...
from xlrd import *
def main():
'''This Program reads input (A:clone name, B:sequence, C:elisa) from an
Excel file and makes a cross comparison of each sequence pair'''
book = open_workbook("mtask.xlsx")
Input = book.sheet_by_index(0)
# naming of input data
a = (Input.col_values(0,0))
b = (Input.col_values(1,0))
c = (Input.col_values(2,0))
# make dictionary: keys are seq numbers; values are residues
y = {}
for i in range(Input.nrows):
x = []
for j in b[i]:
x.append(j)
y[a[i]] = x
# comparison of sequences and extraction of mutations for each sequence pair
List = []
for shit in range(Input.nrows):
for seq in range(Input.nrows):
seq12 = []
z = 0
for i in y[a[seq]]:
try:
for j in y[a[shit]][z]:
if i == j:
seq12.append(i.lower()+j.lower())
else:
seq12.append(i+j)
z = z+1
except IndexError:
print("oops")
lib = [a[seq],a[shit],c[seq],c[shit]]
for position, item in enumerate(seq12):
if item.isupper():
x = (str(item[0])+str(position+1)+str(item[1]))
lib.append(x)
List.append(lib)
# comparison of sequences and extraction of mutations for each sequence pair
dic = {}
for i in range(Input.nrows*Input.nrows):
x = []
for j in List[i]:
x.append(j)
dic[i] = x
# sort
a = []
for i in dic.values():
a.append(i)
# collect number of mutations in data files
import csv
null = []
one = []
two = []
three = []
four = []
five = []
six = []
seven = []
eight = []
nine = []
ten = []
for i in range(Input.nrows*Input.nrows):
if len(a[i]) <= 4:
null.append(a[i])
with open("no_mut.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(null)
elif len(a[i]) == 5:
one.append(a[i])
with open("one.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(one)
elif len(a[i]) == 6:
two.append(a[i])
with open("two.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(two)
elif len(a[i]) == 7:
three.append(a[i])
with open("three.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(three)
elif len(a[i]) == 8:
four.append(a[i])
with open("four.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(four)
elif len(a[i]) == 9:
five.append(a[i])
with open("five.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(five)
elif len(a[i]) == 10:
six.append(a[i])
with open("six.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(six)
elif len(a[i]) == 11:
seven.append(a[i])
with open("seven.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(seven)
elif len(a[i]) == 12:
eight.append(a[i])
with open("eight.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(eight)
elif len(a[i]) == 13:
nine.append(a[i])
with open("nine.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(nine)
elif len(a[i]) == 14:
ten.append(a[i])
with open("ten.csv", "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(ten)
main()