CS50 pset 6 DNA 适用于 small.csv 但不适用于 large.csv
CS50 pset 6 DNA works with small.csv but not large.csv
这是我第 6 周 DNA 问题集的代码。当我使用 small.csv 进行测试时,它工作正常,但是当使用 large.csv 进行测试时,它似乎错误地计算了重复序列。谁能帮我找出代码中的错误?我对此很陌生。
import csv
import sys
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py STRcounts DNASequence")
check = True
STRlist = []
Humanlist = []
# copy person list
with open(sys.argv[1],"r") as STR:
readSTR = csv.reader(STR)
for row in readSTR:
if check:
STRlist.append(row)
check = False
else:
Humanlist.append(row)
Slist = STRlist[0]
Slist.remove("name")
# print(Humanlist)
# print(Slist)
seq=[]
# copy sequence
with open(sys.argv[2],"r") as text:
readtext = csv.reader(text)
for i in readtext:
seq = i
text = seq[0]
# print(text)
# create dictionary for STR
STRdict = {}
for STR in Slist:
STRdict[STR] = 0
for STR in Slist:
for letter in range(len(text)):
if STR == text[letter:letter+len(STR)]:
STRdict[STR] += 1
check = False
for human in range(len(Humanlist)):
for STR in range(len(Slist)):
if str(STRdict[Slist[STR]]) == str(Humanlist[human][STR+1]):
check = True
else:
check = False
break
if check:
print(Humanlist[human][0])
break
if not check:
print("no match")
我注释掉了不必要的部分并添加了代码以获得 max
长度的 STR 重复。您的其余代码未更改,我得到了预期的结果。
我没有检查所有可能改进的代码,但它确实得到了正确的结果。
您的代码不正确的原因是它计算了字符串中所有出现的 STR,而不是计算连续重复的次数(然后找到最大重复次数)。
import csv
import sys
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py STRcounts DNASequence")
check = True
STRlist = []
Humanlist = []
# copy person list
with open(sys.argv[1],"r") as STR:
readSTR = csv.reader(STR)
for row in readSTR:
if check:
STRlist.append(row)
check = False
else:
Humanlist.append(row)
Slist = STRlist[0]
Slist.remove("name")
# print(Humanlist)
# print(Slist)
seq=[]
# copy sequence
with open(sys.argv[2],"r") as text:
readtext = csv.reader(text)
for i in readtext:
seq = i
text = seq[0]
# print(text)
# create dictionary for STR
STRdict = {}
"""
for STR in Slist:
STRdict[STR] = 0"""
for STR in Slist:
idx = 0
max_= 0
while idx < len(text):
num_repeats = 0
while STR == text[idx:idx+len(STR)]:
num_repeats += 1
idx += len(STR)
if num_repeats > max_:
max_ = num_repeats
idx += 1
STRdict[STR] = max_
#print(STR, max_)
"""for letter in range(len(text)):
if STR == text[letter:letter+len(STR)]:
STRdict[STR] += 1"""
check = False
for human in range(len(Humanlist)):
for STR in range(len(Slist)):
if str(STRdict[Slist[STR]]) == str(Humanlist[human][STR+1]):
check = True
else:
check = False
break
if check:
print(Humanlist[human][0])
break
if not check:
print("no match")
这个问题来自Harvard problem
这是我第 6 周 DNA 问题集的代码。当我使用 small.csv 进行测试时,它工作正常,但是当使用 large.csv 进行测试时,它似乎错误地计算了重复序列。谁能帮我找出代码中的错误?我对此很陌生。
import csv
import sys
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py STRcounts DNASequence")
check = True
STRlist = []
Humanlist = []
# copy person list
with open(sys.argv[1],"r") as STR:
readSTR = csv.reader(STR)
for row in readSTR:
if check:
STRlist.append(row)
check = False
else:
Humanlist.append(row)
Slist = STRlist[0]
Slist.remove("name")
# print(Humanlist)
# print(Slist)
seq=[]
# copy sequence
with open(sys.argv[2],"r") as text:
readtext = csv.reader(text)
for i in readtext:
seq = i
text = seq[0]
# print(text)
# create dictionary for STR
STRdict = {}
for STR in Slist:
STRdict[STR] = 0
for STR in Slist:
for letter in range(len(text)):
if STR == text[letter:letter+len(STR)]:
STRdict[STR] += 1
check = False
for human in range(len(Humanlist)):
for STR in range(len(Slist)):
if str(STRdict[Slist[STR]]) == str(Humanlist[human][STR+1]):
check = True
else:
check = False
break
if check:
print(Humanlist[human][0])
break
if not check:
print("no match")
我注释掉了不必要的部分并添加了代码以获得 max
长度的 STR 重复。您的其余代码未更改,我得到了预期的结果。
我没有检查所有可能改进的代码,但它确实得到了正确的结果。
您的代码不正确的原因是它计算了字符串中所有出现的 STR,而不是计算连续重复的次数(然后找到最大重复次数)。
import csv
import sys
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py STRcounts DNASequence")
check = True
STRlist = []
Humanlist = []
# copy person list
with open(sys.argv[1],"r") as STR:
readSTR = csv.reader(STR)
for row in readSTR:
if check:
STRlist.append(row)
check = False
else:
Humanlist.append(row)
Slist = STRlist[0]
Slist.remove("name")
# print(Humanlist)
# print(Slist)
seq=[]
# copy sequence
with open(sys.argv[2],"r") as text:
readtext = csv.reader(text)
for i in readtext:
seq = i
text = seq[0]
# print(text)
# create dictionary for STR
STRdict = {}
"""
for STR in Slist:
STRdict[STR] = 0"""
for STR in Slist:
idx = 0
max_= 0
while idx < len(text):
num_repeats = 0
while STR == text[idx:idx+len(STR)]:
num_repeats += 1
idx += len(STR)
if num_repeats > max_:
max_ = num_repeats
idx += 1
STRdict[STR] = max_
#print(STR, max_)
"""for letter in range(len(text)):
if STR == text[letter:letter+len(STR)]:
STRdict[STR] += 1"""
check = False
for human in range(len(Humanlist)):
for STR in range(len(Slist)):
if str(STRdict[Slist[STR]]) == str(Humanlist[human][STR+1]):
check = True
else:
check = False
break
if check:
print(Humanlist[human][0])
break
if not check:
print("no match")
这个问题来自Harvard problem