从计算 DNA 序列到 table 的元组
Tuple to table from counting DNA sequences
我想计算 DNA 序列中的碱基数,return 序列中每种类型碱基的计数,并打印出两列 table 其中第一列是碱基,第二列是相关的碱基计数。我可以获得 return 基本计数的函数,但我不确定如何打印 table。我想用基本 python 函数来做这个分析,尽管我认为用一些 python 模块会更容易。
代码:
def base_counter(DNA):
A = 0
T = 0
G = 0
C = 0
for base in DNA:
if base == "A":
A = A + 1
elif base == "T":
T = T + 1
elif base == "G":
G = G + 1
elif base == "C":
C = C + 1
else:
pass
return A,T,G,C
参数输入:
dna="AAGCTACGTGGGTGACTTT"
函数调用:
counts=base_counter(dna)
print(counts)
输出:
(4, 6, 6, 3)
期望的输出:
print(counts)
A 4
T 6
G 6
C 3
和
counts
(4, 6, 6, 3)
1) 你有一个错误 - 你的 return
向右多缩进了一个制表位
2) 使用 dict
:
def base_counter(DNA):
dna_dict = {
"A": 0,
"T": 0,
"G": 0,
"C": 0,
}
for base in DNA:
if base == "A":
dna_dict["A"] += 1
elif base == "T":
dna_dict["T"] += 1
elif base == "G":
dna_dict["G"] += 1
elif base == "C":
dna_dict["C"] += 1
return dna_dict
dna = "AAGCTACGTGGGTGACTTT"
counts = base_counter(dna)
for base, count in counts.items():
print(base, count)
但如果您必须保持功能不变:
def base_counter(DNA):
A = 0
T = 0
G = 0
C = 0
for base in DNA:
if base == "A":
A = A + 1
elif base == "T":
T = T + 1
elif base == "G":
G = G + 1
elif base == "C":
C = C + 1
return A,T,G,C
dna = "AAGCTACGTGGGTGACTTT"
counts = base_counter(dna)
for base, count in zip("ATGC", counts):
print(base, count)
从函数内部:
out_str="A "+str(A)+"\n"+\
"T "+str(T)+"\n"+\
"G "+str(G)+"\n"+\
"C "+str(C)
return out_str
现在您可以调用并打印它,它将以您想要的格式打印:
result=base_counter(DNA)
print(result)
OP 请求完整代码:
def base_counter(DNA):
A = 0
T = 0
G = 0
C = 0
for base in DNA:
if base == "A":
A = A + 1
elif base == "T":
T = T + 1
elif base == "G":
G = G + 1
elif base == "C":
C = C + 1
out_str = "A " + str(A) + "\n"+\
"T " + str(T) + "\n"+\
"G " + str(G) + "\n"+\
"C " + str(C)
return out_str
base=base_counter("AAGCTACGTGGGTGACTTT")
print(base)
输出:
A 4
T 6
G 6
C 3
您可以使用collections.Counter
来计算碱基,并使用pandas
以列方式设置数据。这是一个例子
from collections import Counter
import pandas as pd
# Count the bases
dna="AAGCTACGTGGGTGACTTT"
count = Counter(dna)
tup = ()
for _, value in sorted(count.items()):
tup += (value,)
print(tup # Outputs (4, 3, 6, 6)
# Set it in a pandas dataframe
df = pd.DataFrame(list(dict(count).items()), columns=['Base', 'Count'])
print(df.to_string(index=False))
# Output
# Base Count
# A 4
# G 6
# C 3
# T 6
您可以创建另一个函数来打印结果:
def print_bases(bases):
print("A "+str(bases[0])+"\n"
"T "+str(bases[1])+"\n"
"G "+str(bases[2])+"\n"
"C "+str(bases[3]))
print_bases(counts)
我想计算 DNA 序列中的碱基数,return 序列中每种类型碱基的计数,并打印出两列 table 其中第一列是碱基,第二列是相关的碱基计数。我可以获得 return 基本计数的函数,但我不确定如何打印 table。我想用基本 python 函数来做这个分析,尽管我认为用一些 python 模块会更容易。
代码:
def base_counter(DNA):
A = 0
T = 0
G = 0
C = 0
for base in DNA:
if base == "A":
A = A + 1
elif base == "T":
T = T + 1
elif base == "G":
G = G + 1
elif base == "C":
C = C + 1
else:
pass
return A,T,G,C
参数输入:
dna="AAGCTACGTGGGTGACTTT"
函数调用:
counts=base_counter(dna)
print(counts)
输出:
(4, 6, 6, 3)
期望的输出:
print(counts)
A 4
T 6
G 6
C 3
和
counts
(4, 6, 6, 3)
1) 你有一个错误 - 你的 return
向右多缩进了一个制表位
2) 使用 dict
:
def base_counter(DNA):
dna_dict = {
"A": 0,
"T": 0,
"G": 0,
"C": 0,
}
for base in DNA:
if base == "A":
dna_dict["A"] += 1
elif base == "T":
dna_dict["T"] += 1
elif base == "G":
dna_dict["G"] += 1
elif base == "C":
dna_dict["C"] += 1
return dna_dict
dna = "AAGCTACGTGGGTGACTTT"
counts = base_counter(dna)
for base, count in counts.items():
print(base, count)
但如果您必须保持功能不变:
def base_counter(DNA):
A = 0
T = 0
G = 0
C = 0
for base in DNA:
if base == "A":
A = A + 1
elif base == "T":
T = T + 1
elif base == "G":
G = G + 1
elif base == "C":
C = C + 1
return A,T,G,C
dna = "AAGCTACGTGGGTGACTTT"
counts = base_counter(dna)
for base, count in zip("ATGC", counts):
print(base, count)
从函数内部:
out_str="A "+str(A)+"\n"+\
"T "+str(T)+"\n"+\
"G "+str(G)+"\n"+\
"C "+str(C)
return out_str
现在您可以调用并打印它,它将以您想要的格式打印:
result=base_counter(DNA)
print(result)
OP 请求完整代码:
def base_counter(DNA):
A = 0
T = 0
G = 0
C = 0
for base in DNA:
if base == "A":
A = A + 1
elif base == "T":
T = T + 1
elif base == "G":
G = G + 1
elif base == "C":
C = C + 1
out_str = "A " + str(A) + "\n"+\
"T " + str(T) + "\n"+\
"G " + str(G) + "\n"+\
"C " + str(C)
return out_str
base=base_counter("AAGCTACGTGGGTGACTTT")
print(base)
输出:
A 4
T 6
G 6
C 3
您可以使用collections.Counter
来计算碱基,并使用pandas
以列方式设置数据。这是一个例子
from collections import Counter
import pandas as pd
# Count the bases
dna="AAGCTACGTGGGTGACTTT"
count = Counter(dna)
tup = ()
for _, value in sorted(count.items()):
tup += (value,)
print(tup # Outputs (4, 3, 6, 6)
# Set it in a pandas dataframe
df = pd.DataFrame(list(dict(count).items()), columns=['Base', 'Count'])
print(df.to_string(index=False))
# Output
# Base Count
# A 4
# G 6
# C 3
# T 6
您可以创建另一个函数来打印结果:
def print_bases(bases):
print("A "+str(bases[0])+"\n"
"T "+str(bases[1])+"\n"
"G "+str(bases[2])+"\n"
"C "+str(bases[3]))
print_bases(counts)