如何在 biopython 中使用 EPOST 而不是使用 ESEARCH?
How to use EPOST and than use ESEARCH in biopython?
我有一些基因 ID:
id_list = ["19304878", "18606172", "16403221", "16377612", "14871861", "14630660"]
如何在 biopython 中使用 EPOST 和 ESEARCH 获取这些基因的核苷酸序列?
您可能想要使用 efetch
:
from Bio import SeqIO, Entrez
id_list = ["19304878", "18606172", "16403221",
"16377612", "14871861", "14630660"]
# Always tell NCBI who you are!
Entrez.email = "your.email@address.com"
# Fetch results from NCBI and write to output
with open("output.fa", "w") as out_handle:
for gene_id in id_list:
fa = Entrez.efetch(db="nucleotide", id=gene_id, rettype="fasta")
SeqIO.write(SeqIO.parse(fa, "fasta"), out_handle, "fasta")
这是一个 python 脚本:
get_fasta.py
$ python3 get_fasta.py -h
usage: get_fasta.py [-h] -e EMAIL -g GENES [-o OUTPUT]
Retrieve FASTA from NCBI using gene IDs
General options:
-h, --help Show this help and exit
Inputs:
-e EMAIL, --email EMAIL
Your email address
-g GENES, --genes GENES
A file containing a list of gene IDs, with one ID on
each line
Outputs:
-o OUTPUT, --output OUTPUT
The file to write FASTA to. If not provided use STDOUT
#! /usr/bin/env python3
from argparse import ArgumentParser
from pathlib import Path
import sys
from Bio import SeqIO, Entrez
def main():
# Get our command-line arguments
parser, args = get_args()
# Always tell NCBI who you are!
Entrez.email = args.email
# Check the input file is a file
if not Path(args.genes).is_file():
parser.error("--genes parameter must be a file")
# Read in the gene IDs
with open(args.genes, "r") as file_handle:
id_list = [line.strip() for line in file_handle]
# Specify output as a file of STDOUT
if args.output:
out_handle = open(args.output, "w")
else:
out_handle = sys.stdout
# Fetch results from NCBI and write to output
for gene_id in id_list:
fa = Entrez.efetch(db="nucleotide", id=gene_id, rettype="fasta")
SeqIO.write(SeqIO.parse(fa, "fasta"), out_handle, "fasta")
# Close handle if it's a file
if out_handle is not sys.stdout:
out_handle.close()
def get_args():
parser = ArgumentParser(
description="Retrieve FASTA from NCBI using gene IDs",
add_help=False,
)
general = parser.add_argument_group(
title='General options'
)
general.add_argument(
"-h",
"--help",
action="help",
help="Show this help and exit",
)
in_args = parser.add_argument_group(title='Inputs')
in_args.add_argument(
"-e",
"--email",
help="Your email address",
required=True,
)
in_args.add_argument(
"-g",
"--genes",
help="A file containing a list of gene IDs, with one ID on each line",
required=True,
)
out_args = parser.add_argument_group(title='Outputs')
out_args.add_argument(
"-o",
"--output",
help="The file to write FASTA to. If not provided use STDOUT",
)
return parser, parser.parse_args()
if __name__ == "__main__":
main()
我有一些基因 ID:
id_list = ["19304878", "18606172", "16403221", "16377612", "14871861", "14630660"]
如何在 biopython 中使用 EPOST 和 ESEARCH 获取这些基因的核苷酸序列?
您可能想要使用 efetch
:
from Bio import SeqIO, Entrez
id_list = ["19304878", "18606172", "16403221",
"16377612", "14871861", "14630660"]
# Always tell NCBI who you are!
Entrez.email = "your.email@address.com"
# Fetch results from NCBI and write to output
with open("output.fa", "w") as out_handle:
for gene_id in id_list:
fa = Entrez.efetch(db="nucleotide", id=gene_id, rettype="fasta")
SeqIO.write(SeqIO.parse(fa, "fasta"), out_handle, "fasta")
这是一个 python 脚本:
get_fasta.py
$ python3 get_fasta.py -h
usage: get_fasta.py [-h] -e EMAIL -g GENES [-o OUTPUT]
Retrieve FASTA from NCBI using gene IDs
General options:
-h, --help Show this help and exit
Inputs:
-e EMAIL, --email EMAIL
Your email address
-g GENES, --genes GENES
A file containing a list of gene IDs, with one ID on
each line
Outputs:
-o OUTPUT, --output OUTPUT
The file to write FASTA to. If not provided use STDOUT
#! /usr/bin/env python3
from argparse import ArgumentParser
from pathlib import Path
import sys
from Bio import SeqIO, Entrez
def main():
# Get our command-line arguments
parser, args = get_args()
# Always tell NCBI who you are!
Entrez.email = args.email
# Check the input file is a file
if not Path(args.genes).is_file():
parser.error("--genes parameter must be a file")
# Read in the gene IDs
with open(args.genes, "r") as file_handle:
id_list = [line.strip() for line in file_handle]
# Specify output as a file of STDOUT
if args.output:
out_handle = open(args.output, "w")
else:
out_handle = sys.stdout
# Fetch results from NCBI and write to output
for gene_id in id_list:
fa = Entrez.efetch(db="nucleotide", id=gene_id, rettype="fasta")
SeqIO.write(SeqIO.parse(fa, "fasta"), out_handle, "fasta")
# Close handle if it's a file
if out_handle is not sys.stdout:
out_handle.close()
def get_args():
parser = ArgumentParser(
description="Retrieve FASTA from NCBI using gene IDs",
add_help=False,
)
general = parser.add_argument_group(
title='General options'
)
general.add_argument(
"-h",
"--help",
action="help",
help="Show this help and exit",
)
in_args = parser.add_argument_group(title='Inputs')
in_args.add_argument(
"-e",
"--email",
help="Your email address",
required=True,
)
in_args.add_argument(
"-g",
"--genes",
help="A file containing a list of gene IDs, with one ID on each line",
required=True,
)
out_args = parser.add_argument_group(title='Outputs')
out_args.add_argument(
"-o",
"--output",
help="The file to write FASTA to. If not provided use STDOUT",
)
return parser, parser.parse_args()
if __name__ == "__main__":
main()