为 __main__ 创建循环
Creating loop for __main__
我是 Python 的新手,我希望得到您的建议。
我有一个脚本,一次 运行 一个输入值,我希望它能够 运行 一整列这样的值,而无需我一次输入一个值时间。我有一种预感,下面列出的主要方法需要 "for loop"。值是 "gene_name",所以实际上,我想输入 "gene_names" 的列表,脚本可以 运行 很好地通过。
希望我的问题措辞正确,谢谢!有问题的块似乎是
def get_probes_from_genes(gene_names)
import json
import urllib2
import os
import pandas as pd
api_url = "http://api.brain-map.org/api/v2/data/query.json"
def get_probes_from_genes(gene_names):
if not isinstance(gene_names,list):
gene_names = [gene_names]
#in case there are white spaces in gene names
gene_names = ["'%s'"%gene_name for gene_name in gene_names]**
api_query = "?criteria=model::Probe"
api_query= ",rma::criteria,[probe_type$eq'DNA']"
api_query= ",products[abbreviation$eq'HumanMA']"
api_query= ",gene[acronym$eq%s]"%(','.join(gene_names))
api_query= ",rma::options[only$eq'probes.id','name']"
data = json.load(urllib2.urlopen(api_url api_query))
d = {probe['id']: probe['name'] for probe in data['msg']}
if not d:
raise Exception("Could not find any probes for %s gene. Check " \
"http://help.brain- map.org/download/attachments/2818165/HBA_ISH_GeneList.pdf? version=1&modificationDate=1348783035873 " \
"for list of available genes."%gene_name)
return d
def get_expression_values_from_probe_ids(probe_ids):
if not isinstance(probe_ids,list):
probe_ids = [probe_ids]
#in case there are white spaces in gene names
probe_ids = ["'%s'"%probe_id for probe_id in probe_ids]
api_query = "? criteria=service::human_microarray_expression[probes$in%s]"% (','.join(probe_ids))
data = json.load(urllib2.urlopen(api_url api_query))
expression_values = [[float(expression_value) for expression_value in data["msg"]["probes"][i]["expression_level"]] for i in range(len(probe_ids))]
well_ids = [sample["sample"]["well"] for sample in data["msg"] ["samples"]]
donor_names = [sample["donor"]["name"] for sample in data["msg"] ["samples"]]
well_coordinates = [sample["sample"]["mri"] for sample in data["msg"] ["samples"]]
return expression_values, well_ids, well_coordinates, donor_names
def get_mni_coordinates_from_wells(well_ids):
package_directory = os.path.dirname(os.path.abspath(__file__))
frame = pd.read_csv(os.path.join(package_directory, "data", "corrected_mni_coordinates.csv"), header=0, index_col=0)
return list(frame.ix[well_ids].itertuples(index=False))
if __name__ == '__main__':
probes_dict = get_probes_from_genes("SLC6A2")
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
您可能想要创建一个包含 Gene 名称的文件,然后读取该文件的内容并在循环中调用您的函数。下面是一个例子
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
probes_dict = get_probes_from_genes(line.strip())
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
哇哦,要事第一。 Python 不是 Java,所以帮自己一个忙,使用漂亮的“"xxx\nyyy"””字符串,用三引号引出多行。
api_query = """?criteria=model::Probe"
,rma::criteria,[probe_type$eq'DNA']
...
"""
或类似的东西。您将得到键入的空格,因此您可能需要进行调整。
如果像建议的那样,您选择通过文件循环调用您的函数,您将需要 try/except 您的数据未找到异常,或者您将需要处理丢失的数据而不抛出异常。我会选择自己返回一个空结果,让调用者担心如何处理它。
如果您确实选择引发异常,请创建您自己的异常,而不是使用通用异常。这样您的代码就可以首先捕获您预期的异常。
class MyNoDataFoundException(Exception):
pass
#replace your current raise code with...
if not d:
raise MyNoDataFoundException(your message here)
关于捕获异常的说明,使用已接受的答案作为起点:
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
#keep track of your input data
search_data = line.strip()
try:
probes_dict = get_probes_from_genes(search_data)
except MyNoDataFoundException, e:
#and do whatever you feel you need to do here...
print "bummer about search_data:%s:\nexception:%s" % (search_data, e)
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
我是 Python 的新手,我希望得到您的建议。
我有一个脚本,一次 运行 一个输入值,我希望它能够 运行 一整列这样的值,而无需我一次输入一个值时间。我有一种预感,下面列出的主要方法需要 "for loop"。值是 "gene_name",所以实际上,我想输入 "gene_names" 的列表,脚本可以 运行 很好地通过。
希望我的问题措辞正确,谢谢!有问题的块似乎是
def get_probes_from_genes(gene_names)
import json
import urllib2
import os
import pandas as pd
api_url = "http://api.brain-map.org/api/v2/data/query.json"
def get_probes_from_genes(gene_names):
if not isinstance(gene_names,list):
gene_names = [gene_names]
#in case there are white spaces in gene names
gene_names = ["'%s'"%gene_name for gene_name in gene_names]**
api_query = "?criteria=model::Probe"
api_query= ",rma::criteria,[probe_type$eq'DNA']"
api_query= ",products[abbreviation$eq'HumanMA']"
api_query= ",gene[acronym$eq%s]"%(','.join(gene_names))
api_query= ",rma::options[only$eq'probes.id','name']"
data = json.load(urllib2.urlopen(api_url api_query))
d = {probe['id']: probe['name'] for probe in data['msg']}
if not d:
raise Exception("Could not find any probes for %s gene. Check " \
"http://help.brain- map.org/download/attachments/2818165/HBA_ISH_GeneList.pdf? version=1&modificationDate=1348783035873 " \
"for list of available genes."%gene_name)
return d
def get_expression_values_from_probe_ids(probe_ids):
if not isinstance(probe_ids,list):
probe_ids = [probe_ids]
#in case there are white spaces in gene names
probe_ids = ["'%s'"%probe_id for probe_id in probe_ids]
api_query = "? criteria=service::human_microarray_expression[probes$in%s]"% (','.join(probe_ids))
data = json.load(urllib2.urlopen(api_url api_query))
expression_values = [[float(expression_value) for expression_value in data["msg"]["probes"][i]["expression_level"]] for i in range(len(probe_ids))]
well_ids = [sample["sample"]["well"] for sample in data["msg"] ["samples"]]
donor_names = [sample["donor"]["name"] for sample in data["msg"] ["samples"]]
well_coordinates = [sample["sample"]["mri"] for sample in data["msg"] ["samples"]]
return expression_values, well_ids, well_coordinates, donor_names
def get_mni_coordinates_from_wells(well_ids):
package_directory = os.path.dirname(os.path.abspath(__file__))
frame = pd.read_csv(os.path.join(package_directory, "data", "corrected_mni_coordinates.csv"), header=0, index_col=0)
return list(frame.ix[well_ids].itertuples(index=False))
if __name__ == '__main__':
probes_dict = get_probes_from_genes("SLC6A2")
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
您可能想要创建一个包含 Gene 名称的文件,然后读取该文件的内容并在循环中调用您的函数。下面是一个例子
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
probes_dict = get_probes_from_genes(line.strip())
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
哇哦,要事第一。 Python 不是 Java,所以帮自己一个忙,使用漂亮的“"xxx\nyyy"””字符串,用三引号引出多行。
api_query = """?criteria=model::Probe"
,rma::criteria,[probe_type$eq'DNA']
...
"""
或类似的东西。您将得到键入的空格,因此您可能需要进行调整。
如果像建议的那样,您选择通过文件循环调用您的函数,您将需要 try/except 您的数据未找到异常,或者您将需要处理丢失的数据而不抛出异常。我会选择自己返回一个空结果,让调用者担心如何处理它。
如果您确实选择引发异常,请创建您自己的异常,而不是使用通用异常。这样您的代码就可以首先捕获您预期的异常。
class MyNoDataFoundException(Exception):
pass
#replace your current raise code with...
if not d:
raise MyNoDataFoundException(your message here)
关于捕获异常的说明,使用已接受的答案作为起点:
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
#keep track of your input data
search_data = line.strip()
try:
probes_dict = get_probes_from_genes(search_data)
except MyNoDataFoundException, e:
#and do whatever you feel you need to do here...
print "bummer about search_data:%s:\nexception:%s" % (search_data, e)
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)