Batch/Bulk python 中的 DNS 查找?
Batch/Bulk DNS lookup in python?
我有一个脚本可以通过以下方式获取 DNS(CNAME、MX、NS)数据:
from dns import resolver
...
def resolve_dns(url):
response_dict = {}
print "\nResolving DNS for %s" % (url)
try:
response_dict['CNAME'] = [rdata for rdata in resolver.query(url, 'CNAME')]
except:
pass
try:
response_dict['MX'] = [rdata for rdata in resolver.query(url, 'MX')]
except:
pass
try:
response_dict['NS'] = [rdata for rdata in resolver.query(url, 'NS')]
except:
pass
return response_dict
此函数被连续 URL 次顺序调用。如果可能的话,我想通过同时获取多个 URLs 的数据来加快上述过程。
有没有办法完成上述脚本对一批 URL 所做的工作(可能返回字典对象列表,每个字典对应于特定 a [=19= 的数据) ])?
您可以将工作放入线程池中。您的 resolve_dns
连续执行 3 个请求,因此我创建了一个稍微更通用的工作程序,它只执行 1 个查询并使用 collections.product
生成所有组合。在线程池中,我将 chunksize 设置为 1 以减少线程池批处理,如果某些查询需要很长时间,这会增加执行时间。
import dns
from dns import resolver
import itertools
import collections
import multiprocessing.pool
def worker(arg):
"""query dns for (hostname, qname) and return (qname, [rdata,...])"""
try:
url, qname = arg
rdatalist = [rdata for rdata in resolver.query(url, qname)]
return qname, rdatalist
except dns.exception.DNSException, e:
return qname, []
def resolve_dns(url_list):
"""Given a list of hosts, return dict that maps qname to
returned rdata records.
"""
response_dict = collections.defaultdict(list)
# create pool for querys but cap max number of threads
pool = multiprocessing.pool.ThreadPool(processes=min(len(url_list)*3, 60))
# run for all combinations of hosts and qnames
for qname, rdatalist in pool.imap(
worker,
itertools.product(url_list, ('CNAME', 'MX', 'NS')),
chunksize=1):
response_dict[qname].extend(rdatalist)
pool.close()
return response_dict
url_list = ['example.com', 'whosebug.com']
result = resolve_dns(url_list)
for qname, rdatalist in result.items():
print qname
for rdata in rdatalist:
print ' ', rdata
我有一个脚本可以通过以下方式获取 DNS(CNAME、MX、NS)数据:
from dns import resolver
...
def resolve_dns(url):
response_dict = {}
print "\nResolving DNS for %s" % (url)
try:
response_dict['CNAME'] = [rdata for rdata in resolver.query(url, 'CNAME')]
except:
pass
try:
response_dict['MX'] = [rdata for rdata in resolver.query(url, 'MX')]
except:
pass
try:
response_dict['NS'] = [rdata for rdata in resolver.query(url, 'NS')]
except:
pass
return response_dict
此函数被连续 URL 次顺序调用。如果可能的话,我想通过同时获取多个 URLs 的数据来加快上述过程。
有没有办法完成上述脚本对一批 URL 所做的工作(可能返回字典对象列表,每个字典对应于特定 a [=19= 的数据) ])?
您可以将工作放入线程池中。您的 resolve_dns
连续执行 3 个请求,因此我创建了一个稍微更通用的工作程序,它只执行 1 个查询并使用 collections.product
生成所有组合。在线程池中,我将 chunksize 设置为 1 以减少线程池批处理,如果某些查询需要很长时间,这会增加执行时间。
import dns
from dns import resolver
import itertools
import collections
import multiprocessing.pool
def worker(arg):
"""query dns for (hostname, qname) and return (qname, [rdata,...])"""
try:
url, qname = arg
rdatalist = [rdata for rdata in resolver.query(url, qname)]
return qname, rdatalist
except dns.exception.DNSException, e:
return qname, []
def resolve_dns(url_list):
"""Given a list of hosts, return dict that maps qname to
returned rdata records.
"""
response_dict = collections.defaultdict(list)
# create pool for querys but cap max number of threads
pool = multiprocessing.pool.ThreadPool(processes=min(len(url_list)*3, 60))
# run for all combinations of hosts and qnames
for qname, rdatalist in pool.imap(
worker,
itertools.product(url_list, ('CNAME', 'MX', 'NS')),
chunksize=1):
response_dict[qname].extend(rdatalist)
pool.close()
return response_dict
url_list = ['example.com', 'whosebug.com']
result = resolve_dns(url_list)
for qname, rdatalist in result.items():
print qname
for rdata in rdatalist:
print ' ', rdata