如何在 "N" 个 CPU 个 CORES 上将 python 脚本实现到 运行?
How to implement python script to run on "N" number of CPU CORES?
我制作了一个脚本来优化结构的特定部分。 (科学术语,你可以忽略它)但是脚本的主要目的是优化,并且在这两个步骤中花费大量时间 optimize() 和 refine() 函数,它只使用 4 [=37] 中的一个 CPU =] 在我的本地系统中,但我想让这个脚本使用所有 4 个 CPU(特别是对于这两个函数优化()和精炼())。
我对 multiprocessing/multicore 不太了解,但我仍然使用多处理模块,但它无法使用所有 CPU。所以,如果有人知道如何将 python 脚本实施到 运行 上,多个 CPU 可以给我一些建议,这将非常有帮助。
我的脚本:
import sys
import os
from modeller import *
from modeller.optimizers import molecular_dynamics,conjugate_gradients
from modeller.automodel import autosched
def optimize(atmsel, sched):
for step in sched:
step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
refine(atmsel)
cg = conjugate_gradients()
cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
def refine(atmsel):
md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
md_return='FINAL')
init_vel = True
for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
(200, 600,
(1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
for temp in temps:
md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
max_iterations=its, equilibrate=equil)
init_vel = False
def make_restraints(mdl1, aln):
rsr = mdl1.restraints
rsr.clear()
s = selection(mdl1)
for typ in ('stereo', 'phi-psi_binormal'):
rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
rsr.make(s, restraint_type=typ+'_dihedral', spline_range=4.0,
spline_dx=0.3, spline_min_points = 5, aln=aln,
spline_on_site=True)
log.verbose()
env = environ(rand_seed=int(-4243))
env.io.hetatm = True
env.edat.dynamic_sphere=False
env.edat.dynamic_lennard=True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
mdl1 = model(env, file = "3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files= "3O26.pdb", align_codes= "3O26")
s = selection(mdl1.chains["A"].residues["275"])
s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")
mdl1.clear_topology()
mdl1.generate_topology(ali[-1])
mdl1.transfer_xyz(ali)
mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
mdl2 = model(env, file="3O26.pdb")
mdl1.res_num_from(mdl2,ali)
mdl1.write(file="3O26"+"ALA"+"275"+"A"+'.tmp')
mdl1.read(file="3O26"+"ALA"+"275"+"A"+'.tmp')
make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms=1
sched = autosched.loop.make_for_model(mdl1)
s = selection(mdl1.atoms['CA:'+"275"+':'+"A"].select_sphere(5)).by_residue()
mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)
s.energy()
s.randomize_xyz(deviation=4.0)
mdl1.env.edat.nonbonded_sel_atoms=2
optimize(s,sched)
mdl1.env.edat.nonbonded_sel_atoms=1
optimize(s,sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="hi.pdb")
os.remove("3O26"+"ALA"+"275"+"A"+'.tmp')
from multiprocessing import Process
if __name__ == '__main__':
p = Process(target=optimize, args=(atmsel,sched))
p.start()
p.join()
如果是演示,请将此 (http://files.rcsb.org/view/3o26.pdb) 粘贴到文件 3O26.pdb 中并将其保存在同一目录中。
提前致谢
根据@Dinesh 的建议,我修改了代码,包括 pp 模块,它在使用所有内核时工作正常,但我遇到了一些我无法弄清楚的错误。
修改后的脚本:
import sys
import os
import pp
from modeller import *
from modeller.optimizers import molecular_dynamics, conjugate_gradients
from modeller.automodel import autosched
def optimize(atmsel, sched):
for step in sched:
step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
refine(atmsel)
cg = conjugate_gradients()
cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
def refine(atmsel):
md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
md_return='FINAL')
init_vel = True
for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
(200, 600,
(1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
for temp in temps:
md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
max_iterations=its, equilibrate=equil)
init_vel = False
def make_restraints(mdl1, aln):
rsr = mdl1.restraints
rsr.clear()
s = selection(mdl1)
for typ in ('stereo', 'phi-psi_binormal'):
rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
rsr.make(s, restraint_type=typ + '_dihedral', spline_range=4.0,
spline_dx=0.3, spline_min_points=5, aln=aln,
spline_on_site=True)
################################### PPMODULE ############################
def main(s,sched):
print s,"*************************************************************************"
ppservers = ()
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
job_server = pp.Server(ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
#################################### PPMODULE ############################
if __name__=="__main__":
log.verbose()
env = environ(rand_seed=int(-4345))
env.io.hetatm = True
env.edat.dynamic_sphere = False
env.edat.dynamic_lennard = True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
mdl1 = model(env, file="3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files="3O26.pdb", align_codes="3O26")
s = selection(mdl1.chains["A"].residues["275"])
s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")
mdl1.clear_topology()
mdl1.generate_topology(ali[-1])
mdl1.transfer_xyz(ali)
mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
mdl2 = model(env, file="3O26.pdb")
mdl1.res_num_from(mdl2, ali)
mdl1.write(file="3O26" + "ALA" + "275" + "A" + '.tmp')
mdl1.read(file="3O26" + "ALA" + "275" + "A" + '.tmp')
make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms = 1
sched = autosched.loop.make_for_model(mdl1)
s = selection(mdl1.atoms['CA:' + "275" + ':' + "A"].select_sphere(15)).by_residue()
mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)
s.energy()
s.randomize_xyz(deviation=4.0)
mdl1.env.edat.nonbonded_sel_atoms = 2
main(s, sched)
mdl1.env.edat.nonbonded_sel_atoms = 1
main(s, sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="current.pdb")
os.remove("3O26" + "ALA" + "275" + "A" + '.tmp')
错误:
randomi_498_> Atoms,selected atoms,random_seed,amplitude: 2302 558 1 4.0000
randomi_496_> Amplitude is > 0; randomization is done.
<Selection of 558 atoms> *************************************************************************
Starting pp with 4 workers
Traceback (most recent call last):
File "mutate_model.py", line 88, in <module>
main(s, sched)
File "m_m.py", line 52, in main
job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 460, in submit
sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 638, in __dumpsfunc
sources = [self.__get_source(func) for func in funcs]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 705, in __get_source
sourcelines = inspect.getsourcelines(func)[0]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 690, in getsourcelines
lines, lnum = findsource(object)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 526, in findsource
file = getfile(object)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 408, in getfile
raise TypeError('{!r} is a built-in class'.format(object))
TypeError: <module '__builtin__' (built-in)> is a built-in class
请检查pp模块。 Parallelpython.com
最后我在博客http://chriskiehl.com/article/parallelism-in-one-line/ and https://pymotw.com/2/multiprocessing/basics.html
的基础上通过另一种称为multiprocessing.pool的方法完成了自己的工作
这是我的伪代码:
from multiprocessing import Pool
def get_mm_script(scripts):
#I just created all my mm.py scripts as string template
return scripts
def run(filename):
#here i use system command to run my all scripts.
return
if __name__ == '__main__':
scripts=get_mm_script(f)
pool = Pool(4)
pool.map(run, scripts)
pool.close()
pool.join()
我制作了一个脚本来优化结构的特定部分。 (科学术语,你可以忽略它)但是脚本的主要目的是优化,并且在这两个步骤中花费大量时间 optimize() 和 refine() 函数,它只使用 4 [=37] 中的一个 CPU =] 在我的本地系统中,但我想让这个脚本使用所有 4 个 CPU(特别是对于这两个函数优化()和精炼())。
我对 multiprocessing/multicore 不太了解,但我仍然使用多处理模块,但它无法使用所有 CPU。所以,如果有人知道如何将 python 脚本实施到 运行 上,多个 CPU 可以给我一些建议,这将非常有帮助。
我的脚本:
import sys
import os
from modeller import *
from modeller.optimizers import molecular_dynamics,conjugate_gradients
from modeller.automodel import autosched
def optimize(atmsel, sched):
for step in sched:
step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
refine(atmsel)
cg = conjugate_gradients()
cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
def refine(atmsel):
md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
md_return='FINAL')
init_vel = True
for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
(200, 600,
(1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
for temp in temps:
md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
max_iterations=its, equilibrate=equil)
init_vel = False
def make_restraints(mdl1, aln):
rsr = mdl1.restraints
rsr.clear()
s = selection(mdl1)
for typ in ('stereo', 'phi-psi_binormal'):
rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
rsr.make(s, restraint_type=typ+'_dihedral', spline_range=4.0,
spline_dx=0.3, spline_min_points = 5, aln=aln,
spline_on_site=True)
log.verbose()
env = environ(rand_seed=int(-4243))
env.io.hetatm = True
env.edat.dynamic_sphere=False
env.edat.dynamic_lennard=True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
mdl1 = model(env, file = "3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files= "3O26.pdb", align_codes= "3O26")
s = selection(mdl1.chains["A"].residues["275"])
s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")
mdl1.clear_topology()
mdl1.generate_topology(ali[-1])
mdl1.transfer_xyz(ali)
mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
mdl2 = model(env, file="3O26.pdb")
mdl1.res_num_from(mdl2,ali)
mdl1.write(file="3O26"+"ALA"+"275"+"A"+'.tmp')
mdl1.read(file="3O26"+"ALA"+"275"+"A"+'.tmp')
make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms=1
sched = autosched.loop.make_for_model(mdl1)
s = selection(mdl1.atoms['CA:'+"275"+':'+"A"].select_sphere(5)).by_residue()
mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)
s.energy()
s.randomize_xyz(deviation=4.0)
mdl1.env.edat.nonbonded_sel_atoms=2
optimize(s,sched)
mdl1.env.edat.nonbonded_sel_atoms=1
optimize(s,sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="hi.pdb")
os.remove("3O26"+"ALA"+"275"+"A"+'.tmp')
from multiprocessing import Process
if __name__ == '__main__':
p = Process(target=optimize, args=(atmsel,sched))
p.start()
p.join()
如果是演示,请将此 (http://files.rcsb.org/view/3o26.pdb) 粘贴到文件 3O26.pdb 中并将其保存在同一目录中。
提前致谢
根据@Dinesh 的建议,我修改了代码,包括 pp 模块,它在使用所有内核时工作正常,但我遇到了一些我无法弄清楚的错误。
修改后的脚本:
import sys
import os
import pp
from modeller import *
from modeller.optimizers import molecular_dynamics, conjugate_gradients
from modeller.automodel import autosched
def optimize(atmsel, sched):
for step in sched:
step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
refine(atmsel)
cg = conjugate_gradients()
cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
def refine(atmsel):
md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
md_return='FINAL')
init_vel = True
for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
(200, 600,
(1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
for temp in temps:
md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
max_iterations=its, equilibrate=equil)
init_vel = False
def make_restraints(mdl1, aln):
rsr = mdl1.restraints
rsr.clear()
s = selection(mdl1)
for typ in ('stereo', 'phi-psi_binormal'):
rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
rsr.make(s, restraint_type=typ + '_dihedral', spline_range=4.0,
spline_dx=0.3, spline_min_points=5, aln=aln,
spline_on_site=True)
################################### PPMODULE ############################
def main(s,sched):
print s,"*************************************************************************"
ppservers = ()
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
job_server = pp.Server(ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
#################################### PPMODULE ############################
if __name__=="__main__":
log.verbose()
env = environ(rand_seed=int(-4345))
env.io.hetatm = True
env.edat.dynamic_sphere = False
env.edat.dynamic_lennard = True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
mdl1 = model(env, file="3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files="3O26.pdb", align_codes="3O26")
s = selection(mdl1.chains["A"].residues["275"])
s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")
mdl1.clear_topology()
mdl1.generate_topology(ali[-1])
mdl1.transfer_xyz(ali)
mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
mdl2 = model(env, file="3O26.pdb")
mdl1.res_num_from(mdl2, ali)
mdl1.write(file="3O26" + "ALA" + "275" + "A" + '.tmp')
mdl1.read(file="3O26" + "ALA" + "275" + "A" + '.tmp')
make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms = 1
sched = autosched.loop.make_for_model(mdl1)
s = selection(mdl1.atoms['CA:' + "275" + ':' + "A"].select_sphere(15)).by_residue()
mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)
s.energy()
s.randomize_xyz(deviation=4.0)
mdl1.env.edat.nonbonded_sel_atoms = 2
main(s, sched)
mdl1.env.edat.nonbonded_sel_atoms = 1
main(s, sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="current.pdb")
os.remove("3O26" + "ALA" + "275" + "A" + '.tmp')
错误:
randomi_498_> Atoms,selected atoms,random_seed,amplitude: 2302 558 1 4.0000
randomi_496_> Amplitude is > 0; randomization is done.
<Selection of 558 atoms> *************************************************************************
Starting pp with 4 workers
Traceback (most recent call last):
File "mutate_model.py", line 88, in <module>
main(s, sched)
File "m_m.py", line 52, in main
job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 460, in submit
sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 638, in __dumpsfunc
sources = [self.__get_source(func) for func in funcs]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 705, in __get_source
sourcelines = inspect.getsourcelines(func)[0]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 690, in getsourcelines
lines, lnum = findsource(object)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 526, in findsource
file = getfile(object)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 408, in getfile
raise TypeError('{!r} is a built-in class'.format(object))
TypeError: <module '__builtin__' (built-in)> is a built-in class
请检查pp模块。 Parallelpython.com
最后我在博客http://chriskiehl.com/article/parallelism-in-one-line/ and https://pymotw.com/2/multiprocessing/basics.html
的基础上通过另一种称为multiprocessing.pool的方法完成了自己的工作这是我的伪代码:
from multiprocessing import Pool
def get_mm_script(scripts):
#I just created all my mm.py scripts as string template
return scripts
def run(filename):
#here i use system command to run my all scripts.
return
if __name__ == '__main__':
scripts=get_mm_script(f)
pool = Pool(4)
pool.map(run, scripts)
pool.close()
pool.join()