Python 带有 importlib 模块的多进程
Python multiprocess with importlib modules
你好,
今天我将代码从 threading
移动到 multiprocess
。一切似乎都很好,直到出现以下错误:
错误
Traceback (most recent call last):
File "run.py", line 93, in <module>
main()
File "run.py", line 82, in main
emenu.executemenu(components, _path)
File "/home/s1810979/paellego/lib/execute/execute_menu.py", line 29, in executemenu
e.executeall(installed, _path)
File "/home/s1810979/paellego/lib/execute/execute.py", line 153, in executeall
pool.starmap(phase2, args)
File "/usr/lib64/python3.4/multiprocessing/pool.py", line 268, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/lib64/python3.4/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib64/python3.4/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/lib64/python3.4/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib64/python3.4/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'module'>: attribute lookup module on builtins failed
代码
execute.py
def executeall(components, _path):
args = []
manager = multiprocessing.Manager()
q = manager.Queue()
resultloc = '/some/result.log'
for component in components:
for apkpath, resultpath in zip(execonfig.apkpaths, execonfig.resultpaths):
args.append((component,apkpath,resultpath,q,)) #Args for subprocesses
cores = askcores()
with multiprocessing.Pool(processes=cores) as pool:
watcher = pool.apply_async(lgr.log, (resultloc+'/results.txt', q,))
pool.starmap(phase2, args)
component.py
class Component(object):
def __init__(self, installmodule, runmodule, installerloc, installationloc, dependencyloc):
self.installmodule = installmodule
self.runmodule = runmodule
self.installerloc = installerloc
self.installationloc = installationloc
self.dependencyloc = dependencyloc
self.config = icnf.Installconfiguration(installerloc+'/conf.conf')
#lots of functions...
installconfig.py
class State(Enum):
BEGIN=0 #Look for units
UNIT=1 #Look for unit keypairs
KEYPAIR=3
class Phase(Enum):
NONE=0
DEPS=1
PKGS=2
class Installconfiguration(object):
def __init__(self, config):
dictionary = self.reader(config) #Fill a dictionary
#dictionary (key:Phase, value: (dictionary key: str, job))
self.deps = dictionary[Phase.DEPS]
self.pkgs = dictionary[Phase.PKGS]
job.py
class Job(object):
def __init__(self, directory=None, url=None):
self.directory = directory if directory else ''
self.url = url if url else ''
如您所见,我将组件作为参数传递给 function phase2(component, str, str, multiprocess.manager.Queue())
。
component
的构造函数的第二个和第三个参数是用importlib
导入的模块。
我试过的
我是 python 的新手,但不是编程新手。这是我尝试过的:
- 因为错误本身并没有指出问题到底是什么,所以我尝试删除args以找出哪些不能被腌制:删除
component
,一切正常,所以这似乎是麻烦的起因。但是,我需要将此对象传递给我的进程。
- 我在 Internet 上搜索了几个小时,但除了关于多处理的基本教程和关于 pickle 工作原理的解释之外,没有找到任何东西。我确实发现 this 说它应该工作,但在 windows 或其他东西上没有。但是,它不适用于 Unix(我使用的)
我的想法
据我了解,没有任何迹象表明我不能发送包含两个 importlib 模块的 class。我不知道 component
class 的确切问题是什么,但是 importlib 模块作为成员是唯一非常规的事情。这就是为什么我认为问题出现在这里。
问题
你知道为什么 class 包含模块不适合 'pickling' 吗?如何更好地了解 Can't pickle <class 'module'>
错误发生的原因和位置?
更多代码
可以在 https://github.com/Sebastiaan-Alvarez-Rodriguez/paellego
上找到完整的源代码
向我提问
请留下评论请求 clarifications/more 代码片段/???如果你想让我编辑这个问题
最后一个请求
我希望解决方案仅使用 python 标准库,最好是 python 3.3。另外,我的代码的一个要求是它在 Unix 系统上运行。
提前致谢
编辑
根据要求,这是一个大大简化了问题的最小示例:
main.py(你可以执行 python main.py foo)
#!/usr/bin/env python
import sys
import importlib
import multiprocessing
class clazz(object):
def __init__(self, moduly):
self.moduly = moduly
def foopass(self, stringy):
self.moduly.foo(stringy)
def barpass(self, stringy, numbery):
self.moduly.bar(stringy)
print('Second argument: '+str(numbery))
def worker(clazzy, numbery):
clazzy.barpass('wow', numbery)
def main():
clazzy = clazz(importlib.import_module(sys.argv[1]))
clazzy.foopass('init')
args = [(clazzy, 2,)]
with multiprocessing.Pool(processes=2) as pool:
pool.starmap(worker, args)
if __name__ == "__main__":
main()
foo.py(上述调用建议需要在同一目录中):
#!/usr/bin/env python
globaly = 0
def foo(stringy):
print('foo '+stringy)
global globaly
globaly = 5
def bar(stringy):
print('bar '+stringy)
print(str(globaly))
这在 运行 上给出了错误:TypeError: can't pickle module objects
现在我们知道酸洗模块对象是(遗憾的)不可能的。
为了消除错误,让clazz
不把模块作为属性,不管多么方便,而是让它把“modpath
”,这是[=所需的字符串15=]导入用户指定的模块。
看起来像这样(foo.py 与上面完全相同):
#!/usr/bin/env python
import sys
import importlib
import multiprocessing
class clazz(object):
def __init__(self, modpathy):
self.modpathy = modpathy
def foopass(self, stringy):
moduly = importlib.import_module(self.modpathy)
moduly.foo(stringy)
def barpass(self, stringy, numbery):
moduly = importlib.import_module(self.modpathy)
moduly.bar(stringy)
print('Second argument: '+str(numbery))
def worker(clazzy, number):
clazzy.barpass('wow', number)
def main():
clazzy = clazz(sys.argv[1])
clazzy.foopass('init')
args = [(clazzy, 2,)]
with multiprocessing.Pool(processes=2) as pool:
pool.starmap(worker, args)
if __name__ == "__main__":
main()
如果你需要你的全局变量,比如 globaly
,保证保持状态,那么你需要传递一个可变对象(例如列表,字典)来保存这些数据,感谢@DavisHerring:
Module attributes are called “global variables” in Python, but they are no more persistent or accessible than any other data. Why not just use dictionaries?
示例代码如下所示:
#!/usr/bin/env python
import sys
import importlib
import multiprocessing
class clazz(object):
def __init__(self, modpathy):
self.modpathy = modpathy
self.dictionary = {}
def foopass(self, stringy):
moduly = importlib.import_module(self.modpathy)
moduly.foo(stringy, self.dictionary)
def barpass(self, stringy, numbery):
moduly = importlib.import_module(self.modpathy)
moduly.bar(stringy, self.dictionary)
print('Second argument: '+str(numbery))
def worker(clazzy, number):
clazzy.barpass('wow', number)
def main():
clazzy = clazz(sys.argv[1])
clazzy.foopass('init')
args = [(clazzy, 2,)]
with multiprocessing.Pool(processes=2) as pool:
pool.starmap(worker, args)
if __name__ == "__main__":
main()
foo.py(不再有全局变量):
#!/usr/bin/env python
def foo(stringy, dictionary):
print('foo '+stringy)
globaly = 5
dictionary['globaly'] = globaly
def bar(stringy, dictionary):
print('bar '+stringy)
globaly = dictionary['globaly']
print(str(globaly))
通过这种方式,您可以解决问题而不会出现烦人的 can't pickle ...
错误,同时保持状态
你好,
今天我将代码从 threading
移动到 multiprocess
。一切似乎都很好,直到出现以下错误:
错误
Traceback (most recent call last):
File "run.py", line 93, in <module>
main()
File "run.py", line 82, in main
emenu.executemenu(components, _path)
File "/home/s1810979/paellego/lib/execute/execute_menu.py", line 29, in executemenu
e.executeall(installed, _path)
File "/home/s1810979/paellego/lib/execute/execute.py", line 153, in executeall
pool.starmap(phase2, args)
File "/usr/lib64/python3.4/multiprocessing/pool.py", line 268, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/usr/lib64/python3.4/multiprocessing/pool.py", line 608, in get
raise self._value
File "/usr/lib64/python3.4/multiprocessing/pool.py", line 385, in _handle_tasks
put(task)
File "/usr/lib64/python3.4/multiprocessing/connection.py", line 206, in send
self._send_bytes(ForkingPickler.dumps(obj))
File "/usr/lib64/python3.4/multiprocessing/reduction.py", line 50, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class 'module'>: attribute lookup module on builtins failed
代码
execute.py
def executeall(components, _path):
args = []
manager = multiprocessing.Manager()
q = manager.Queue()
resultloc = '/some/result.log'
for component in components:
for apkpath, resultpath in zip(execonfig.apkpaths, execonfig.resultpaths):
args.append((component,apkpath,resultpath,q,)) #Args for subprocesses
cores = askcores()
with multiprocessing.Pool(processes=cores) as pool:
watcher = pool.apply_async(lgr.log, (resultloc+'/results.txt', q,))
pool.starmap(phase2, args)
component.py
class Component(object):
def __init__(self, installmodule, runmodule, installerloc, installationloc, dependencyloc):
self.installmodule = installmodule
self.runmodule = runmodule
self.installerloc = installerloc
self.installationloc = installationloc
self.dependencyloc = dependencyloc
self.config = icnf.Installconfiguration(installerloc+'/conf.conf')
#lots of functions...
installconfig.py
class State(Enum):
BEGIN=0 #Look for units
UNIT=1 #Look for unit keypairs
KEYPAIR=3
class Phase(Enum):
NONE=0
DEPS=1
PKGS=2
class Installconfiguration(object):
def __init__(self, config):
dictionary = self.reader(config) #Fill a dictionary
#dictionary (key:Phase, value: (dictionary key: str, job))
self.deps = dictionary[Phase.DEPS]
self.pkgs = dictionary[Phase.PKGS]
job.py
class Job(object):
def __init__(self, directory=None, url=None):
self.directory = directory if directory else ''
self.url = url if url else ''
如您所见,我将组件作为参数传递给 function phase2(component, str, str, multiprocess.manager.Queue())
。
component
的构造函数的第二个和第三个参数是用importlib
导入的模块。
我试过的
我是 python 的新手,但不是编程新手。这是我尝试过的:
- 因为错误本身并没有指出问题到底是什么,所以我尝试删除args以找出哪些不能被腌制:删除
component
,一切正常,所以这似乎是麻烦的起因。但是,我需要将此对象传递给我的进程。 - 我在 Internet 上搜索了几个小时,但除了关于多处理的基本教程和关于 pickle 工作原理的解释之外,没有找到任何东西。我确实发现 this 说它应该工作,但在 windows 或其他东西上没有。但是,它不适用于 Unix(我使用的)
我的想法
据我了解,没有任何迹象表明我不能发送包含两个 importlib 模块的 class。我不知道 component
class 的确切问题是什么,但是 importlib 模块作为成员是唯一非常规的事情。这就是为什么我认为问题出现在这里。
问题
你知道为什么 class 包含模块不适合 'pickling' 吗?如何更好地了解 Can't pickle <class 'module'>
错误发生的原因和位置?
更多代码
可以在 https://github.com/Sebastiaan-Alvarez-Rodriguez/paellego
上找到完整的源代码向我提问
请留下评论请求 clarifications/more 代码片段/???如果你想让我编辑这个问题
最后一个请求
我希望解决方案仅使用 python 标准库,最好是 python 3.3。另外,我的代码的一个要求是它在 Unix 系统上运行。
提前致谢
编辑
根据要求,这是一个大大简化了问题的最小示例:
main.py(你可以执行 python main.py foo)
#!/usr/bin/env python
import sys
import importlib
import multiprocessing
class clazz(object):
def __init__(self, moduly):
self.moduly = moduly
def foopass(self, stringy):
self.moduly.foo(stringy)
def barpass(self, stringy, numbery):
self.moduly.bar(stringy)
print('Second argument: '+str(numbery))
def worker(clazzy, numbery):
clazzy.barpass('wow', numbery)
def main():
clazzy = clazz(importlib.import_module(sys.argv[1]))
clazzy.foopass('init')
args = [(clazzy, 2,)]
with multiprocessing.Pool(processes=2) as pool:
pool.starmap(worker, args)
if __name__ == "__main__":
main()
foo.py(上述调用建议需要在同一目录中):
#!/usr/bin/env python
globaly = 0
def foo(stringy):
print('foo '+stringy)
global globaly
globaly = 5
def bar(stringy):
print('bar '+stringy)
print(str(globaly))
这在 运行 上给出了错误:TypeError: can't pickle module objects
现在我们知道酸洗模块对象是(遗憾的)不可能的。
为了消除错误,让clazz
不把模块作为属性,不管多么方便,而是让它把“modpath
”,这是[=所需的字符串15=]导入用户指定的模块。
看起来像这样(foo.py 与上面完全相同):
#!/usr/bin/env python
import sys
import importlib
import multiprocessing
class clazz(object):
def __init__(self, modpathy):
self.modpathy = modpathy
def foopass(self, stringy):
moduly = importlib.import_module(self.modpathy)
moduly.foo(stringy)
def barpass(self, stringy, numbery):
moduly = importlib.import_module(self.modpathy)
moduly.bar(stringy)
print('Second argument: '+str(numbery))
def worker(clazzy, number):
clazzy.barpass('wow', number)
def main():
clazzy = clazz(sys.argv[1])
clazzy.foopass('init')
args = [(clazzy, 2,)]
with multiprocessing.Pool(processes=2) as pool:
pool.starmap(worker, args)
if __name__ == "__main__":
main()
如果你需要你的全局变量,比如 globaly
,保证保持状态,那么你需要传递一个可变对象(例如列表,字典)来保存这些数据,感谢@DavisHerring:
Module attributes are called “global variables” in Python, but they are no more persistent or accessible than any other data. Why not just use dictionaries?
示例代码如下所示:
#!/usr/bin/env python
import sys
import importlib
import multiprocessing
class clazz(object):
def __init__(self, modpathy):
self.modpathy = modpathy
self.dictionary = {}
def foopass(self, stringy):
moduly = importlib.import_module(self.modpathy)
moduly.foo(stringy, self.dictionary)
def barpass(self, stringy, numbery):
moduly = importlib.import_module(self.modpathy)
moduly.bar(stringy, self.dictionary)
print('Second argument: '+str(numbery))
def worker(clazzy, number):
clazzy.barpass('wow', number)
def main():
clazzy = clazz(sys.argv[1])
clazzy.foopass('init')
args = [(clazzy, 2,)]
with multiprocessing.Pool(processes=2) as pool:
pool.starmap(worker, args)
if __name__ == "__main__":
main()
foo.py(不再有全局变量):
#!/usr/bin/env python
def foo(stringy, dictionary):
print('foo '+stringy)
globaly = 5
dictionary['globaly'] = globaly
def bar(stringy, dictionary):
print('bar '+stringy)
globaly = dictionary['globaly']
print(str(globaly))
通过这种方式,您可以解决问题而不会出现烦人的 can't pickle ...
错误,同时保持状态