如何使用池处理来更新全局字典

How to use pool processing to update global dictionary

我正在尝试使用 Python 的池处理来更新名为:globalDict 的全局词典。我期待 globalDict={'0':0,'1':1,'2':2} 但是在代码 运行 之后,这个字典仍然是空白的。请帮我解决这个问题,代码如下:

from multiprocessing import Pool
import time

def f(x):
    global globalDict # update this dictionary every time function called
    globalDict.setdefault(str(x),x)
    return globalDict

def init_pool(dictX):
    # function to initial global dictionary 
    global globalDict
    globalDict = dictX

if __name__ == '__main__':
    start=time.time()
    globalDict={}
    pool=Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
    pool.map(f, range(3)) # using pool processing to call f()function
    pool.close()
    pool.join()
    stop=time.time()
    print('Done in {:4f}'.format(stop-start))

尝试这将获得预期输出:{0: '0', 1: '1', 2: '2'}

from multiprocessing import Pool
import time

def f(x):
    global globalDict # update this dictionary every time function called
    globalDict.setdefault(str(x),x)
    return globalDict

def init_pool(dictX):
    # function to initial global dictionary 
    global globalDict
    globalDict = dictX

if __name__ == '__main__':
    start=time.time()
    globalDict={}
    pool=Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
    result = pool.map(f, range(3)) # using pool processing to call f()function
    pool.close()
    pool.join()
    stop=time.time()
    print('Done in {:4f}'.format(stop-start))
    for i in result:
        res = {val : key for key, val in i.items()}
    print(res)

一个解决方案是使用托管字典。不需要从 worker 函数返回字典,f:

from multiprocessing import Pool, Manager
import time

def f(x):
    globalDict.setdefault(str(x),x)

def init_pool(dictX):
    # function to initial global dictionary
    global globalDict
    globalDict = dictX

if __name__ == '__main__':
    start = time.time()
    with Manager() as manager:
        globalDict = manager.dict()
        pool = Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
        pool.map(f, range(3)) # using pool processing to call f()function
        pool.close()
        pool.join()
        stop = time.time()
        print('Done in {:4f}'.format(stop-start))
        print(globalDict)

打印:

Done in 0.606996
{'0': 0, '2': 2, '1': 1

如果你想得到一个不再需要调用 Manager() 返回的 SycnManager class 的“常规”字典,那么在调用map完成,添加如下语句:

regular_dict = {k: v for k, v in globalDict.items()}

或者,如果你想变聪明,你可以创建自己的托管字典类型(我们称之为 Dict),它只支持我们需要的一种方法,setdefault,并调度对底层 dict 的方法调用,当我们对 map 的调用完成时,我们将能够检索它:

from multiprocessing import Pool
from multiprocessing.managers import BaseManager
import time

class DictManager(BaseManager):
    pass

class Dict:
    def __init__(self):
        self._dict = {}

    def setdefault(self, *args):
        return self._dict.setdefault(*args)

    def get_underlying_dict(self):
        return self._dict

def f(x):
    globalDict.setdefault(str(x),x)

def init_pool(dictX):
    # function to initial global dictionary
    global globalDict
    globalDict = dictX

if __name__ == '__main__':
    start = time.time()
    DictManager.register('Dict', Dict)
    with DictManager() as manager:
        globalDict = manager.Dict()
        pool = Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
        pool.map(f, range(3)) # using pool processing to call f()function
        pool.close()
        pool.join()
        stop = time.time()
        print('Done in {:4f}'.format(stop-start))
        regular_dict = globalDict.get_underlying_dict()
    print(regular_dict)

打印:

Done in 0.460001
{'0': 0, '1': 1, '2': 2}