joblib 并行返回重复数组

joblib Parallel returning duplicate arrays

import multiprocessing
from joblib import Parallel, delayed
import numpy as np

l1 = [0,1,2,3,4]
l2 = [1,2]

c = np.empty([5,2])

def myfun(item):
    for i,ele in enumerate(l2):
        c[item,i] = item + ele
    return c

results = Parallel(n_jobs=-1, backend="threading")(map(delayed(myfun), l1))

我预计结果是

[[1., 2.],
 [2., 3.],
 [3., 4.],
 [4., 5.],
 [5., 6.]]

为什么我得到的是四个相同的数组而不是一个?

问题是您在每个线程中更新同一个数组。当您打印结果时,它只是将同一个数组打印 5 次。

要为每个线程获取单独的结果,您需要创建主数组的副本。

试试这个代码:

import multiprocessing
from joblib import Parallel, delayed
import numpy as np

l1 = [0,1,2,3,4]
l2 = [1,2]

c = np.zeros([5,2])
lst = [np.copy(c) for x in l1]  #  array for each item

def myfun(item):
    cc = lst[item]  # array for this item
    for i,ele in enumerate(l2):
        cc[item,i] = item + ele # array for this item
        c[item,i] = item + ele # main array
    return cc

results = Parallel(n_jobs=-1, backend="threading")(map(delayed(myfun), l1))

print(results, '\n')  # item arrays
print(c)  # main array

输出

[array([[1., 2.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]), 
 array([[0., 0.],
        [2., 3.],
        [0., 0.],
        [0., 0.],
        [0., 0.]]), 
 array([[0., 0.],
        [0., 0.],
        [3., 4.],
        [0., 0.],
        [0., 0.]]), 
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [4., 5.],
        [0., 0.]]), 
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [5., 6.]])]

[[1. 2.]
 [2. 3.]
 [3. 4.]
 [4. 5.]
 [5. 6.]]