joblib 并行返回重复数组
joblib Parallel returning duplicate arrays
import multiprocessing
from joblib import Parallel, delayed
import numpy as np
l1 = [0,1,2,3,4]
l2 = [1,2]
c = np.empty([5,2])
def myfun(item):
for i,ele in enumerate(l2):
c[item,i] = item + ele
return c
results = Parallel(n_jobs=-1, backend="threading")(map(delayed(myfun), l1))
我预计结果是
[[1., 2.],
[2., 3.],
[3., 4.],
[4., 5.],
[5., 6.]]
为什么我得到的是四个相同的数组而不是一个?
问题是您在每个线程中更新同一个数组。当您打印结果时,它只是将同一个数组打印 5 次。
要为每个线程获取单独的结果,您需要创建主数组的副本。
试试这个代码:
import multiprocessing
from joblib import Parallel, delayed
import numpy as np
l1 = [0,1,2,3,4]
l2 = [1,2]
c = np.zeros([5,2])
lst = [np.copy(c) for x in l1] # array for each item
def myfun(item):
cc = lst[item] # array for this item
for i,ele in enumerate(l2):
cc[item,i] = item + ele # array for this item
c[item,i] = item + ele # main array
return cc
results = Parallel(n_jobs=-1, backend="threading")(map(delayed(myfun), l1))
print(results, '\n') # item arrays
print(c) # main array
输出
[array([[1., 2.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]]),
array([[0., 0.],
[2., 3.],
[0., 0.],
[0., 0.],
[0., 0.]]),
array([[0., 0.],
[0., 0.],
[3., 4.],
[0., 0.],
[0., 0.]]),
array([[0., 0.],
[0., 0.],
[0., 0.],
[4., 5.],
[0., 0.]]),
array([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[5., 6.]])]
[[1. 2.]
[2. 3.]
[3. 4.]
[4. 5.]
[5. 6.]]
import multiprocessing
from joblib import Parallel, delayed
import numpy as np
l1 = [0,1,2,3,4]
l2 = [1,2]
c = np.empty([5,2])
def myfun(item):
for i,ele in enumerate(l2):
c[item,i] = item + ele
return c
results = Parallel(n_jobs=-1, backend="threading")(map(delayed(myfun), l1))
我预计结果是
[[1., 2.],
[2., 3.],
[3., 4.],
[4., 5.],
[5., 6.]]
为什么我得到的是四个相同的数组而不是一个?
问题是您在每个线程中更新同一个数组。当您打印结果时,它只是将同一个数组打印 5 次。
要为每个线程获取单独的结果,您需要创建主数组的副本。
试试这个代码:
import multiprocessing
from joblib import Parallel, delayed
import numpy as np
l1 = [0,1,2,3,4]
l2 = [1,2]
c = np.zeros([5,2])
lst = [np.copy(c) for x in l1] # array for each item
def myfun(item):
cc = lst[item] # array for this item
for i,ele in enumerate(l2):
cc[item,i] = item + ele # array for this item
c[item,i] = item + ele # main array
return cc
results = Parallel(n_jobs=-1, backend="threading")(map(delayed(myfun), l1))
print(results, '\n') # item arrays
print(c) # main array
输出
[array([[1., 2.],
[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.]]),
array([[0., 0.],
[2., 3.],
[0., 0.],
[0., 0.],
[0., 0.]]),
array([[0., 0.],
[0., 0.],
[3., 4.],
[0., 0.],
[0., 0.]]),
array([[0., 0.],
[0., 0.],
[0., 0.],
[4., 5.],
[0., 0.]]),
array([[0., 0.],
[0., 0.],
[0., 0.],
[0., 0.],
[5., 6.]])]
[[1. 2.]
[2. 3.]
[3. 4.]
[4. 5.]
[5. 6.]]