Numba 无法并行化循环
Numba fails to parallelize a loop
运行了以下代码,但使用 prange 的循环无法并行化,尽管它显然是可并行化的:
import numpy as np
from numba import njit, prange
@njit(parallel=True)
def f1(money, u, v, cost_u, cost_v):
# task: find index of u iu_opt and index of v iv_opt
# such that u[iu_opt] + v[iv_opt] is maximal subject
# to cost_u[iu_opt] + cost_v[iv_opt] < money
na = money.size
ncu = cost_u.size
ncv = cost_v.size
iu_opt = np.empty((na,),dtype=np.int16)
iv_opt = np.empty((na,),dtype=np.int16)
for ia in prange(na):
money_i = money[ia]
ivbest = 0 # initially pick iv = 0
# find max iu corresponding to iv = 0
for iu in range(ncu-1,-1,-1):
if cost_u[iu] + cost_v[0] < money_i: break
iubest = iu
# compute initial score
score_best = u[iu] + v[0]
# then try to increase iv
for iv in range(1,ncv):
# it not enough money for u_0
if cost_v[iv] + cost_u[0] > money_i: break
while cost_v[iv] + cost_u[iu] > money_i:
iu -= 1
assert iu >= 0
score_now = u[iu] + v[iv]
if score_now > score_best:
ivbest = iv
iubest = iu
score_best = score_now
iu_opt[ia] = iubest
iv_opt[ia] = ivbest
return iu_opt, iv_opt
na = 50
ncu = 204
ncv = 205
money = np.arange(na)/(na)
cost_u = np.arange(ncu)/ncu
u = np.cumsum(np.random.random_sample(ncu))
cost_v = np.arange(ncv)/ncv
v = np.cumsum(np.random.random_sample(ncv))
iu, iv = f1(money, u, v, cost_u, cost_v)
f1.parallel_diagnostics(level=4)
如果这有帮助,问题的设置如下:u[i] 的值成本 cost_u[i] 和 v[j] 的值成本 cost_v[j](所有这些序列都是严格递增的),对于每个价值 money[ia] 我们想要找到 i 和 j 最大化 u[i] + v[j] 给定 cost_u[i] + cost_v[j ] 不能超过 money[ia]。
为了防止有人遇到类似的问题,我最终通过将大循环内部拆分为另一个 njit 函数来解决这个问题。这是代码:
import numpy as np
from numba import njit, prange
@njit(parallel=True)
def f1(money, u, v, cost_u, cost_v):
# task: find index of u iu_opt and index of v iv_opt
# such that u[iu_opt] + v[iv_opt] is maximal subject
# to cost_u[iu_opt] + cost_v[iv_opt] < money
na = money.size
iu_opt = np.empty((na,),dtype=np.int16)
iv_opt = np.empty((na,),dtype=np.int16)
for ia in prange(na):
money_i = money[ia]
iubest, ivbest = f1_int(money_i,u,v,cost_u,cost_v)
iu_opt[ia] = iubest
iv_opt[ia] = ivbest
return iu_opt, iv_opt
@njit
def f1_int(money_i,u,v,cost_u,cost_v):
ivbest = 0 # initially pick iv = 0
ncu = cost_u.size
ncv = cost_v.size
# find max iu corresponding to iv = 0
for iu in range(ncu-1,-1,-1):
if cost_u[iu] + cost_v[0] < money_i: break
iubest = iu
# compute initial score
score_best = u[iu] + v[0]
# then try to increase iv
for iv in range(1,ncv):
# it not enough money for u_0
if cost_v[iv] + cost_u[0] > money_i: break
while cost_v[iv] + cost_u[iu] > money_i:
iu -= 1
assert iu >= 0
score_now = u[iu] + v[iv]
if score_now > score_best:
ivbest = iv
iubest = iu
score_best = score_now
return iubest, ivbest
na = 50
ncu = 204
ncv = 205
money = np.arange(na)/(na)
cost_u = np.arange(ncu)/ncu
u = np.cumsum(np.random.random_sample(ncu))
cost_v = np.arange(ncv)/ncv
v = np.cumsum(np.random.random_sample(ncv))
iu, iv = f1(money, u, v, cost_u, cost_v)
f1.parallel_diagnostics(level=4)
这并没有真正回答问题发生的原因,但以某种方式起作用。
运行了以下代码,但使用 prange 的循环无法并行化,尽管它显然是可并行化的:
import numpy as np
from numba import njit, prange
@njit(parallel=True)
def f1(money, u, v, cost_u, cost_v):
# task: find index of u iu_opt and index of v iv_opt
# such that u[iu_opt] + v[iv_opt] is maximal subject
# to cost_u[iu_opt] + cost_v[iv_opt] < money
na = money.size
ncu = cost_u.size
ncv = cost_v.size
iu_opt = np.empty((na,),dtype=np.int16)
iv_opt = np.empty((na,),dtype=np.int16)
for ia in prange(na):
money_i = money[ia]
ivbest = 0 # initially pick iv = 0
# find max iu corresponding to iv = 0
for iu in range(ncu-1,-1,-1):
if cost_u[iu] + cost_v[0] < money_i: break
iubest = iu
# compute initial score
score_best = u[iu] + v[0]
# then try to increase iv
for iv in range(1,ncv):
# it not enough money for u_0
if cost_v[iv] + cost_u[0] > money_i: break
while cost_v[iv] + cost_u[iu] > money_i:
iu -= 1
assert iu >= 0
score_now = u[iu] + v[iv]
if score_now > score_best:
ivbest = iv
iubest = iu
score_best = score_now
iu_opt[ia] = iubest
iv_opt[ia] = ivbest
return iu_opt, iv_opt
na = 50
ncu = 204
ncv = 205
money = np.arange(na)/(na)
cost_u = np.arange(ncu)/ncu
u = np.cumsum(np.random.random_sample(ncu))
cost_v = np.arange(ncv)/ncv
v = np.cumsum(np.random.random_sample(ncv))
iu, iv = f1(money, u, v, cost_u, cost_v)
f1.parallel_diagnostics(level=4)
如果这有帮助,问题的设置如下:u[i] 的值成本 cost_u[i] 和 v[j] 的值成本 cost_v[j](所有这些序列都是严格递增的),对于每个价值 money[ia] 我们想要找到 i 和 j 最大化 u[i] + v[j] 给定 cost_u[i] + cost_v[j ] 不能超过 money[ia]。
为了防止有人遇到类似的问题,我最终通过将大循环内部拆分为另一个 njit 函数来解决这个问题。这是代码:
import numpy as np
from numba import njit, prange
@njit(parallel=True)
def f1(money, u, v, cost_u, cost_v):
# task: find index of u iu_opt and index of v iv_opt
# such that u[iu_opt] + v[iv_opt] is maximal subject
# to cost_u[iu_opt] + cost_v[iv_opt] < money
na = money.size
iu_opt = np.empty((na,),dtype=np.int16)
iv_opt = np.empty((na,),dtype=np.int16)
for ia in prange(na):
money_i = money[ia]
iubest, ivbest = f1_int(money_i,u,v,cost_u,cost_v)
iu_opt[ia] = iubest
iv_opt[ia] = ivbest
return iu_opt, iv_opt
@njit
def f1_int(money_i,u,v,cost_u,cost_v):
ivbest = 0 # initially pick iv = 0
ncu = cost_u.size
ncv = cost_v.size
# find max iu corresponding to iv = 0
for iu in range(ncu-1,-1,-1):
if cost_u[iu] + cost_v[0] < money_i: break
iubest = iu
# compute initial score
score_best = u[iu] + v[0]
# then try to increase iv
for iv in range(1,ncv):
# it not enough money for u_0
if cost_v[iv] + cost_u[0] > money_i: break
while cost_v[iv] + cost_u[iu] > money_i:
iu -= 1
assert iu >= 0
score_now = u[iu] + v[iv]
if score_now > score_best:
ivbest = iv
iubest = iu
score_best = score_now
return iubest, ivbest
na = 50
ncu = 204
ncv = 205
money = np.arange(na)/(na)
cost_u = np.arange(ncu)/ncu
u = np.cumsum(np.random.random_sample(ncu))
cost_v = np.arange(ncv)/ncv
v = np.cumsum(np.random.random_sample(ncv))
iu, iv = f1(money, u, v, cost_u, cost_v)
f1.parallel_diagnostics(level=4)
这并没有真正回答问题发生的原因,但以某种方式起作用。