为什么这段代码不能与 Numba 并行化？

Question

我正在进行物理模拟。我实际上正在尝试为 Ising 模型实现集群 Wolff 算法。这主要涉及在编程方面对 NumPy 数组的操作。我想绘制比热、能量、磁化强度与温度的关系图。对于不同的温度，模拟是完全独立的。因此，我尝试将操作与 numba prange 函数并行化。但它不允许这样做。原因似乎是我从主函数调用的另一个函数。

我的代码在这里：

# -*- coding: utf-8 -*-
"""
Created on Mon Mar 30 18:39:45 2020

@author: ENDEAVOUR
"""

#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from numba import jit,prange
import timeit
#----------------------------------------------------------------------
##  semi-block codes################
#----------------------------------------------------------------------

start = timeit.default_timer()
error_model=True

@jit(error_model="numpy",nopython=True)
def initialstate(N):   
    ''' generates a random spin configuration for initial condition'''
    state =2*np.random.randint(0,2, size=(N,N))-1
    return state

@jit(error_model="numpy",nopython=True)
def calcEnergy(config):
    '''Energy of a given configuration'''
    N=len(config)
    energy = 0
    for i in range(len(config)):
        for j in range(len(config)):
               S = config[i,j]
               nb = config[(i+1)%N, j] + config[i,(j+1)%N]  + config[(i-1)%N, j] + config[i,(j-1)%N] 
               energy += -nb*S - H*S
    return energy/(4.)

@jit(error_model="numpy",nopython=True)
def calcMag(config):
    '''Magnetization of a given configuration'''
    mag = np.sum(config) + H
    return mag


def wolff(config,p):##wollff cluster implementation 

   ## the followng is the function for a sngle run of cluster making and flipping.


   #print(config)
   '''Monte Carlo move using Wolff-Cluster Sampling'''

   que = []  ### "que" ; stores the list of coordinates of the neighbours aded to the cluster
   (x0,y0) = np.random.randint(len(config)),np.random.randint(len(config)) ## a random spin is chosen at first
   que.append((x0,y0)) ## then added to the "que"


   while (len(que) > 0):## as mentioned in the documents I havesnt u , code must run untill there is nobody left to be added

       #print('Length of que ',len(que))

       q = que[np.random.randint(len(que))] ## a random element from que is chosen

       neighbours = [((q[0]+1)%N,q[1]), ((q[0]-1)%N,q[1]), (q[0],(q[1]+1)%N), (q[0],(q[1]-1)%N) ] ## neighbours to the selected spin are considered
       for c in neighbours:

           if all([config[q]==config[c]  , c not in que,np.random.rand() < p]):
## process of adding spins to the que based on wolff's criteria if they have same spin
              que.append(c)


       config[q] *= -1 ## the spin'q' that was selected from the que is flipped so to avoid being selected in future
       que.remove(q)  ## the spin'q' is removed from the 'que'

   return config



@jit(error_model="numpy",parallel=True)
def run_simulation(N,eqSteps,mcSteps,T,nt):

    E,M,C,X = np.empty(nt), np.empty(nt), np.empty(nt), np.empty(nt)

    n1, n2  = 1.0/(mcSteps*N*N), 1.0/(mcSteps*mcSteps*N*N) 


    for tt in prange(nt):

        E1 = M1 = E2 = M2 = 0
        config = initialstate(N)
        iT=1.0/T[tt]; iT2=iT*iT;
        p = 1 - np.exp(-2*J*iT)

        for i in range(eqSteps):         # equilibrate
            config=wolff(config,p)                # Monte Carlo moves



        for i in range(mcSteps):
            config=wolff(config,p)            
            Ene = calcEnergy(config)     # calculate the energy
            Mag = abs(calcMag(config))       # calculate the magnetisation



            E1 = E1 + Ene
            M1 = M1 + Mag
            M2 = M2 + Mag*Mag 
            E2 = E2 + Ene*Ene


        E[tt] = n1*E1
        M[tt] = n1*M1
        C[tt] = (n1*E2 - n2*E1*E1)*iT2
        X[tt] = (n1*M2 - n2*M1*M1)*iT

        print ("Temp:",T[tt],":", E[tt], M[tt],C[tt],X[tt])


    return E,M,C,X

#def control():
####################################
N  = 64       #N X N spin field
J  = 1
H  = 0
nt = 50
eqSteps = 150
mcSteps = 20

#############################################No change rquired here
T  = np.linspace(1.33, 4.8, nt) 
E,M,C,X=run_simulation(N,eqSteps,mcSteps,T,nt)



f = plt.figure(figsize=(25, 20)); # plot the calculated values    
plt.title("External Field :%5.2f"%(H))
sp =  f.add_subplot(3, 2, 1 );
plt.scatter(T, E, s=50, marker='o', color='Red')
plt.xlabel("Temperature (T)", fontsize=20);
plt.ylabel("Energy ", fontsize=20);         plt.axis('tight');

sp =  f.add_subplot(3, 2, 2 );
plt.scatter(T, M, s=50, marker='o', color='Blue')
plt.xlabel("Temperature (T)", fontsize=20); 
plt.ylabel("Magnetization ", fontsize=20);   plt.axis('tight');

sp =  f.add_subplot(3, 2, 3 );
plt.scatter(T, C, s=50, marker='o', color='Red')
plt.xlabel("Temperature (T)", fontsize=20);  
plt.ylabel("Specific Heat ", fontsize=20);   plt.axis('tight');   

sp =  f.add_subplot(3, 2, 4 );
plt.scatter(T, X, s=50, marker='o', color='RoyalBlue')
plt.xlabel("Temperature (T)", fontsize=20); 
plt.ylabel("Susceptibility", fontsize=20);   plt.axis('tight');

sp = f.add_subplot(3 ,2 ,5);
plt.scatter(E, M,s=50, marker='+', color='Red') 
plt.xlabel("Energy ", fontsize=20);         plt.axis('tight');
plt.ylabel("Magnetization ", fontsize=20);   plt.axis('tight');

plt.show()
stop = timeit.default_timer()

print('Time taken for this simulation:: ', stop - start)

显示的错误消息是

E:/Project/wolff_cluster.2.py:78: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "run_simulation" failed type inference due to: Untyped global name 'wolff': cannot determine Numba type of <class 'function'>

File "wolff_cluster.2.py", line 94:
def run_simulation(N,eqSteps,mcSteps,T,nt):
    <source elided>
        for i in range(eqSteps):         # equilibrate
            config=wolff(config,p)                # Monte Carlo moves
            ^

  @jit(error_model="numpy",parallel=True,nopython=True)
E:/Project/wolff_cluster.2.py:78: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "run_simulation" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>

File "wolff_cluster.2.py", line 86:
def run_simulation(N,eqSteps,mcSteps,T,nt):
    <source elided>

    for tt in prange(nt):
    ^

  @jit(error_model="numpy",parallel=True,nopython=True)
C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\compiler.py:725: NumbaWarning: Function "run_simulation" was compiled in object mode without forceobj=True, but has lifted loops.

File "wolff_cluster.2.py", line 79:
@jit(error_model="numpy",parallel=True,nopython=True)
def run_simulation(N,eqSteps,mcSteps,T,nt):
^

  self.func_ir.loc))
C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\compiler.py:734: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "wolff_cluster.2.py", line 79:
@jit(error_model="numpy",parallel=True,nopython=True)
def run_simulation(N,eqSteps,mcSteps,T,nt):
^

  warnings.warn(errors.NumbaDeprecationWarning(msg, self.func_ir.loc))
E:/Project/wolff_cluster.2.py:78: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "run_simulation" failed type inference due to: Untyped global name 'wolff': cannot determine Numba type of <class 'function'>

File "wolff_cluster.2.py", line 94:
def run_simulation(N,eqSteps,mcSteps,T,nt):
    <source elided>
        for i in range(eqSteps):         # equilibrate
            config=wolff(config,p)                # Monte Carlo moves
            ^

  @jit(error_model="numpy",parallel=True,nopython=True)
C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\compiler.py:725: NumbaWarning: Function "run_simulation" was compiled in object mode without forceobj=True.

File "wolff_cluster.2.py", line 86:
def run_simulation(N,eqSteps,mcSteps,T,nt):
    <source elided>

    for tt in prange(nt):
    ^

  self.func_ir.loc))
C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\compiler.py:734: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "wolff_cluster.2.py", line 86:
def run_simulation(N,eqSteps,mcSteps,T,nt):
    <source elided>

    for tt in prange(nt):
    ^

  warnings.warn(errors.NumbaDeprecationWarning(msg, self.func_ir.loc))
E:/Project/wolff_cluster.2.py:78: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "run_simulation" failed type inference due to: Untyped global name 'wolff': cannot determine Numba type of <class 'function'>

File "wolff_cluster.2.py", line 94:
def run_simulation(N,eqSteps,mcSteps,T,nt):
    <source elided>
        for i in range(eqSteps):         # equilibrate
            config=wolff(config,p)                # Monte Carlo moves
            ^

  @jit(error_model="numpy",parallel=True,nopython=True)
Temp: 1.33 : -0.9890869140625 0.9942626953125 0.041807132522607905 0.017099021969053343

在显示上述错误后，它像往常一样运行，但与纯 python 版本相比花费了 3 倍。我在没有 numba 的情况下为 Wolff 尝试过，但没有成功。并行化 run_simulation 中的 for 循环非常重要，因为它是代码中最耗时的部分。

我认为问题出在 Wolff 函数上。 Numba 无法对其进行任何优化。但是我没有在那里使用 numba 不支持的任何功能。（仅限简单列表和 numpy）。它标记了 if 条件--'all' 关键字。我试过在 if 中使用 'and' ，但随后程序进入了无限循环。它在队列中多次附加相同的元素（可能忽略了 not in 条件）。我知道这在技术上是不可能的。（'and' 和 'all' 都应该有效）。我做错了什么。但是我不知道它是什么。

你能帮我优化这段代码吗？没有numba它工作正常。但是当 N >100 时，它对于任何有用的工作来说都太慢了。

Answer 1

首先得到一个可运行的代码

并行化通常是最后要考虑的事情。充其量您可以获得比可用核心数量少一点的收益（通常小于一个数量级）。有很多功能不可用，通常最好使其尽可能简单（并在此处完成工作）

在大多数情况下，建议在使用 Numba 时摆脱任何全局变量，这可能会导致难以调试错误或非常耗时的代码重新编译。实际上并没有真正的全局变量，而是它们被硬编码在编译函数中。

例子

import numba as nb
@nb.njit()
def why_globals_have_to_be_avoided(A):
    print(A+B)

A=0
B=5
why_globals_have_to_be_avoided(A)
#5

A=0
B=8
why_globals_have_to_be_avoided(A)
#5

#If this function is buried in your code it will be extremely hard to debug
#If you turn caching on, it may even get harder to find an error.

这个例子非常有趣，因为输入非常小，但执行速度在很大程度上取决于每个计算所依赖的温度 T。工作被简单地吐出 (0.48) 并传递给多个线程。如果除了一个线程之外的所有线程都早得多地完成，您将看不到任何加速。由于在此示例中随机重新排列输入的成本非常低，因此混洗输入是解决此问题的简单方法。

例子

@nb.njit()
def why_globals_have_to_be_avoided(A):
    print(A+B)

A=0
B=5
why_globals_have_to_be_avoided(A)
#5

A=0
B=8
why_globals_have_to_be_avoided(A)
#5

#If this function is burried in your code it will be extremely hard to debug
#If you turn caching on, it may even get harder to find an error.

工作示例

import numpy as np
import numba as nb
#----------------------------------------------------------------------
##  semi-block codes################
#----------------------------------------------------------------------

@nb.njit(error_model="numpy")
def initialstate(N):   
    ''' generates a random spin configuration for initial condition'''
    state =2*np.random.randint(0,2, size=(N,N))-1
    return state

@nb.njit(error_model="numpy")
def calcEnergy(config,H):
    '''Energy of a given configuration'''
    N=len(config)
    energy = 0
    for i in range(len(config)):
        for j in range(len(config)):
            S = config[i,j]
            nb = config[(i+1)%N, j] + config[i,(j+1)%N]  + config[(i-1)%N, j] + config[i,(j-1)%N] 
            energy += -nb*S - H*S
    return energy/(4.)

@nb.njit(error_model="numpy")
def calcMag(config,H):
    '''Magnetization of a given configuration'''
    mag = np.sum(config) + H
    return mag

@nb.njit(error_model="numpy")
def wolff(config,p,N):##wollff cluster implementation 
   ## the followng is the function for a sngle run of cluster making and flipping.
    que = []  ### "que" ; stores the list of coordinates of the neighbours aded to the cluster
    x0,y0 = np.random.randint(len(config)),np.random.randint(len(config)) ## a random spin is chosen at first
    que.append((x0,y0)) ## then added to the "que"


    while (len(que) > 0):## as mentioned in the documents I havesnt u , code must run untill there is nobody left to be added
        q = que[np.random.randint(len(que))] ## a random element from que is chosen

        neighbours = [((q[0]+1)%N,q[1]), ((q[0]-1)%N,q[1]), (q[0],(q[1]+1)%N), (q[0],(q[1]-1)%N) ] ## neighbours to the selected spin are considered
        for c in neighbours:
            if config[q]==config[c]  and c not in que and np.random.rand() < p:## process of adding spins to the que based on wolff's criteria if they have same spin
                que.append(c)


        config[q] *= -1 ## the spin'q' that was selected from the que is flipped so to avoid being selected in future
        que.remove(q)  ## the spin'q' is removed from the 'que'

    return config


@nb.njit(error_model="numpy",parallel=True)
def run_simulation(N,eqSteps,mcSteps,T,J,H):
    nt=T.shape[0]

    E,M,C,X = np.empty(nt), np.empty(nt), np.empty(nt), np.empty(nt)

    n1, n2  = 1.0/(mcSteps*N*N), 1.0/(mcSteps*mcSteps*N*N) 

    #It looks like the calculation time heavily depends on the Temperature
    #shuffle the values to get the work more equaly splitted
    #np.random.shuffle isn't supported by Numba, but you can use a Python callback
    with nb.objmode(ind='int32[::1]'): 
        ind = np.arange(nt)
        np.random.shuffle(ind)

    ind_rev=np.argsort(ind)
    T=T[ind]

    for tt in nb.prange(nt):

        E1 = M1 = E2 = M2 = 0
        config = initialstate(N)
        iT=1.0/T[tt]; iT2=iT*iT;
        p = 1 - np.exp(-2*J*iT)

        for i in range(eqSteps):           # equilibrate
            config=wolff(config,p,N)       # Monte Carlo moves

        for i in range(mcSteps):
            config=wolff(config,p,N)            
            Ene = calcEnergy(config,H)     # calculate the energy
            Mag = abs(calcMag(config,H))   # calculate the magnetisation

            E1 = E1 + Ene
            M1 = M1 + Mag
            M2 = M2 + Mag*Mag 
            E2 = E2 + Ene*Ene


        E[tt] = n1*E1
        M[tt] = n1*M1
        C[tt] = (n1*E2 - n2*E1*E1)*iT2
        X[tt] = (n1*M2 - n2*M1*M1)*iT

        #print ("Temp:",T[tt],":", E[tt], M[tt],C[tt],X[tt])

        #undo the shuffling
    return E[ind_rev],M[ind_rev],C[ind_rev],X[ind_rev]

#def control():
####################################
N  = 64       #N X N spin field
J  = 1
H  = 0
nt = 50
eqSteps = 150
mcSteps = 20

#############################################No change rquired here
T  = np.linspace(1.33, 4.8, nt) 

#You can measure the compilation time separately, it doesn't make sense to mix 
#runtime and compialtion time together.
#If the compilation time gets relevant it also make sense to use `cache=True`
E,M,C,X=run_simulation(N,eqSteps,mcSteps,T,J,H)

%timeit E,M,C,X=run_simulation(N,eqSteps,mcSteps,T,J,H)
1.17 s ± 74.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

#without parallelization
2.1 s ± 44 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

#without compilation
~130 s

为什么这段代码不能与 Numba 并行化？

Why this code can't be parallelised with Numba?

optimization

physics

numpy

python-3.x

numba

首先得到一个可运行的代码