如何使用 numba 加速以下代码？

Question

我在做分子动力学模拟。它包括数值积分、许多 for 循环、处理大型 NumPy 数组。我尽可能地尝试使用 NumPy 函数和数组。但是代码还是太慢了。我想过使用 numba jit 作为加速器。但它总是抛出一个错误信息。

这是代码。

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020

@author: Sandipan
"""

import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys

# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp   = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM     =3
N       =500
density =0.776
Rcutoff =3


#----------------------Function Definitions---------------------

#------------------Initialise Configuration--------

@jit(nopython=True)
def initialise_config(N,DIM,density):
    velocity = (np.random.randn(N,DIM)-0.5)


    # Set initial momentum to zero
    COM_V = np.sum(velocity)/N     #Center of mass velocity
    velocity = velocity - COM_V    # Fix any center-of-mass drift

    # Calculate initial kinetic energy
    k_energy=0
    for i in range (N):
        k_energy+=np.dot(velocity[i],velocity[i])
    vscale=np.sqrt(DIM*temp/k_energy)
    velocity*=vscale

    #Initialize with zeroes
    coords = np.zeros([N,DIM]);

    # Get the cooresponding box size
    L = (N/density)**(1.0/DIM)

    """ Find the lowest perfect cube greater than or equal to the number of
     particles"""
    nCube = 2

    while (nCube**3 < N):
        nCube = nCube + 1



    # Assign particle positions
    ip=-1
    x=0
    y=0
    z=0

    for i in range(0,nCube):
        for j in range(0,nCube):
            for k in range(0,nCube):
                if(ip<N):
                    x=(i+0.5)*(L/nCube)
                    y=(j+0.5)*(L/nCube)
                    z=(k+0.5)*(L/nCube)
                    coords[ip]=np.array([x,y,z])
                    ip=ip+1
                else:
                    break
    return coords,velocity,L


@jit(nopython=True)
def wrap(pos,L):
    '''Apply perodic boundary conditions.'''

    for i in range (len(pos)):
        for k in range(DIM):
                if (pos[i][k]>0.5):
                    pos[i][k]=pos[i][k]-1
                if (pos[i][k]<-0.5):
                    pos[i][k]=pos[i][k]+1


    return (pos)    






@jit(nopython=True)
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
    # Compute forces on positions using the Lennard-Jones potential
    # Uses double nested loop which is slow O(N^2) time unsuitable for large systems
    Sij = np.zeros(DIM) # Box scaled units
    Rij = np.zeros(DIM) # Real space units

    #Set all variables to zero
    ene_pot = np.zeros(N)
    acc = acc*0
    virial=0.0

    # Loop over all pairs of particles
    for i in range(N-1):
        for j in range(i+1,N): #i+1 to N ensures we do not double count
            Sij = pos[i]-pos[j] # Distance in box scaled units
            for l in range(DIM): # Periodic interactions
                if (np.abs(Sij[l])>0.5):
                    Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5  (scaled units) then subtract 0.5 to find periodic interaction distance.

            Rij   = L*Sij # Scale the box to the real units in this case reduced LJ units
            Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance

            if(Rsqij < Rcutoff**2):
                # Calculate LJ potential inside cutoff
                # We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
                rm2      = 1.0/Rsqij # 1/r^2
                rm6      = rm2**3
                forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
                phi      =4*(rm6**2-rm6)

                ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy

                ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy

                virial     = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
                acc[i]     = acc[i]+forcefact*Sij # Accumulate forces
                acc[j]     = acc[j]-forcefact*Sij # (Fji=-Fij)
    return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient


@jit(nopython=True)
def Calculate_Temperature(vel,L,DIM,N):

    ene_kin = 0.0

    for i in range(N):
        real_vel = L*vel[i]
        ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)

    ene_kin_aver = 1.0*ene_kin/N
    temperature = 2.0*ene_kin_aver/DIM

    return ene_kin_aver,temperature


# Main MD loop
@jit(nopython=True)
def main():

    # Vectors to store parameter values at each step

    ene_kin_aver = np.ones(NSteps)
    ene_pot_aver = np.ones(NSteps)
    temperature = np.ones(NSteps)
    virial = np.ones(NSteps)
    pressure = np.ones(NSteps)


    pos,vel,L = initialise_config(N,DIM,density)
    acc = (np.random.randn(N,DIM)-0.5)
    volume=L**3

    # Open file which we will save the outputs to
    if os.path.exists('energy2'):
        os.remove('energy2')
    f = open('traj.xyz', 'w')

    for k in range(NSteps):

        # Refold positions according to periodic boundary conditions
        pos=wrap(pos,L)

        # r(t+dt) modify positions according to velocity and acceleration
        pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Rescale velocities and take half step
        chi = np.sqrt(temp/temperature[k])
        vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2

        # Compute forces a(t+dt),ene_pot,virial
        acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3

        # Complete the velocity step 
        vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Calculate pressure
        pressure[k]= density*temperature[k] + virial[k]/volume

    # Print output to file every DumpFreq number of steps
        if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values

            f.write("%s\n" %(N)) # Write the number of particles to file
            # Write all of the quantities at this step to the file
            f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
            for n in range(N): # Write the positions to file
                f.write("X"+" ")
                for l in range(DIM):
                    f.write(str(pos[n][l]*L)+" ")
                f.write("\n")


        if (k%5==0):
           # print("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            sys.stdout.write("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            sys.stdout.flush()

    return ene_kin_aver, ene_pot_aver, temperature, pressure, pos

#------------------------------------------------------    
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()




# Plot all of the quantities
def plot():
    plt.figure(figsize=[7,12])
    plt.rc('xtick', labelsize=15) 
    plt.rc('ytick', labelsize=15)
    plt.subplot(4, 1, 1)
    plt.plot(ene_kin_aver,'k-')
    plt.ylabel(r"$E_{K}", fontsize=20)
    plt.subplot(4, 1, 2)
    plt.plot(ene_pot_aver,'k-')
    plt.ylabel(r"$E_{P}$", fontsize=20)
    plt.subplot(4, 1, 3)
    plt.plot(temperature,'k-')
    plt.ylabel(r"$T$", fontsize=20)
    plt.subplot(4, 1, 4)
    plt.plot(pressure,'k-')
    plt.ylabel(r"$P$", fontsize=20)
    plt.show()

plot()

我得到的错误是：


runfile('E:/Project/LJMD4.py', wdir='E:/Project')
Traceback (most recent call last):

  File "<ipython-input-8-aeebce887079>", line 1, in <module>
    runfile('E:/Project/LJMD4.py', wdir='E:/Project')

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
    execfile(filename, namespace)

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "E:/Project/LJMD4.py", line 226, in <module>
    ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 351, in _compile_for_args
    error_rewrite(e, 'typing')

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 318, in error_rewrite
    reraise(type(e), e, None)

  File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\six.py", line 658, in reraise
    raise value.with_traceback(tb)

TypingError: cannot determine Numba type of <class 'builtin_function_or_method'>

当我在网上搜索时，我发现numba可能不支持我正在使用的某些功能。但我没有使用任何 Pandas 或其他数据框。我只是使用纯 python 循环或 NumPy，据 numba 文档显示，它们得到了很好的支持。我尝试从某些函数中删除 numba 并设置为 nopython=0 但它仍然会发送不同的错误消息。我不知道它有什么问题。如果没有 numba，代码将无法实际使用。关于加速的任何进一步提示都会有很大帮助。提前谢谢你。

Answer 1

几个常见错误

使用了不受支持的函数

文件操作，多字符串操作。这些可以在 objmode block 中。在这个例子中，我把这些东西注释掉了。
数组初始化方式错误

只支持元组，不支持列表（Numpy 两者都接受，但文档中只提到了元组）
检查被零除并抛出异常

这是 Python 的标准行为，但不是 Numpy。如果你想避免这种开销（if/else 在每个部门）打开 Numpy 默认行为（error_model="numpy"）。
全局变量的使用

全局变量被硬编码到编译后的代码中（就像您将它们直接写入代码中一样）。不重新编译就不能改变它们。
Numpy 数组索引错误

pos[i][k] 而不是 pos[i,k]。 Numba 可能会对此进行优化，但这对纯 Python 代码有相当明显的负面影响。

工作版本

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020

@author: Sandipan
"""

import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys

# All globals are compile time constants
# recompilation needed if you change this values
# Better way: hand a tuple of all needed vars to the functions
# params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)

# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp   = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM     =3
N       =500
density =0.776
Rcutoff =3

params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)

#----------------------Function Definitions---------------------

#------------------Initialise Configuration--------

#error_model=True
#Do you really want to search for division by zeros (additional cost)?

@jit(nopython=True,error_model="numpy")
def initialise_config(N,DIM,density):
    velocity = (np.random.randn(N,DIM)-0.5)


    # Set initial momentum to zero
    COM_V = np.sum(velocity)/N     #Center of mass velocity
    velocity = velocity - COM_V    # Fix any center-of-mass drift

    # Calculate initial kinetic energy
    k_energy=0
    for i in range (N):
        k_energy+=np.dot(velocity[i],velocity[i])
    vscale=np.sqrt(DIM*temp/k_energy)
    velocity*=vscale

    #wrong array initialization (use tuple)
    #Initialize with zeroes
    coords = np.zeros((N,DIM))

    # Get the cooresponding box size
    L = (N/density)**(1.0/DIM)

    """ Find the lowest perfect cube greater than or equal to the number of
     particles"""
    nCube = 2

    while (nCube**3 < N):
        nCube = nCube + 1



    # Assign particle positions
    ip=-1
    x=0
    y=0
    z=0

    for i in range(0,nCube):
        for j in range(0,nCube):
            for k in range(0,nCube):
                if(ip<N):
                    x=(i+0.5)*(L/nCube)
                    y=(j+0.5)*(L/nCube)
                    z=(k+0.5)*(L/nCube)
                    coords[ip]=np.array([x,y,z])
                    ip=ip+1
                else:
                    break
    return coords,velocity,L


@jit(nopython=True)
def wrap(pos,L):
    '''Apply perodic boundary conditions.'''

    #correct array indexing
    for i in range (len(pos)):
        for k in range(DIM):
                if (pos[i,k]>0.5):
                    pos[i,k]=pos[i,k]-1
                if (pos[i,k]<-0.5):
                    pos[i,k]=pos[i,k]+1


    return (pos)    


@jit(nopython=True,error_model="numpy")
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
    # Compute forces on positions using the Lennard-Jones potential
    # Uses double nested loop which is slow O(N^2) time unsuitable for large systems
    Sij = np.zeros(DIM) # Box scaled units
    Rij = np.zeros(DIM) # Real space units

    #Set all variables to zero
    ene_pot = np.zeros(N)
    acc = acc*0
    virial=0.0

    # Loop over all pairs of particles
    for i in range(N-1):
        for j in range(i+1,N): #i+1 to N ensures we do not double count
            Sij = pos[i]-pos[j] # Distance in box scaled units
            for l in range(DIM): # Periodic interactions
                if (np.abs(Sij[l])>0.5):
                    Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5  (scaled units) then subtract 0.5 to find periodic interaction distance.

            Rij   = L*Sij # Scale the box to the real units in this case reduced LJ units
            Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance

            if(Rsqij < Rcutoff**2):
                # Calculate LJ potential inside cutoff
                # We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
                rm2      = 1.0/Rsqij # 1/r^2
                rm6      = rm2**3
                forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
                phi      =4*(rm6**2-rm6)

                ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy

                ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy

                virial     = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
                acc[i]     = acc[i]+forcefact*Sij # Accumulate forces
                acc[j]     = acc[j]-forcefact*Sij # (Fji=-Fij)
    #If you want to get get the best performance, sum directly in the loop intead of 
    #summing at the end np.sum(ene_pot)
    return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient


@jit(nopython=True,error_model="numpy")
def Calculate_Temperature(vel,L,DIM,N):

    ene_kin = 0.0

    for i in range(N):
        real_vel = L*vel[i]
        ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)

    ene_kin_aver = 1.0*ene_kin/N
    temperature = 2.0*ene_kin_aver/DIM

    return ene_kin_aver,temperature


# Main MD loop
@jit(nopython=True,error_model="numpy")
def main(params):
    NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff=params
    # Vectors to store parameter values at each step

    ene_kin_aver = np.ones(NSteps)
    ene_pot_aver = np.ones(NSteps)
    temperature = np.ones(NSteps)
    virial = np.ones(NSteps)
    pressure = np.ones(NSteps)


    pos,vel,L = initialise_config(N,DIM,density)
    acc = (np.random.randn(N,DIM)-0.5)
    volume=L**3

    # Open file which we will save the outputs to
    # Unsupported operations have to be in an objectmode block
    # or simply write the outputs at the end in a pure Python function
    """
    if os.path.exists('energy2'):
        os.remove('energy2')
    f = open('traj.xyz', 'w')
    """
    for k in range(NSteps):

        # Refold positions according to periodic boundary conditions
        pos=wrap(pos,L)

        # r(t+dt) modify positions according to velocity and acceleration
        pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Rescale velocities and take half step
        chi = np.sqrt(temp/temperature[k])
        vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2

        # Compute forces a(t+dt),ene_pot,virial
        acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3

        # Complete the velocity step 
        vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4

        # Calculate temperature
        ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)

        # Calculate pressure
        pressure[k]= density*temperature[k] + virial[k]/volume

        # Print output to file every DumpFreq number of steps
        """
        if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values

            f.write("%s\n" %(N)) # Write the number of particles to file
            # Write all of the quantities at this step to the file
            f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
            for n in range(N): # Write the positions to file
                f.write("X"+" ")
                for l in range(DIM):
                    f.write(str(pos[n][l]*L)+" ")
                f.write("\n")

        #Simple prints without formating are supported
        if (k%5==0):
            #print("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            #sys.stdout.write("\rStep: {0} KE: {1}   PE: {2} Energy:  {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
            #sys.stdout.flush()
        """
    return ene_kin_aver, ene_pot_aver, temperature, pressure, pos

#------------------------------------------------------    
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main(params)




# Plot all of the quantities
def plot():
    plt.figure(figsize=[7,12])
    plt.rc('xtick', labelsize=15) 
    plt.rc('ytick', labelsize=15)
    plt.subplot(4, 1, 1)
    plt.plot(ene_kin_aver,'k-')
    plt.ylabel(r"$E_{K}", fontsize=20)
    plt.subplot(4, 1, 2)
    plt.plot(ene_pot_aver,'k-')
    plt.ylabel(r"$E_{P}$", fontsize=20)
    plt.subplot(4, 1, 3)
    plt.plot(temperature,'k-')
    plt.ylabel(r"$T$", fontsize=20)
    plt.subplot(4, 1, 4)
    plt.plot(pressure,'k-')
    plt.ylabel(r"$P$", fontsize=20)
    plt.show()

plot()

如何使用 numba 加速以下代码？

How to speed up the following code using numba?

numpy

scientific-computing

python-3.x

numba