Numba 没有加速功能

Question

我有一些代码正在尝试使用 numba 加速。我已经阅读了一些关于该主题的资料，但我还没有 100% 弄明白。

代码如下：

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import seaborn as sns
from numba import jit, vectorize, float64, autojit
sns.set(context='talk', style='ticks', font_scale=1.2, rc={'figure.figsize': (6.5, 5.5), 'xtick.direction': 'in', 'ytick.direction': 'in'})

#%% constraints
x_min = 0                               # death below this
x_max = 20                              # maximum weight
t_max = 100                             # maximum time
foraging_efficiencies = np.linspace(0, 1, 10)               # potential foraging efficiencies
R = 10.0                                    # Resource level

#%% make the body size and time categories
body_sizes = np.arange(x_min, x_max+1)
time_steps = np.arange(t_max)

#%% parameter functions
@jit
def metabolic_fmr(x, u,temp):                           # metabolic cost function
    fmr = 0.125*(2**(0.2*temp))*(1 + 0.5*u) + x*0.1
    return fmr

def intake_dist(u):                         # intake stochastic function (returns a vector)
    g = st.binom.pmf(np.arange(R+1), R, u)
    return g

@jit
def mass_gain(x, u, temp):                      # mass gain function (returns a vector)
    x_prime = x - metabolic_fmr(x, u,temp) + np.arange(R+1)
    x_prime = np.minimum(x_prime, x_max)
    x_prime = np.maximum(x_prime, 0)
    return x_prime

@jit
def prob_attack(P):                         # probability of an attack
    p_a = 0.02*P
    return p_a

@jit
def prob_see(u):                            # probability of not seeing an attack
    p_s = 1-(1-u)**0.3
    return p_s

@jit
def prob_lethal(x):                         # probability of lethality given a successful attack
    p_l = 0.5*np.exp(-0.05*x) 
    return p_l

@jit
def prob_mort(P, u, x):
    p_m = prob_attack(P)*prob_see(u)*prob_lethal(x)
    return np.minimum(p_m, 1)

#%% terminal fitness function
@jit
def terminal_fitness(x):
    t_f = 15.0*x/(x+5.0)
    return t_f

#%% linear interpolation function
@jit
def linear_interpolation(x, F, t):
    floor = x.astype(int)
    delta_c = x-floor
    ceiling = floor + 1
    ceiling[ceiling>x_max] = x_max
    floor[floor<x_min] = x_min
    interpolated_F = (1-delta_c)*F[floor,t] + (delta_c)*F[ceiling,t]
    return interpolated_F

#%% solver
@jit
def solver_jit(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-1)[::-1]:
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*(linear_interpolation(xp, F, t+1)*g_u).sum()       # Fitness calculation
            vmax = V[:,x,t].max()
            idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

def solver_norm(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-1)[::-1]:
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*(linear_interpolation(xp, F, t+1)*g_u).sum()       # Fitness calculation
            vmax = V[:,x,t].max()
            idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

单独的 jit 函数往往比非 jit 函数快得多。例如，prob_mort 在通过 jit.运行后快了大约 600%。但是，求解器本身并没有快多少：

In [3]: %timeit -n 10 solver_jit(200, 25)
10 loops, best of 3: 3.94 s per loop

In [4]: %timeit -n 10 solver_norm(200, 25)
10 loops, best of 3: 4.09 s per loop

我知道有些函数不能被 jitted，所以我用自定义 jit 函数替换了 st.binom.pmf 函数，这实际上将时间减慢到每个循环大约 17 秒，慢了 5 倍多。大概是因为 scipy 函数在这一点上进行了高度优化。

所以我怀疑速度缓慢要么在 linear_interpolate 函数中，要么在 jitted 函数之外的求解器代码中的某个地方（因为有一次我取消了所有函数和运行 solver_norm 并获得了相同的时间）。对慢速部分在哪里以及如何加快速度有什么想法吗？

更新

这是我用来加速 jit 的二项式代码

@jit
def factorial(n):
    if n==0:
        return 1
    else:
        return n*factorial(n-1)

@vectorize([float64(float64,float64,float64)])
def binom(k, n, p):
    binom_coef = factorial(n)/(factorial(k)*factorial(n-k))
    pmf = binom_coef*p**k*(1-p)**(n-k)
    return pmf

@jit
def intake_dist(u):                         # intake stochastic function (returns a vector)
    g = binom(np.arange(R+1), R, u)
    return g

更新 2 我在 nopython 模式下尝试运行ning 我的二项式代码，发现我做错了，因为它是递归的。通过将代码更改为：

来修复该问题

@jit(int64(int64), nopython=True)
def factorial(nn):
    res = 1
    for ii in range(2, nn + 1):
        res *= ii
    return res

@vectorize([float64(float64,float64,float64)], nopython=True)
def binom(k, n, p):
    binom_coef = factorial(n)/(factorial(k)*factorial(n-k))
    pmf = binom_coef*p**k*(1-p)**(n-k)
    return pmf

求解器现在运行在

In [34]: %timeit solver_jit(200, 25)
1 loop, best of 3: 921 ms per loop

大约快 3.5 倍。然而，solver_jit() 和 solver_norm() 仍然运行以相同的速度，这意味着在 jit 函数之外有一些代码减慢了它的速度。

Answer 1

如前所述，可能有一些代码正在回退到对象模式。我只是想补充一点，您可以使用 njit 而不是 jit 来禁用对象模式。这将有助于诊断什么代码是罪魁祸首。

Answer 2

我能够对您的代码进行一些更改，使 jit 版本可以在 nopython 模式下完全编译。在我的笔记本电脑上，结果是：

%timeit solver_jit(200, 25)
1 loop, best of 3: 50.9 ms per loop

%timeit solver_norm(200, 25)
1 loop, best of 3: 192 ms per loop

作为参考，我使用的是 Numba 0.27.0。我承认 Numba 的编译错误仍然让人难以确定发生了什么，但由于我已经使用了一段时间，所以我对需要修复的内容建立了直觉。完整的代码在下面，但这里是我所做的更改列表：

在 linear_interpolation 中将 x.astype(int) 更改为 x.astype(np.int64) 以便它可以在 nopython 模式下编译。
在求解器中，使用 np.sum 作为函数而不是数组的方法。
np.argwhere 不受支持。编写自定义循环。

可能还可以进行一些进一步的优化，但这提供了初始加速。

完整代码：

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import seaborn as sns
from numba import jit, vectorize, float64, autojit, njit
sns.set(context='talk', style='ticks', font_scale=1.2, rc={'figure.figsize': (6.5, 5.5), 'xtick.direction': 'in', 'ytick.direction': 'in'})

#%% constraints
x_min = 0                               # death below this
x_max = 20                              # maximum weight
t_max = 100                             # maximum time
foraging_efficiencies = np.linspace(0, 1, 10)               # potential foraging efficiencies
R = 10.0                                    # Resource level

#%% make the body size and time categories
body_sizes = np.arange(x_min, x_max+1)
time_steps = np.arange(t_max)

#%% parameter functions
@njit
def metabolic_fmr(x, u,temp):                           # metabolic cost function
    fmr = 0.125*(2**(0.2*temp))*(1 + 0.5*u) + x*0.1
    return fmr

@njit()
def factorial(nn):
    res = 1
    for ii in range(2, nn + 1):
        res *= ii
    return res

@vectorize([float64(float64,float64,float64)], nopython=True)
def binom(k, n, p):
    binom_coef = factorial(n)/(factorial(k)*factorial(n-k))
    pmf = binom_coef*p**k*(1-p)**(n-k)
    return pmf

@njit
def intake_dist(u):                         # intake stochastic function (returns a vector)
    g = binom(np.arange(R+1), R, u)
    return g

@njit
def mass_gain(x, u, temp):                      # mass gain function (returns a vector)
    x_prime = x - metabolic_fmr(x, u,temp) + np.arange(R+1)
    x_prime = np.minimum(x_prime, x_max)
    x_prime = np.maximum(x_prime, 0)
    return x_prime

@njit
def prob_attack(P):                         # probability of an attack
    p_a = 0.02*P
    return p_a

@njit
def prob_see(u):                            # probability of not seeing an attack
    p_s = 1-(1-u)**0.3
    return p_s

@njit
def prob_lethal(x):                         # probability of lethality given a successful attack
    p_l = 0.5*np.exp(-0.05*x) 
    return p_l

@njit
def prob_mort(P, u, x):
    p_m = prob_attack(P)*prob_see(u)*prob_lethal(x)
    return np.minimum(p_m, 1)

#%% terminal fitness function
@njit
def terminal_fitness(x):
    t_f = 15.0*x/(x+5.0)
    return t_f

#%% linear interpolation function
@njit
def linear_interpolation(x, F, t):
    floor = x.astype(np.int64)
    delta_c = x-floor
    ceiling = floor + 1
    ceiling[ceiling>x_max] = x_max
    floor[floor<x_min] = x_min
    interpolated_F = (1-delta_c)*F[floor,t] + (delta_c)*F[ceiling,t]
    return interpolated_F

#%% solver
@njit
def solver_jit(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-2,-1,-1):
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*np.sum((linear_interpolation(xp, F, t+1)*g_u))       # Fitness calculation
            vmax = V[:,x,t].max()

            for k in xrange(V.shape[0]):
                if V[k,x,t] == vmax:
                    idx = k
                    break
            #idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

def solver_norm(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-1)[::-1]:
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*(linear_interpolation(xp, F, t+1)*g_u).sum()       # Fitness calculation
            vmax = V[:,x,t].max()
            idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

Numba 没有加速功能

Numba not speeding up function

python

performance

numpy

numba