Numba 没有加速功能

Numba not speeding up function

我有一些代码正在尝试使用 numba 加速。我已经阅读了一些关于该主题的资料,但我还没有 100% 弄明白。

代码如下:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import seaborn as sns
from numba import jit, vectorize, float64, autojit
sns.set(context='talk', style='ticks', font_scale=1.2, rc={'figure.figsize': (6.5, 5.5), 'xtick.direction': 'in', 'ytick.direction': 'in'})

#%% constraints
x_min = 0                               # death below this
x_max = 20                              # maximum weight
t_max = 100                             # maximum time
foraging_efficiencies = np.linspace(0, 1, 10)               # potential foraging efficiencies
R = 10.0                                    # Resource level

#%% make the body size and time categories
body_sizes = np.arange(x_min, x_max+1)
time_steps = np.arange(t_max)

#%% parameter functions
@jit
def metabolic_fmr(x, u,temp):                           # metabolic cost function
    fmr = 0.125*(2**(0.2*temp))*(1 + 0.5*u) + x*0.1
    return fmr

def intake_dist(u):                         # intake stochastic function (returns a vector)
    g = st.binom.pmf(np.arange(R+1), R, u)
    return g

@jit
def mass_gain(x, u, temp):                      # mass gain function (returns a vector)
    x_prime = x - metabolic_fmr(x, u,temp) + np.arange(R+1)
    x_prime = np.minimum(x_prime, x_max)
    x_prime = np.maximum(x_prime, 0)
    return x_prime

@jit
def prob_attack(P):                         # probability of an attack
    p_a = 0.02*P
    return p_a

@jit
def prob_see(u):                            # probability of not seeing an attack
    p_s = 1-(1-u)**0.3
    return p_s

@jit
def prob_lethal(x):                         # probability of lethality given a successful attack
    p_l = 0.5*np.exp(-0.05*x) 
    return p_l

@jit
def prob_mort(P, u, x):
    p_m = prob_attack(P)*prob_see(u)*prob_lethal(x)
    return np.minimum(p_m, 1)

#%% terminal fitness function
@jit
def terminal_fitness(x):
    t_f = 15.0*x/(x+5.0)
    return t_f

#%% linear interpolation function
@jit
def linear_interpolation(x, F, t):
    floor = x.astype(int)
    delta_c = x-floor
    ceiling = floor + 1
    ceiling[ceiling>x_max] = x_max
    floor[floor<x_min] = x_min
    interpolated_F = (1-delta_c)*F[floor,t] + (delta_c)*F[ceiling,t]
    return interpolated_F

#%% solver
@jit
def solver_jit(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-1)[::-1]:
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*(linear_interpolation(xp, F, t+1)*g_u).sum()       # Fitness calculation
            vmax = V[:,x,t].max()
            idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

def solver_norm(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-1)[::-1]:
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*(linear_interpolation(xp, F, t+1)*g_u).sum()       # Fitness calculation
            vmax = V[:,x,t].max()
            idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

单独的 jit 函数往往比非 jit 函数快得多。例如,prob_mort 在通过 jit.运行 后快了大约 600%。但是,求解器本身并没有快多少:

In [3]: %timeit -n 10 solver_jit(200, 25)
10 loops, best of 3: 3.94 s per loop

In [4]: %timeit -n 10 solver_norm(200, 25)
10 loops, best of 3: 4.09 s per loop

我知道有些函数不能被 jitted,所以我用自定义 jit 函数替换了 st.binom.pmf 函数,这实际上将时间减慢到每个循环大约 17 秒,慢了 5 倍多。大概是因为 scipy 函数在这一点上进行了高度优化。

所以我怀疑速度缓慢要么在 linear_interpolate 函数中,要么在 jitted 函数之外的求解器代码中的某个地方(因为有一次我取消了所有函数和 运行 solver_norm 并获得了相同的时间)。对慢速部分在哪里以及如何加快速度有什么想法吗?

更新

这是我用来加速 jit 的二项式代码

@jit
def factorial(n):
    if n==0:
        return 1
    else:
        return n*factorial(n-1)

@vectorize([float64(float64,float64,float64)])
def binom(k, n, p):
    binom_coef = factorial(n)/(factorial(k)*factorial(n-k))
    pmf = binom_coef*p**k*(1-p)**(n-k)
    return pmf

@jit
def intake_dist(u):                         # intake stochastic function (returns a vector)
    g = binom(np.arange(R+1), R, u)
    return g

更新 2 我在 nopython 模式下尝试 运行ning 我的二项式代码,发现我做错了,因为它是递归的。通过将代码更改为:

来修复该问题
@jit(int64(int64), nopython=True)
def factorial(nn):
    res = 1
    for ii in range(2, nn + 1):
        res *= ii
    return res

@vectorize([float64(float64,float64,float64)], nopython=True)
def binom(k, n, p):
    binom_coef = factorial(n)/(factorial(k)*factorial(n-k))
    pmf = binom_coef*p**k*(1-p)**(n-k)
    return pmf

求解器现在运行在

In [34]: %timeit solver_jit(200, 25)
1 loop, best of 3: 921 ms per loop

大约快 3.5 倍。然而,solver_jit() 和 solver_norm() 仍然 运行 以相同的速度,这意味着在 jit 函数之外有一些代码减慢了它的速度。

如前所述,可能有一些代码正在回退到对象模式。我只是想补充一点,您可以使用 njit 而不是 jit 来禁用对象模式。这将有助于诊断什么代码是罪魁祸首。

我能够对您的代码进行一些更改,使 jit 版本可以在 nopython 模式下完全编译。在我的笔记本电脑上,结果是:

%timeit solver_jit(200, 25)
1 loop, best of 3: 50.9 ms per loop

%timeit solver_norm(200, 25)
1 loop, best of 3: 192 ms per loop

作为参考,我使用的是 Numba 0.27.0。我承认 Numba 的编译错误仍然让人难以确定发生了什么,但由于我已经使用了一段时间,所以我对需要修复的内容建立了直觉。完整的代码在下面,但这里是我所做的更改列表:

  • linear_interpolation 中将 x.astype(int) 更改为 x.astype(np.int64) 以便它可以在 nopython 模式下编译。
  • 在求解器中,使用 np.sum 作为函数而不是数组的方法。
  • np.argwhere 不受支持。编写自定义循环。

可能还可以进行一些进一步的优化,但这提供了初始加速。

完整代码:

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
import seaborn as sns
from numba import jit, vectorize, float64, autojit, njit
sns.set(context='talk', style='ticks', font_scale=1.2, rc={'figure.figsize': (6.5, 5.5), 'xtick.direction': 'in', 'ytick.direction': 'in'})

#%% constraints
x_min = 0                               # death below this
x_max = 20                              # maximum weight
t_max = 100                             # maximum time
foraging_efficiencies = np.linspace(0, 1, 10)               # potential foraging efficiencies
R = 10.0                                    # Resource level

#%% make the body size and time categories
body_sizes = np.arange(x_min, x_max+1)
time_steps = np.arange(t_max)

#%% parameter functions
@njit
def metabolic_fmr(x, u,temp):                           # metabolic cost function
    fmr = 0.125*(2**(0.2*temp))*(1 + 0.5*u) + x*0.1
    return fmr

@njit()
def factorial(nn):
    res = 1
    for ii in range(2, nn + 1):
        res *= ii
    return res

@vectorize([float64(float64,float64,float64)], nopython=True)
def binom(k, n, p):
    binom_coef = factorial(n)/(factorial(k)*factorial(n-k))
    pmf = binom_coef*p**k*(1-p)**(n-k)
    return pmf

@njit
def intake_dist(u):                         # intake stochastic function (returns a vector)
    g = binom(np.arange(R+1), R, u)
    return g

@njit
def mass_gain(x, u, temp):                      # mass gain function (returns a vector)
    x_prime = x - metabolic_fmr(x, u,temp) + np.arange(R+1)
    x_prime = np.minimum(x_prime, x_max)
    x_prime = np.maximum(x_prime, 0)
    return x_prime

@njit
def prob_attack(P):                         # probability of an attack
    p_a = 0.02*P
    return p_a

@njit
def prob_see(u):                            # probability of not seeing an attack
    p_s = 1-(1-u)**0.3
    return p_s

@njit
def prob_lethal(x):                         # probability of lethality given a successful attack
    p_l = 0.5*np.exp(-0.05*x) 
    return p_l

@njit
def prob_mort(P, u, x):
    p_m = prob_attack(P)*prob_see(u)*prob_lethal(x)
    return np.minimum(p_m, 1)

#%% terminal fitness function
@njit
def terminal_fitness(x):
    t_f = 15.0*x/(x+5.0)
    return t_f

#%% linear interpolation function
@njit
def linear_interpolation(x, F, t):
    floor = x.astype(np.int64)
    delta_c = x-floor
    ceiling = floor + 1
    ceiling[ceiling>x_max] = x_max
    floor[floor<x_min] = x_min
    interpolated_F = (1-delta_c)*F[floor,t] + (delta_c)*F[ceiling,t]
    return interpolated_F

#%% solver
@njit
def solver_jit(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-2,-1,-1):
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*np.sum((linear_interpolation(xp, F, t+1)*g_u))       # Fitness calculation
            vmax = V[:,x,t].max()

            for k in xrange(V.shape[0]):
                if V[k,x,t] == vmax:
                    idx = k
                    break
            #idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F

def solver_norm(P, temp):
    F = np.zeros((len(body_sizes), len(time_steps)))            # Expected fitness
    F[:,-1] = terminal_fitness(body_sizes)              # expected terminal fitness for every body size
    V = np.zeros((len(foraging_efficiencies), len(body_sizes), len(time_steps)))        # Fitness for each foraging effort
    D = np.zeros((len(body_sizes), len(time_steps)))            # Decision
    for t in range(t_max-1)[::-1]:
        for x in range(x_min+1, x_max+1):               # iterate over every body size except dead
            for i in range(len(foraging_efficiencies)):     # iterate over every possible foraging efficiency
                u = foraging_efficiencies[i]
                g_u = intake_dist(u)                # calculate the distribution of intakes
                xp = mass_gain(x, u, temp)          # calculate the mass gain
                p_m = prob_mort(P, u, x)            # probability of mortality
                V[i,x,t] = (1 - p_m)*(linear_interpolation(xp, F, t+1)*g_u).sum()       # Fitness calculation
            vmax = V[:,x,t].max()
            idx = np.argwhere(V[:,x,t]==vmax).min()
            D[x,t] = foraging_efficiencies[idx]
            F[x,t] = vmax
    return D, F