通过 scipy.optimize 最小化根函数

Minimising root function through scipy.optimize

我有代码可以估计 ODE 系统中的参数 beta,假设除了 beta 之外的所有参数都是已知的,并且 'epidemic' 模拟的峰值是起始总体的 10%。但是,我意识到解决根问题可能并不总能找到价值。有没有什么方法可以使用 scipy.optimize 来找到另一种估算方法,即在 10% 的峰值处取总和的平方差,对整个事物进行平方,然后将其最小化?这是当前代码:

import numpy as np
from scipy.integrate import odeint
from  scipy.optimize import root

def peak_infections(beta, days = 100):

    # Total population, N.
    N = 1000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    gamma = 1/7
    # A grid of time points (in days)
    t = np.linspace(0, days, days + 1)

    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ

    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T

    return np.max(I)/N

root(lambda b: peak_infections(b)-0.1, x0 = 0.5).x

仅使用 scipy.optimize(root(lambda b: peak_infections(b)-0.1, x0 = 0.5).x) returns 误用函数错误。

编辑---------------------------------------- ------

我想知道如果我没有将峰值的 10% 作为关键信息,而是有一个包含每周新数字的数据框,那么如何应用这种方法。在帮助估计 beta 时如何使用类似的方法来考虑该数据?如果我们说

import pandas as pd
d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)

现在这是我们的数据,而不是知道模拟的峰值是 N 个起始人口的 10%。如何使用它来最小化和找到 beta 估计值?

-----编辑 2------

import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas as pd
from scipy.optimize import leastsq

###############################################################################
##########                  WITH WEEKLY DATA
###############################################################################


#t = np.arange(0,84,7)
t = np.linspace(0, 77, 77+1)
d = {'Week': [t[7],t[14],t[21],t[28],t[35],t[42],t[49],t[56],t[63],t[70],t[77]], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)
#d = {'Week': t, 'incidence': [0,206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
#df = pd.DataFrame(data=d)

def peak_infections(beta, df):
 
    # Weeks for which the ODE system will be solved
    #weeks = df.Week.to_numpy()

    # Total population, N.
    N = 100000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    #reproductive no. R zero is beta/gamma
    gamma = 1/6 #rate should be in weeks now
    # A grid of time points 
    t7 = np.arange(7,84,7)

    # The SIR model differential equations.
    def deriv(y, t7, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ

    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t7, args=(N, beta, gamma))
    S, I, R, J = solve.T

    return np.max(I)/N

def residual(x, df):

    # Total population,  N.
    N = 100000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df) - incidence) ** 2)

x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead").x
print(res)

是的,您可以使用 scipy.optimize.minimize

一种方法如下:

from scipy.optimize import minimize

def residual(x):
    return (peak_infections(x) - 0.1) ** 2

x0 = 0.5
res = minimize(residual, x0, method="Nelder-Mead", options={'fatol':1e-04})
print(res)

这现在给出的答案与您发布的根方法几乎相同,但可以作为替代方法使用。

编辑

根据本回答评论区的讨论,根据对你问题的编辑,我提出以下解决方案:

import numpy as np
from scipy.integrate import odeint
from  scipy.optimize import minimize
import pandas as pd

d = {'Week': [1, 2,3,4,5,6,7,8,9,10,11], 'incidence': [206.1705794,2813.420201,11827.9453,30497.58655,10757.66954,7071.878779,3046.752723,1314.222882,765.9763902,201.3800578,109.8982006]}
df = pd.DataFrame(data=d)

def peak_infections(beta, df):

    # Weeks for which the ODE system will be solved
    weeks = df.Week.to_numpy()

    # Total population, N.
    N = 1000
    # Initial number of infected and recovered individuals, I0 and R0.
    I0, R0 = 10, 0
    # Everyone else, S0, is susceptible to infection initially.
    S0 = N - I0 - R0
    J0 = I0
    # Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
    gamma = 1/7 * 7 #rate should be in weeks now
    # A grid of time points (in days)
    t = np.linspace(0, weeks[-1], weeks[-1] + 1)

    # The SIR model differential equations.
    def deriv(y, t, N, beta, gamma):
        S, I, R, J = y
        dS = ((-beta * S * I) / N)
        dI = ((beta * S * I) / N) - (gamma * I)
        dR = (gamma * I)
        dJ = ((beta * S * I) / N)
        return dS, dI, dR, dJ

    # Initial conditions are S0, I0, R0
    # Integrate the SIR equations over the time grid, t.
    solve = odeint(deriv, (S0, I0, R0, J0), t, args=(N, beta, gamma))
    S, I, R, J = solve.T

    return I/N

def residual(x, df):

    # Total population, N.
    N = 1000
    incidence = df.incidence.to_numpy()/N
    return np.sum((peak_infections(x, df)[1:] - incidence) ** 2)

x0 = 0.5
res = minimize(residual, x0, args=(df), method="Nelder-Mead", options={'fatol':1e-04})
print(res)

在这里,我计算了 11 周的 ODE 系统,并将结果直接与提供的数据框中的 11 个发生率值进行比较。在平方差(逐个元素)之后,求和并将该和最小化。然而,结果并不是很有希望。