将数据拟合到 Python 中具有多个变量的互补误差函数

Fitting data to a complementary error function with multiple variables in Python

我无法将实验数据拟合到 Python 3.7.4 中的互补误差函数。

import matplotlib.pyplot as plt
import math 
import numpy as np
from scipy import optimize
from scipy import integrate

with open('C:Path\Data\test.txt', 'r') as f:
    lines = f.readlines()
    x = [float(line.split(',')[0]) for line in lines]
    y = [float(line.split(',')[1]) for line in lines]


int_start = 35
int_end = 75

start = float(x[int_start])
end = float(x[int_end]) 
  
x_data = np.linspace(start, end, (int_end-int_start)+1)
y_data = y[int_start: int_end+1]


def integrand(x, a, b, c):
    return a*np.exp(((-1)*(x-b)**2)/(2*(c**2)))

def cerf(x, a, b, c):
    return integrate.quad(integrand, x, np.inf)

params, params_covariance = optimize.curve_fit(cerf, x_data, y_data)

plt.plot(x_data, y_data, 'x', label='Data')
plt.plot(x_data, integrand(x_data, params[0], params[1], params[2]), '-', label="fit")

plt.legend(loc='best')
plt.show()

更准确地说,我想将我的数据拟合到由 integrand 函数和参数 abccerf 函数执行实际集成。积分应该从 x(函数的参数)到 +infinity。之后,我想使用 scipy 中的标准 curve_fit。但是现在我得到如下值错误:

> ValueError                                Traceback (most recent call last)
<ipython-input-33-8130a3eb44bb> in <module>
     29     return integrate.quad(integrand, x, np.inf)
     30 
---> 31 params, params_covariance = optimize.curve_fit(cerf, x_data, y_data)

~\AppData\Roaming\Python\Python37\site-packages\scipy\integrate\quadpack.py in quad(func, a, b, args, full_output, epsabs, epsrel, limit, points, weight, wvar, wopts, maxp1, limlst)
    346 
    347     # check the limits of integration: \int_a^b, expect a < b
--> 348     flip, a, b = b < a, min(a, b), max(a, b)
    349 
    350     if weight is None:

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

如果有人知道如何使用 x 参数作为积分的下边界来拟合函数,我将非常感激。

数据如下所示:

0.20,0.40
0.21,0.40
0.22,0.40
0.23,0.40
0.24,0.40
0.25,0.40
0.26,0.40
0.27,0.40
0.28,0.40
0.29,0.40
0.30,0.40
0.31,0.40
0.32,0.40
0.33,0.40
0.34,0.40
0.35,0.40
0.36,0.40
0.37,0.40
0.38,0.40
0.39,0.40
0.40,0.40
0.41,0.40
0.42,0.39
0.43,0.39
0.44,0.38
0.45,0.38
0.46,0.37
0.47,0.37
0.48,0.35
0.49,0.34
0.50,0.33
0.51,0.31
0.52,0.30
0.53,0.28
0.54,0.26
0.55,0.24
0.56,0.21
0.57,0.19
0.58,0.16
0.59,0.14
0.60,0.12
0.61,0.10
0.62,0.09
0.63,0.07
0.64,0.06 
0.65,0.05
0.66,0.04
0.67,0.03
0.68,0.02
0.69,0.02
0.70,0.01
0.71,0.01
0.72,0.00
0.73,0.00
0.74,0.00
0.75,0.00
0.76,0.00
0.77,0.00
0.78,-0.00
0.79,0.00
0.80,0.00
0.81,-0.00
0.82,-0.00
0.83,-0.00
0.84,0.00
0.85,-0.00
0.86,0.00
0.87,0.00
0.88,0.00
0.89,-0.00
0.90,0.00

曲线的形状看起来更符合逻辑而非高斯。

根据文档,scipy.integrate.quad 不接受数组,并且不能调用带参数的函数。因此,我们必须在由 scipy.curve_fit:

寻址的函数中构造一个辅助函数 f
import matplotlib.pyplot as plt
import numpy as np
from scipy import optimize, integrate

#define a function that integrates or evaluates f depending on the Boolean flag func_integr
def cerf(x, a, b, c, func_integr=True):
    f = lambda x: a*np.exp(((-1)*(x-b)**2)/(2*(c**2)))
    #flag is preset to True, so will return the integrated values
    if func_integr:
        return np.asarray([integrate.quad(f, i, np.inf)[0] for i in x])
    #unless the flag func_integr is set to False, then it will return the function values
    else:
        return f(x)

#read file
arr=np.genfromtxt("test.txt", delimiter=",")
x_data = arr[:, 0]
y_data = arr[:, 1]

#provide reasonable start values...
start_p = [1, 0, -1]
#...for scipy.curve_fit
params, params_covariance = optimize.curve_fit(cerf, x_data, y_data, p0=start_p)
print(params)
#[ 2.26757666  0.56501062 -0.0704476 ]


#plot our results
plt.plot(x_data, y_data, 'x', label='Data')
plt.plot(x_data, cerf(x_data, *params), '-', label="fit")    
plt.legend(loc='best')
plt.show()

示例输出:

这种方法不是最快的 - 每个 x 值都是单独积分的。也许还有其他 scipy.integrate 函数可以处理 numpy 数组;我不知道。
评估 f 而不是其综合值的部分在这里并不是真正必要的。但我最初使用它来验证 cerf 是否按预期工作,所以我将其留在脚本中。