如何在拟合过程中将权重添加到曲线而不是数据点? import/func 我用什么?

How do I add weighting to a curve not the data points during fitting? What import/func do I use?

我试过 scipy.optimize import curve_fit 但它似乎只更改了数据点。我想在从我的数据点残差(带权重的最小平方)拟合 Y 曲线的过程中添加 1/Y^2 权重。我不确定如何定位 yfit 而不是 ydata 或者我是否应该使用其他东西?任何帮助将不胜感激。

xdata = np.array([0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 1.12, 1.12, 2.89, 2.89, 2.89, 2.89,
                     2.89, 2.89, 6.19, 6.19, 6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30, 108.98, 108.98,
                     108.98, 108.98, 108.98, 255.33, 255.33, 255.33, 255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62,
                     1188.62, 1188.62], dtype=float)

ydata = np.array([0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528, -1.15391, -1.46504, -0.942926,
                     -2.12808, -2.90962, -1.51093, -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019, -13.8455,
                     -16.9911, -11.0881, -10.6453, -15.1288, -52.4669, -68.2344, -74.7673, -70.2025, -65.8181, -55.7344,
                     -271.286, -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43, -984.344, -1124.8, -1076.27,
                     -1072.03, -3968.22, -3114.46, -3771.61, -2805.4, -4078.05], dtype=float)

def fourPL(x, A, B, C, D):
    return ((A-D)/(1.0+((x/C)**B))) + D

params, params_covariance = spo.curve_fit(fourPL, xdata, ydata)
params_list = params
roundy = [round(num, 4) for num in params_list]
print(roundy)

popt2, pcov2 = spo.curve_fit(fourPL, xdata, ydata, sigma=1/ydata**2, absolute_sigma=True)
yfit2 = fourPL(xdata, *popt2)
params_list2 = popt2
roundy2 = [round(num, 4) for num in params_list2]
print(roundy2)

x_min, x_max = np.amin(xdata), np.amax(xdata)
xs = np.linspace(x_min, x_max, 1000)
plt.scatter(xdata, ydata)
plt.plot(xs, fourPL(xs, *params), 'm--', label='No Weight')
plt.plot(xs, fourPL(xs, *popt2), 'b--', label='Weights')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05, 0.1, 0.1),
              ncol=3, fancybox=True, shadow=True)
plt.xlabel('µg/mL)')
plt.ylabel('kHz/s')
#plt.xscale('log')
plt.show()
```

这将是我使用手动 least_squares 配合的版本。我将其与使用简单 curve_fit 获得的解决方案进行比较。其实差别不是很大而且 curve_fit 结果对我来说还不错。

import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import least_squares
from scipy.optimize import curve_fit

np.set_printoptions( linewidth=250, precision=2 )  ## avoids OPs "roundy"

xdata = np.array(
    [
        0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 
        1.12, 1.12, 2.89, 2.89, 2.89, 2.89, 2.89, 2.89, 6.19, 6.19, 
        6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30,
        108.98, 108.98, 108.98, 108.98, 108.98, 255.33, 255.33, 255.33,
        255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62, 1188.62,
        1188.62
    ], dtype=float
)

ydata = np.array(
    [
        0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528,
        -1.15391, -1.46504, -0.942926, -2.12808, -2.90962, -1.51093,
        -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019,
        -13.8455, -16.9911, -11.0881, -10.6453, -15.1288, -52.4669,
        -68.2344, -74.7673, -70.2025, -65.8181, -55.7344, -271.286,
        -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43,
        -984.344, -1124.8, -1076.27, -1072.03, -3968.22, -3114.46,
        -3771.61, -2805.4, -4078.05
    ], dtype=float
)

def fourPL( x, a, b, c, d ):
    out = ( a - d ) / ( 1 + ( x / c )**b ) + d
    return out

def residuals( params, xlist, ylist ):
    # ~a, b, c, d = params
    yth = np.fromiter( (fourPL( x, *params ) for x in xlist ), np.float )
    diff = np.subtract( yth, ylist )
    weights = 1 / np.abs( yth )**1
    ## not sure if this makes sense, but we weigth with function value
    ## here I put it inverse linear as it gets squared in the chi-square
    ## but other weighting may be required
    return diff * weights

### for initial guess
xl = np.linspace( .1, 1200, 150 )
yl0 = np.fromiter( (fourPL( x, 1, 1, 1000, -6000) for x in xl ), np.float )

### standard curve_fit
cfsol, _ = curve_fit( fourPL, xdata, ydata, p0=( 1, 1, 1000, -6000 ) )
print( cfsol )
yl1 = np.fromiter( (fourPL( x, *cfsol ) for x in xl ), np.float )

### least square with manual residual function including "unusual" weighting
lssol = least_squares( residuals, x0=( 1, 1, 1000, -6000 ), args=( xdata, ydata ) )
print( lssol.x )
yl2 = np.fromiter( (fourPL( x, *( lssol.x ) ) for x in xl ), np.float )

### plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.scatter( xdata, ydata )
ax.plot( xl, yl1 )
ax.plot( xl, yl2 )

plt.show()

查看协方差矩阵,不仅误差较大,而且参数的相关性也很高。这当然是由于模型的性质所致,但应谨慎处理,尤其是在涉及数据解释时。

当我们只考虑小 x 的数据时,问题变得更加明显。无论如何,大 x 的数据分散很多。对于小 x 或大 c,泰勒展开是 (a - d ) (1 - b x / c ) + d,即 a - (a - d ) b / c x,基本上是 a + e x。所以bcd基本上是一样的。