如何在拟合过程中将权重添加到曲线而不是数据点? import/func 我用什么?
How do I add weighting to a curve not the data points during fitting? What import/func do I use?
我试过 scipy.optimize import curve_fit 但它似乎只更改了数据点。我想在从我的数据点残差(带权重的最小平方)拟合 Y 曲线的过程中添加 1/Y^2 权重。我不确定如何定位 yfit 而不是 ydata 或者我是否应该使用其他东西?任何帮助将不胜感激。
xdata = np.array([0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 1.12, 1.12, 2.89, 2.89, 2.89, 2.89,
2.89, 2.89, 6.19, 6.19, 6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30, 108.98, 108.98,
108.98, 108.98, 108.98, 255.33, 255.33, 255.33, 255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62,
1188.62, 1188.62], dtype=float)
ydata = np.array([0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528, -1.15391, -1.46504, -0.942926,
-2.12808, -2.90962, -1.51093, -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019, -13.8455,
-16.9911, -11.0881, -10.6453, -15.1288, -52.4669, -68.2344, -74.7673, -70.2025, -65.8181, -55.7344,
-271.286, -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43, -984.344, -1124.8, -1076.27,
-1072.03, -3968.22, -3114.46, -3771.61, -2805.4, -4078.05], dtype=float)
def fourPL(x, A, B, C, D):
return ((A-D)/(1.0+((x/C)**B))) + D
params, params_covariance = spo.curve_fit(fourPL, xdata, ydata)
params_list = params
roundy = [round(num, 4) for num in params_list]
print(roundy)
popt2, pcov2 = spo.curve_fit(fourPL, xdata, ydata, sigma=1/ydata**2, absolute_sigma=True)
yfit2 = fourPL(xdata, *popt2)
params_list2 = popt2
roundy2 = [round(num, 4) for num in params_list2]
print(roundy2)
x_min, x_max = np.amin(xdata), np.amax(xdata)
xs = np.linspace(x_min, x_max, 1000)
plt.scatter(xdata, ydata)
plt.plot(xs, fourPL(xs, *params), 'm--', label='No Weight')
plt.plot(xs, fourPL(xs, *popt2), 'b--', label='Weights')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05, 0.1, 0.1),
ncol=3, fancybox=True, shadow=True)
plt.xlabel('µg/mL)')
plt.ylabel('kHz/s')
#plt.xscale('log')
plt.show()
```
这将是我使用手动 least_squares
配合的版本。我将其与使用简单 curve_fit
获得的解决方案进行比较。其实差别不是很大而且 curve_fit
结果对我来说还不错。
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import least_squares
from scipy.optimize import curve_fit
np.set_printoptions( linewidth=250, precision=2 ) ## avoids OPs "roundy"
xdata = np.array(
[
0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12,
1.12, 1.12, 2.89, 2.89, 2.89, 2.89, 2.89, 2.89, 6.19, 6.19,
6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30,
108.98, 108.98, 108.98, 108.98, 108.98, 255.33, 255.33, 255.33,
255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62, 1188.62,
1188.62
], dtype=float
)
ydata = np.array(
[
0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528,
-1.15391, -1.46504, -0.942926, -2.12808, -2.90962, -1.51093,
-3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019,
-13.8455, -16.9911, -11.0881, -10.6453, -15.1288, -52.4669,
-68.2344, -74.7673, -70.2025, -65.8181, -55.7344, -271.286,
-329.521, -436.097, -654.034, -396.45, -826.195, -1084.43,
-984.344, -1124.8, -1076.27, -1072.03, -3968.22, -3114.46,
-3771.61, -2805.4, -4078.05
], dtype=float
)
def fourPL( x, a, b, c, d ):
out = ( a - d ) / ( 1 + ( x / c )**b ) + d
return out
def residuals( params, xlist, ylist ):
# ~a, b, c, d = params
yth = np.fromiter( (fourPL( x, *params ) for x in xlist ), np.float )
diff = np.subtract( yth, ylist )
weights = 1 / np.abs( yth )**1
## not sure if this makes sense, but we weigth with function value
## here I put it inverse linear as it gets squared in the chi-square
## but other weighting may be required
return diff * weights
### for initial guess
xl = np.linspace( .1, 1200, 150 )
yl0 = np.fromiter( (fourPL( x, 1, 1, 1000, -6000) for x in xl ), np.float )
### standard curve_fit
cfsol, _ = curve_fit( fourPL, xdata, ydata, p0=( 1, 1, 1000, -6000 ) )
print( cfsol )
yl1 = np.fromiter( (fourPL( x, *cfsol ) for x in xl ), np.float )
### least square with manual residual function including "unusual" weighting
lssol = least_squares( residuals, x0=( 1, 1, 1000, -6000 ), args=( xdata, ydata ) )
print( lssol.x )
yl2 = np.fromiter( (fourPL( x, *( lssol.x ) ) for x in xl ), np.float )
### plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.scatter( xdata, ydata )
ax.plot( xl, yl1 )
ax.plot( xl, yl2 )
plt.show()
查看协方差矩阵,不仅误差较大,而且参数的相关性也很高。这当然是由于模型的性质所致,但应谨慎处理,尤其是在涉及数据解释时。
当我们只考虑小 x
的数据时,问题变得更加明显。无论如何,大 x
的数据分散很多。对于小 x
或大 c
,泰勒展开是 (a - d ) (1 - b x / c ) + d
,即 a - (a - d ) b / c x
,基本上是 a + e x
。所以b
、c
和d
基本上是一样的。
我试过 scipy.optimize import curve_fit 但它似乎只更改了数据点。我想在从我的数据点残差(带权重的最小平方)拟合 Y 曲线的过程中添加 1/Y^2 权重。我不确定如何定位 yfit 而不是 ydata 或者我是否应该使用其他东西?任何帮助将不胜感激。
xdata = np.array([0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12, 1.12, 1.12, 2.89, 2.89, 2.89, 2.89,
2.89, 2.89, 6.19, 6.19, 6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30, 108.98, 108.98,
108.98, 108.98, 108.98, 255.33, 255.33, 255.33, 255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62,
1188.62, 1188.62], dtype=float)
ydata = np.array([0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528, -1.15391, -1.46504, -0.942926,
-2.12808, -2.90962, -1.51093, -3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019, -13.8455,
-16.9911, -11.0881, -10.6453, -15.1288, -52.4669, -68.2344, -74.7673, -70.2025, -65.8181, -55.7344,
-271.286, -329.521, -436.097, -654.034, -396.45, -826.195, -1084.43, -984.344, -1124.8, -1076.27,
-1072.03, -3968.22, -3114.46, -3771.61, -2805.4, -4078.05], dtype=float)
def fourPL(x, A, B, C, D):
return ((A-D)/(1.0+((x/C)**B))) + D
params, params_covariance = spo.curve_fit(fourPL, xdata, ydata)
params_list = params
roundy = [round(num, 4) for num in params_list]
print(roundy)
popt2, pcov2 = spo.curve_fit(fourPL, xdata, ydata, sigma=1/ydata**2, absolute_sigma=True)
yfit2 = fourPL(xdata, *popt2)
params_list2 = popt2
roundy2 = [round(num, 4) for num in params_list2]
print(roundy2)
x_min, x_max = np.amin(xdata), np.amax(xdata)
xs = np.linspace(x_min, x_max, 1000)
plt.scatter(xdata, ydata)
plt.plot(xs, fourPL(xs, *params), 'm--', label='No Weight')
plt.plot(xs, fourPL(xs, *popt2), 'b--', label='Weights')
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05, 0.1, 0.1),
ncol=3, fancybox=True, shadow=True)
plt.xlabel('µg/mL)')
plt.ylabel('kHz/s')
#plt.xscale('log')
plt.show()
```
这将是我使用手动 least_squares
配合的版本。我将其与使用简单 curve_fit
获得的解决方案进行比较。其实差别不是很大而且 curve_fit
结果对我来说还不错。
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import least_squares
from scipy.optimize import curve_fit
np.set_printoptions( linewidth=250, precision=2 ) ## avoids OPs "roundy"
xdata = np.array(
[
0.10, 0.10, 0.10, 0.10, 0.10, 0.10, 1.12, 1.12, 1.12, 1.12,
1.12, 1.12, 2.89, 2.89, 2.89, 2.89, 2.89, 2.89, 6.19, 6.19,
6.19, 6.19, 6.19, 23.30, 23.30, 23.30, 23.30, 23.30, 23.30,
108.98, 108.98, 108.98, 108.98, 108.98, 255.33, 255.33, 255.33,
255.33, 255.33, 255.33, 1188.62, 1188.62, 1188.62, 1188.62,
1188.62
], dtype=float
)
ydata = np.array(
[
0.264352, 0.412386, 0.231238, 0.483558, 0.613206, 0.728528,
-1.15391, -1.46504, -0.942926, -2.12808, -2.90962, -1.51093,
-3.09798, -5.08591, -4.75703, -4.91317, -5.1966, -4.04019,
-13.8455, -16.9911, -11.0881, -10.6453, -15.1288, -52.4669,
-68.2344, -74.7673, -70.2025, -65.8181, -55.7344, -271.286,
-329.521, -436.097, -654.034, -396.45, -826.195, -1084.43,
-984.344, -1124.8, -1076.27, -1072.03, -3968.22, -3114.46,
-3771.61, -2805.4, -4078.05
], dtype=float
)
def fourPL( x, a, b, c, d ):
out = ( a - d ) / ( 1 + ( x / c )**b ) + d
return out
def residuals( params, xlist, ylist ):
# ~a, b, c, d = params
yth = np.fromiter( (fourPL( x, *params ) for x in xlist ), np.float )
diff = np.subtract( yth, ylist )
weights = 1 / np.abs( yth )**1
## not sure if this makes sense, but we weigth with function value
## here I put it inverse linear as it gets squared in the chi-square
## but other weighting may be required
return diff * weights
### for initial guess
xl = np.linspace( .1, 1200, 150 )
yl0 = np.fromiter( (fourPL( x, 1, 1, 1000, -6000) for x in xl ), np.float )
### standard curve_fit
cfsol, _ = curve_fit( fourPL, xdata, ydata, p0=( 1, 1, 1000, -6000 ) )
print( cfsol )
yl1 = np.fromiter( (fourPL( x, *cfsol ) for x in xl ), np.float )
### least square with manual residual function including "unusual" weighting
lssol = least_squares( residuals, x0=( 1, 1, 1000, -6000 ), args=( xdata, ydata ) )
print( lssol.x )
yl2 = np.fromiter( (fourPL( x, *( lssol.x ) ) for x in xl ), np.float )
### plotting
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
ax.scatter( xdata, ydata )
ax.plot( xl, yl1 )
ax.plot( xl, yl2 )
plt.show()
查看协方差矩阵,不仅误差较大,而且参数的相关性也很高。这当然是由于模型的性质所致,但应谨慎处理,尤其是在涉及数据解释时。
当我们只考虑小 x
的数据时,问题变得更加明显。无论如何,大 x
的数据分散很多。对于小 x
或大 c
,泰勒展开是 (a - d ) (1 - b x / c ) + d
,即 a - (a - d ) b / c x
,基本上是 a + e x
。所以b
、c
和d
基本上是一样的。