如何执行多条曲线的联合拟合(Python)?
How to perform a joint fit of several curves (in Python)?
假设我正在通过简单的线性回归拟合一些数据点。现在我想对几组数据点执行几个 joint 线性回归。更具体地说,我希望一个参数在所有拟合中都相等,这里示意性地描绘了 y 轴交点。
在搜索 Google 一段时间后,我既找不到任何 Python (Scipy) 例程可以做到这一点,也找不到任何一般文献来说明如何实现这一点。
理想情况下,我不仅要在简单线性回归的情况下执行这些联合拟合,而且要对更一般的拟合函数(例如,幂律拟合与联合指数)执行这些联合拟合。
我认为这个绘图代码示例可以满足您的要求,用一个共享参数拟合两个数据集。请注意,如果数据集的长度不等,则可以有效地对具有更多单独点的数据集进行加权。此示例明确将初始参数值设置为 1,0 - curve_fit() 默认值 - 并且不使用 scipy 的遗传算法来帮助找到初始参数估计值。
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
y1 = np.array([ 16.00, 18.42, 20.84, 23.26])
y2 = np.array([-20.00, -25.50, -31.00, -36.50, -42.00])
comboY = np.append(y1, y2)
x1 = np.array([5.0, 6.1, 7.2, 8.3])
x2 = np.array([15.0, 16.1, 17.2, 18.3, 19.4])
comboX = np.append(x1, x2)
if len(y1) != len(x1):
raise(Exception('Unequal x1 and y1 data length'))
if len(y2) != len(x2):
raise(Exception('Unequal x2 and y2 data length'))
def function1(data, a, b, c): # not all parameters are used here, c is shared
return a * data + c
def function2(data, a, b, c): # not all parameters are used here, c is shared
return b * data + c
def combinedFunction(comboData, a, b, c):
# single data reference passed in, extract separate data
extract1 = comboData[:len(x1)] # first data
extract2 = comboData[len(x1):] # second data
result1 = function1(extract1, a, b, c)
result2 = function2(extract2, a, b, c)
return np.append(result1, result2)
# some initial parameter values
initialParameters = np.array([1.0, 1.0, 1.0])
# curve fit the combined data to the combined function
fittedParameters, pcov = curve_fit(combinedFunction, comboX, comboY, initialParameters)
# values for display of fitted function
a, b, c = fittedParameters
y_fit_1 = function1(x1, a, b, c) # first data set, first equation
y_fit_2 = function2(x2, a, b, c) # second data set, second equation
plt.plot(comboX, comboY, 'D') # plot the raw data
plt.plot(x1, y_fit_1) # plot the equation using the fitted parameters
plt.plot(x2, y_fit_2) # plot the equation using the fitted parameters
plt.show()
print('a, b, c:', fittedParameters)
lmfit
module allows you to do this, as mentioned in their FAQ:
from lmfit import minimize, Parameters, fit_report
import numpy as np
# residual function to minimize
def fit_function(params, x=None, dat1=None, dat2=None):
model1 = params['offset'] + x * params['slope1']
model2 = params['offset'] + x * params['slope2']
resid1 = dat1 - model1
resid2 = dat2 - model2
return np.concatenate((resid1, resid2))
# setup fit parameters
params = Parameters()
params.add('slope1', value=1)
params.add('slope2', value=-1)
params.add('offset', value=0.5)
# generate sample data
x = np.arange(0, 10)
slope1, slope2, offset = 1.1, -0.9, 0.2
y1 = slope1 * x + offset
y2 = slope2 * x + offset
# fit
out = minimize(residual, params, kws={"x": x, "dat1": y1, "dat2": y2})
print(fit_report(out))
# [[Fit Statistics]]
# # fitting method = leastsq
# # function evals = 9
# # data points = 20
# # variables = 3
# chi-square = 1.4945e-31
# reduced chi-square = 8.7913e-33
# Akaike info crit = -1473.48128
# Bayesian info crit = -1470.49408
# [[Variables]]
# slope1: 1.10000000 +/- 8.2888e-18 (0.00%) (init = 1)
# slope2: -0.90000000 +/- 8.2888e-18 (0.00%) (init = -1)
# offset: 0.20000000 +/- 3.8968e-17 (0.00%) (init = 0.5)
# [[Correlations]] (unreported correlations are < 0.100)
# C(slope1, offset) = -0.742
# C(slope2, offset) = -0.742
# C(slope1, slope2) = 0.551
假设我正在通过简单的线性回归拟合一些数据点。现在我想对几组数据点执行几个 joint 线性回归。更具体地说,我希望一个参数在所有拟合中都相等,这里示意性地描绘了 y 轴交点。
在搜索 Google 一段时间后,我既找不到任何 Python (Scipy) 例程可以做到这一点,也找不到任何一般文献来说明如何实现这一点。
理想情况下,我不仅要在简单线性回归的情况下执行这些联合拟合,而且要对更一般的拟合函数(例如,幂律拟合与联合指数)执行这些联合拟合。
我认为这个绘图代码示例可以满足您的要求,用一个共享参数拟合两个数据集。请注意,如果数据集的长度不等,则可以有效地对具有更多单独点的数据集进行加权。此示例明确将初始参数值设置为 1,0 - curve_fit() 默认值 - 并且不使用 scipy 的遗传算法来帮助找到初始参数估计值。
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
y1 = np.array([ 16.00, 18.42, 20.84, 23.26])
y2 = np.array([-20.00, -25.50, -31.00, -36.50, -42.00])
comboY = np.append(y1, y2)
x1 = np.array([5.0, 6.1, 7.2, 8.3])
x2 = np.array([15.0, 16.1, 17.2, 18.3, 19.4])
comboX = np.append(x1, x2)
if len(y1) != len(x1):
raise(Exception('Unequal x1 and y1 data length'))
if len(y2) != len(x2):
raise(Exception('Unequal x2 and y2 data length'))
def function1(data, a, b, c): # not all parameters are used here, c is shared
return a * data + c
def function2(data, a, b, c): # not all parameters are used here, c is shared
return b * data + c
def combinedFunction(comboData, a, b, c):
# single data reference passed in, extract separate data
extract1 = comboData[:len(x1)] # first data
extract2 = comboData[len(x1):] # second data
result1 = function1(extract1, a, b, c)
result2 = function2(extract2, a, b, c)
return np.append(result1, result2)
# some initial parameter values
initialParameters = np.array([1.0, 1.0, 1.0])
# curve fit the combined data to the combined function
fittedParameters, pcov = curve_fit(combinedFunction, comboX, comboY, initialParameters)
# values for display of fitted function
a, b, c = fittedParameters
y_fit_1 = function1(x1, a, b, c) # first data set, first equation
y_fit_2 = function2(x2, a, b, c) # second data set, second equation
plt.plot(comboX, comboY, 'D') # plot the raw data
plt.plot(x1, y_fit_1) # plot the equation using the fitted parameters
plt.plot(x2, y_fit_2) # plot the equation using the fitted parameters
plt.show()
print('a, b, c:', fittedParameters)
lmfit
module allows you to do this, as mentioned in their FAQ:
from lmfit import minimize, Parameters, fit_report
import numpy as np
# residual function to minimize
def fit_function(params, x=None, dat1=None, dat2=None):
model1 = params['offset'] + x * params['slope1']
model2 = params['offset'] + x * params['slope2']
resid1 = dat1 - model1
resid2 = dat2 - model2
return np.concatenate((resid1, resid2))
# setup fit parameters
params = Parameters()
params.add('slope1', value=1)
params.add('slope2', value=-1)
params.add('offset', value=0.5)
# generate sample data
x = np.arange(0, 10)
slope1, slope2, offset = 1.1, -0.9, 0.2
y1 = slope1 * x + offset
y2 = slope2 * x + offset
# fit
out = minimize(residual, params, kws={"x": x, "dat1": y1, "dat2": y2})
print(fit_report(out))
# [[Fit Statistics]]
# # fitting method = leastsq
# # function evals = 9
# # data points = 20
# # variables = 3
# chi-square = 1.4945e-31
# reduced chi-square = 8.7913e-33
# Akaike info crit = -1473.48128
# Bayesian info crit = -1470.49408
# [[Variables]]
# slope1: 1.10000000 +/- 8.2888e-18 (0.00%) (init = 1)
# slope2: -0.90000000 +/- 8.2888e-18 (0.00%) (init = -1)
# offset: 0.20000000 +/- 3.8968e-17 (0.00%) (init = 0.5)
# [[Correlations]] (unreported correlations are < 0.100)
# C(slope1, offset) = -0.742
# C(slope2, offset) = -0.742
# C(slope1, slope2) = 0.551