考虑不确定性的高斯拟合
Gaussian fit with consideration of uncertainties
我无法理解以下代码有什么问题:
import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *
def gauss(p,x):
return p[0]*np.exp(-(x-p[1])**2/(2*p[2]**2)+p[4]) + p[3]
# Create a model for fitting.
gg = Model(gauss)
x = np.arange(0, 350)
# Create a RealData object using our initiated data from above.
data = RealData(x, y_data, sx=0, sy=y_data_err)
# Set up ODR with the model and data.
odr = ODR(data, gg, beta0=[0.1, 1., 1.0, 1.0, 1.0])
# Run the regression.
out = odr.run()
# Use the in-built pprint method to give us results.
out.pprint()
x_fit = np.linspace(x[0], x[-1], 1000)
y_fit = gauss(out.beta, x_fit)
plt.figure()
plt.errorbar(x, xy_data xerr=0, yerr=y_data_err, linestyle='None', marker='x')
plt.plot(x_fit, y_fit)
plt.show()
这是直接从 here 复制过来的,只是更改了模型。我得到的错误是
scipy.odr.odrpack.odr_error: number of observations do not match
但据我所知 beta0
有五个参数,恰好与 gauss
工作所需的一样多。如果有人能指出错误来源或我的误解,那就太好了。
这是一个用你的方程式绘制的图形,在一张图上比较了 ODR 和 curve_fit。该示例使用 scipy 的 differential_evolution 遗传算法模块来确定求解器的初始参数估计,并且该模块实现拉丁超立方体算法以确保彻底搜索需要边界的参数 space在其中搜索。在此示例中,这些边界取自数据的最大值和最小值。由于您的 post 没有包含数据,我在示例中使用了自己的测试数据。在此示例中,两条拟合曲线看起来非常相似,在绘制的极值处略有不同。
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.odr
from scipy.optimize import differential_evolution
import warnings
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset
def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
return func(x, *parameters)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([minY, maxY]) # search bounds for a
parameterBounds.append([minX, maxX]) # search bounds for b
parameterBounds.append([minX, maxX]) # search bounds for c
parameterBounds.append([minY, maxY]) # search bounds for d
parameterBounds.append([0.0, maxY]) # search bounds for Offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
geneticParameters = generate_Initial_Parameters()
##########################
# curve_fit section
##########################
fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters curve_fit:', fittedParameters_curvefit)
print()
modelPredictions_curvefit = func(xData, *fittedParameters_curvefit)
absError_curvefit = modelPredictions_curvefit - yData
SE_curvefit = numpy.square(absError_curvefit) # squared errors
MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))
print()
print('RMSE curve_fit:', RMSE_curvefit)
print('R-squared curve_fit:', Rsquared_curvefit)
print()
##########################
# ODR section
##########################
data = scipy.odr.odrpack.Data(xData,yData)
model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)
# Run the regression.
odr_out = odr.run()
print('Fitted parameters ODR:', odr_out.beta)
print()
modelPredictions_odr = func(xData, *odr_out.beta)
absError_odr = modelPredictions_odr - yData
SE_odr = numpy.square(absError_odr) # squared errors
MSE_odr = numpy.mean(SE_odr) # mean squared errors
RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))
print()
print('RMSE ODR:', RMSE_odr)
print('R-squared ODR:', Rsquared_odr)
print()
##########################################################
# graphics output section
def ModelsAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plots
xModel = numpy.linspace(min(xData), max(xData))
yModel_curvefit = func(xModel, *fittedParameters_curvefit)
yModel_odr = func(xModel, *odr_out.beta)
# now the models as line plots
axes.plot(xModel, yModel_curvefit)
axes.plot(xModel, yModel_odr)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelsAndScatterPlot(graphWidth, graphHeight)
我无法理解以下代码有什么问题:
import numpy as np
import matplotlib.pyplot as plt
from scipy.odr import *
def gauss(p,x):
return p[0]*np.exp(-(x-p[1])**2/(2*p[2]**2)+p[4]) + p[3]
# Create a model for fitting.
gg = Model(gauss)
x = np.arange(0, 350)
# Create a RealData object using our initiated data from above.
data = RealData(x, y_data, sx=0, sy=y_data_err)
# Set up ODR with the model and data.
odr = ODR(data, gg, beta0=[0.1, 1., 1.0, 1.0, 1.0])
# Run the regression.
out = odr.run()
# Use the in-built pprint method to give us results.
out.pprint()
x_fit = np.linspace(x[0], x[-1], 1000)
y_fit = gauss(out.beta, x_fit)
plt.figure()
plt.errorbar(x, xy_data xerr=0, yerr=y_data_err, linestyle='None', marker='x')
plt.plot(x_fit, y_fit)
plt.show()
这是直接从 here 复制过来的,只是更改了模型。我得到的错误是
scipy.odr.odrpack.odr_error: number of observations do not match
但据我所知 beta0
有五个参数,恰好与 gauss
工作所需的一样多。如果有人能指出错误来源或我的误解,那就太好了。
这是一个用你的方程式绘制的图形,在一张图上比较了 ODR 和 curve_fit。该示例使用 scipy 的 differential_evolution 遗传算法模块来确定求解器的初始参数估计,并且该模块实现拉丁超立方体算法以确保彻底搜索需要边界的参数 space在其中搜索。在此示例中,这些边界取自数据的最大值和最小值。由于您的 post 没有包含数据,我在示例中使用了自己的测试数据。在此示例中,两条拟合曲线看起来非常相似,在绘制的极值处略有不同。
import numpy, scipy, matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import scipy.odr
from scipy.optimize import differential_evolution
import warnings
xData = numpy.array([1.1, 2.2, 3.3, 4.4, 5.0, 6.6, 7.7, 0.0])
yData = numpy.array([1.1, 20.2, 30.3, 40.4, 50.0, 60.6, 70.7, 0.1])
def func(x, a, b, c, d, offset): # curve fitting function for curve_fit()
return a*numpy.exp(-(x-b)**2/(2*c**2)+d) + offset
def func_wrapper_for_ODR(parameters, x): # parameter order for ODR
return func(x, *parameters)
# function for genetic algorithm to minimize (sum of squared error)
def sumOfSquaredError(parameterTuple):
warnings.filterwarnings("ignore") # do not print warnings by genetic algorithm
val = func(xData, *parameterTuple)
return numpy.sum((yData - val) ** 2.0)
def generate_Initial_Parameters():
# min and max used for bounds
maxX = max(xData)
minX = min(xData)
maxY = max(yData)
minY = min(yData)
parameterBounds = []
parameterBounds.append([minY, maxY]) # search bounds for a
parameterBounds.append([minX, maxX]) # search bounds for b
parameterBounds.append([minX, maxX]) # search bounds for c
parameterBounds.append([minY, maxY]) # search bounds for d
parameterBounds.append([0.0, maxY]) # search bounds for Offset
# "seed" the numpy random number generator for repeatable results
result = differential_evolution(sumOfSquaredError, parameterBounds, seed=3)
return result.x
geneticParameters = generate_Initial_Parameters()
##########################
# curve_fit section
##########################
fittedParameters_curvefit, pcov = curve_fit(func, xData, yData, geneticParameters)
print('Fitted parameters curve_fit:', fittedParameters_curvefit)
print()
modelPredictions_curvefit = func(xData, *fittedParameters_curvefit)
absError_curvefit = modelPredictions_curvefit - yData
SE_curvefit = numpy.square(absError_curvefit) # squared errors
MSE_curvefit = numpy.mean(SE_curvefit) # mean squared errors
RMSE_curvefit = numpy.sqrt(MSE_curvefit) # Root Mean Squared Error, RMSE
Rsquared_curvefit = 1.0 - (numpy.var(absError_curvefit) / numpy.var(yData))
print()
print('RMSE curve_fit:', RMSE_curvefit)
print('R-squared curve_fit:', Rsquared_curvefit)
print()
##########################
# ODR section
##########################
data = scipy.odr.odrpack.Data(xData,yData)
model = scipy.odr.odrpack.Model(func_wrapper_for_ODR)
odr = scipy.odr.odrpack.ODR(data, model, beta0=geneticParameters)
# Run the regression.
odr_out = odr.run()
print('Fitted parameters ODR:', odr_out.beta)
print()
modelPredictions_odr = func(xData, *odr_out.beta)
absError_odr = modelPredictions_odr - yData
SE_odr = numpy.square(absError_odr) # squared errors
MSE_odr = numpy.mean(SE_odr) # mean squared errors
RMSE_odr = numpy.sqrt(MSE_odr) # Root Mean Squared Error, RMSE
Rsquared_odr = 1.0 - (numpy.var(absError_odr) / numpy.var(yData))
print()
print('RMSE ODR:', RMSE_odr)
print('R-squared ODR:', Rsquared_odr)
print()
##########################################################
# graphics output section
def ModelsAndScatterPlot(graphWidth, graphHeight):
f = plt.figure(figsize=(graphWidth/100.0, graphHeight/100.0), dpi=100)
axes = f.add_subplot(111)
# first the raw data as a scatter plot
axes.plot(xData, yData, 'D')
# create data for the fitted equation plots
xModel = numpy.linspace(min(xData), max(xData))
yModel_curvefit = func(xModel, *fittedParameters_curvefit)
yModel_odr = func(xModel, *odr_out.beta)
# now the models as line plots
axes.plot(xModel, yModel_curvefit)
axes.plot(xModel, yModel_odr)
axes.set_xlabel('X Data') # X axis data label
axes.set_ylabel('Y Data') # Y axis data label
plt.show()
plt.close('all') # clean up after using pyplot
graphWidth = 800
graphHeight = 600
ModelsAndScatterPlot(graphWidth, graphHeight)