使用 Python 的低峰宽高斯拟合不准确
gaussian fitting inaccurate for lower peak width using Python
我正在尝试使高斯分布适应嘈杂的吸收光谱。但是,它似乎并不适用于所有情况。当我尝试将峰宽减小到例如peak_width=10,下面的代码确实很适合,只是一行。同样,如果我将峰值的位置进一步向右移动 x_peak_loc=160,它也不起作用。我怎样才能更好地适应这些情况?谢谢!下面是代码:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as mpl
import matplotlib.pyplot as plt
import scipy.integrate as integrate
def func(x, a, x0, sigma):
#return a*(1/(np.sqrt(2*np.pi*sigma**2 ))) *np.exp(-(x-x0)**2/(2*sigma**2))
return a*np.exp(-(x-x0)**2/(2*sigma**2))
amplitude=-10
peak_width=30
x_peak_loc=70
# Generating clean data
x = np.linspace(0, 200, 1000)
y = func(x, amplitude,x_peak_loc, peak_width)
# Adding noise to the data
mn = 0
N=0.2
std=np.sqrt(N)
noise2=np.random.normal(mn,std,size=len(x))
yn = y + noise2
fig = mpl.figure(1)
ax = fig.add_subplot(111)
ax.plot(x, y, c='k', label='analytic function')
ax.scatter(x, yn, s=5, label='fake noisy data')
fig.savefig('model_and_noise.png')
popt, pcov = curve_fit(func, x, yn)
print (popt)
ym = func(x, popt[0], popt[1], popt[2])
ax.plot(x, ym, c='r', label='Best fit')
ax.legend()
fig.savefig('model_fit.png')
plt.legend(loc='upper left')
plt.xlabel("v")
plt.ylabel("f(v)")
我认为问题在于使用 curve_fit
或任何基于梯度的算法。在处理嘈杂的数据时,他们往往会陷入局部最小值。有时你很幸运,有时你不是。如果增加噪声幅度,您将获得与改变峰宽相同的(不幸的)效果。
您应该提供一些好的初始猜测或将拟合方法更改为完全不同的方法。您需要使用某种 hyperparameter optimization 方法搜索 space 个函数参数。
也许一些简单的级联(精度不断提高)网格搜索就足够了?
嗯,你说的是频谱,所以我猜有不止一个峰。在这种情况下 scipy.signal.find_peaks()
can be very useful.In case of peaks that can be isolated there is definitively no need for machine-learning overkill as it can be done by simple linear fits as described by Jean Jacquelin
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from scipy.integrate import cumtrapz
def func( x, a, x0, sigma ):
return a * np.exp( -( x - x0 )**2 / ( 2 * sigma**2 ) )
amplitude = -10
peak_width = 30
x_peak_loc = 160
# Generating clean data
xl = np.linspace( 0, 200, 1000 )
y0 = func( xl, amplitude, x_peak_loc, peak_width )
mn = 0
N = 0.2
std = np.sqrt( N )
noise2 = np.random.normal( mn, std, size=len( xl ) )
yl = y0 + noise2
"""
Most simple implementation of the linear fit of mu and sigma
"""
nul = yl - yl[0]
Sk = cumtrapz( yl, x=xl, initial=0 )
Tk = cumtrapz( xl * yl, x=xl, initial=0 )
MX = [
[ np.dot( Sk, Sk ), np.dot( Sk, Tk ) ],
[ np.dot( Sk, Tk ), np.dot( Tk, Tk ) ]
]
Vek = [ np.dot( nul, Sk ), np.dot( nul, Tk ) ]
res = np.dot( np.linalg.inv( MX ), Vek )
sig = np.sqrt( -1 / res[1] )
mu = res[0] * sig**2
print( sig )
print( mu )
"""
Most simple linear fit of amplitude, probably not required but...hey.
"""
fk = func( xl, 1, mu, sig )
amp = np.dot( yl, fk ) / np.dot( fk, fk )
print( amp )
print( "probably quite close, already")
popt, pcov = curve_fit( func, xl, yl, p0=( sig, mu, amp ) )
print( popt )
fig = plt.figure( 1 )
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xl, y0, c='k', label="analytic function" )
ax.scatter( xl, yl, s=5, label="fake noisy data" )
ym = func( xl, *popt )
ax.plot( xl, ym, c='r', label="Best fit" )
ax.legend()
fig.savefig( "model_fit.png" )
plt.legend( loc="lower left" )
plt.xlabel( "v" )
plt.ylabel( "f(v)" )
plt.show()
我正在尝试使高斯分布适应嘈杂的吸收光谱。但是,它似乎并不适用于所有情况。当我尝试将峰宽减小到例如peak_width=10,下面的代码确实很适合,只是一行。同样,如果我将峰值的位置进一步向右移动 x_peak_loc=160,它也不起作用。我怎样才能更好地适应这些情况?谢谢!下面是代码:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as mpl
import matplotlib.pyplot as plt
import scipy.integrate as integrate
def func(x, a, x0, sigma):
#return a*(1/(np.sqrt(2*np.pi*sigma**2 ))) *np.exp(-(x-x0)**2/(2*sigma**2))
return a*np.exp(-(x-x0)**2/(2*sigma**2))
amplitude=-10
peak_width=30
x_peak_loc=70
# Generating clean data
x = np.linspace(0, 200, 1000)
y = func(x, amplitude,x_peak_loc, peak_width)
# Adding noise to the data
mn = 0
N=0.2
std=np.sqrt(N)
noise2=np.random.normal(mn,std,size=len(x))
yn = y + noise2
fig = mpl.figure(1)
ax = fig.add_subplot(111)
ax.plot(x, y, c='k', label='analytic function')
ax.scatter(x, yn, s=5, label='fake noisy data')
fig.savefig('model_and_noise.png')
popt, pcov = curve_fit(func, x, yn)
print (popt)
ym = func(x, popt[0], popt[1], popt[2])
ax.plot(x, ym, c='r', label='Best fit')
ax.legend()
fig.savefig('model_fit.png')
plt.legend(loc='upper left')
plt.xlabel("v")
plt.ylabel("f(v)")
我认为问题在于使用 curve_fit
或任何基于梯度的算法。在处理嘈杂的数据时,他们往往会陷入局部最小值。有时你很幸运,有时你不是。如果增加噪声幅度,您将获得与改变峰宽相同的(不幸的)效果。
您应该提供一些好的初始猜测或将拟合方法更改为完全不同的方法。您需要使用某种 hyperparameter optimization 方法搜索 space 个函数参数。
也许一些简单的级联(精度不断提高)网格搜索就足够了?
嗯,你说的是频谱,所以我猜有不止一个峰。在这种情况下 scipy.signal.find_peaks()
can be very useful.In case of peaks that can be isolated there is definitively no need for machine-learning overkill as it can be done by simple linear fits as described by Jean Jacquelin
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from scipy.integrate import cumtrapz
def func( x, a, x0, sigma ):
return a * np.exp( -( x - x0 )**2 / ( 2 * sigma**2 ) )
amplitude = -10
peak_width = 30
x_peak_loc = 160
# Generating clean data
xl = np.linspace( 0, 200, 1000 )
y0 = func( xl, amplitude, x_peak_loc, peak_width )
mn = 0
N = 0.2
std = np.sqrt( N )
noise2 = np.random.normal( mn, std, size=len( xl ) )
yl = y0 + noise2
"""
Most simple implementation of the linear fit of mu and sigma
"""
nul = yl - yl[0]
Sk = cumtrapz( yl, x=xl, initial=0 )
Tk = cumtrapz( xl * yl, x=xl, initial=0 )
MX = [
[ np.dot( Sk, Sk ), np.dot( Sk, Tk ) ],
[ np.dot( Sk, Tk ), np.dot( Tk, Tk ) ]
]
Vek = [ np.dot( nul, Sk ), np.dot( nul, Tk ) ]
res = np.dot( np.linalg.inv( MX ), Vek )
sig = np.sqrt( -1 / res[1] )
mu = res[0] * sig**2
print( sig )
print( mu )
"""
Most simple linear fit of amplitude, probably not required but...hey.
"""
fk = func( xl, 1, mu, sig )
amp = np.dot( yl, fk ) / np.dot( fk, fk )
print( amp )
print( "probably quite close, already")
popt, pcov = curve_fit( func, xl, yl, p0=( sig, mu, amp ) )
print( popt )
fig = plt.figure( 1 )
ax = fig.add_subplot( 1, 1, 1 )
ax.plot( xl, y0, c='k', label="analytic function" )
ax.scatter( xl, yl, s=5, label="fake noisy data" )
ym = func( xl, *popt )
ax.plot( xl, ym, c='r', label="Best fit" )
ax.legend()
fig.savefig( "model_fit.png" )
plt.legend( loc="lower left" )
plt.xlabel( "v" )
plt.ylabel( "f(v)" )
plt.show()