使用 LMFIT 将对数正态模型拟合到数据
Fitting a log-normal model to data using LMFIT
我希望为大致服从对数正态分布的数据拟合对数正态曲线。
我的数据来自激光衍射机,它测量喷雾的粒度分布。这段代码的最终目标是为我的数据重新创建this method,它使用专为XRD数据曲线拟合设计的OriginPro软件;一个类似的问题。我想将该方法整合到我自己的研究分析中,该研究正在 Python.
中完成
我将 this post 中的代码改编为(理想情况下)处理对数正态分布。我简化了我的代码以仅处理数据中的第一个对数正态峰值,所以现在它只试图拟合一个对数正态分布。我提供的数据也被简化为只有一个峰适合。 post.
底部给出了示例数据和代码
我以前有一些使用 LMFIT 进行模型拟合的经验,尽管我使用的是用户定义的状态-space 模型进行时间建模和 LMFIT minimize()
函数。我不确定从哪里开始调试这段代码的曲线拟合组件。
谁能帮我弄清楚为什么我无法拟合这些数据?请注意,我得到的结果是微不足道的(y=0 处的直线)。
正在 Windows 7(笔记本电脑)和 10(台式机)
运行 python -V in a CMD window 给出:
Python 3.5.3 :: Anaconda 4.1.1 (64-bit)
这是样本分布的数据:
sizes = np.array([ 1.26500000e-01, 1.47000000e-01, 1.71500000e-01,
2.00000000e-01, 2.33000000e-01, 2.72000000e-01,
3.17000000e-01, 3.69500000e-01, 4.31000000e-01,
5.02500000e-01, 5.86000000e-01, 6.83500000e-01,
7.97000000e-01, 9.29000000e-01, 1.08300000e+00,
1.26250000e+00, 1.47200000e+00, 1.71650000e+00,
2.00100000e+00, 2.33300000e+00, 2.72050000e+00,
3.17200000e+00, 3.69800000e+00, 4.31150000e+00,
5.02700000e+00, 5.86100000e+00, 6.83300000e+00,
7.96650000e+00, 9.28850000e+00, 1.08295000e+01,
1.26265000e+01, 1.47215000e+01, 1.71640000e+01,
2.00115000e+01, 2.33315000e+01, 2.72030000e+01,
3.17165000e+01, 3.69785000e+01, 4.31135000e+01,
5.02665000e+01, 5.86065000e+01, 6.83300000e+01,
7.96670000e+01, 9.28850000e+01, 1.08296000e+02,
1.26264000e+02, 1.47213000e+02, 1.71637500e+02,
2.00114500e+02, 2.33316500e+02])
y_exp = np.array([ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01,
0.02, 0.03, 0.04, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
0.13, 0.19, 0.3 , 0.48, 0.74, 1.1 , 1.56, 2.11, 2.72,
3.37, 3.99, 4.55, 4.99, 5.3 , 5.48, 5.53, 5.48, 5.36,
5.19, 4.97, 4.67, 4.28, 3.79, 3.18, 2.48, 1.73, 1. ,
0.35, 0. , 0. , 0. , 0. ])
函数如下:
def generate_model(spec):
composite_model = None
params = None
x = spec['x']
y = spec['y']
x_min = np.min(x)
x_max = np.max(x)
x_range = x_max - x_min
y_max = np.max(y)
for i, basis_func in enumerate(spec['model']):
# prefix = f'm{i}_'
prefix = 'm{0}_'.format(i)
model = getattr(models, basis_func['type'])(prefix=prefix)
if basis_func['type'] in ['LognormalModel','GaussianModel', 'LorentzianModel', 'VoigtModel']: # for now VoigtModel has gamma constrained to sigma
model.set_param_hint('sigma', min=1e-6, max=x_range)
model.set_param_hint('center', min=x_min, max=x_max)
model.set_param_hint('height', min=1e-6, max=1.1*y_max)
model.set_param_hint('amplitude', min=1e-6)
# default guess is horrible!! do not use guess()
default_params = {
prefix+'center': x_min + x_range * random.random(),
prefix+'height': y_max * random.random(),
prefix+'sigma': x_range * random.random()
}
else:
# raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
raise NotImplemented('model {0} not implemented yet'.format(basis_func["type"]))
if 'help' in basis_func: # allow override of settings in parameter
for param, options in basis_func['help'].items():
model.set_param_hint(param, **options)
model_params = model.make_params(**default_params, **basis_func.get('params', {}))
if params is None:
params = model_params
else:
params.update(model_params)
if composite_model is None:
composite_model = model
else:
composite_model = composite_model + model
return composite_model, params
def update_spec_from_peaks(spec, model_indicies, peak_widths=np.arange(1,10), **kwargs):
x = spec['x']
y = spec['y']
x_range = np.max(x) - np.min(x)
peak_indicies = signal.find_peaks_cwt(y, peak_widths)
np.random.shuffle(peak_indicies)
# for peak_indicie, model_indicie in zip(peak_indicies.tolist(), model_indicies):
for peak_indicie, model_indicie in zip(peak_indicies, model_indicies):
model = spec['model'][model_indicie]
if model['type'] in ['LognormalModel','GaussianModel', 'LorentzianModel', 'VoigtModel']:
params = {
'height': y[peak_indicie],
'sigma': x_range / len(x) * np.min(peak_widths),
'center': x[peak_indicie]
}
if 'params' in model:
model.update(params)
else:
model['params'] = params
else:
# raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
raise NotImplemented('model {0} not implemented yet'.format(model["type"]))
return peak_indicies
这里是主线:
spec = {
'x': sizes,
'y': y_exp,
'model': [
{
'type': 'LognormalModel',
'params': {'center': 20, 'height': 3, 'sigma': 1},
# 'help': {'center': {'min': 10, 'max': 30}}
}]}
num_comp = list(range(0,len(spec['model'])))
peaks_found = update_spec_from_peaks(spec, num_comp, peak_widths=np.arange(1,10))
#For checking peak fitting
print(peaks_found)
fig, ax = plt.subplots()
ax.scatter(spec['x'], spec['y'], s=4)
for i in peaks_found:
ax.axvline(x=spec['x'][i], c='black', linestyle='dotted')
model, params = generate_model(spec)
output = model.fit(spec['y'], params, x=spec['x'])
fig, gridspec = output.plot()
感谢您的帮助,祝今天愉快。
以撒
关于 Whosebug 和一般问题解决的标准建议是将问题减少到显示问题的最小脚本。参见,例如,https://whosebug.com/help/mcve。这种方法鼓励剥离问题,通常有助于指出问题在代码中的位置。这是解决问题的经典方法。
原来你的脚本有不少多余的东西。
精简到最基本的东西会得到:
import numpy as np
from lmfit import models
import matplotlib.pyplot as plt
x = np.array([ 1.26500000e-01, 1.47000000e-01, 1.71500000e-01,
2.00000000e-01, 2.33000000e-01, 2.72000000e-01,
3.17000000e-01, 3.69500000e-01, 4.31000000e-01,
5.02500000e-01, 5.86000000e-01, 6.83500000e-01,
7.97000000e-01, 9.29000000e-01, 1.08300000e+00,
1.26250000e+00, 1.47200000e+00, 1.71650000e+00,
2.00100000e+00, 2.33300000e+00, 2.72050000e+00,
3.17200000e+00, 3.69800000e+00, 4.31150000e+00,
5.02700000e+00, 5.86100000e+00, 6.83300000e+00,
7.96650000e+00, 9.28850000e+00, 1.08295000e+01,
1.26265000e+01, 1.47215000e+01, 1.71640000e+01,
2.00115000e+01, 2.33315000e+01, 2.72030000e+01,
3.17165000e+01, 3.69785000e+01, 4.31135000e+01,
5.02665000e+01, 5.86065000e+01, 6.83300000e+01,
7.96670000e+01, 9.28850000e+01, 1.08296000e+02,
1.26264000e+02, 1.47213000e+02, 1.71637500e+02,
2.00114500e+02, 2.33316500e+02])
y = np.array([ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0.02,
0.03, 0.04, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.13, 0.19,
0.3 , 0.48, 0.74, 1.1 , 1.56, 2.11, 2.72, 3.37, 3.99, 4.55,
4.99, 5.3 , 5.48, 5.53, 5.48, 5.36, 5.19, 4.97, 4.67, 4.28,
3.79, 3.18, 2.48, 1.73, 1. , 0.35, 0. , 0. , 0. , 0. ])
model = models.LognormalModel()
params = model.make_params(center=20, sigma=3, amplitude=5)
result = model.fit(y, params, x=x)
print(result.fit_report())
plt.plot(x, y, label='data')
plt.plot(x, result.best_fit, label='fit')
plt.legend()
plt.show()
虽然不是很完美,但它运行起来还是不错的。
一般来说,我不建议您根据数据范围设置 "parameter hints"。使用这种能力谨慎地设置此类限制,并且仅在它们是模型固有的地方(例如 sigma<0
没有意义)。
我不知道你的代码使用随机数来设置初始值,但在我看来它很可能会设置非常糟糕的初始值选择。
我希望为大致服从对数正态分布的数据拟合对数正态曲线。
我的数据来自激光衍射机,它测量喷雾的粒度分布。这段代码的最终目标是为我的数据重新创建this method,它使用专为XRD数据曲线拟合设计的OriginPro软件;一个类似的问题。我想将该方法整合到我自己的研究分析中,该研究正在 Python.
中完成我将 this post 中的代码改编为(理想情况下)处理对数正态分布。我简化了我的代码以仅处理数据中的第一个对数正态峰值,所以现在它只试图拟合一个对数正态分布。我提供的数据也被简化为只有一个峰适合。 post.
底部给出了示例数据和代码我以前有一些使用 LMFIT 进行模型拟合的经验,尽管我使用的是用户定义的状态-space 模型进行时间建模和 LMFIT minimize()
函数。我不确定从哪里开始调试这段代码的曲线拟合组件。
谁能帮我弄清楚为什么我无法拟合这些数据?请注意,我得到的结果是微不足道的(y=0 处的直线)。
正在 Windows 7(笔记本电脑)和 10(台式机)
运行 python -V in a CMD window 给出:
Python 3.5.3 :: Anaconda 4.1.1 (64-bit)
这是样本分布的数据:
sizes = np.array([ 1.26500000e-01, 1.47000000e-01, 1.71500000e-01,
2.00000000e-01, 2.33000000e-01, 2.72000000e-01,
3.17000000e-01, 3.69500000e-01, 4.31000000e-01,
5.02500000e-01, 5.86000000e-01, 6.83500000e-01,
7.97000000e-01, 9.29000000e-01, 1.08300000e+00,
1.26250000e+00, 1.47200000e+00, 1.71650000e+00,
2.00100000e+00, 2.33300000e+00, 2.72050000e+00,
3.17200000e+00, 3.69800000e+00, 4.31150000e+00,
5.02700000e+00, 5.86100000e+00, 6.83300000e+00,
7.96650000e+00, 9.28850000e+00, 1.08295000e+01,
1.26265000e+01, 1.47215000e+01, 1.71640000e+01,
2.00115000e+01, 2.33315000e+01, 2.72030000e+01,
3.17165000e+01, 3.69785000e+01, 4.31135000e+01,
5.02665000e+01, 5.86065000e+01, 6.83300000e+01,
7.96670000e+01, 9.28850000e+01, 1.08296000e+02,
1.26264000e+02, 1.47213000e+02, 1.71637500e+02,
2.00114500e+02, 2.33316500e+02])
y_exp = np.array([ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01,
0.02, 0.03, 0.04, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
0.13, 0.19, 0.3 , 0.48, 0.74, 1.1 , 1.56, 2.11, 2.72,
3.37, 3.99, 4.55, 4.99, 5.3 , 5.48, 5.53, 5.48, 5.36,
5.19, 4.97, 4.67, 4.28, 3.79, 3.18, 2.48, 1.73, 1. ,
0.35, 0. , 0. , 0. , 0. ])
函数如下:
def generate_model(spec):
composite_model = None
params = None
x = spec['x']
y = spec['y']
x_min = np.min(x)
x_max = np.max(x)
x_range = x_max - x_min
y_max = np.max(y)
for i, basis_func in enumerate(spec['model']):
# prefix = f'm{i}_'
prefix = 'm{0}_'.format(i)
model = getattr(models, basis_func['type'])(prefix=prefix)
if basis_func['type'] in ['LognormalModel','GaussianModel', 'LorentzianModel', 'VoigtModel']: # for now VoigtModel has gamma constrained to sigma
model.set_param_hint('sigma', min=1e-6, max=x_range)
model.set_param_hint('center', min=x_min, max=x_max)
model.set_param_hint('height', min=1e-6, max=1.1*y_max)
model.set_param_hint('amplitude', min=1e-6)
# default guess is horrible!! do not use guess()
default_params = {
prefix+'center': x_min + x_range * random.random(),
prefix+'height': y_max * random.random(),
prefix+'sigma': x_range * random.random()
}
else:
# raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
raise NotImplemented('model {0} not implemented yet'.format(basis_func["type"]))
if 'help' in basis_func: # allow override of settings in parameter
for param, options in basis_func['help'].items():
model.set_param_hint(param, **options)
model_params = model.make_params(**default_params, **basis_func.get('params', {}))
if params is None:
params = model_params
else:
params.update(model_params)
if composite_model is None:
composite_model = model
else:
composite_model = composite_model + model
return composite_model, params
def update_spec_from_peaks(spec, model_indicies, peak_widths=np.arange(1,10), **kwargs):
x = spec['x']
y = spec['y']
x_range = np.max(x) - np.min(x)
peak_indicies = signal.find_peaks_cwt(y, peak_widths)
np.random.shuffle(peak_indicies)
# for peak_indicie, model_indicie in zip(peak_indicies.tolist(), model_indicies):
for peak_indicie, model_indicie in zip(peak_indicies, model_indicies):
model = spec['model'][model_indicie]
if model['type'] in ['LognormalModel','GaussianModel', 'LorentzianModel', 'VoigtModel']:
params = {
'height': y[peak_indicie],
'sigma': x_range / len(x) * np.min(peak_widths),
'center': x[peak_indicie]
}
if 'params' in model:
model.update(params)
else:
model['params'] = params
else:
# raise NotImplemented(f'model {basis_func["type"]} not implemented yet')
raise NotImplemented('model {0} not implemented yet'.format(model["type"]))
return peak_indicies
这里是主线:
spec = {
'x': sizes,
'y': y_exp,
'model': [
{
'type': 'LognormalModel',
'params': {'center': 20, 'height': 3, 'sigma': 1},
# 'help': {'center': {'min': 10, 'max': 30}}
}]}
num_comp = list(range(0,len(spec['model'])))
peaks_found = update_spec_from_peaks(spec, num_comp, peak_widths=np.arange(1,10))
#For checking peak fitting
print(peaks_found)
fig, ax = plt.subplots()
ax.scatter(spec['x'], spec['y'], s=4)
for i in peaks_found:
ax.axvline(x=spec['x'][i], c='black', linestyle='dotted')
model, params = generate_model(spec)
output = model.fit(spec['y'], params, x=spec['x'])
fig, gridspec = output.plot()
感谢您的帮助,祝今天愉快。
以撒
关于 Whosebug 和一般问题解决的标准建议是将问题减少到显示问题的最小脚本。参见,例如,https://whosebug.com/help/mcve。这种方法鼓励剥离问题,通常有助于指出问题在代码中的位置。这是解决问题的经典方法。
原来你的脚本有不少多余的东西。 精简到最基本的东西会得到:
import numpy as np
from lmfit import models
import matplotlib.pyplot as plt
x = np.array([ 1.26500000e-01, 1.47000000e-01, 1.71500000e-01,
2.00000000e-01, 2.33000000e-01, 2.72000000e-01,
3.17000000e-01, 3.69500000e-01, 4.31000000e-01,
5.02500000e-01, 5.86000000e-01, 6.83500000e-01,
7.97000000e-01, 9.29000000e-01, 1.08300000e+00,
1.26250000e+00, 1.47200000e+00, 1.71650000e+00,
2.00100000e+00, 2.33300000e+00, 2.72050000e+00,
3.17200000e+00, 3.69800000e+00, 4.31150000e+00,
5.02700000e+00, 5.86100000e+00, 6.83300000e+00,
7.96650000e+00, 9.28850000e+00, 1.08295000e+01,
1.26265000e+01, 1.47215000e+01, 1.71640000e+01,
2.00115000e+01, 2.33315000e+01, 2.72030000e+01,
3.17165000e+01, 3.69785000e+01, 4.31135000e+01,
5.02665000e+01, 5.86065000e+01, 6.83300000e+01,
7.96670000e+01, 9.28850000e+01, 1.08296000e+02,
1.26264000e+02, 1.47213000e+02, 1.71637500e+02,
2.00114500e+02, 2.33316500e+02])
y = np.array([ 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.01, 0.02,
0.03, 0.04, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11, 0.13, 0.19,
0.3 , 0.48, 0.74, 1.1 , 1.56, 2.11, 2.72, 3.37, 3.99, 4.55,
4.99, 5.3 , 5.48, 5.53, 5.48, 5.36, 5.19, 4.97, 4.67, 4.28,
3.79, 3.18, 2.48, 1.73, 1. , 0.35, 0. , 0. , 0. , 0. ])
model = models.LognormalModel()
params = model.make_params(center=20, sigma=3, amplitude=5)
result = model.fit(y, params, x=x)
print(result.fit_report())
plt.plot(x, y, label='data')
plt.plot(x, result.best_fit, label='fit')
plt.legend()
plt.show()
虽然不是很完美,但它运行起来还是不错的。
一般来说,我不建议您根据数据范围设置 "parameter hints"。使用这种能力谨慎地设置此类限制,并且仅在它们是模型固有的地方(例如 sigma<0
没有意义)。
我不知道你的代码使用随机数来设置初始值,但在我看来它很可能会设置非常糟糕的初始值选择。