如何用最小二乘法计算"relative error in the sum of squares"和"relative error in the approximate solution"？

Question

我已经使用 scipy.optimize.leastsq 实现了 3D 高斯拟合，现在我想调整参数 ftol 和 xtol 以优化性能。但是，为了做出正确的选择，我不了解这两个参数的“单位”。是否可以从结果中计算出这两个参数？这将使我了解如何选择它们。我的数据是 np.uint8 的 numpy 数组。我试图阅读 MINIPACK 的 FORTRAN 源代码，但我的 FORTRAN 知识为零。我还阅读了 Levenberg-Marquardt 算法，但我无法真正获得低于 ftol 的数字。

这是我所做的一个最小示例：

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq

class gaussian_model:
    def __init__(self):
        self.prev_iter_model = None
        self.f_vals = []

    def gaussian_1D(self, coeffs, xx):
        A, sigma, mu = coeffs
        # Center rotation around peak center
        x0 = xx - mu
        model = A*np.exp(-(x0**2)/(2*(sigma**2)))
        return model

    def residuals(self, coeffs, I_obs, xx, model_func):
        model = model_func(coeffs, xx)
        residuals = I_obs - model
        if self.prev_iter_model is not None:
            self.f = np.sum(((model-self.prev_iter_model)/model)**2)
            self.f_vals.append(self.f)
        self.prev_iter_model = model
        return residuals


# x data
x_start = 1
x_stop = 10
num = 100
xx, dx = np.linspace(x_start, x_stop, num, retstep=True)

# Simulated data with some noise
A, s_x, mu = 10, 0.5, 3
coeffs = [A, s_x, mu]
model = gaussian_model()
yy = model.gaussian_1D(coeffs, xx)
noise_ampl = 0.5
noise = np.random.normal(0, noise_ampl, size=num)
yy += noise

# LM Least squares
initial_guess = [1, 1, 1]
pred_coeffs, cov_x, info, mesg, ier = leastsq(model.residuals, initial_guess,
                                              args=(yy, xx, model.gaussian_1D),
                                              ftol=1E-6, full_output=True)
yy_fit = model.gaussian_1D(pred_coeffs, xx)

rel_SSD = np.sum(((yy-yy_fit)/yy)**2)
RMS_SSD = np.sqrt(rel_SSD/num)

print(RMS_SSD)
print(model.f)
print(model.f_vals)

fig, ax = plt.subplots(1,2)

# Plot results
ax[0].scatter(xx, yy)
ax[0].plot(xx, yy_fit, c='r')

ax[1].scatter(range(len(model.f_vals)), model.f_vals, c='r')

# ax[1].set_ylim(0, 1E-6)

plt.show()

rel_SSD 大约为 1，绝对不低于 ftol = 1E-6。

编辑：基于下面@user12750353 的回答，我更新了我的最小示例以尝试重新创建 lmdif 如何用 ftol 确定终止。问题是我的 f_vals 太小了，所以它们不是正确的值。我想重新创建这个的原因是我想看看我在我的主要代码上得到什么样的数字来决定 ftol 会提前终止拟合过程。

Answer 1

因为你给出的函数没有梯度，调用的方法是lmdif。它将使用前向差异梯度估计而不是梯度，f(x + delta) - f(x) ~ delta * df(x)/dx（我会写成参数）。

您可以找到以下描述

c       ftol is a nonnegative input variable. termination
c         occurs when both the actual and predicted relative
c         reductions in the sum of squares are at most ftol.
c         therefore, ftol measures the relative error desired
c         in the sum of squares.
c
c       xtol is a nonnegative input variable. termination
c         occurs when the relative error between two consecutive
c         iterates is at most xtol. therefore, xtol measures the
c         relative error desired in the approximate solution.

查看代码，实际减少 acred = 1 - (fnorm1/fnorm)**2 是您为 rel_SSD 计算的，但在最后两次迭代之间，而不是在拟合函数和目标点之间。

例子

这里的问题是我们需要发现内部变量假定的值是什么。这样做的尝试是在每次调用函数时保存系数和残差范数，如下所示。

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq

class gaussian_model:
    def __init__(self):
        self.prev_iter_model = None
        self.fnorm = []
        self.x = []

    def gaussian_1D(self, coeffs, xx):
        A, sigma, mu = coeffs
        # Center rotation around peak center
        x0 = xx - mu
        model = A*np.exp(-(x0**2)/(2*(sigma**2)))
        grad = np.array([
            model / A,
            model * x0**2 / (sigma**3),
            model * 2 * x0 / (2*(sigma**2))
        ]).transpose();
        
        return model, grad

    def residuals(self, coeffs, I_obs, xx, model_func):
        model, grad = model_func(coeffs, xx)
        residuals = I_obs - model
        self.x.append(np.copy(coeffs));
        self.fnorm.append(np.sqrt(np.sum(residuals**2)))
        return residuals
    
    def grad(self, coeffs, I_obs, xx, model_func):
        model, grad = model_func(coeffs, xx)
        residuals = I_obs - model
        return -grad
    
    def plot_progress(self):
        x = np.array(self.x)
        dx = np.sqrt(np.sum(np.diff(x, axis=0)**2, axis=1))
        plt.plot(dx / np.sqrt(np.sum(x[1:, :]**2, axis=1)))
        fnorm = np.array(self.fnorm)
        plt.plot(1 - (fnorm[1:]/fnorm[:-1])**2)
        plt.legend(['$||\Delta f||$', '$||\Delta x||$'], loc='upper left');
        
# x data
x_start = 1
x_stop = 10
num = 100
xx, dx = np.linspace(x_start, x_stop, num, retstep=True)

# Simulated data with some noise
A, s_x, mu = 10, 0.5, 3
coeffs = [A, s_x, mu]
model = gaussian_model()
yy, _ = model.gaussian_1D(coeffs, xx)
noise_ampl = 0.5
noise = np.random.normal(0, noise_ampl, size=num)
yy += noise

然后我们可以看到$x$和$f$的相对变化

initial_guess = [1, 1, 1]
pred_coeffs, cov_x, info, mesg, ier = leastsq(model.residuals, initial_guess,
                                              args=(yy, xx, model.gaussian_1D),
                                              xtol=1e-6,
                                              ftol=1e-6, full_output=True)

plt.figure(figsize=(14, 6))
plt.subplot(121)
model.plot_progress()
plt.yscale('log')
plt.grid()
plt.subplot(122)

yy_fit,_ = model.gaussian_1D(pred_coeffs, xx)
# Plot results
plt.scatter(xx, yy)
plt.plot(xx, yy_fit, c='r')
plt.show()

这个问题是函数被评估为计算 f 和计算 f 的梯度。为了产生更清晰的图，可以做的是实现 pass Dfun 以便它每次迭代仅评估一次 func。

# x data
x_start = 1
x_stop = 10
num = 100
xx, dx = np.linspace(x_start, x_stop, num, retstep=True)

# Simulated data with some noise
A, s_x, mu = 10, 0.5, 3
coeffs = [A, s_x, mu]
model = gaussian_model()
yy, _ = model.gaussian_1D(coeffs, xx)
noise_ampl = 0.5
noise = np.random.normal(0, noise_ampl, size=num)
yy += noise

# LM Least squares
initial_guess = [1, 1, 1]
pred_coeffs, cov_x, info, mesg, ier = leastsq(model.residuals, initial_guess,
                                              args=(yy, xx, model.gaussian_1D),
                                              Dfun=model.grad,
                                              xtol=1e-6,
                                              ftol=1e-6, full_output=True)

plt.figure(figsize=(14, 6))
plt.subplot(121)
model.plot_progress()
plt.yscale('log')
plt.grid()
plt.subplot(122)

yy_fit,_ = model.gaussian_1D(pred_coeffs, xx)
# Plot results
plt.scatter(xx, yy)
plt.plot(xx, yy_fit, c='r')
plt.show()

好吧，我为 xtol 获得的值与 lmdif 实现中的值不完全相同。

如何用最小二乘法计算"relative error in the sum of squares"和"relative error in the approximate solution"？

How to calculate "relative error in the sum of squares" and "relative error in the approximate solution" from least squares method?

python

scipy

least-squares

levenberg-marquardt

例子