扩展多项式以拟合数据:创建任意数量的函数参数

Extending a polynomial to fit data: creating an arbitrary amount of functional parameters

我想将函数拟合到给定的数据集,如果不满足条件,则扩展函数并重复该过程 - 类似于增加泰勒级数的最高阶。我的问题是我不知道如何扩展功能。这个函数应该是这样的

def func(x, a0,a1,a2,...)
    return (a0 * H0 + a1 * H1 + a2 * H2 + ...) (x)

H0(x), H1(x), H2(x),... 是已知函数。我希望这种方法使用 20-1000 个函数 H0、H1... 所以我必须找到一种方法来不手动定义每个参数 a0、a1、a2...

我的想法是用最大数量的参数定义函数,然后在循环中对其进行操作(以某种方式减少参数数量,然后在每次迭代时增加参数数量)

# choose N: an arbitrary number of parameters
# create N-many functions H0, H1, ... HN-1 -> put them into an numpy array HArray

def func(x, parameters): # somehow reduce the number of parameters to N
    # convert parameters into numpy array -> parameterArray
    result = parameterArray * HArray (x) # (a * H0 + b * H1 + c * H2 + ...) (x)
    return result

# fit the function to a given dataset

完整代码

import numpy as np
from scipy.optimize import curve_fit

x = np.linspace(0,2*np.pi,num=10000) # xdata
y = np.sin(x) # ydata
error = 0.0001 # fit condition
K = 1000

for N in range(K):
    def func(x, parameters): # somehow reduce the number of parameters to N
        parameterArray = np.array([p for p in parameters])
        HArray = np.array([x**i for i in range(N)]) # a polynomial as example
        return parameterArray * HArray (x)

    popt, pcov = curve_fit(func, x, y)

    stdDev = np.sqrt(np.diag(pcov))
    if stdDev < error: break

为了使 curve_fit 正常工作,该函数需要适当数量的参数。我也想过在拟合时固定最后的K-N个参数,但我也不知道该怎么做。

也就是说,如果我做对了,那就是线性拟合。因此,不需要 curve_fit。最简单的方法是:

import numpy as np

def h0( x ):
    return 1

def h1( x ):
    return x

def h2( x ):
    return x**2

def h3( x ):
    return x**3

def h4( x ):
    return np.sin( x )


def my_linear_fit( xdata, ydata, funclist ):
    ST = np.array( [ f( xdata ) for f in funclist ] )
    S = np.transpose( ST )
    A = np.dot( ST, S )
    AI = np.linalg.inv( A )
    K = np.dot( AI, ST )
    sol = np.dot( K, ydata )
    yth = np.dot( S, sol )
    diff = ydata - yth
    s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
    cov = s2 * np.dot( K, np.transpose( K ) )
    return sol, cov
    
"""
testing
"""

xl = np.linspace( -2, 8, 23 )
yl = np.fromiter( 
    ( 2.1 * h1( x ) + 0.21 * h3(x) + 1.56 *  h4(x) for x in xl),
    float
)

yn = yl + np.random.normal( size=len(xl), scale=0.2 )

opt, popt = my_linear_fit( xl, yn, ( h1, h3, h4 ) )

print( opt )
print( popt )

为了避免矩阵求逆等问题,有一些方法可以使用矩阵分解等使其更复杂一些,但主要思想保持不变。如果输入函数需要额外的参数,可能需要使用 lambda 函数 and/or 相应地扩展代码。

编辑 1

使用非常高的阶会导致矩阵求逆或浮点精度问题。我们可以使用 mpmath 来解决这个问题 请注意,numpy 不再使用,现在需要以更手动的方式完成一些数据处理。

import matplotlib.pyplot as plt
from mpmath import mpf
from mpmath import fabs
from mpmath import matrix
from mpmath import mp


def pol( x, n ):
    return x**n


def make_func_list( order ):
    out = list()
    for i in range( order + 1 ):
        out.append( lambda x, i=i: pol( x, i ) )
    return out


def my_linear_fit( xdata, ydata, funclist ):
    ymat = matrix( ydata )
    ST = matrix( [ [ f( x ) for x in xdata ] for f in funclist ] )
    S = ST.T
    A = ST* S 
    AI =  A**(-1)
    K = AI * ST
    sol =  K * ymat

    yth = S *  sol

    diff = ymat - yth
    s2l = [ x**2 for x in diff ]
    s2 = 0
    for x in s2l:
        s2 += x
    s2 /= ( len( ydata ) - len( funclist ) )
    cov = s2 * K *  K.T
    return sol, cov

"""
testing
"""
mp.prec = 50

xlth = [ mpf( -2 + (5 + 2) * i / 154 )for i in range( 155 ) ]
xl = [ mpf( -2 + (5 + 2) * i / 54 )for i in range( 55 ) ]
xmax = max( [ fabs( x ) for x in xl ] )
xls = [ x / xmax for x in xl ]
xlsth = [ x / xmax for x in xlth ]
yl = [
    2.1 * mp.sin( 3.83 * x ) for x in xl
]


fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )

ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )

for order in[ 2, 4, 12, 18 ]:
    fl  = make_func_list( order )
    opt, popt = my_linear_fit( xls, yl, fl )
    print( opt.T )
    fit = [ [ o * f( x ) for x in xlsth ] for o,f in zip( opt, fl ) ]
    fitsum = fit[0]
    for line in fit[ 1::]:
        fitsum = [ x + y for x, y in zip( fitsum, line )]
    bx.plot( xlsth, fitsum)
plt.show()

编辑 2

实际上,numpy 有一个内置机制。我不确定它是如何详细完成的,因为它最终会从库中调用函数,但我猜它使用的是 SVD。 QR分解还是有助于得到协方差矩阵的。

from scipy.linalg import qr

def my_linear_fit( xdata, ydata, funclist ):
    ST = np.array( [ f( xdata ) for f in funclist ] )
    S = np.transpose( ST )
    q, r = qr( S )
    sol = np.linalg.lstsq( S, ydata )[0]
    yth = np.dot( S, sol )
    diff = ydata - yth
    s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
    cov =  np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
    return sol, cov

"""
testing
"""

xl = np.linspace( -2, 8, 55 )
xmax = max( np.abs( xl ) )
xls = xl / xmax
yl = np.fromiter(
    ( 2.1 * np.sin( 2.83 * x ) for x in xl),
    float
)


fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )

ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )

for order in[ 5, 10, 15, 20 ]:
    fl  = make_func_list( order )
    opt, popt = my_linear_fit( xls, yl, fl )
    fit = np.array( [ o * f( xls ) for o,f in zip( opt, fl ) ] )
    fit = np.sum( fit, axis=0 )
    fits = np.array( [ o/(xmax**n) * f( xl ) for n, o, f in zip( range(order + 1), opt, fl ) ] )
    fits = np.sum( fits, axis=0 )
    ax.plot( xl, fits)
    bx.plot( xls, fit)
plt.show()

编辑 3

这将是仅使用 QR 分解的解决方案。在我的例子中,它很容易达到 20 阶(重新缩放)

from scipy.linalg import solve_triangular

def my_linear_fit( xdata, ydata, funclist ):
    n = len( funclist )
    ST = np.array( [ f( xdata ) for f in funclist ] )
    S = np.transpose( ST )
    q, r = qr( S )
    rred = r[ : n ] ### making it square...skip the zeros
    yred = np.dot( np.transpose( q ), ydata )[ : n ]
    sol = solve_triangular( rred, yred )
    yth = np.dot( S, sol )
    diff = ydata - yth
    s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
    cov =  np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
    return sol, cov