扩展多项式以拟合数据:创建任意数量的函数参数
Extending a polynomial to fit data: creating an arbitrary amount of functional parameters
我想将函数拟合到给定的数据集,如果不满足条件,则扩展函数并重复该过程 - 类似于增加泰勒级数的最高阶。我的问题是我不知道如何扩展功能。这个函数应该是这样的
def func(x, a0,a1,a2,...)
return (a0 * H0 + a1 * H1 + a2 * H2 + ...) (x)
H0(x), H1(x), H2(x),... 是已知函数。我希望这种方法使用 20-1000 个函数 H0、H1... 所以我必须找到一种方法来不手动定义每个参数 a0、a1、a2...
我的想法是用最大数量的参数定义函数,然后在循环中对其进行操作(以某种方式减少参数数量,然后在每次迭代时增加参数数量)
# choose N: an arbitrary number of parameters
# create N-many functions H0, H1, ... HN-1 -> put them into an numpy array HArray
def func(x, parameters): # somehow reduce the number of parameters to N
# convert parameters into numpy array -> parameterArray
result = parameterArray * HArray (x) # (a * H0 + b * H1 + c * H2 + ...) (x)
return result
# fit the function to a given dataset
完整代码
import numpy as np
from scipy.optimize import curve_fit
x = np.linspace(0,2*np.pi,num=10000) # xdata
y = np.sin(x) # ydata
error = 0.0001 # fit condition
K = 1000
for N in range(K):
def func(x, parameters): # somehow reduce the number of parameters to N
parameterArray = np.array([p for p in parameters])
HArray = np.array([x**i for i in range(N)]) # a polynomial as example
return parameterArray * HArray (x)
popt, pcov = curve_fit(func, x, y)
stdDev = np.sqrt(np.diag(pcov))
if stdDev < error: break
为了使 curve_fit 正常工作,该函数需要适当数量的参数。我也想过在拟合时固定最后的K-N个参数,但我也不知道该怎么做。
也就是说,如果我做对了,那就是线性拟合。因此,不需要 curve_fit
。最简单的方法是:
import numpy as np
def h0( x ):
return 1
def h1( x ):
return x
def h2( x ):
return x**2
def h3( x ):
return x**3
def h4( x ):
return np.sin( x )
def my_linear_fit( xdata, ydata, funclist ):
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
A = np.dot( ST, S )
AI = np.linalg.inv( A )
K = np.dot( AI, ST )
sol = np.dot( K, ydata )
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = s2 * np.dot( K, np.transpose( K ) )
return sol, cov
"""
testing
"""
xl = np.linspace( -2, 8, 23 )
yl = np.fromiter(
( 2.1 * h1( x ) + 0.21 * h3(x) + 1.56 * h4(x) for x in xl),
float
)
yn = yl + np.random.normal( size=len(xl), scale=0.2 )
opt, popt = my_linear_fit( xl, yn, ( h1, h3, h4 ) )
print( opt )
print( popt )
为了避免矩阵求逆等问题,有一些方法可以使用矩阵分解等使其更复杂一些,但主要思想保持不变。如果输入函数需要额外的参数,可能需要使用 lambda
函数 and/or 相应地扩展代码。
编辑 1
使用非常高的阶会导致矩阵求逆或浮点精度问题。我们可以使用 mpmath
来解决这个问题
请注意,numpy
不再使用,现在需要以更手动的方式完成一些数据处理。
import matplotlib.pyplot as plt
from mpmath import mpf
from mpmath import fabs
from mpmath import matrix
from mpmath import mp
def pol( x, n ):
return x**n
def make_func_list( order ):
out = list()
for i in range( order + 1 ):
out.append( lambda x, i=i: pol( x, i ) )
return out
def my_linear_fit( xdata, ydata, funclist ):
ymat = matrix( ydata )
ST = matrix( [ [ f( x ) for x in xdata ] for f in funclist ] )
S = ST.T
A = ST* S
AI = A**(-1)
K = AI * ST
sol = K * ymat
yth = S * sol
diff = ymat - yth
s2l = [ x**2 for x in diff ]
s2 = 0
for x in s2l:
s2 += x
s2 /= ( len( ydata ) - len( funclist ) )
cov = s2 * K * K.T
return sol, cov
"""
testing
"""
mp.prec = 50
xlth = [ mpf( -2 + (5 + 2) * i / 154 )for i in range( 155 ) ]
xl = [ mpf( -2 + (5 + 2) * i / 54 )for i in range( 55 ) ]
xmax = max( [ fabs( x ) for x in xl ] )
xls = [ x / xmax for x in xl ]
xlsth = [ x / xmax for x in xlth ]
yl = [
2.1 * mp.sin( 3.83 * x ) for x in xl
]
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )
for order in[ 2, 4, 12, 18 ]:
fl = make_func_list( order )
opt, popt = my_linear_fit( xls, yl, fl )
print( opt.T )
fit = [ [ o * f( x ) for x in xlsth ] for o,f in zip( opt, fl ) ]
fitsum = fit[0]
for line in fit[ 1::]:
fitsum = [ x + y for x, y in zip( fitsum, line )]
bx.plot( xlsth, fitsum)
plt.show()
编辑 2
实际上,numpy 有一个内置机制。我不确定它是如何详细完成的,因为它最终会从库中调用函数,但我猜它使用的是 SVD。 QR分解还是有助于得到协方差矩阵的。
from scipy.linalg import qr
def my_linear_fit( xdata, ydata, funclist ):
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
q, r = qr( S )
sol = np.linalg.lstsq( S, ydata )[0]
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
return sol, cov
"""
testing
"""
xl = np.linspace( -2, 8, 55 )
xmax = max( np.abs( xl ) )
xls = xl / xmax
yl = np.fromiter(
( 2.1 * np.sin( 2.83 * x ) for x in xl),
float
)
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )
for order in[ 5, 10, 15, 20 ]:
fl = make_func_list( order )
opt, popt = my_linear_fit( xls, yl, fl )
fit = np.array( [ o * f( xls ) for o,f in zip( opt, fl ) ] )
fit = np.sum( fit, axis=0 )
fits = np.array( [ o/(xmax**n) * f( xl ) for n, o, f in zip( range(order + 1), opt, fl ) ] )
fits = np.sum( fits, axis=0 )
ax.plot( xl, fits)
bx.plot( xls, fit)
plt.show()
编辑 3
这将是仅使用 QR 分解的解决方案。在我的例子中,它很容易达到 20 阶(重新缩放)
from scipy.linalg import solve_triangular
def my_linear_fit( xdata, ydata, funclist ):
n = len( funclist )
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
q, r = qr( S )
rred = r[ : n ] ### making it square...skip the zeros
yred = np.dot( np.transpose( q ), ydata )[ : n ]
sol = solve_triangular( rred, yred )
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
return sol, cov
我想将函数拟合到给定的数据集,如果不满足条件,则扩展函数并重复该过程 - 类似于增加泰勒级数的最高阶。我的问题是我不知道如何扩展功能。这个函数应该是这样的
def func(x, a0,a1,a2,...)
return (a0 * H0 + a1 * H1 + a2 * H2 + ...) (x)
H0(x), H1(x), H2(x),... 是已知函数。我希望这种方法使用 20-1000 个函数 H0、H1... 所以我必须找到一种方法来不手动定义每个参数 a0、a1、a2...
我的想法是用最大数量的参数定义函数,然后在循环中对其进行操作(以某种方式减少参数数量,然后在每次迭代时增加参数数量)
# choose N: an arbitrary number of parameters
# create N-many functions H0, H1, ... HN-1 -> put them into an numpy array HArray
def func(x, parameters): # somehow reduce the number of parameters to N
# convert parameters into numpy array -> parameterArray
result = parameterArray * HArray (x) # (a * H0 + b * H1 + c * H2 + ...) (x)
return result
# fit the function to a given dataset
完整代码
import numpy as np
from scipy.optimize import curve_fit
x = np.linspace(0,2*np.pi,num=10000) # xdata
y = np.sin(x) # ydata
error = 0.0001 # fit condition
K = 1000
for N in range(K):
def func(x, parameters): # somehow reduce the number of parameters to N
parameterArray = np.array([p for p in parameters])
HArray = np.array([x**i for i in range(N)]) # a polynomial as example
return parameterArray * HArray (x)
popt, pcov = curve_fit(func, x, y)
stdDev = np.sqrt(np.diag(pcov))
if stdDev < error: break
为了使 curve_fit 正常工作,该函数需要适当数量的参数。我也想过在拟合时固定最后的K-N个参数,但我也不知道该怎么做。
也就是说,如果我做对了,那就是线性拟合。因此,不需要 curve_fit
。最简单的方法是:
import numpy as np
def h0( x ):
return 1
def h1( x ):
return x
def h2( x ):
return x**2
def h3( x ):
return x**3
def h4( x ):
return np.sin( x )
def my_linear_fit( xdata, ydata, funclist ):
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
A = np.dot( ST, S )
AI = np.linalg.inv( A )
K = np.dot( AI, ST )
sol = np.dot( K, ydata )
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = s2 * np.dot( K, np.transpose( K ) )
return sol, cov
"""
testing
"""
xl = np.linspace( -2, 8, 23 )
yl = np.fromiter(
( 2.1 * h1( x ) + 0.21 * h3(x) + 1.56 * h4(x) for x in xl),
float
)
yn = yl + np.random.normal( size=len(xl), scale=0.2 )
opt, popt = my_linear_fit( xl, yn, ( h1, h3, h4 ) )
print( opt )
print( popt )
为了避免矩阵求逆等问题,有一些方法可以使用矩阵分解等使其更复杂一些,但主要思想保持不变。如果输入函数需要额外的参数,可能需要使用 lambda
函数 and/or 相应地扩展代码。
编辑 1
使用非常高的阶会导致矩阵求逆或浮点精度问题。我们可以使用 mpmath
来解决这个问题
请注意,numpy
不再使用,现在需要以更手动的方式完成一些数据处理。
import matplotlib.pyplot as plt
from mpmath import mpf
from mpmath import fabs
from mpmath import matrix
from mpmath import mp
def pol( x, n ):
return x**n
def make_func_list( order ):
out = list()
for i in range( order + 1 ):
out.append( lambda x, i=i: pol( x, i ) )
return out
def my_linear_fit( xdata, ydata, funclist ):
ymat = matrix( ydata )
ST = matrix( [ [ f( x ) for x in xdata ] for f in funclist ] )
S = ST.T
A = ST* S
AI = A**(-1)
K = AI * ST
sol = K * ymat
yth = S * sol
diff = ymat - yth
s2l = [ x**2 for x in diff ]
s2 = 0
for x in s2l:
s2 += x
s2 /= ( len( ydata ) - len( funclist ) )
cov = s2 * K * K.T
return sol, cov
"""
testing
"""
mp.prec = 50
xlth = [ mpf( -2 + (5 + 2) * i / 154 )for i in range( 155 ) ]
xl = [ mpf( -2 + (5 + 2) * i / 54 )for i in range( 55 ) ]
xmax = max( [ fabs( x ) for x in xl ] )
xls = [ x / xmax for x in xl ]
xlsth = [ x / xmax for x in xlth ]
yl = [
2.1 * mp.sin( 3.83 * x ) for x in xl
]
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )
for order in[ 2, 4, 12, 18 ]:
fl = make_func_list( order )
opt, popt = my_linear_fit( xls, yl, fl )
print( opt.T )
fit = [ [ o * f( x ) for x in xlsth ] for o,f in zip( opt, fl ) ]
fitsum = fit[0]
for line in fit[ 1::]:
fitsum = [ x + y for x, y in zip( fitsum, line )]
bx.plot( xlsth, fitsum)
plt.show()
编辑 2
实际上,numpy 有一个内置机制。我不确定它是如何详细完成的,因为它最终会从库中调用函数,但我猜它使用的是 SVD。 QR分解还是有助于得到协方差矩阵的。
from scipy.linalg import qr
def my_linear_fit( xdata, ydata, funclist ):
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
q, r = qr( S )
sol = np.linalg.lstsq( S, ydata )[0]
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
return sol, cov
"""
testing
"""
xl = np.linspace( -2, 8, 55 )
xmax = max( np.abs( xl ) )
xls = xl / xmax
yl = np.fromiter(
( 2.1 * np.sin( 2.83 * x ) for x in xl),
float
)
fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )
ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )
for order in[ 5, 10, 15, 20 ]:
fl = make_func_list( order )
opt, popt = my_linear_fit( xls, yl, fl )
fit = np.array( [ o * f( xls ) for o,f in zip( opt, fl ) ] )
fit = np.sum( fit, axis=0 )
fits = np.array( [ o/(xmax**n) * f( xl ) for n, o, f in zip( range(order + 1), opt, fl ) ] )
fits = np.sum( fits, axis=0 )
ax.plot( xl, fits)
bx.plot( xls, fit)
plt.show()
编辑 3
这将是仅使用 QR 分解的解决方案。在我的例子中,它很容易达到 20 阶(重新缩放)
from scipy.linalg import solve_triangular
def my_linear_fit( xdata, ydata, funclist ):
n = len( funclist )
ST = np.array( [ f( xdata ) for f in funclist ] )
S = np.transpose( ST )
q, r = qr( S )
rred = r[ : n ] ### making it square...skip the zeros
yred = np.dot( np.transpose( q ), ydata )[ : n ]
sol = solve_triangular( rred, yred )
yth = np.dot( S, sol )
diff = ydata - yth
s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
cov = np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
return sol, cov