Python 拟合：优化循环

Question

我正在尝试将多个高斯拟合到给定数据，当达到第 500 个模型时，这部分程序使用了大约 3 GB 的内存，我需要拟合总共约 2000 个模型。这是我的程序的简化版本，其中包含随机生成的数据，不会产生很好的拟合，但它解释了时间问题：

import sys
sys.setrecursionlimit(5000)
import matplotlib.pyplot as plt
import numpy as np
import random
from random import uniform
x=[random.uniform(2200.,3100.) for p in range(0, 1000)]
y=[random.uniform(1.,1000.) for p in range(0, 1000)]

import sherpa.ui as ui
import numpy as np
ui.load_arrays(1,x,y) # 1 is the first data 
d1=ui.get_data()
d1.staterror=0.002*d1.y # define error on y just for plotting purpose, not required for fit
ui.plot_data()
ui.set_stat("leastsq") # leasr square method for fit
ui.set_model(ui.powlaw1d.pow1) # fit powerlaw.. pow1 is the shortcut name 
# ui.show_all() will show you all the parameters for the model
pow1.ref=2500
ui.fit()
# fitting  templates
x2=[random.uniform(2200.,3100.) for p in range(0, 1000)]
y2=[random.uniform(1.,1000.) for p in range(0, 1000)]

model1="pow1" # initiliaze the model for fitting all the gaussians
sign="+"
sigma=45. 
g_pos=x2 
g_ampl=[] # we will store the fit value here



ui.freeze(model1) # freeze the powerlaw 
for n in range(1,1000): # this excludes the upper limit
        ui.create_model_component("gauss1d","g{}".format(n))
        ui.set_par("g{}.pos".format(n),x2[n],frozen=True)
        ui.set_par("g{}.ampl".format(n),y2[n])
        ui.set_par("g{}.fwhm".format(n),sigma,frozen=True)
        model1=model1+sign+"g{}".format(n)
        if y2[n] == 0.:
           g_ampl.append(0.) # list zero amplitude for this model
        else:
           g=ui.create_model_component("gauss1d","g{}".format(n)) # do this to store g_ampl of this model only
           ui.set_source(model1) # overwriting with actual model
           ui.fit()
           ui.fit()
           ui.fit()
           g_ampl.append(g.ampl.val)
        ui.freeze(model1) # freeze the model and go to the next gaussian

我想不出一种方法来优化这部分以使其高效且耗时更少。如果有任何想法可以帮助我运行更快，我们将不胜感激。

Answer 1

您的代码的问题在于它似乎不必要地存储了您想要拟合的所有数据。更好的解决方案是只存储拟合结果。我真的不太了解夏尔巴协作。这是 scipy.optimize

的解决方案

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import glob, os
plt.ion()

def gaussian_func(x, a, x0, sigma,c):
    return a * np.exp(-(x-x0)**2/(2*sigma**2)) + c

# This is creates two files with random data
# Don't use for actual program
xdata = np.linspace(0, 4, 50)
ydata = gaussian_func(xdata, 2.5, 1.3, 0.5,0) + 0.2 * np.random.normal(size=len(xdata))
np.savetxt('example.dat',np.array([xdata,ydata]).T)
ydata = gaussian_func(xdata, 2.5, 2.3, 0.5,0) + 0.2 * np.random.normal(size=len(xdata))
np.savetxt('example2.dat',np.array([xdata,ydata]).T)

# Create your list of files with the data
# This examples just loads all files with .dat extension
filelist = glob.glob("*.dat") 
print(filelist)

results = []

for file in filelist:
    data = np.loadtxt(file)
    xdata = data[:,0]
    ydata = data[:,1]
    # if the error of the points is included in your file
    # exchange the following line to sigma = data[:,2]
    sigma = 0*ydata+0.2
    initial_guess = [2,1,1,0]
    popt, pcov = curve_fit(gaussian_func, xdata, ydata,p0=initial_guess,sigma=sigma)
    results.append({"filename":file,"parameters":popt,"covariance matrix":pcov})

    # This plots the result
    # Should be commented out for the large dataset
    plt.figure(1)
    plt.clf()
    plt.errorbar(xdata,ydata,sigma,fmt='ko')
    xplot = np.linspace(0,4,100)
    plt.plot(xplot,gaussian_func(xplot,*popt),'r',linewidth=2)
    plt.draw()

Python 拟合：优化循环

Python fitting : optimize loop

python

optimization

loops

model-fitting