Python 二维高斯拟合数据中的 NaN 值
Python 2D Gaussian Fit with NaN Values in Data
我是 Python 的新手,但我正在尝试为某些数据生成 2D 高斯拟合。具体来说,恒星通量与坐标 system/grid 中的某些位置相关联。然而,并不是我网格中的所有位置都有相应的通量值。我真的不想将这些值设置为零,以防它使我的拟合出现偏差,但我似乎无法将它们设置为 nan
并仍然让我的高斯拟合起作用。这是我正在使用的代码(从 here 稍作修改):
import numpy
import scipy
from numpy import *
from scipy import optimize
def gaussian(height, center_x, center_y, width_x, width_y):
width_x = float(width_x)
width_y = float(width_y)
return lambda x,y: height*exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
total = nansum(data)
X, Y = indices(data.shape)
center_x = nansum(X*data)/total
center_y = nansum(Y*data)/total
row = data[int(center_x), :]
col = data[:, int(center_y)]
width_x = nansum(sqrt(abs((arange(col.size)-center_y)**2*col))/nansum(col))
width_y = nansum(sqrt(abs((arange(row.size)-center_x)**2*row))/nansum(row))
height = nanmax(data)
return height, center_x, center_y, width_x, width_y
def fitgaussian(data):
params = moments(data)
errorfunction = lambda p: ravel(gaussian(*p)(*indices(data.shape)) - data)
p, success = optimize.leastsq(errorfunction, params)
return p
parameters = fitgaussian(data)
fit = gaussian(*parameters)
我的通量值在一个名为 data
的二维数组中。如果我在此数组中有 0
而不是 nan
值,则代码有效,否则我的 parameters
总是显示为 [nan nan nan nan nan]
。如果有办法解决这个问题,我将非常感谢您的见解!解释的越详细越好。提前致谢!
只需删除所有没有相应通量值的值。如果此时 y 轴上没有任何内容,则删除值对无关紧要。
如果空值等于 ''
,这应该删除所有没有通量值的值
# assumes data.shape = (1, 3) where data[:,0:1] is the x,y axis
# data[:,2] contains the flux values
data = numpy.delete(data, numpy.where(data[:,3] == ''), axis=0)
如果空值等于 nan
,这将完成工作
data = numpy.delete(data, numpy.where(data[:,3] == numpy.nan), axis=0)
显而易见的事情是从 data
中删除 NaN。但是,这样做还需要将 2D X
、Y
位置数组中的相应位置也删除:
X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]
现在您可以使用 optimize.leastsq
(或更新、更简单的 optimize.curve_fit
)
将数据拟合到模型函数:
p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
例如,如果我们生成一些带有 NaNs
的随机 data
data = make_data(shape)
所以
import matplotlib.pyplot as plt
plt.imshow(data)
plt.show()
看起来像
白点显示有 NaN 值的地方,然后
import numpy as np
from scipy import optimize
np.set_printoptions(precision=4)
def gaussian(p, x, y):
height, center_x, center_y, width_x, width_y = p
return height*np.exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
total = np.nansum(data)
X, Y = np.indices(data.shape)
center_x = np.nansum(X*data)/total
center_y = np.nansum(Y*data)/total
row = data[int(center_x), :]
col = data[:, int(center_y)]
width_x = np.nansum(np.sqrt(abs((np.arange(col.size)-center_y)**2*col))
/np.nansum(col))
width_y = np.nansum(np.sqrt(abs((np.arange(row.size)-center_x)**2*row))
/np.nansum(row))
height = np.nanmax(data)
return height, center_x, center_y, width_x, width_y
def errorfunction(p, x, y, data):
return gaussian(p, x, y) - data
def fitgaussian(data):
params = moments(data)
X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]
p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
return p
def make_data(shape):
h, w = shape
p = 50, h/2.0, w/2.0, h/3.0, w/5.0
print('Actual parameters: {}'.format(np.array(p)))
X, Y = np.indices(shape)
data = gaussian(p, X, Y) + np.random.random(shape)
mask = np.random.random(shape) < 0.3
data[mask] = np.nan
return data
shape = 100, 200
data = make_data(shape)
X, Y = np.indices(shape)
parameters = fitgaussian(data)
print('Fitted parameters: {}'.format(parameters))
fit = gaussian(parameters, X, Y)
产量
Actual parameters: [ 50. 50. 100. 33.3333 40. ]
Fitted parameters: [ 50.2908 49.9992 99.9927 33.7039 40.6149]
我是 Python 的新手,但我正在尝试为某些数据生成 2D 高斯拟合。具体来说,恒星通量与坐标 system/grid 中的某些位置相关联。然而,并不是我网格中的所有位置都有相应的通量值。我真的不想将这些值设置为零,以防它使我的拟合出现偏差,但我似乎无法将它们设置为 nan
并仍然让我的高斯拟合起作用。这是我正在使用的代码(从 here 稍作修改):
import numpy
import scipy
from numpy import *
from scipy import optimize
def gaussian(height, center_x, center_y, width_x, width_y):
width_x = float(width_x)
width_y = float(width_y)
return lambda x,y: height*exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
total = nansum(data)
X, Y = indices(data.shape)
center_x = nansum(X*data)/total
center_y = nansum(Y*data)/total
row = data[int(center_x), :]
col = data[:, int(center_y)]
width_x = nansum(sqrt(abs((arange(col.size)-center_y)**2*col))/nansum(col))
width_y = nansum(sqrt(abs((arange(row.size)-center_x)**2*row))/nansum(row))
height = nanmax(data)
return height, center_x, center_y, width_x, width_y
def fitgaussian(data):
params = moments(data)
errorfunction = lambda p: ravel(gaussian(*p)(*indices(data.shape)) - data)
p, success = optimize.leastsq(errorfunction, params)
return p
parameters = fitgaussian(data)
fit = gaussian(*parameters)
我的通量值在一个名为 data
的二维数组中。如果我在此数组中有 0
而不是 nan
值,则代码有效,否则我的 parameters
总是显示为 [nan nan nan nan nan]
。如果有办法解决这个问题,我将非常感谢您的见解!解释的越详细越好。提前致谢!
只需删除所有没有相应通量值的值。如果此时 y 轴上没有任何内容,则删除值对无关紧要。
如果空值等于 ''
# assumes data.shape = (1, 3) where data[:,0:1] is the x,y axis
# data[:,2] contains the flux values
data = numpy.delete(data, numpy.where(data[:,3] == ''), axis=0)
如果空值等于 nan
data = numpy.delete(data, numpy.where(data[:,3] == numpy.nan), axis=0)
显而易见的事情是从 data
中删除 NaN。但是,这样做还需要将 2D X
、Y
位置数组中的相应位置也删除:
X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]
现在您可以使用 optimize.leastsq
(或更新、更简单的 optimize.curve_fit
)
将数据拟合到模型函数:
p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
例如,如果我们生成一些带有 NaNs
的随机data
data = make_data(shape)
所以
import matplotlib.pyplot as plt
plt.imshow(data)
plt.show()
看起来像
白点显示有 NaN 值的地方,然后
import numpy as np
from scipy import optimize
np.set_printoptions(precision=4)
def gaussian(p, x, y):
height, center_x, center_y, width_x, width_y = p
return height*np.exp(-(((center_x-x)/width_x)**2+((center_y-y)/width_y)**2)/2)
def moments(data):
total = np.nansum(data)
X, Y = np.indices(data.shape)
center_x = np.nansum(X*data)/total
center_y = np.nansum(Y*data)/total
row = data[int(center_x), :]
col = data[:, int(center_y)]
width_x = np.nansum(np.sqrt(abs((np.arange(col.size)-center_y)**2*col))
/np.nansum(col))
width_y = np.nansum(np.sqrt(abs((np.arange(row.size)-center_x)**2*row))
/np.nansum(row))
height = np.nanmax(data)
return height, center_x, center_y, width_x, width_y
def errorfunction(p, x, y, data):
return gaussian(p, x, y) - data
def fitgaussian(data):
params = moments(data)
X, Y = np.indices(data.shape)
mask = ~np.isnan(data)
x = X[mask]
y = Y[mask]
data = data[mask]
p, success = optimize.leastsq(errorfunction, params, args=(x, y, data))
return p
def make_data(shape):
h, w = shape
p = 50, h/2.0, w/2.0, h/3.0, w/5.0
print('Actual parameters: {}'.format(np.array(p)))
X, Y = np.indices(shape)
data = gaussian(p, X, Y) + np.random.random(shape)
mask = np.random.random(shape) < 0.3
data[mask] = np.nan
return data
shape = 100, 200
data = make_data(shape)
X, Y = np.indices(shape)
parameters = fitgaussian(data)
print('Fitted parameters: {}'.format(parameters))
fit = gaussian(parameters, X, Y)
产量
Actual parameters: [ 50. 50. 100. 33.3333 40. ]
Fitted parameters: [ 50.2908 49.9992 99.9927 33.7039 40.6149]