Python: 如何将一条线拟合到特定的数据区间?
Python: How do I fit a line to a specific interval of data?
我正在尝试为我的数据集的 9.0 到 10.0 um 范围拟合一条线。这是我的情节:
不幸的是,这是一个散点图,其中 x
值没有从小数字到大数字进行索引,所以我不能只将 optimize.curve_fit
函数应用于特定范围的索引来获得x
值中的所需范围。
以下是我进行曲线拟合的首选程序。我将如何修改它以仅适合 9.0 到 10.0 um x
值范围(在我的例子中,x_dist
变量),该范围内的点随机分布在整个索引中?
def func(x,a,b): # Define your fitting function
return a*x+b
initialguess = [-14.0, 0.05] # initial guess for the parameters of the function func
fit, covariance = optimize.curve_fit( # call to the fitting routine curve_fit. Returns optimal values of the fit parameters, and their estimated variance
func, # function to fit
x_dist, # data for independant variable
xdiff_norm, # data for dependant variable
initialguess, # initial guess of fit parameters
) # uncertainty in dependant variable
print("linear coefficient:",fit[0],"+-",np.sqrt(covariance[0][0])) #print value and one std deviation of first fit parameter
print("offset coefficient:",fit[1],"+-",np.sqrt(covariance[1][1])) #print value and one std deviation of second fit parameter
print(covariance)
您正确地确定问题的出现是因为您的 x 值数据未排序。您可以用不同的方式解决这个问题。一种方法是使用布尔掩码来过滤掉不需要的值。我试图尽可能接近你的例子:
from matplotlib import pyplot as plt
import numpy as np
from scipy import optimize
#fake data generation
np.random.seed(1234)
arr = np.linspace(0, 15, 100).reshape(2, 50)
arr[1, :] = np.random.random(50)
arr[1, 20:45] += 2 * arr[0, 20:45] -5
rng = np.random.default_rng()
rng.shuffle(arr, axis = 1)
x_dist = arr[0, :]
xdiff_norm = arr[1, :]
def func(x, a, b):
return a * x + b
initialguess = [5, 3]
mask = (x_dist>2.5) & (x_dist<6.6)
fit, covariance = optimize.curve_fit(
func,
x_dist[mask],
xdiff_norm[mask],
initialguess)
plt.scatter(x_dist, xdiff_norm, label="data")
x_fit = np.linspace(x_dist[mask].min(), x_dist[mask].max(), 100)
y_fit = func(x_fit, *fit)
plt.plot(x_fit, y_fit, c="red", label="fit")
plt.legend()
plt.show()
示例输出:
此方法不会修改 x_dist
和 xdiff_norm
,这对于进一步的数据评估可能是好事,也可能不是好事。如果您想使用折线图而不是散点图,提前对数组进行排序可能会很有用(尝试使用上述方法绘制折线图以了解原因):
from matplotlib import pyplot as plt
import numpy as np
from scipy import optimize
#fake data generation
np.random.seed(1234)
arr = np.linspace(0, 15, 100).reshape(2, 50)
arr[1, :] = np.random.random(50)
arr[1, 20:45] += 2 * arr[0, 20:45] -5
rng = np.random.default_rng()
rng.shuffle(arr, axis = 1)
x_dist = arr[0, :]
xdiff_norm = arr[1, :]
def func(x, a, b):
return a * x + b
#find indexes of a sorted x_dist array, then sort both arrays based on this index
ind = x_dist.argsort()
x_dist = x_dist[ind]
xdiff_norm = xdiff_norm[ind]
#identify index where linear range starts for normal array indexing
start = np.argmax(x_dist>2.5)
stop = np.argmax(x_dist>6.6)
initialguess = [5, 3]
fit, covariance = optimize.curve_fit(
func,
x_dist[start:stop],
xdiff_norm[start:stop],
initialguess)
plt.plot(x_dist, xdiff_norm, label="data")
x_fit = np.linspace(x_dist[start], x_dist[stop], 100)
y_fit = func(x_fit, *fit)
plt.plot(x_fit, y_fit, c="red", ls="--", label="fit")
plt.legend()
plt.show()
示例输出(不出所料,差别不大):
我正在尝试为我的数据集的 9.0 到 10.0 um 范围拟合一条线。这是我的情节:
不幸的是,这是一个散点图,其中 x
值没有从小数字到大数字进行索引,所以我不能只将 optimize.curve_fit
函数应用于特定范围的索引来获得x
值中的所需范围。
以下是我进行曲线拟合的首选程序。我将如何修改它以仅适合 9.0 到 10.0 um x
值范围(在我的例子中,x_dist
变量),该范围内的点随机分布在整个索引中?
def func(x,a,b): # Define your fitting function
return a*x+b
initialguess = [-14.0, 0.05] # initial guess for the parameters of the function func
fit, covariance = optimize.curve_fit( # call to the fitting routine curve_fit. Returns optimal values of the fit parameters, and their estimated variance
func, # function to fit
x_dist, # data for independant variable
xdiff_norm, # data for dependant variable
initialguess, # initial guess of fit parameters
) # uncertainty in dependant variable
print("linear coefficient:",fit[0],"+-",np.sqrt(covariance[0][0])) #print value and one std deviation of first fit parameter
print("offset coefficient:",fit[1],"+-",np.sqrt(covariance[1][1])) #print value and one std deviation of second fit parameter
print(covariance)
您正确地确定问题的出现是因为您的 x 值数据未排序。您可以用不同的方式解决这个问题。一种方法是使用布尔掩码来过滤掉不需要的值。我试图尽可能接近你的例子:
from matplotlib import pyplot as plt
import numpy as np
from scipy import optimize
#fake data generation
np.random.seed(1234)
arr = np.linspace(0, 15, 100).reshape(2, 50)
arr[1, :] = np.random.random(50)
arr[1, 20:45] += 2 * arr[0, 20:45] -5
rng = np.random.default_rng()
rng.shuffle(arr, axis = 1)
x_dist = arr[0, :]
xdiff_norm = arr[1, :]
def func(x, a, b):
return a * x + b
initialguess = [5, 3]
mask = (x_dist>2.5) & (x_dist<6.6)
fit, covariance = optimize.curve_fit(
func,
x_dist[mask],
xdiff_norm[mask],
initialguess)
plt.scatter(x_dist, xdiff_norm, label="data")
x_fit = np.linspace(x_dist[mask].min(), x_dist[mask].max(), 100)
y_fit = func(x_fit, *fit)
plt.plot(x_fit, y_fit, c="red", label="fit")
plt.legend()
plt.show()
示例输出:
此方法不会修改 x_dist
和 xdiff_norm
,这对于进一步的数据评估可能是好事,也可能不是好事。如果您想使用折线图而不是散点图,提前对数组进行排序可能会很有用(尝试使用上述方法绘制折线图以了解原因):
from matplotlib import pyplot as plt
import numpy as np
from scipy import optimize
#fake data generation
np.random.seed(1234)
arr = np.linspace(0, 15, 100).reshape(2, 50)
arr[1, :] = np.random.random(50)
arr[1, 20:45] += 2 * arr[0, 20:45] -5
rng = np.random.default_rng()
rng.shuffle(arr, axis = 1)
x_dist = arr[0, :]
xdiff_norm = arr[1, :]
def func(x, a, b):
return a * x + b
#find indexes of a sorted x_dist array, then sort both arrays based on this index
ind = x_dist.argsort()
x_dist = x_dist[ind]
xdiff_norm = xdiff_norm[ind]
#identify index where linear range starts for normal array indexing
start = np.argmax(x_dist>2.5)
stop = np.argmax(x_dist>6.6)
initialguess = [5, 3]
fit, covariance = optimize.curve_fit(
func,
x_dist[start:stop],
xdiff_norm[start:stop],
initialguess)
plt.plot(x_dist, xdiff_norm, label="data")
x_fit = np.linspace(x_dist[start], x_dist[stop], 100)
y_fit = func(x_fit, *fit)
plt.plot(x_fit, y_fit, c="red", ls="--", label="fit")
plt.legend()
plt.show()
示例输出(不出所料,差别不大):