如何使用 python/scipy 对曲线族进行曲线拟合
How to do curve fitting of a curve family with python/scipy
我有一个解决以下问题的方法:
- 我想用一个方程拟合一组曲线(参见代码)
- 拟合参数 (C1-C4) 应为常量
- 所以我可以通过改变 "Sigma and T"
来拟合(或描述)家庭的一条曲线
希望能描述一下我的问题,希望大家帮帮忙,不胜感激!
问题已编辑(由于误解 - 2020_04_04)
我现在会尝试更具体一些,因为我附上了一张图片,您可以在其中看到“曲线系列”的示例,它会随着不同的“Sigma”而变化。我想用一对常量——C1、C2、C3 和 C4 来描述这些曲线族,而不改变它们。线索是找到一个最优化的常数,它可以描述这个曲线族,只需改变 Sigma 和 T 作为变量。因此,我必须以最小的误差拟合一组曲线的参数。之后,只需更改“Sigma 和 T”,方程式就应该覆盖整个曲线族。
Example of Curves
此致!
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
#Equation --> Eps_Cr = (C1*Sigma**C2*x**(C3+1)*e(-C4/T))/(C3+1)
def func(x, C1, C2, C3,C4):
Sigma = 20
T = 1
return (C1*Sigma**C2*x**(C3+1)*np.exp(-C4*1/T))/(C3+1)
#Example Data 1
xdata = [1, 10, 100, 1000, 10000, 100000]
ydata = [0.000382,0.000407,0.000658,0.001169,0.002205,0.004304]
#Example Data 2
xdata1 = [1, 10, 100, 1000, 10000, 100000]
ydata1 = [0.002164,0.002371,0.004441,0.008571,0.016811,0.033261]
#Example Data 3
xdata2 = [1, 10, 100, 1000, 10000, 100000]
ydata2 = [0.001332,0.001457,0.002707,0.005157,0.010007,0.019597]
plt.plot(xdata, ydata, 'b-', label='data')
plt.plot(xdata1, ydata1, 'g-', label='data')
plt.plot(xdata2, ydata2, 'y-', label='data')
popt, pcov = curve_fit(func, xdata, ydata)
plt.plot(xdata, func(xdata, *popt), 'r--',
label='fit: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt))
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
根据您的查询,我了解到您需要分别对三个不同的数据集拟合一个方程。因此,我通过保持 sigma 和 T 相同来更新您的代码。请看一下,让我进一步了解。
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
#Equation --> Eps_Cr = (C1*Sigma**C2*x**(C3+1)*e(-C4/T))/(C3+1)
def func(x, C1, C2, C3,C4):
Sigma = 20
T = 1
return (C1*Sigma**C2*x**(C3+1)*np.exp(-C4*1/T))/(C3+1)
#Example Data 1
xdata = [1, 10, 100, 1000, 10000, 100000]
ydata = [0.000382,0.000407,0.000658,0.001169,0.002205,0.004304]
#Example Data 2
xdata1 = [1, 10, 100, 1000, 10000, 100000]
ydata1 = [0.002164,0.002371,0.004441,0.008571,0.016811,0.033261]
#Example Data 3
xdata2 = [1, 10, 100, 1000, 10000, 100000]
ydata2 = [0.001332,0.001457,0.002707,0.005157,0.010007,0.019597]
plt.plot(xdata, ydata, 'b-', label='data 1')
plt.plot(xdata1, ydata1, 'g-', label='data 2')
plt.plot(xdata2, ydata2, 'y-', label='data 3')
popt, pcov = curve_fit(func, xdata, ydata)
popt1, pcov1 = curve_fit(func, xdata1, ydata1)
popt2, pcov2 = curve_fit(func, xdata2, ydata2)
plt.plot(xdata, func(xdata, *popt), 'r.',
label='fit for Data 1: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt))
plt.plot(xdata1, func(xdata1, *popt1), 'r+',
label='fit for Data 2: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt1))
plt.plot(xdata2, func(xdata2, *popt2), 'r--',
label='fit for Data 3 : C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt2))
plt.xlabel('X')
plt.ylabel('Y')
plt.legend(loc='upper left',prop={'size': 8})
plt.show()
根据您在 'answer' 中提供的额外信息,您似乎想要拟合分层模型。至少那是统计学家通常所说的。有些参数在所有数据点之间共享(参数 C1
到 C4
,有些参数在数据集组内共享(T
和 Sigma
)。所有这些参数需要根据数据进行估算。
通常通过为所有数据构建更大的模型来解决这个问题,并在模型中 select 使用哪个分组参数。如果数据点属于数据组 1
我们选择 Sigma1
和 T1
等等...
由于您已经在使用 curve_fit
,我制作了一个可以完成这项工作的代码版本。由于我不是 scipy
方面的专家,因此代码风格还有一些要求,但我认为您至少会理解该方法。
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
def func(x_and_grp, C1, C2, C3, C4, Sigma0, Sigma1, Sigma2, T0, T1, T2):
# We estimate one sigma and one T per group of data points
x = x_and_grp[:,0]
grp_id = x_and_grp[:,1]
# here we select the appropriate T and Sigma for each data point based on their group id
T = np.array([[T0, T1, T2][int(gid)] for gid in grp_id])
Sigma = np.array([[Sigma0, Sigma1, Sigma2][int(gid)] for gid in grp_id])
return (C1*Sigma**C2*x**(C3+1)*np.exp(-C4*1/T))/(C3+1)
#Example Data in 3 groups
xdata0 = [1, 10, 100, 1000, 10000, 100000]
ydata0 = [0.000382,0.000407,0.000658,0.001169,0.002205,0.004304]
xdata1 = [1, 10, 100, 1000, 10000, 100000]
ydata1 = [0.002164,0.002371,0.004441,0.008571,0.016811,0.033261]
xdata2 = [1, 10, 100, 1000, 10000, 100000]
ydata2 = [0.001332,0.001457,0.002707,0.005157,0.010007,0.019597]
# merge all the data and add the group id to the x-data vectors
y_all = np.concatenate([ydata0, ydata1, ydata2])
x_and_grp_all = np.zeros(shape=(3 * 6, 2))
x_and_grp_all[:, 0] = np.concatenate([xdata0, xdata1, xdata2])
x_and_grp_all[0:6, 1] = 0
x_and_grp_all[6:12, 1] = 1
x_and_grp_all[12:18, 1] = 2
# fit a model to all the data together
popt, pcov = curve_fit(func, x_and_grp_all, y_all)
xspace = np.logspace(1,5)
plt.plot(xdata0, ydata0, 'b-', label='data')
plt.plot(xdata1, ydata1, 'g-', label='data')
plt.plot(xdata2, ydata2, 'y-', label='data')
for gid,color in zip([0,1,2],['r','k','purple']):
T = popt[4+gid]
Sigma = popt[7+gid]
x_and_grp = np.column_stack([xspace,np.ones_like(xspace)*gid])
plt.plot(xspace,
func(x_and_grp, *popt),
linestyle='dashed', color=color,
label='fit: T=%5.2e, Sigma=%5.3f' % (T,Sigma))
plt.xlabel('X')
plt.ylabel('Y')
plt.title('fit: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt[0:4]))
plt.legend()
plt.show()
输出如下所示:
最后,我想补充一点,如果您有很多不同的组,curve_fit
不太适合这项任务。考虑其他一些可能相关的库。 Statmodels 是可能的。一种替代方法是改为使用 scipy.optimize.minimze
,因为它为您提供了更大的灵活性。不过,您需要手动进行置信区间估计...
我还想补充一点,如果你知道每组数据的T
和Sigma
,上面的方法就太复杂了。在这种情况下,我们将 Sigma
和 T
的相关值添加到 x 向量,而不是组 ID。
我有一个解决以下问题的方法:
- 我想用一个方程拟合一组曲线(参见代码)
- 拟合参数 (C1-C4) 应为常量
- 所以我可以通过改变 "Sigma and T" 来拟合(或描述)家庭的一条曲线
希望能描述一下我的问题,希望大家帮帮忙,不胜感激!
问题已编辑(由于误解 - 2020_04_04)
我现在会尝试更具体一些,因为我附上了一张图片,您可以在其中看到“曲线系列”的示例,它会随着不同的“Sigma”而变化。我想用一对常量——C1、C2、C3 和 C4 来描述这些曲线族,而不改变它们。线索是找到一个最优化的常数,它可以描述这个曲线族,只需改变 Sigma 和 T 作为变量。因此,我必须以最小的误差拟合一组曲线的参数。之后,只需更改“Sigma 和 T”,方程式就应该覆盖整个曲线族。
Example of Curves
此致!
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
#Equation --> Eps_Cr = (C1*Sigma**C2*x**(C3+1)*e(-C4/T))/(C3+1)
def func(x, C1, C2, C3,C4):
Sigma = 20
T = 1
return (C1*Sigma**C2*x**(C3+1)*np.exp(-C4*1/T))/(C3+1)
#Example Data 1
xdata = [1, 10, 100, 1000, 10000, 100000]
ydata = [0.000382,0.000407,0.000658,0.001169,0.002205,0.004304]
#Example Data 2
xdata1 = [1, 10, 100, 1000, 10000, 100000]
ydata1 = [0.002164,0.002371,0.004441,0.008571,0.016811,0.033261]
#Example Data 3
xdata2 = [1, 10, 100, 1000, 10000, 100000]
ydata2 = [0.001332,0.001457,0.002707,0.005157,0.010007,0.019597]
plt.plot(xdata, ydata, 'b-', label='data')
plt.plot(xdata1, ydata1, 'g-', label='data')
plt.plot(xdata2, ydata2, 'y-', label='data')
popt, pcov = curve_fit(func, xdata, ydata)
plt.plot(xdata, func(xdata, *popt), 'r--',
label='fit: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt))
plt.xlabel('X')
plt.ylabel('Y')
plt.legend()
plt.show()
根据您的查询,我了解到您需要分别对三个不同的数据集拟合一个方程。因此,我通过保持 sigma 和 T 相同来更新您的代码。请看一下,让我进一步了解。
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
#Equation --> Eps_Cr = (C1*Sigma**C2*x**(C3+1)*e(-C4/T))/(C3+1)
def func(x, C1, C2, C3,C4):
Sigma = 20
T = 1
return (C1*Sigma**C2*x**(C3+1)*np.exp(-C4*1/T))/(C3+1)
#Example Data 1
xdata = [1, 10, 100, 1000, 10000, 100000]
ydata = [0.000382,0.000407,0.000658,0.001169,0.002205,0.004304]
#Example Data 2
xdata1 = [1, 10, 100, 1000, 10000, 100000]
ydata1 = [0.002164,0.002371,0.004441,0.008571,0.016811,0.033261]
#Example Data 3
xdata2 = [1, 10, 100, 1000, 10000, 100000]
ydata2 = [0.001332,0.001457,0.002707,0.005157,0.010007,0.019597]
plt.plot(xdata, ydata, 'b-', label='data 1')
plt.plot(xdata1, ydata1, 'g-', label='data 2')
plt.plot(xdata2, ydata2, 'y-', label='data 3')
popt, pcov = curve_fit(func, xdata, ydata)
popt1, pcov1 = curve_fit(func, xdata1, ydata1)
popt2, pcov2 = curve_fit(func, xdata2, ydata2)
plt.plot(xdata, func(xdata, *popt), 'r.',
label='fit for Data 1: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt))
plt.plot(xdata1, func(xdata1, *popt1), 'r+',
label='fit for Data 2: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt1))
plt.plot(xdata2, func(xdata2, *popt2), 'r--',
label='fit for Data 3 : C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt2))
plt.xlabel('X')
plt.ylabel('Y')
plt.legend(loc='upper left',prop={'size': 8})
plt.show()
根据您在 'answer' 中提供的额外信息,您似乎想要拟合分层模型。至少那是统计学家通常所说的。有些参数在所有数据点之间共享(参数 C1
到 C4
,有些参数在数据集组内共享(T
和 Sigma
)。所有这些参数需要根据数据进行估算。
通常通过为所有数据构建更大的模型来解决这个问题,并在模型中 select 使用哪个分组参数。如果数据点属于数据组 1
我们选择 Sigma1
和 T1
等等...
由于您已经在使用 curve_fit
,我制作了一个可以完成这项工作的代码版本。由于我不是 scipy
方面的专家,因此代码风格还有一些要求,但我认为您至少会理解该方法。
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import curve_fit
def func(x_and_grp, C1, C2, C3, C4, Sigma0, Sigma1, Sigma2, T0, T1, T2):
# We estimate one sigma and one T per group of data points
x = x_and_grp[:,0]
grp_id = x_and_grp[:,1]
# here we select the appropriate T and Sigma for each data point based on their group id
T = np.array([[T0, T1, T2][int(gid)] for gid in grp_id])
Sigma = np.array([[Sigma0, Sigma1, Sigma2][int(gid)] for gid in grp_id])
return (C1*Sigma**C2*x**(C3+1)*np.exp(-C4*1/T))/(C3+1)
#Example Data in 3 groups
xdata0 = [1, 10, 100, 1000, 10000, 100000]
ydata0 = [0.000382,0.000407,0.000658,0.001169,0.002205,0.004304]
xdata1 = [1, 10, 100, 1000, 10000, 100000]
ydata1 = [0.002164,0.002371,0.004441,0.008571,0.016811,0.033261]
xdata2 = [1, 10, 100, 1000, 10000, 100000]
ydata2 = [0.001332,0.001457,0.002707,0.005157,0.010007,0.019597]
# merge all the data and add the group id to the x-data vectors
y_all = np.concatenate([ydata0, ydata1, ydata2])
x_and_grp_all = np.zeros(shape=(3 * 6, 2))
x_and_grp_all[:, 0] = np.concatenate([xdata0, xdata1, xdata2])
x_and_grp_all[0:6, 1] = 0
x_and_grp_all[6:12, 1] = 1
x_and_grp_all[12:18, 1] = 2
# fit a model to all the data together
popt, pcov = curve_fit(func, x_and_grp_all, y_all)
xspace = np.logspace(1,5)
plt.plot(xdata0, ydata0, 'b-', label='data')
plt.plot(xdata1, ydata1, 'g-', label='data')
plt.plot(xdata2, ydata2, 'y-', label='data')
for gid,color in zip([0,1,2],['r','k','purple']):
T = popt[4+gid]
Sigma = popt[7+gid]
x_and_grp = np.column_stack([xspace,np.ones_like(xspace)*gid])
plt.plot(xspace,
func(x_and_grp, *popt),
linestyle='dashed', color=color,
label='fit: T=%5.2e, Sigma=%5.3f' % (T,Sigma))
plt.xlabel('X')
plt.ylabel('Y')
plt.title('fit: C1=%5.2e, C2=%5.3f, C3=%5.3f,C4=%5.3f' % tuple(popt[0:4]))
plt.legend()
plt.show()
输出如下所示:
最后,我想补充一点,如果您有很多不同的组,curve_fit
不太适合这项任务。考虑其他一些可能相关的库。 Statmodels 是可能的。一种替代方法是改为使用 scipy.optimize.minimze
,因为它为您提供了更大的灵活性。不过,您需要手动进行置信区间估计...
我还想补充一点,如果你知道每组数据的T
和Sigma
,上面的方法就太复杂了。在这种情况下,我们将 Sigma
和 T
的相关值添加到 x 向量,而不是组 ID。