Issuse: Scipy 拟合正态数据
Issuse: Scipy Fitting Normal Data
我目前导入并绘制了一些正态分布的实验数据。
然后我尝试使用 Scipy 来适应数据的正常分布:
from scipy.stats import norm
def normal_fit(data, offset):
x = numpy.linspace(0, len(data), len(data))
params = norm.fit(data - offset)
normal = norm.pdf(x, params[0], params[1])
return normal, params
但是它计算出的均值和标准差 (4504, 2961) 是不正确的 - 在下面以红色绘制。
如何正确拟合数据?
编辑: 按照 ev-br 的建议,数据偏移为零,这会产生仍然没有多大意义的参数 (954, 2961)
需要先减去零级
我玩过类似的问题
添加了b
,曲线拟合的偏移量,似乎需要一个半途OK的猜测值
然后比例因子符号变了?? to 我刚补过,不懂自己
这里是使用 curve_fit 重新编写的代码,还找到了偏移量:
from scipy.stats import norm
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x = np.array([ 0.47712125, 0.5445641 , 0.61193563, 0.67924615, 0.74671202,
0.81404772, 0.88144172, 0.94885291, 1.01623919, 1.08361011,
1.15100191, 1.21837793, 1.28578227, 1.3531658 , 1.42054981,
1.48794397, 1.55532424, 1.62272161, 1.69010744, 1.75749472,
1.82488047, 1.89226717, 1.9596566 , 2.02704774, 2.09443269,
2.16182302, 2.2292107 , 2.29659719, 2.36398595, 2.43137342,
2.49876254, 2.56614983, 2.63353814, 2.700926 , 2.76831392,
2.83570198, 2.90308999, 2.97008999, 3.03708997, 3.10408999,
3.17108999, 3.23808998, 3.30508998, 3.37208999, 3.43908999,
3.50608998, 3.57308998, 3.64008999, 3.70708999, 3.77408999,
3.84108999, 3.90808999])/2
y = np.array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 5.50000000e+01, 1.33500000e+02,
2.49000000e+02, 4.40000000e+02, 7.27000000e+02,
1.09000000e+03, 1.53000000e+03, 2.21500000e+03,
3.13500000e+03, 4.44000000e+03, 5.57000000e+03,
6.77000000e+03, 8.04500000e+03, 9.15500000e+03,
1.00000000e+04, 1.06000000e+04, 1.06500000e+04,
1.02000000e+04, 9.29000000e+03, 8.01500000e+03,
6.50000000e+03, 5.24000000e+03, 4.11000000e+03,
2.97000000e+03, 1.86000000e+03, 1.02000000e+03,
5.26500000e+02, 2.49000000e+02, 1.11000000e+02,
5.27000000e+01, 6.90825000e+00, 4.54329000e+00,
3.63846500e+00, 3.58135000e+00, 2.37404000e+00,
1.81840000e+00, 1.20159500e+00, 6.02470000e-01,
3.43295000e-01, 1.62295000e-01, 7.99350000e-02,
3.60750000e-02, 1.50000000e-02, 3.61500000e-03,
8.00000000e-05])+10000
#numpy.random.normal(loc=0.0, scale=2.0, size=100)
'''
norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
norm.pdf(x, loc, scale) == norm.pdf(y) / scale with y = (x - loc) / scale
'''
def datafit(x,N,u,sig,b):
y = N*np.exp(-((x-u)/sig)**2/2)/(np.sqrt(2*np.pi)) + b
return y
popt,popc = curve_fit(datafit,x,y,p0=[np.max(y),2,2,1000])
# scipy norm.pdf with scaling factors to match datafit()
scale = abs(popt[2]) # I don't know why, but it 1s needed with b
Normal_distribution = popt[0]*scale*norm.pdf(x, popt[1], scale) + popt[3]
plt.plot(x,y, 'b-')
plt.plot(x, datafit(x+.1, *popt), 'g')
plt.plot(x, Normal_distribution, 'r')
我目前导入并绘制了一些正态分布的实验数据。
然后我尝试使用 Scipy 来适应数据的正常分布:
from scipy.stats import norm
def normal_fit(data, offset):
x = numpy.linspace(0, len(data), len(data))
params = norm.fit(data - offset)
normal = norm.pdf(x, params[0], params[1])
return normal, params
但是它计算出的均值和标准差 (4504, 2961) 是不正确的 - 在下面以红色绘制。
如何正确拟合数据?
编辑: 按照 ev-br 的建议,数据偏移为零,这会产生仍然没有多大意义的参数 (954, 2961)
需要先减去零级
我玩过类似的问题
添加了b
,曲线拟合的偏移量,似乎需要一个半途OK的猜测值
然后比例因子符号变了?? to 我刚补过,不懂自己
这里是使用 curve_fit 重新编写的代码,还找到了偏移量:
from scipy.stats import norm
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
x = np.array([ 0.47712125, 0.5445641 , 0.61193563, 0.67924615, 0.74671202,
0.81404772, 0.88144172, 0.94885291, 1.01623919, 1.08361011,
1.15100191, 1.21837793, 1.28578227, 1.3531658 , 1.42054981,
1.48794397, 1.55532424, 1.62272161, 1.69010744, 1.75749472,
1.82488047, 1.89226717, 1.9596566 , 2.02704774, 2.09443269,
2.16182302, 2.2292107 , 2.29659719, 2.36398595, 2.43137342,
2.49876254, 2.56614983, 2.63353814, 2.700926 , 2.76831392,
2.83570198, 2.90308999, 2.97008999, 3.03708997, 3.10408999,
3.17108999, 3.23808998, 3.30508998, 3.37208999, 3.43908999,
3.50608998, 3.57308998, 3.64008999, 3.70708999, 3.77408999,
3.84108999, 3.90808999])/2
y = np.array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
0.00000000e+00, 5.50000000e+01, 1.33500000e+02,
2.49000000e+02, 4.40000000e+02, 7.27000000e+02,
1.09000000e+03, 1.53000000e+03, 2.21500000e+03,
3.13500000e+03, 4.44000000e+03, 5.57000000e+03,
6.77000000e+03, 8.04500000e+03, 9.15500000e+03,
1.00000000e+04, 1.06000000e+04, 1.06500000e+04,
1.02000000e+04, 9.29000000e+03, 8.01500000e+03,
6.50000000e+03, 5.24000000e+03, 4.11000000e+03,
2.97000000e+03, 1.86000000e+03, 1.02000000e+03,
5.26500000e+02, 2.49000000e+02, 1.11000000e+02,
5.27000000e+01, 6.90825000e+00, 4.54329000e+00,
3.63846500e+00, 3.58135000e+00, 2.37404000e+00,
1.81840000e+00, 1.20159500e+00, 6.02470000e-01,
3.43295000e-01, 1.62295000e-01, 7.99350000e-02,
3.60750000e-02, 1.50000000e-02, 3.61500000e-03,
8.00000000e-05])+10000
#numpy.random.normal(loc=0.0, scale=2.0, size=100)
'''
norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
norm.pdf(x, loc, scale) == norm.pdf(y) / scale with y = (x - loc) / scale
'''
def datafit(x,N,u,sig,b):
y = N*np.exp(-((x-u)/sig)**2/2)/(np.sqrt(2*np.pi)) + b
return y
popt,popc = curve_fit(datafit,x,y,p0=[np.max(y),2,2,1000])
# scipy norm.pdf with scaling factors to match datafit()
scale = abs(popt[2]) # I don't know why, but it 1s needed with b
Normal_distribution = popt[0]*scale*norm.pdf(x, popt[1], scale) + popt[3]
plt.plot(x,y, 'b-')
plt.plot(x, datafit(x+.1, *popt), 'g')
plt.plot(x, Normal_distribution, 'r')