正态随机变量平方的概率分布 - Python 中的理论与模拟
Probablity distribution of normal random variable squared - theory vs. simulation in Python
我似乎无法理解为什么正态随机变量平方的概率分布的理论结果和模拟结果如此不同。 (例如,高斯噪声电压信号的功率)
我怀疑我做错了什么,想问一下,是否有人可以帮助解决这个问题。
这是解释我正在尝试做的事情的代码:
import numpy as np
from scipy.integrate import quad, simps
from matplotlib import pyplot as plt
def PDF(x, sigma=1, mu=0): # Gaussian normal distribution PDF
return 1/(np.sqrt(2*np.pi*sigma))*np.exp(-1/(2*sigma**2)*(x-mu)**2)
def PDFu(u, u_rms=1, u_mean=0):
return PDF(u, sigma=u_rms, mu=u_mean)
def PDFP(P):
return 2*PDFu(np.sqrt(P)) # substitute the input variable with the 'scaled' one
def probDensity(x, nbins): # calculate the probability density based on the input samples
distr, bins = np.histogram(x, nbins) # similar to plt.hist(density=True)
binWidth = bins[1]-bins[0]
binCenters = bins[:-1]+binWidth/2
return distr/len(x)/binWidth, binCenters
npoints = 100000
rms = 1
u = np.random.normal(0, rms, npoints) # samples with Gaussian normal distribution
P = u**2 # square of the samples with Gaussian normal distribution - should follow chi-squared distribution?
nbins = 500
u_distr, u_bins = probDensity(u, nbins) # calculate PDF based on the samples
print('U_distr integral = ', simps(u_distr,u_bins)) # integrate the calculated PDF, should be 1
plt.plot(u_bins, u_distr)
us = np.linspace(-10, 10, 500)
PDFu_u = PDFu(us) # calculate the theoretical PDF
print('PDFu_u integral = ', quad(PDFu, -np.Inf, np.Inf)) # integral of the theoretical PDF, should be 1
plt.plot(us, PDFu_u)
nbins = 1000
P_distr, P_bins = probDensity(P, nbins) # calculate PDF based on the samples
print('P_distr integral = ', simps(P_distr, P_bins)) # integrate the calculated PDF, should be 1
plt.plot(P_bins, P_distr)
Ps = np.linspace(0, 8, npoints)
PDFP_P = PDFP(Ps) # calculate the theoretical PDF
plt.plot(Ps, PDFP_P)
print('PDFP_P integral = ', quad(PDFP, 0, np.Inf)) # integral of the theoretical PDF, should be 1
plt.show()
正态随机变量 (u) 的理论概率分布和模拟概率分布似乎非常匹配,我将其用作健全性检查。但是在平方变量的情况下差异很大,我不明白为什么以及如何让它们匹配。顺便说一句,我为理论分布尝试了各种合理的比例因子(例如 0.5、2、sqrt(2)),但它没有用,我不明白为什么我什至需要它。难道不应该根据公式 u=sqrt(P*R) [R=1] 将 'P' 替换为 'u' 并使用 'u' 的正态分布来计算某些 'P' 的 PDF 值?
我更相信模拟分布,我想知道应该如何正确计算理论分布。为什么替换法不行?
提前感谢您的帮助!
你的高斯平方的理论密度是错误的。这是计算。如果 X 是高斯分布,那么对于平方变量 $Y=X^2$ 的 CDF $F$,我们有
$$
F(x) = P(Y
其中 $\Phi$ 是高斯 CDF
所以对于 $Y$ 的 PDF $f(x)$,我们对其进行微分,得到
$$
f(x) = F'(x) = (1/(2\sqrt{x})) \Psi'(\sqrt{x}) + (1/(2\sqrt{x})) \Psi'(-\sqrt{x}) = (1/(2\sqrt{x})) (\psi(\sqrt{x}) + \psi(-\sqrt{x})
$$
其中 $\psi$ 是高斯 PDF
所以至少你漏掉了 $(1/(2\sqrt{x}))$
这是公式的图片,如果有帮助的话
作为参考,这里是根据 piterbarg 的回答修改后的 PDF 代码。再次感谢!
import numpy as np
from scipy.integrate import quad, simps
from matplotlib import pyplot as plt
def PDF(x, sigma=1, mu=0): # Gaussian normal distribution PDF
return 1/(np.sqrt(2*np.pi*sigma))*np.exp(-1/(2*sigma**2)*(x-mu)**2)
def PDFu(u, u_rms=1, u_mean=0):
return PDF(u, sigma=u_rms, mu=u_mean)
def PDFP(P):
return 1/(2*np.sqrt(P))*2*PDFu(np.sqrt(P)) # substitute the input variable with the 'scaled' one
def probDensity(x, nbins): # calculate the probability density based on the input samples
distr, bins = np.histogram(x, nbins) # similar to plt.hist(density=True)
binWidth = bins[1]-bins[0]
binCenters = bins[:-1]+binWidth/2
return distr/len(x)/binWidth, binCenters
npoints = 100000
rms = 1
u = np.random.normal(0, rms, npoints) # samples with Gaussian normal distribution
P = u**2 # square of the samples with Gaussian normal distribution - should follow chi-squared distribution?
nbins = 500
u_distr, u_bins = probDensity(u, nbins) # calculate PDF based on the samples
print('U_distr integral = ', simps(u_distr,u_bins)) # integrate the calculated PDF, should be 1
plt.plot(u_bins, u_distr)
us = np.linspace(-10, 10, 500)
PDFu_u = PDFu(us) # calculate the theoretical PDF
print('PDFu_u integral = ', quad(PDFu, -np.Inf, np.Inf)) # integral of the theoretical PDF, should be 1
plt.plot(us, PDFu_u)
nbins = 1000
P_distr, P_bins = probDensity(P, nbins) # calculate PDF based on the samples
print('P_distr integral = ', simps(P_distr, P_bins)) # integrate the calculated PDF, should be 1
plt.plot(P_bins, P_distr)
Ps = np.linspace(0, 8, npoints)
PDFP_P = PDFP(Ps) # calculate the theoretical PDF
plt.plot(Ps, PDFP_P)
print('PDFP_P integral = ', quad(PDFP, 0, np.Inf)) # integral of the theoretical PDF, should be 1
plt.show()
我似乎无法理解为什么正态随机变量平方的概率分布的理论结果和模拟结果如此不同。 (例如,高斯噪声电压信号的功率) 我怀疑我做错了什么,想问一下,是否有人可以帮助解决这个问题。
这是解释我正在尝试做的事情的代码:
import numpy as np
from scipy.integrate import quad, simps
from matplotlib import pyplot as plt
def PDF(x, sigma=1, mu=0): # Gaussian normal distribution PDF
return 1/(np.sqrt(2*np.pi*sigma))*np.exp(-1/(2*sigma**2)*(x-mu)**2)
def PDFu(u, u_rms=1, u_mean=0):
return PDF(u, sigma=u_rms, mu=u_mean)
def PDFP(P):
return 2*PDFu(np.sqrt(P)) # substitute the input variable with the 'scaled' one
def probDensity(x, nbins): # calculate the probability density based on the input samples
distr, bins = np.histogram(x, nbins) # similar to plt.hist(density=True)
binWidth = bins[1]-bins[0]
binCenters = bins[:-1]+binWidth/2
return distr/len(x)/binWidth, binCenters
npoints = 100000
rms = 1
u = np.random.normal(0, rms, npoints) # samples with Gaussian normal distribution
P = u**2 # square of the samples with Gaussian normal distribution - should follow chi-squared distribution?
nbins = 500
u_distr, u_bins = probDensity(u, nbins) # calculate PDF based on the samples
print('U_distr integral = ', simps(u_distr,u_bins)) # integrate the calculated PDF, should be 1
plt.plot(u_bins, u_distr)
us = np.linspace(-10, 10, 500)
PDFu_u = PDFu(us) # calculate the theoretical PDF
print('PDFu_u integral = ', quad(PDFu, -np.Inf, np.Inf)) # integral of the theoretical PDF, should be 1
plt.plot(us, PDFu_u)
nbins = 1000
P_distr, P_bins = probDensity(P, nbins) # calculate PDF based on the samples
print('P_distr integral = ', simps(P_distr, P_bins)) # integrate the calculated PDF, should be 1
plt.plot(P_bins, P_distr)
Ps = np.linspace(0, 8, npoints)
PDFP_P = PDFP(Ps) # calculate the theoretical PDF
plt.plot(Ps, PDFP_P)
print('PDFP_P integral = ', quad(PDFP, 0, np.Inf)) # integral of the theoretical PDF, should be 1
plt.show()
正态随机变量 (u) 的理论概率分布和模拟概率分布似乎非常匹配,我将其用作健全性检查。但是在平方变量的情况下差异很大,我不明白为什么以及如何让它们匹配。顺便说一句,我为理论分布尝试了各种合理的比例因子(例如 0.5、2、sqrt(2)),但它没有用,我不明白为什么我什至需要它。难道不应该根据公式 u=sqrt(P*R) [R=1] 将 'P' 替换为 'u' 并使用 'u' 的正态分布来计算某些 'P' 的 PDF 值? 我更相信模拟分布,我想知道应该如何正确计算理论分布。为什么替换法不行?
提前感谢您的帮助!
你的高斯平方的理论密度是错误的。这是计算。如果 X 是高斯分布,那么对于平方变量 $Y=X^2$ 的 CDF $F$,我们有
$$
F(x) = P(Y 其中 $\Phi$ 是高斯 CDF 所以对于 $Y$ 的 PDF $f(x)$,我们对其进行微分,得到 $$
f(x) = F'(x) = (1/(2\sqrt{x})) \Psi'(\sqrt{x}) + (1/(2\sqrt{x})) \Psi'(-\sqrt{x}) = (1/(2\sqrt{x})) (\psi(\sqrt{x}) + \psi(-\sqrt{x})
$$ 其中 $\psi$ 是高斯 PDF 所以至少你漏掉了 $(1/(2\sqrt{x}))$ 这是公式的图片,如果有帮助的话
作为参考,这里是根据 piterbarg 的回答修改后的 PDF 代码。再次感谢!
import numpy as np
from scipy.integrate import quad, simps
from matplotlib import pyplot as plt
def PDF(x, sigma=1, mu=0): # Gaussian normal distribution PDF
return 1/(np.sqrt(2*np.pi*sigma))*np.exp(-1/(2*sigma**2)*(x-mu)**2)
def PDFu(u, u_rms=1, u_mean=0):
return PDF(u, sigma=u_rms, mu=u_mean)
def PDFP(P):
return 1/(2*np.sqrt(P))*2*PDFu(np.sqrt(P)) # substitute the input variable with the 'scaled' one
def probDensity(x, nbins): # calculate the probability density based on the input samples
distr, bins = np.histogram(x, nbins) # similar to plt.hist(density=True)
binWidth = bins[1]-bins[0]
binCenters = bins[:-1]+binWidth/2
return distr/len(x)/binWidth, binCenters
npoints = 100000
rms = 1
u = np.random.normal(0, rms, npoints) # samples with Gaussian normal distribution
P = u**2 # square of the samples with Gaussian normal distribution - should follow chi-squared distribution?
nbins = 500
u_distr, u_bins = probDensity(u, nbins) # calculate PDF based on the samples
print('U_distr integral = ', simps(u_distr,u_bins)) # integrate the calculated PDF, should be 1
plt.plot(u_bins, u_distr)
us = np.linspace(-10, 10, 500)
PDFu_u = PDFu(us) # calculate the theoretical PDF
print('PDFu_u integral = ', quad(PDFu, -np.Inf, np.Inf)) # integral of the theoretical PDF, should be 1
plt.plot(us, PDFu_u)
nbins = 1000
P_distr, P_bins = probDensity(P, nbins) # calculate PDF based on the samples
print('P_distr integral = ', simps(P_distr, P_bins)) # integrate the calculated PDF, should be 1
plt.plot(P_bins, P_distr)
Ps = np.linspace(0, 8, npoints)
PDFP_P = PDFP(Ps) # calculate the theoretical PDF
plt.plot(Ps, PDFP_P)
print('PDFP_P integral = ', quad(PDFP, 0, np.Inf)) # integral of the theoretical PDF, should be 1
plt.show()