可以创建一个分布来表征 Python 中两个分布的乘积吗?
Can one create a distribution characterizing the multiplication of two distributions in Python?
我有两个分布,我想知道这些分布相乘的性质。
例如,如果我有属性速度和时间的分布,我想要距离的概率分布的特征。
通过对积分边界的合理估计,我可以从 the product of two random variables:
计算概率密度函数
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
T, dt = np.linspace(0,20,201, retstep = True)
T = T[1:] # avoid divide by zero below
V = np.linspace(0,20,201)
D = np.linspace(0,120,201)
P_t = stats.gamma(4,1) # probability distribution for time
P_v = stats.norm(8,2) # probability distribution for speed
# complete integration
P_d = [np.trapz(P_t.pdf(T) * P_v.pdf(d / T) / T, dx = dt) for d in D]
plt.plot(T, P_t.pdf(T), label = 'time')
plt.plot(V, P_v.pdf(V), label = 'velocity')
plt.plot(D, P_d, label = 'distance')
plt.legend()
plt.ylabel('Probability density')
我希望能够针对 d
的任意值计算 P_d.sf(d)
、P_d.cdf(d)
等内容。我可以创建一个 new 分布(可能使用 scipy.stats.rv_continuous
)来表征距离吗?
该解决方案花了一些时间来理解 rv_continuous
。从一堆例子中拼凑知识(我应该记录它们——抱歉)我想我得到了一个可行的解决方案。
唯一的问题是需要提前知道域,但我可以解决这个问题。如果有人知道如何解决这个问题,请告诉我。
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy as sp
interp1d = sp.interpolate.interp1d
trapz = sp.integrate.trapz
# Time domain vector - needed in class
dt = 0.01
t_max = 10
T = np.arange(dt, t_max + dt, dt)
# Distance domain vector - needed in class
dd = 0.01
d_max = 30
D = np.arange(0, d_max + dd, dd)
class MultiplicativeModel(stats.rv_continuous):
def __init__(self, Tmodel, Vmodel, *args, **kwargs):
super().__init__(*args, **kwargs)
self.Tmodel = Tmodel # The time-domain probability function
self.Vmodel = Vmodel # The velocity-domain probability function
# Create vectors for interpolation of distributions
self.pdf_vec = np.array([trapz(self.Tmodel.pdf(T) * \
self.Vmodel.pdf(_ / T) / T, dx = dt) \
for _ in D])
self.cdf_vec = np.cumsum(self.pdf_vec) * dd
self.sf_vec = 1 - self.cdf_vec
# define key functions for rv_continuous class
self._pdf = interp1d(D, self.pdf_vec, assume_sorted=True)
self._sf = interp1d(D, self.sf_vec, assume_sorted=True)
self._cdf = interp1d(D, self.cdf_vec, assume_sorted=True)
# Extraolation option below is necessary because sometimes rvs picks
# a number really really close to 1 or 0 and this spits out an error if it
# is outside of the interpolation range.
self._ppf = interp1d(self.cdf_vec, D, assume_sorted=True,
fill_value = 'extrapolate')
# Moments
self._munp = lambda n, *args: np.trapz(self.pdf_vec * D ** n, dx=dd)
根据上面的定义,我们得到如下结果:
dv = 0.01
v_max = 10
V = np.arange(0, v_max + dv, dv)
model = MultiplicativeModel(stats.norm(3, 1),
stats.uniform(loc=2, scale = 2))
# test moments and stats functions
print(f'median: {model.median()}')
# median: 8.700970199181763
print(f'moments: {model.stats(moments = "mvsk")}')
#moments: (array(9.00872026), array(12.2315612), array(0.44131568), array(0.16819043))
plt.figure(figsize=(6,4))
plt.plot(T, model.Tmodel.pdf(T), label = 'Time PDF')
plt.plot(V, model.Vmodel.pdf(V), label = 'Velocity PDF')
plt.plot(D, model.pdf(D), label = 'Distance PDF')
plt.plot(D, model.cdf(D), label = 'Distance CDF')
plt.plot(D, model.sf(D), label = 'Distance SF')
x = model.rvs(size=10**5)
plt.hist(x, bins = 50, density = True, alpha = 0.5, label = 'Sampled distribution')
plt.legend()
plt.xlim([0,30])
我有两个分布,我想知道这些分布相乘的性质。
例如,如果我有属性速度和时间的分布,我想要距离的概率分布的特征。
通过对积分边界的合理估计,我可以从 the product of two random variables:
计算概率密度函数from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
T, dt = np.linspace(0,20,201, retstep = True)
T = T[1:] # avoid divide by zero below
V = np.linspace(0,20,201)
D = np.linspace(0,120,201)
P_t = stats.gamma(4,1) # probability distribution for time
P_v = stats.norm(8,2) # probability distribution for speed
# complete integration
P_d = [np.trapz(P_t.pdf(T) * P_v.pdf(d / T) / T, dx = dt) for d in D]
plt.plot(T, P_t.pdf(T), label = 'time')
plt.plot(V, P_v.pdf(V), label = 'velocity')
plt.plot(D, P_d, label = 'distance')
plt.legend()
plt.ylabel('Probability density')
我希望能够针对 d
的任意值计算 P_d.sf(d)
、P_d.cdf(d)
等内容。我可以创建一个 new 分布(可能使用 scipy.stats.rv_continuous
)来表征距离吗?
该解决方案花了一些时间来理解 rv_continuous
。从一堆例子中拼凑知识(我应该记录它们——抱歉)我想我得到了一个可行的解决方案。
唯一的问题是需要提前知道域,但我可以解决这个问题。如果有人知道如何解决这个问题,请告诉我。
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy as sp
interp1d = sp.interpolate.interp1d
trapz = sp.integrate.trapz
# Time domain vector - needed in class
dt = 0.01
t_max = 10
T = np.arange(dt, t_max + dt, dt)
# Distance domain vector - needed in class
dd = 0.01
d_max = 30
D = np.arange(0, d_max + dd, dd)
class MultiplicativeModel(stats.rv_continuous):
def __init__(self, Tmodel, Vmodel, *args, **kwargs):
super().__init__(*args, **kwargs)
self.Tmodel = Tmodel # The time-domain probability function
self.Vmodel = Vmodel # The velocity-domain probability function
# Create vectors for interpolation of distributions
self.pdf_vec = np.array([trapz(self.Tmodel.pdf(T) * \
self.Vmodel.pdf(_ / T) / T, dx = dt) \
for _ in D])
self.cdf_vec = np.cumsum(self.pdf_vec) * dd
self.sf_vec = 1 - self.cdf_vec
# define key functions for rv_continuous class
self._pdf = interp1d(D, self.pdf_vec, assume_sorted=True)
self._sf = interp1d(D, self.sf_vec, assume_sorted=True)
self._cdf = interp1d(D, self.cdf_vec, assume_sorted=True)
# Extraolation option below is necessary because sometimes rvs picks
# a number really really close to 1 or 0 and this spits out an error if it
# is outside of the interpolation range.
self._ppf = interp1d(self.cdf_vec, D, assume_sorted=True,
fill_value = 'extrapolate')
# Moments
self._munp = lambda n, *args: np.trapz(self.pdf_vec * D ** n, dx=dd)
根据上面的定义,我们得到如下结果:
dv = 0.01
v_max = 10
V = np.arange(0, v_max + dv, dv)
model = MultiplicativeModel(stats.norm(3, 1),
stats.uniform(loc=2, scale = 2))
# test moments and stats functions
print(f'median: {model.median()}')
# median: 8.700970199181763
print(f'moments: {model.stats(moments = "mvsk")}')
#moments: (array(9.00872026), array(12.2315612), array(0.44131568), array(0.16819043))
plt.figure(figsize=(6,4))
plt.plot(T, model.Tmodel.pdf(T), label = 'Time PDF')
plt.plot(V, model.Vmodel.pdf(V), label = 'Velocity PDF')
plt.plot(D, model.pdf(D), label = 'Distance PDF')
plt.plot(D, model.cdf(D), label = 'Distance CDF')
plt.plot(D, model.sf(D), label = 'Distance SF')
x = model.rvs(size=10**5)
plt.hist(x, bins = 50, density = True, alpha = 0.5, label = 'Sampled distribution')
plt.legend()
plt.xlim([0,30])