可以创建一个分布来表征 Python 中两个分布的乘积吗？

Question

我有两个分布，我想知道这些分布相乘的性质。

例如，如果我有属性速度和时间的分布，我想要距离的概率分布的特征。

通过对积分边界的合理估计，我可以从 the product of two random variables:

计算概率密度函数

from scipy import stats
import numpy as np
import matplotlib.pyplot as plt

T, dt = np.linspace(0,20,201, retstep = True)
T = T[1:] # avoid divide by zero below

V = np.linspace(0,20,201)
D = np.linspace(0,120,201)

P_t = stats.gamma(4,1) # probability distribution for time
P_v = stats.norm(8,2)  # probability distribution for speed

# complete integration
P_d = [np.trapz(P_t.pdf(T) * P_v.pdf(d / T) / T, dx = dt) for d in D]

plt.plot(T, P_t.pdf(T), label = 'time')
plt.plot(V, P_v.pdf(V), label = 'velocity')
plt.plot(D, P_d, label = 'distance')
plt.legend()
plt.ylabel('Probability density')

我希望能够针对 d 的任意值计算 P_d.sf(d)、P_d.cdf(d) 等内容。我可以创建一个 new 分布（可能使用 scipy.stats.rv_continuous）来表征距离吗？

Answer 1

该解决方案花了一些时间来理解 rv_continuous。从一堆例子中拼凑知识（我应该记录它们——抱歉）我想我得到了一个可行的解决方案。

唯一的问题是需要提前知道域，但我可以解决这个问题。如果有人知道如何解决这个问题，请告诉我。

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy as sp

interp1d = sp.interpolate.interp1d
trapz = sp.integrate.trapz

# Time domain vector - needed in class
dt = 0.01
t_max = 10
T = np.arange(dt, t_max + dt, dt)

# Distance domain vector - needed in class
dd = 0.01
d_max = 30
D = np.arange(0, d_max + dd, dd)

class MultiplicativeModel(stats.rv_continuous):
    def __init__(self, Tmodel, Vmodel, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.Tmodel = Tmodel # The time-domain probability function
        self.Vmodel = Vmodel # The velocity-domain probability function

        # Create vectors for interpolation of distributions        
        self.pdf_vec = np.array([trapz(self.Tmodel.pdf(T) * \
                                       self.Vmodel.pdf(_ / T) / T, dx = dt) \
                                 for _ in D])
        self.cdf_vec = np.cumsum(self.pdf_vec) * dd
        self.sf_vec = 1 - self.cdf_vec

        # define key functions for rv_continuous class
        self._pdf = interp1d(D, self.pdf_vec, assume_sorted=True)        
        self._sf = interp1d(D, self.sf_vec, assume_sorted=True)        
        self._cdf = interp1d(D, self.cdf_vec, assume_sorted=True)

        # Extraolation option below is necessary because sometimes rvs picks 
        # a number really really close to 1 or 0 and this spits out an error if it
        # is outside of the interpolation range.        
        self._ppf = interp1d(self.cdf_vec, D, assume_sorted=True, 
                             fill_value = 'extrapolate')
        # Moments
        self._munp = lambda n, *args: np.trapz(self.pdf_vec * D ** n, dx=dd)

根据上面的定义，我们得到如下结果：

dv = 0.01
v_max = 10
V = np.arange(0, v_max + dv, dv)

model = MultiplicativeModel(stats.norm(3, 1),
                            stats.uniform(loc=2, scale = 2))


# test moments and stats functions
print(f'median: {model.median()}')
# median: 8.700970199181763
print(f'moments: {model.stats(moments = "mvsk")}')
#moments: (array(9.00872026), array(12.2315612), array(0.44131568), array(0.16819043))

plt.figure(figsize=(6,4))
plt.plot(T, model.Tmodel.pdf(T), label = 'Time PDF')
plt.plot(V, model.Vmodel.pdf(V), label = 'Velocity PDF')
plt.plot(D, model.pdf(D), label = 'Distance PDF')
plt.plot(D, model.cdf(D), label = 'Distance CDF')
plt.plot(D, model.sf(D), label = 'Distance SF')

x = model.rvs(size=10**5)
plt.hist(x, bins = 50, density = True, alpha = 0.5, label = 'Sampled distribution')
plt.legend()
plt.xlim([0,30])

可以创建一个分布来表征 Python 中两个分布的乘积吗？

Can one create a distribution characterizing the multiplication of two distributions in Python?

python

scipy

scipy.stats