python 的多元学生 t 分布
multivariate student t-distribution with python
为了生成具有多元 t 分布的样本,我使用了这个函数:
def multivariatet(mu,Sigma,N,M):
'''
Output:
Produce M samples of d-dimensional multivariate t distribution
Input:
mu = mean (d dimensional numpy array or scalar)
Sigma = scale matrix (dxd numpy array)
N = degrees of freedom
M = # of samples to produce
'''
d = len(Sigma)
g = np.tile(np.random.gamma(N/2.,2./N,M),(d,1)).T
Z = np.random.multivariate_normal(np.zeros(d),Sigma,M)
return mu + Z/np.sqrt(g)
但我现在要找的是 multivariate student t-distribution 它本身,这样我就可以计算 dimension > 1
.
处的元素密度
这将类似于包 scipy 的 stats.t.pdf(x, df, loc, scale)
,但在多维 space 中。
密度是我自己打码的:
import numpy as np
from math import *
def multivariate_t_distribution(x,mu,Sigma,df,d):
'''
Multivariate t-student density:
output:
the density of the given element
input:
x = parameter (d dimensional numpy array or scalar)
mu = mean (d dimensional numpy array or scalar)
Sigma = scale matrix (dxd numpy array)
df = degrees of freedom
d: dimension
'''
Num = gamma(1. * (d+df)/2)
Denom = ( gamma(1.*df/2) * pow(df*pi,1.*d/2) * pow(np.linalg.det(Sigma),1./2) * pow(1 + (1./df)*np.dot(np.dot((x - mu),np.linalg.inv(Sigma)), (x - mu)),1.* (d+df)/2))
d = 1. * Num / Denom
return d
这将计算 n x d 数据矩阵 X 的多元学生 T 分布的对数 pdf:
from scipy.special import gamma
from numpy.linalg import slogdet
def multivariate_student_t(X, mu, Sigma, df):
#multivariate student T distribution
[n,d] = X.shape
Xm = X-mu
V = df * Sigma
V_inv = np.linalg.inv(V)
(sign, logdet) = slogdet(np.pi * V)
logz = -gamma(df/2.0 + d/2.0) + gamma(df/2.0) + 0.5*logdet
logp = -0.5*(df+d)*np.log(1+ np.sum(np.dot(Xm,V_inv)*Xm,axis=1))
logp = logp - logz
return logp
我概括了@farhawa 的代码以允许 x
中的多个条目(我发现我想一次查询多个点)。
import numpy as np
from math import gamma
def multivariate_t_distribution(x, mu, Sigma, df):
'''
Multivariate t-student density. Returns the density
of the function at points specified by x.
input:
x = parameter (n-d numpy array; will be forced to 2d)
mu = mean (d dimensional numpy array)
Sigma = scale matrix (dxd numpy array)
df = degrees of freedom
Edited from:
'''
x = np.atleast_2d(x) # requires x as 2d
nD = Sigma.shape[0] # dimensionality
numerator = gamma(1.0 * (nD + df) / 2.0)
denominator = (
gamma(1.0 * df / 2.0) *
np.power(df * np.pi, 1.0 * nD / 2.0) *
np.power(np.linalg.det(Sigma), 1.0 / 2.0) *
np.power(
1.0 + (1.0 / df) *
np.diagonal(
np.dot( np.dot(x - mu, np.linalg.inv(Sigma)), (x - mu).T)
),
1.0 * (nD + df) / 2.0
)
)
return 1.0 * numerator / denominator
我尝试了上述答案并得到了不同的结果,但我不确定 why/what 是否有误。以下,我基于高斯混合的 scikit-learn code,我认为有效(对于任意大小的输入 numpy 数组 X,和 c t-distributions 参数在列表中均值和协变量):
import numpy as np
from scipy import linalg
try: # SciPy >= 0.19
from scipy.special import gammaln as sp_gammaln
except ImportError:
from scipy.misc import gammaln as sp_gammaln
def log_multivariate_t_density(X, means, covars, nu = 1):
n_samples, n_dim = X.shape
nmix = len(means)
log_prob = np.empty((n_samples, nmix))
for c, (mu, cv) in enumerate(zip(means, covars)):
try:
cv_chol = linalg.cholesky(cv, lower=True)
except linalg.LinAlgError:
try:
cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
lower=True)
except linalg.LinAlgError:
raise ValueError("'covars' must be symmetric, "
"positive-definite")
cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
norm = (sp_gammaln((nu + n_dim) / 2.) - sp_gammaln(nu / 2.)
- 0.5 * n_dim * np.log(nu * np.pi))
inner = - (nu + n_dim) / 2. * np.log1p(np.sum(cv_sol ** 2, axis=1) / nu)
log_prob[:, c] = norm + inner - cv_log_det
return log_prob
为了生成具有多元 t 分布的样本,我使用了这个函数:
def multivariatet(mu,Sigma,N,M):
'''
Output:
Produce M samples of d-dimensional multivariate t distribution
Input:
mu = mean (d dimensional numpy array or scalar)
Sigma = scale matrix (dxd numpy array)
N = degrees of freedom
M = # of samples to produce
'''
d = len(Sigma)
g = np.tile(np.random.gamma(N/2.,2./N,M),(d,1)).T
Z = np.random.multivariate_normal(np.zeros(d),Sigma,M)
return mu + Z/np.sqrt(g)
但我现在要找的是 multivariate student t-distribution 它本身,这样我就可以计算 dimension > 1
.
这将类似于包 scipy 的 stats.t.pdf(x, df, loc, scale)
,但在多维 space 中。
密度是我自己打码的:
import numpy as np
from math import *
def multivariate_t_distribution(x,mu,Sigma,df,d):
'''
Multivariate t-student density:
output:
the density of the given element
input:
x = parameter (d dimensional numpy array or scalar)
mu = mean (d dimensional numpy array or scalar)
Sigma = scale matrix (dxd numpy array)
df = degrees of freedom
d: dimension
'''
Num = gamma(1. * (d+df)/2)
Denom = ( gamma(1.*df/2) * pow(df*pi,1.*d/2) * pow(np.linalg.det(Sigma),1./2) * pow(1 + (1./df)*np.dot(np.dot((x - mu),np.linalg.inv(Sigma)), (x - mu)),1.* (d+df)/2))
d = 1. * Num / Denom
return d
这将计算 n x d 数据矩阵 X 的多元学生 T 分布的对数 pdf:
from scipy.special import gamma
from numpy.linalg import slogdet
def multivariate_student_t(X, mu, Sigma, df):
#multivariate student T distribution
[n,d] = X.shape
Xm = X-mu
V = df * Sigma
V_inv = np.linalg.inv(V)
(sign, logdet) = slogdet(np.pi * V)
logz = -gamma(df/2.0 + d/2.0) + gamma(df/2.0) + 0.5*logdet
logp = -0.5*(df+d)*np.log(1+ np.sum(np.dot(Xm,V_inv)*Xm,axis=1))
logp = logp - logz
return logp
我概括了@farhawa 的代码以允许 x
中的多个条目(我发现我想一次查询多个点)。
import numpy as np
from math import gamma
def multivariate_t_distribution(x, mu, Sigma, df):
'''
Multivariate t-student density. Returns the density
of the function at points specified by x.
input:
x = parameter (n-d numpy array; will be forced to 2d)
mu = mean (d dimensional numpy array)
Sigma = scale matrix (dxd numpy array)
df = degrees of freedom
Edited from:
'''
x = np.atleast_2d(x) # requires x as 2d
nD = Sigma.shape[0] # dimensionality
numerator = gamma(1.0 * (nD + df) / 2.0)
denominator = (
gamma(1.0 * df / 2.0) *
np.power(df * np.pi, 1.0 * nD / 2.0) *
np.power(np.linalg.det(Sigma), 1.0 / 2.0) *
np.power(
1.0 + (1.0 / df) *
np.diagonal(
np.dot( np.dot(x - mu, np.linalg.inv(Sigma)), (x - mu).T)
),
1.0 * (nD + df) / 2.0
)
)
return 1.0 * numerator / denominator
我尝试了上述答案并得到了不同的结果,但我不确定 why/what 是否有误。以下,我基于高斯混合的 scikit-learn code,我认为有效(对于任意大小的输入 numpy 数组 X,和 c t-distributions 参数在列表中均值和协变量):
import numpy as np
from scipy import linalg
try: # SciPy >= 0.19
from scipy.special import gammaln as sp_gammaln
except ImportError:
from scipy.misc import gammaln as sp_gammaln
def log_multivariate_t_density(X, means, covars, nu = 1):
n_samples, n_dim = X.shape
nmix = len(means)
log_prob = np.empty((n_samples, nmix))
for c, (mu, cv) in enumerate(zip(means, covars)):
try:
cv_chol = linalg.cholesky(cv, lower=True)
except linalg.LinAlgError:
try:
cv_chol = linalg.cholesky(cv + min_covar * np.eye(n_dim),
lower=True)
except linalg.LinAlgError:
raise ValueError("'covars' must be symmetric, "
"positive-definite")
cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
cv_sol = linalg.solve_triangular(cv_chol, (X - mu).T, lower=True).T
norm = (sp_gammaln((nu + n_dim) / 2.) - sp_gammaln(nu / 2.)
- 0.5 * n_dim * np.log(nu * np.pi))
inner = - (nu + n_dim) / 2. * np.log1p(np.sum(cv_sol ** 2, axis=1) / nu)
log_prob[:, c] = norm + inner - cv_log_det
return log_prob