STAN 中的多元排放隐马尔可夫模型
Multivariate Emission Hidden Markov Model in STAN
我正在尝试实施 HMM,其中观测值是与小波卷积的一阶 HMM 的发射。
即:
与:
和
我目前的代码是,连同概述的一维案例 here:
%%writefile HMM_code.stan
data {
int<lower=1> T; // number of observations (length)
int<lower=1> K; // number of hidden states
int<lower=1> H; // number of elements in wavelet vector
vector[K] dir_alpha;
real y[T]; // observations
matrix[T,H] W; // Wavelet matrix
cov_matrix[H] I; //prior covariance
}
parameters {
// Discrete state model
simplex[K] pi1; // initial state probabilities
simplex[K] A[K]; // transition probabilities
// A[i][j] = p(z_t = j | z_{t-1} = i)
// Continuous observation model
vector[H] Mu[K]; // observation means
real<lower=0> sigma; // observation standard deviations
cov_matrix[H] Sigma[K];
vector[T] eei[H]; //
}
transformed parameters {
vector[K] logalpha[T];
{ // Forward algorithm log p(z_t = j | y_{1:t})
real accumulator[K];
logalpha[1] = log(pi1) + multi_normal_lpdf(eei[1] | Mu[][1], Sigma[][1]);
for (t in 2:T) {
for (j in 1:K) { // j = current (t)
for (i in 1:K) { // i = previous (t-1)
// Murphy (2012) p. 609 eq. 17.48
// belief state + transition prob + local evidence at t
accumulator[i] = logalpha[t-1, i] + log(A[i, j]) + multi_normal_lpdf(eei[t] | Mu[][j], Sigma[][i]);
}
logalpha[t, j] = log_sum_exp(accumulator);
}
}
} // Forward
}
model{
Mu ~ multi_normal(rep_vector(0.0,H),I);
sigma ~ inv_gamma(.1,.1);
for(k in 1:K)
A[k] ~ dirichlet(dir_alpha);
for( j in 1:K)
Sigma[j] ~ inv_wishart(3.0, I); //Relevant part for this post
for (t in 1:T)
y[t] ~ normal(W[t,]*eei[t], pow(sigma,.5));// Likelihood
}
generated quantities {
vector[K] alpha[T];
vector[K] beta[T];
vector[K] gamma[T];
vector[K] loggamma[T];
int<lower=1, upper=K> zstar[T];
{ // Forward algortihm
for (t in 1:T)
alpha[t] = softmax(logalpha[t]);
} // Forward
{ // Backward algorithm log p(eei_{t+1:T} | z_t = j)
real accumulator[K];
vector[K] logbeta[T];
for (j in 1:K)
logbeta[T, j] = 1;
for (tforward in 0:(T-2)) {
int t;
t = T - tforward;
for (j in 1:K) { // j = previous (t-1)
for (i in 1:K) { // i = next (t)
// Murphy (2012) Eq. 17.58
// backwards t + transition prob + local evidence at t
accumulator[i] = logbeta[t, i] + log(A[j, i]) + multi_normal_lpdf(eei[t] | Mu[][i], Sigma[][i]);
}
logbeta[t-1, j] = log_sum_exp(accumulator);
}
}
for (t in 1:T)
beta[t] = softmax(logbeta[t]);
} // Backward
{ // forward-backward algorithm log p(z_t = j | eei_{1:T})
for(t in 1:T) {
loggamma[t] = alpha[t] .* beta[t];
}
for(t in 1:T)
gamma[t] = softmax(loggamma[t]);
} // forward-backward
{ // Viterbi algorithm
real logp_zstar;
int bpointer[T, K]; // backpointer to the most likely previous state on the most probable path
real delta[T, K]; // max prob for the sequence up to t
// that ends with an emission from state k
for (j in 1:K)
delta[1, K] = multi_normal_lpdf(eei[1] | Mu[][j], Sigma[][j]);
for (t in 2:T) {
for (j in 1:K) { // j = current (t)
delta[t, j] = negative_infinity();
for (i in 1:K) { // i = previous (t-1)
real logp;
logp = delta[t-1, i] + log(A[i, j]) + multi_normal_lpdf(eei[t] | Mu[][i], Sigma[][i]);
if (logp > delta[t, j]) {
bpointer[t, j] = i;
delta[t, j] = logp;
}
}
}
}
logp_zstar = max(delta[T]);
for (j in 1:K)
if (delta[T, j] == logp_zstar)
zstar[T] = j;
for (t in 1:(T - 1)) {
zstar[T - t] = bpointer[T - t + 1, zstar[T - t + 1]];
}
}
}
编译使用:
%%time
sm = pystan.StanModel(file='HMM_code.stan')
适合使用:
%%time
fit = sm.sampling(data=HMM_data, iter=1, thin = 1, verbose = True)
我遇到的错误是在计算 multi_normal_pdf 函数的第一行,即 "the LDLT_Factor of covariance parameter is not positive definite. last conditional variance is 2.77556e-16"
大的协方差矩阵很容易在数值上是奇异的,即使在这种情况下它们在构造上是正定的。为了避免这个问题和不必要的计算,通常在 Stan 中做的事情是将参数声明为协方差或相关矩阵的 Cholesky 因子(在这种情况下,您还需要在参数中声明标准偏差的正向量堵塞)。
在这种情况下,您将调用 multi_normal_cholesky_lpdf
函数,该函数以协方差矩阵的 Cholesky 因子为条件,而不是协方差矩阵本身,后者又可以是 diag_pre_multiply
标准偏差向量和相关矩阵的 Cholesky 因子。但是,在这种情况下,您需要将相关矩阵的 Cholesky 因子的先验从逆 Wishart 切换为 lkj_corr_cholesky_lpdf
,以及您想要的任何标准差(例如 exponential_lpdf
)。这些都在 Stan 用户手册中讨论过。
另一个option是对协方差矩阵求积分。
我正在尝试实施 HMM,其中观测值是与小波卷积的一阶 HMM 的发射。
即:
与:
和
我目前的代码是,连同概述的一维案例 here:
%%writefile HMM_code.stan
data {
int<lower=1> T; // number of observations (length)
int<lower=1> K; // number of hidden states
int<lower=1> H; // number of elements in wavelet vector
vector[K] dir_alpha;
real y[T]; // observations
matrix[T,H] W; // Wavelet matrix
cov_matrix[H] I; //prior covariance
}
parameters {
// Discrete state model
simplex[K] pi1; // initial state probabilities
simplex[K] A[K]; // transition probabilities
// A[i][j] = p(z_t = j | z_{t-1} = i)
// Continuous observation model
vector[H] Mu[K]; // observation means
real<lower=0> sigma; // observation standard deviations
cov_matrix[H] Sigma[K];
vector[T] eei[H]; //
}
transformed parameters {
vector[K] logalpha[T];
{ // Forward algorithm log p(z_t = j | y_{1:t})
real accumulator[K];
logalpha[1] = log(pi1) + multi_normal_lpdf(eei[1] | Mu[][1], Sigma[][1]);
for (t in 2:T) {
for (j in 1:K) { // j = current (t)
for (i in 1:K) { // i = previous (t-1)
// Murphy (2012) p. 609 eq. 17.48
// belief state + transition prob + local evidence at t
accumulator[i] = logalpha[t-1, i] + log(A[i, j]) + multi_normal_lpdf(eei[t] | Mu[][j], Sigma[][i]);
}
logalpha[t, j] = log_sum_exp(accumulator);
}
}
} // Forward
}
model{
Mu ~ multi_normal(rep_vector(0.0,H),I);
sigma ~ inv_gamma(.1,.1);
for(k in 1:K)
A[k] ~ dirichlet(dir_alpha);
for( j in 1:K)
Sigma[j] ~ inv_wishart(3.0, I); //Relevant part for this post
for (t in 1:T)
y[t] ~ normal(W[t,]*eei[t], pow(sigma,.5));// Likelihood
}
generated quantities {
vector[K] alpha[T];
vector[K] beta[T];
vector[K] gamma[T];
vector[K] loggamma[T];
int<lower=1, upper=K> zstar[T];
{ // Forward algortihm
for (t in 1:T)
alpha[t] = softmax(logalpha[t]);
} // Forward
{ // Backward algorithm log p(eei_{t+1:T} | z_t = j)
real accumulator[K];
vector[K] logbeta[T];
for (j in 1:K)
logbeta[T, j] = 1;
for (tforward in 0:(T-2)) {
int t;
t = T - tforward;
for (j in 1:K) { // j = previous (t-1)
for (i in 1:K) { // i = next (t)
// Murphy (2012) Eq. 17.58
// backwards t + transition prob + local evidence at t
accumulator[i] = logbeta[t, i] + log(A[j, i]) + multi_normal_lpdf(eei[t] | Mu[][i], Sigma[][i]);
}
logbeta[t-1, j] = log_sum_exp(accumulator);
}
}
for (t in 1:T)
beta[t] = softmax(logbeta[t]);
} // Backward
{ // forward-backward algorithm log p(z_t = j | eei_{1:T})
for(t in 1:T) {
loggamma[t] = alpha[t] .* beta[t];
}
for(t in 1:T)
gamma[t] = softmax(loggamma[t]);
} // forward-backward
{ // Viterbi algorithm
real logp_zstar;
int bpointer[T, K]; // backpointer to the most likely previous state on the most probable path
real delta[T, K]; // max prob for the sequence up to t
// that ends with an emission from state k
for (j in 1:K)
delta[1, K] = multi_normal_lpdf(eei[1] | Mu[][j], Sigma[][j]);
for (t in 2:T) {
for (j in 1:K) { // j = current (t)
delta[t, j] = negative_infinity();
for (i in 1:K) { // i = previous (t-1)
real logp;
logp = delta[t-1, i] + log(A[i, j]) + multi_normal_lpdf(eei[t] | Mu[][i], Sigma[][i]);
if (logp > delta[t, j]) {
bpointer[t, j] = i;
delta[t, j] = logp;
}
}
}
}
logp_zstar = max(delta[T]);
for (j in 1:K)
if (delta[T, j] == logp_zstar)
zstar[T] = j;
for (t in 1:(T - 1)) {
zstar[T - t] = bpointer[T - t + 1, zstar[T - t + 1]];
}
}
}
编译使用:
%%time
sm = pystan.StanModel(file='HMM_code.stan')
适合使用:
%%time
fit = sm.sampling(data=HMM_data, iter=1, thin = 1, verbose = True)
我遇到的错误是在计算 multi_normal_pdf 函数的第一行,即 "the LDLT_Factor of covariance parameter is not positive definite. last conditional variance is 2.77556e-16"
大的协方差矩阵很容易在数值上是奇异的,即使在这种情况下它们在构造上是正定的。为了避免这个问题和不必要的计算,通常在 Stan 中做的事情是将参数声明为协方差或相关矩阵的 Cholesky 因子(在这种情况下,您还需要在参数中声明标准偏差的正向量堵塞)。
在这种情况下,您将调用 multi_normal_cholesky_lpdf
函数,该函数以协方差矩阵的 Cholesky 因子为条件,而不是协方差矩阵本身,后者又可以是 diag_pre_multiply
标准偏差向量和相关矩阵的 Cholesky 因子。但是,在这种情况下,您需要将相关矩阵的 Cholesky 因子的先验从逆 Wishart 切换为 lkj_corr_cholesky_lpdf
,以及您想要的任何标准差(例如 exponential_lpdf
)。这些都在 Stan 用户手册中讨论过。
另一个option是对协方差矩阵求积分。