stan 操纵和修复数据的语法
stan to manipulate and fix syntax for data
max_lag 是所有媒体的固定整数。我需要为每种媒体设置特定的延迟。
那么,我怎样才能让每种媒体都有不同的延迟,以及数据和参数必须如何更改为语法?
例如:max_lag_channel_media1 = 10; max_lag_channel_media2 = 4; max_lag_channel_media3 = 6
model_data2 = {
'N': len(df),
'max_lag': max_lag,
'num_media': num_media,
'X_media': X_media,
'mu_mdip': mu_mdip,
'num_ctrl': X_ctrl.shape[1],
'X_ctrl': X_ctrl,
'y': df_mmm['sales'].values
}
model_code2 = '''
functions {
// the adstock transformation with a vector of weights
real Adstock(vector t, row_vector weights) {
return dot_product(t, weights) / sum(weights);
}
}
data {
// the total number of observations
int<lower=1> N;
// the vector of sales
real y[N];
// the maximum duration of lag effect, in weeks
int<lower=1> max_lag;
// the number of media channels
int<lower=1> num_media;
// matrix of media variables
matrix[N+max_lag-1, num_media] X_media;
// vector of media variables' mean
real mu_mdip[num_media];
// the number of other control variables
int<lower=1> num_ctrl;
// a matrix of control variables
matrix[N, num_ctrl] X_ctrl;
}
parameters {
// residual variance
real<lower=0> noise_var;
// the intercept
real tau;
// the coefficients for media variables and base sales
vector<lower=0>[num_media+num_ctrl] beta;
// the decay and peak parameter for the adstock transformation of
// each media
vector<lower=0,upper=1>[num_media] decay;
vector<lower=0,upper=ceil(max_lag/2)>[num_media] peak;
}
transformed parameters {
// the cumulative media effect after adstock
real cum_effect;
// matrix of media variables after adstock
matrix[N, num_media] X_media_adstocked;
// matrix of all predictors
matrix[N, num_media+num_ctrl] X;
// adstock, mean-center, log1p transformation
row_vector[max_lag] lag_weights;
for (nn in 1:N) {
for (media in 1 : num_media) {
for (lag in 1 : max_lag) {
lag_weights[max_lag-lag+1] <- pow(decay[media], (lag - 1 - peak[media]) ^ 2);
}
cum_effect <- Adstock(sub_col(X_media, nn, media, max_lag), lag_weights);
X_media_adstocked[nn, media] <- log1p(cum_effect/mu_mdip[media]);
}
X <- append_col(X_media_adstocked, X_ctrl);
}
}
model {
decay ~ beta(3,3);
peak ~ uniform(0, ceil(max_lag/2));
tau ~ normal(0, 5);
for (i in 1 : num_media+num_ctrl) {
beta[i] ~ normal(0, 1);
}
noise_var ~ inv_gamma(0.05, 0.05 * 0.01);
y ~ normal(tau + X * beta, sqrt(noise_var));
}
'''
在您的数据中,将 max_lag
设为整数数组而不是单个整数;每个元素应存储一种介质的最大滞后。像这样:
int<lower=1> max_lag[num_media];
然后,当您构建 X_media_adstocked
时,在循环的每次迭代中使用正确的 max_lag
。您需要在循环内而不是事先定义 lag_weights
,因为它每次都有不同的长度:
for (nn in 1:N) {
for (medium in 1 : num_media) {
row_vector[max_lag[medium]] lag_weights;
for (lag in 1 : max_lag[medium]) {
lag_weights[max_lag[medium]-lag+1] <- pow(decay[medium], (lag - 1 - peak[medium]) ^ 2);
}
cum_effect <- Adstock(sub_col(X_media, nn, medium, max_lag[medium]), lag_weights);
X_media_adstocked[nn, medium] <- log1p(cum_effect/mu_mdip[medium]);
}
}
max_lag 是所有媒体的固定整数。我需要为每种媒体设置特定的延迟。 那么,我怎样才能让每种媒体都有不同的延迟,以及数据和参数必须如何更改为语法? 例如:max_lag_channel_media1 = 10; max_lag_channel_media2 = 4; max_lag_channel_media3 = 6
model_data2 = {
'N': len(df),
'max_lag': max_lag,
'num_media': num_media,
'X_media': X_media,
'mu_mdip': mu_mdip,
'num_ctrl': X_ctrl.shape[1],
'X_ctrl': X_ctrl,
'y': df_mmm['sales'].values
}
model_code2 = '''
functions {
// the adstock transformation with a vector of weights
real Adstock(vector t, row_vector weights) {
return dot_product(t, weights) / sum(weights);
}
}
data {
// the total number of observations
int<lower=1> N;
// the vector of sales
real y[N];
// the maximum duration of lag effect, in weeks
int<lower=1> max_lag;
// the number of media channels
int<lower=1> num_media;
// matrix of media variables
matrix[N+max_lag-1, num_media] X_media;
// vector of media variables' mean
real mu_mdip[num_media];
// the number of other control variables
int<lower=1> num_ctrl;
// a matrix of control variables
matrix[N, num_ctrl] X_ctrl;
}
parameters {
// residual variance
real<lower=0> noise_var;
// the intercept
real tau;
// the coefficients for media variables and base sales
vector<lower=0>[num_media+num_ctrl] beta;
// the decay and peak parameter for the adstock transformation of
// each media
vector<lower=0,upper=1>[num_media] decay;
vector<lower=0,upper=ceil(max_lag/2)>[num_media] peak;
}
transformed parameters {
// the cumulative media effect after adstock
real cum_effect;
// matrix of media variables after adstock
matrix[N, num_media] X_media_adstocked;
// matrix of all predictors
matrix[N, num_media+num_ctrl] X;
// adstock, mean-center, log1p transformation
row_vector[max_lag] lag_weights;
for (nn in 1:N) {
for (media in 1 : num_media) {
for (lag in 1 : max_lag) {
lag_weights[max_lag-lag+1] <- pow(decay[media], (lag - 1 - peak[media]) ^ 2);
}
cum_effect <- Adstock(sub_col(X_media, nn, media, max_lag), lag_weights);
X_media_adstocked[nn, media] <- log1p(cum_effect/mu_mdip[media]);
}
X <- append_col(X_media_adstocked, X_ctrl);
}
}
model {
decay ~ beta(3,3);
peak ~ uniform(0, ceil(max_lag/2));
tau ~ normal(0, 5);
for (i in 1 : num_media+num_ctrl) {
beta[i] ~ normal(0, 1);
}
noise_var ~ inv_gamma(0.05, 0.05 * 0.01);
y ~ normal(tau + X * beta, sqrt(noise_var));
}
'''
在您的数据中,将 max_lag
设为整数数组而不是单个整数;每个元素应存储一种介质的最大滞后。像这样:
int<lower=1> max_lag[num_media];
然后,当您构建 X_media_adstocked
时,在循环的每次迭代中使用正确的 max_lag
。您需要在循环内而不是事先定义 lag_weights
,因为它每次都有不同的长度:
for (nn in 1:N) {
for (medium in 1 : num_media) {
row_vector[max_lag[medium]] lag_weights;
for (lag in 1 : max_lag[medium]) {
lag_weights[max_lag[medium]-lag+1] <- pow(decay[medium], (lag - 1 - peak[medium]) ^ 2);
}
cum_effect <- Adstock(sub_col(X_media, nn, medium, max_lag[medium]), lag_weights);
X_media_adstocked[nn, medium] <- log1p(cum_effect/mu_mdip[medium]);
}
}