如何有效地为 stan 数据块指定一个大的预测矩阵
How to efficiently specify a large predictor matrix for stan data block
对于为 stan 数据块创建大型预测矩阵的任何帮助,我将不胜感激。
我想在我的模型中使用以下数据中的变量 w_1
到 w_K
作为预测变量 "matrix" real<lower=0> weights[N, W];
。 K=W
是变量权重的个数(权重的列),N
是观察的个数(权重的行),所以K
和N
是int
.
下面的 my current approach
适用于几列(例如 K=10
),但我有更多的 K>100
列,因此,鉴于下面的数据,我需要一个函数来提供一种高效且可扩展的方法:
#for the desired data block
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput,
nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))
我从 tidybayes 探索了 compose_data
,但我没有看到如何使用它来完成我想要的 desired data block
。因此,我们将不胜感激。
#样本数据
dat <- data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4),
imput = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),
A = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
B = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
Pass = c(278, 278, 278, 278, 278, 100, 100, 100, 100, 100, 153, 153, 153, 153, 153, 79, 79, 79, 79, 79),
Fail = c(740, 743, 742, 743, 740, 7581, 7581, 7581, 7581, 7581, 1231, 1232, 1235, 1235, 1232, 1731, 1732, 1731, 1731, 1731),
W_1= c(4, 3, 4, 3, 3, 1, 2, 1, 2, 1, 12, 12, 11, 12, 12, 3, 5, 3, 3, 3),
W_2= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_3= c(4, 3, 3, 3, 3, 1, 2, 1, 1, 1, 12, 12, 11, 12, 12, 3, 3, 3, 3, 3),
W_4= c(3, 3, 4, 3, 3, 1, 1, 1, 2, 1, 12, 12, 13, 12, 12, 3, 2, 3, 3, 3),
W_5= c(3, 3, 3, 3, 3, 1, 0, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_6= c(4, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_7= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_8= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 15, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_9= c(3, 3, 3, 4, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 2, 3, 3, 3, 3),
W_10= c(3, 3, 4, 3, 3, 1, 1, 1, 1, 1, 12, 10, 12, 12, 12, 3, 3, 3, 3, 3)
)
#我目前的做法
N <- nrow(dat)
ncases <- dat$Pass
nn <- dat$Fail + dat$Pass
A <- dat$A
B <- dat$B
id <- dat$id
imput <- dat$imput
w_1 <- dat$W_1
w_2 <- dat$W_2
w_3 <- dat$W_3
w_4 <- dat$W_4
w_5 <- dat$W_5
w_6 <- dat$W_6
w_7 <- dat$W_7
w_8 <- dat$W_8
w_9 <- dat$W_9
w_10 <- dat$W_10
#for current data block
dat_list <-dat %>%compose_data(.n_name = n_prefix("N"))
#for desired data block
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))
#当前数据块
data{
int N; // number of observations
int ncases[N];
int A[N];
int B[N];
int nn[N];
int id[N];
real<lower=0> w_1[N]; // variable w_1
real<lower=0> w_2[N]; // variable w_2
real<lower=0> w_3[N]; // variable w_3
real<lower=0> w_4[N]; // variable w_4
real<lower=0> w_5[N]; // variable w_5
real<lower=0> w_6[N]; // variable w_6
real<lower=0> w_7[N]; // variable w_7
real<lower=0> w_8[N]; // variable w_8
real<lower=0> w_9[N]; // variable w_9
real<lower=0> w_10[N]; // variable w_10
}
#想要的数据块
data{
int N; // number of observations
int ncases[N];
int A[N];
int B[N];
int nn[N];
int id[N];
real<lower=0> weights[N, W]; // N by W block of weights
}
此问题也已发布 here。在此先感谢您的帮助。
如果 dat
中的所有预测列都以 W_
开头,那么我认为这应该可以解决问题:
w.matrix = as.matrix(dat[,grepl("^W_", colnames(dat))])
dat1 <- list (N = N, ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn,
W = ncol(w.matrix), weights = w.matrix)
对于为 stan 数据块创建大型预测矩阵的任何帮助,我将不胜感激。
我想在我的模型中使用以下数据中的变量 w_1
到 w_K
作为预测变量 "matrix" real<lower=0> weights[N, W];
。 K=W
是变量权重的个数(权重的列),N
是观察的个数(权重的行),所以K
和N
是int
.
my current approach
适用于几列(例如 K=10
),但我有更多的 K>100
列,因此,鉴于下面的数据,我需要一个函数来提供一种高效且可扩展的方法:
#for the desired data block
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput,
nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))
我从 tidybayes 探索了 compose_data
,但我没有看到如何使用它来完成我想要的 desired data block
。因此,我们将不胜感激。
#样本数据
dat <- data.frame(
id = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4),
imput = c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5),
A = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
B = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
Pass = c(278, 278, 278, 278, 278, 100, 100, 100, 100, 100, 153, 153, 153, 153, 153, 79, 79, 79, 79, 79),
Fail = c(740, 743, 742, 743, 740, 7581, 7581, 7581, 7581, 7581, 1231, 1232, 1235, 1235, 1232, 1731, 1732, 1731, 1731, 1731),
W_1= c(4, 3, 4, 3, 3, 1, 2, 1, 2, 1, 12, 12, 11, 12, 12, 3, 5, 3, 3, 3),
W_2= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_3= c(4, 3, 3, 3, 3, 1, 2, 1, 1, 1, 12, 12, 11, 12, 12, 3, 3, 3, 3, 3),
W_4= c(3, 3, 4, 3, 3, 1, 1, 1, 2, 1, 12, 12, 13, 12, 12, 3, 2, 3, 3, 3),
W_5= c(3, 3, 3, 3, 3, 1, 0, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_6= c(4, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_7= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_8= c(3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 15, 12, 12, 12, 12, 3, 3, 3, 3, 3),
W_9= c(3, 3, 3, 4, 3, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, 2, 3, 3, 3, 3),
W_10= c(3, 3, 4, 3, 3, 1, 1, 1, 1, 1, 12, 10, 12, 12, 12, 3, 3, 3, 3, 3)
)
#我目前的做法
N <- nrow(dat)
ncases <- dat$Pass
nn <- dat$Fail + dat$Pass
A <- dat$A
B <- dat$B
id <- dat$id
imput <- dat$imput
w_1 <- dat$W_1
w_2 <- dat$W_2
w_3 <- dat$W_3
w_4 <- dat$W_4
w_5 <- dat$W_5
w_6 <- dat$W_6
w_7 <- dat$W_7
w_8 <- dat$W_8
w_9 <- dat$W_9
w_10 <- dat$W_10
#for current data block
dat_list <-dat %>%compose_data(.n_name = n_prefix("N"))
#for desired data block
dat1 <- list (N = N,
ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn, W = 10,
weights = cbind(w_1, w_2, w_3, w_4, w_5, w_6, w_7, w_8, w_9, w_10))
#当前数据块
data{
int N; // number of observations
int ncases[N];
int A[N];
int B[N];
int nn[N];
int id[N];
real<lower=0> w_1[N]; // variable w_1
real<lower=0> w_2[N]; // variable w_2
real<lower=0> w_3[N]; // variable w_3
real<lower=0> w_4[N]; // variable w_4
real<lower=0> w_5[N]; // variable w_5
real<lower=0> w_6[N]; // variable w_6
real<lower=0> w_7[N]; // variable w_7
real<lower=0> w_8[N]; // variable w_8
real<lower=0> w_9[N]; // variable w_9
real<lower=0> w_10[N]; // variable w_10
}
#想要的数据块
data{
int N; // number of observations
int ncases[N];
int A[N];
int B[N];
int nn[N];
int id[N];
real<lower=0> weights[N, W]; // N by W block of weights
}
此问题也已发布 here。在此先感谢您的帮助。
如果 dat
中的所有预测列都以 W_
开头,那么我认为这应该可以解决问题:
w.matrix = as.matrix(dat[,grepl("^W_", colnames(dat))])
dat1 <- list (N = N, ncases = ncases, A = A, B = B, id = id, P = imput, nn = nn,
W = ncol(w.matrix), weights = w.matrix)