如何将大量 brms 保存为不同的 R 对象或 Rds/Rda 文件?
How to save numerous brms fit as distinct R objects or Rds/Rda file?
我正在尝试使用 brms::brm()
替换模型中一个参数的先验来拟合贝叶斯模型
purrr::map2()
一个接一个(我有 63 个先验参数)。我可以 'theoretically' 通过 list2env()
将每个拟合模型保存为全局环境(即我的工作区)中的不同对象,这要归功于我上一个问题的 。通过 list2env()
,对象 (brmsfit
s) 将在所有 63 次迭代完成后被保存。然而,当我 运行 整个代码时,我总是收到一条错误消息说 Error in scan(con, nlines = 1, sep = ",", quiet = TRUE) : could not allocate memory (0 Mb) in C function 'R_AllocStringBuffer'
并且没有 brmsfit
对象存储在全局环境中,尽管模型拟合似乎完成了 63 次,与我之前的存在一样多。
因此,我想将每个 brmsfit
对象保存为 R 对象,并在其迭代完成后立即保存为 .Rds
或 .Rda
文件 以避免这样的内存问题。但是,我应该怎么做才能意识到这一点?
MWE
请注意,以下命令只是一个“示意图”示例,包含我正在尝试执行的公开可用数据。它会工作 而不会出现我上面提到的问题 ,因为这个例子中 brm()
中的数据和模型要简单得多,而且先验的数量比我少得多。抢断。
library(lme4)
library(tidyverse)
library(magrittr)
library(brms)
library(cmdstanr)
library(rstan)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
prior_test <- data.frame(
sd = c(0.01, 0.01, 0.01),
mean = c(50, -50, 0)
) %>%
mutate(
id = row_number()
)
list2env(
purrr::map2(
prior_test$sd,
prior_test$mean,
function(psd, pm){
gc()
gc()
cbfm1 <- brm(
Reaction ~ 0 + Intercept + Days + (0 + Intercept + Days|Subject),
data = sleepstudy,
family = "normal",
prior =
c(
prior(normal(0, 1), class = b, coef = Intercept),
set_prior(
paste0(
"normal(",
pm,
", ",
psd,
")"
),
class = "b",
coef = "Days"
),
prior(normal(0, 1), class = sd),
prior(lkj(2), class = cor)
),
save_pars = save_pars(all = TRUE),
backend = "cmdstanr"
)
}
) %>%
setNames(paste0('model', prior_test$id)),
.GlobalEnv
)
带回溯的错误消息
Error in scan(con, nlines = 1, sep = ",", quiet = TRUE) :
could not allocate memory (0 Mb) in C function 'R_AllocStringBuffer'
15.
scan(con, nlines = 1, sep = ",", quiet = TRUE)
14.
rstan::read_stan_csv(out$output_files())
13.
.fit_model(model, ...)
12.
.fun(model = .x1, sdata = .x2, algorithm = .x3, backend = .x4,
iter = .x5, warmup = .x6, thin = .x7, chains = .x8, cores = .x9,
threads = .x10, inits = .x11, exclude = .x12, control = .x13,
future = .x14, seed = .x15, silent = .x16)
11.
eval(expr, envir, ...)
10.
eval(expr, envir, ...)
9.
eval2(call, envir = args, enclos = parent.frame())
8.
do_call(fit_model, fit_args)
7.
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 +
agent:patient + agent:REGION1 + agent:REGION2 + patient:REGION1 +
patient:REGION2 + agent:patient:REGION1 + agent:patient:REGION2 +
(0 + Intercept + agent + patient + agent:patient | subj) + ...
6.
.f(.x[[i]], .y[[i]], ...)
5.
map2(R1data$prior_sd, R1data$prior_mean, function(psd, pm) {
gc()
gc()
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 + ...
4.
eval(lhs, parent, parent)
3.
eval(lhs, parent, parent)
2.
map2(R1data$prior_sd, R1data$prior_mean, function(psd, pm) {
gc()
gc()
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 + ...
1.
list2env(map2(R1data$prior_sd, R1data$prior_mean, function(psd,
pm) {
gc()
gc() ...
我想你可以使用类似下面的东西(这是一个更简单的版本,避免了数据的来源、变量名、模型规范等)
dir.create("models")
# This function generates a file path, runs the `brm` function and saves the object
# returned by `brm()` as a .rds file
my_fun = function(prior_sd, prior_mean) {
file_name = paste0("models/model_sd_", prior_sd, "_mean_", prior_mean, ".rds")
brms_object = brm(...) # pass formula, prior, data, options, etc.
saveRDS(brms_object, file_name) # you can control compression too if you want
}
sds = c(0.01, 0.01, 0.01)
means = c(50, -50, 0)
purrr::walk2(sds, means, my_fun)
您必须确保您要使用的数据框存在于全局环境中
我稍微修改了 中的函数,以便 (1) 我们可以保留 brmsfit
全局环境中的对象使用 assign(..., envir = .GlobalEnv)
和 (2) 当我们使用负值作为先验均值时,我们可以避免使用 -
的对象名称,R 将其解释为减法运算符。
data <- data.frame(
prior_sd = prior_sd <- c(0.01, 0.01, 0.01),
prior_mean = prior_mean <- c(50, -50, 0),
id = case_when(
prior_mean == 0 ~ paste0("model_mean_", prior_mean, "_sd_", prior_sd),
prior_mean < 0 ~ paste0("model_mean_m", abs(prior_mean), "_sd_", prior_sd),
prior_mean > 0 ~ paste0("model_mean_p", prior_mean, "_sd_", prior_sd)
)
)
computation = function(prior_mean, prior_sd) {
file_name = case_when(
prior_mean == 0 ~ paste0("path-to-your-directory/model_mean_", prior_mean, "_sd_", prior_sd, ".rds"),
prior_mean < 0 ~ paste0("path-to-your-directory/model_mean_m", abs(prior_mean), "_sd_", prior_sd, ".rds"),
prior_mean > 0 ~ paste0("path-to-your-directory/model_mean_p", prior_mean, "_sd_", prior_sd, ".rds")
)
# pass formula, prior, data, options, etc.
brms_object = brm(
Reaction ~ 0 + Intercept + Days + (0 + Intercept + Days|Subject),
data = sleepstudy,
family = "normal",
prior =
c(
prior(normal(0, 1), class = b, coef = Intercept),
set_prior(
paste0(
"normal(",
prior_mean,
", ",
prior_sd,
")"
),
class = "b",
coef = "Days"
),
prior(normal(0, 1), class = sd),
prior(lkj(2), class = cor)
),
save_pars = save_pars(all = TRUE),
backend = "cmdstanr"
)
# save the computation results with object name
assign(
case_when(
prior_mean == 0 ~ paste0("model_mean_", prior_mean, "_sd_", prior_sd),
prior_mean < 0 ~ paste0("model_mean_m", abs(prior_mean), "_sd_", prior_sd),
prior_mean > 0 ~ paste0("model_mean_p", prior_mean, "_sd_", prior_sd)
),
brms_object,
envir = .GlobalEnv)
# you can control compression too if you want
saveRDS(brms_object, file_name)
}
purrr::walk2(data$prior_mean, data$prior_sd, computation)
我正在尝试使用 brms::brm()
替换模型中一个参数的先验来拟合贝叶斯模型
purrr::map2()
一个接一个(我有 63 个先验参数)。我可以 'theoretically' 通过 list2env()
将每个拟合模型保存为全局环境(即我的工作区)中的不同对象,这要归功于我上一个问题的 list2env()
,对象 (brmsfit
s) 将在所有 63 次迭代完成后被保存。然而,当我 运行 整个代码时,我总是收到一条错误消息说 Error in scan(con, nlines = 1, sep = ",", quiet = TRUE) : could not allocate memory (0 Mb) in C function 'R_AllocStringBuffer'
并且没有 brmsfit
对象存储在全局环境中,尽管模型拟合似乎完成了 63 次,与我之前的存在一样多。
因此,我想将每个 brmsfit
对象保存为 R 对象,并在其迭代完成后立即保存为 .Rds
或 .Rda
文件 以避免这样的内存问题。但是,我应该怎么做才能意识到这一点?
MWE
请注意,以下命令只是一个“示意图”示例,包含我正在尝试执行的公开可用数据。它会工作 而不会出现我上面提到的问题 ,因为这个例子中 brm()
中的数据和模型要简单得多,而且先验的数量比我少得多。抢断。
library(lme4)
library(tidyverse)
library(magrittr)
library(brms)
library(cmdstanr)
library(rstan)
## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())
prior_test <- data.frame(
sd = c(0.01, 0.01, 0.01),
mean = c(50, -50, 0)
) %>%
mutate(
id = row_number()
)
list2env(
purrr::map2(
prior_test$sd,
prior_test$mean,
function(psd, pm){
gc()
gc()
cbfm1 <- brm(
Reaction ~ 0 + Intercept + Days + (0 + Intercept + Days|Subject),
data = sleepstudy,
family = "normal",
prior =
c(
prior(normal(0, 1), class = b, coef = Intercept),
set_prior(
paste0(
"normal(",
pm,
", ",
psd,
")"
),
class = "b",
coef = "Days"
),
prior(normal(0, 1), class = sd),
prior(lkj(2), class = cor)
),
save_pars = save_pars(all = TRUE),
backend = "cmdstanr"
)
}
) %>%
setNames(paste0('model', prior_test$id)),
.GlobalEnv
)
带回溯的错误消息
Error in scan(con, nlines = 1, sep = ",", quiet = TRUE) :
could not allocate memory (0 Mb) in C function 'R_AllocStringBuffer'
15.
scan(con, nlines = 1, sep = ",", quiet = TRUE)
14.
rstan::read_stan_csv(out$output_files())
13.
.fit_model(model, ...)
12.
.fun(model = .x1, sdata = .x2, algorithm = .x3, backend = .x4,
iter = .x5, warmup = .x6, thin = .x7, chains = .x8, cores = .x9,
threads = .x10, inits = .x11, exclude = .x12, control = .x13,
future = .x14, seed = .x15, silent = .x16)
11.
eval(expr, envir, ...)
10.
eval(expr, envir, ...)
9.
eval2(call, envir = args, enclos = parent.frame())
8.
do_call(fit_model, fit_args)
7.
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 +
agent:patient + agent:REGION1 + agent:REGION2 + patient:REGION1 +
patient:REGION2 + agent:patient:REGION1 + agent:patient:REGION2 +
(0 + Intercept + agent + patient + agent:patient | subj) + ...
6.
.f(.x[[i]], .y[[i]], ...)
5.
map2(R1data$prior_sd, R1data$prior_mean, function(psd, pm) {
gc()
gc()
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 + ...
4.
eval(lhs, parent, parent)
3.
eval(lhs, parent, parent)
2.
map2(R1data$prior_sd, R1data$prior_mean, function(psd, pm) {
gc()
gc()
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 + ...
1.
list2env(map2(R1data$prior_sd, R1data$prior_mean, function(psd,
pm) {
gc()
gc() ...
我想你可以使用类似下面的东西(这是一个更简单的版本,避免了数据的来源、变量名、模型规范等)
dir.create("models")
# This function generates a file path, runs the `brm` function and saves the object
# returned by `brm()` as a .rds file
my_fun = function(prior_sd, prior_mean) {
file_name = paste0("models/model_sd_", prior_sd, "_mean_", prior_mean, ".rds")
brms_object = brm(...) # pass formula, prior, data, options, etc.
saveRDS(brms_object, file_name) # you can control compression too if you want
}
sds = c(0.01, 0.01, 0.01)
means = c(50, -50, 0)
purrr::walk2(sds, means, my_fun)
您必须确保您要使用的数据框存在于全局环境中
我稍微修改了 brmsfit
全局环境中的对象使用 assign(..., envir = .GlobalEnv)
和 (2) 当我们使用负值作为先验均值时,我们可以避免使用 -
的对象名称,R 将其解释为减法运算符。
data <- data.frame(
prior_sd = prior_sd <- c(0.01, 0.01, 0.01),
prior_mean = prior_mean <- c(50, -50, 0),
id = case_when(
prior_mean == 0 ~ paste0("model_mean_", prior_mean, "_sd_", prior_sd),
prior_mean < 0 ~ paste0("model_mean_m", abs(prior_mean), "_sd_", prior_sd),
prior_mean > 0 ~ paste0("model_mean_p", prior_mean, "_sd_", prior_sd)
)
)
computation = function(prior_mean, prior_sd) {
file_name = case_when(
prior_mean == 0 ~ paste0("path-to-your-directory/model_mean_", prior_mean, "_sd_", prior_sd, ".rds"),
prior_mean < 0 ~ paste0("path-to-your-directory/model_mean_m", abs(prior_mean), "_sd_", prior_sd, ".rds"),
prior_mean > 0 ~ paste0("path-to-your-directory/model_mean_p", prior_mean, "_sd_", prior_sd, ".rds")
)
# pass formula, prior, data, options, etc.
brms_object = brm(
Reaction ~ 0 + Intercept + Days + (0 + Intercept + Days|Subject),
data = sleepstudy,
family = "normal",
prior =
c(
prior(normal(0, 1), class = b, coef = Intercept),
set_prior(
paste0(
"normal(",
prior_mean,
", ",
prior_sd,
")"
),
class = "b",
coef = "Days"
),
prior(normal(0, 1), class = sd),
prior(lkj(2), class = cor)
),
save_pars = save_pars(all = TRUE),
backend = "cmdstanr"
)
# save the computation results with object name
assign(
case_when(
prior_mean == 0 ~ paste0("model_mean_", prior_mean, "_sd_", prior_sd),
prior_mean < 0 ~ paste0("model_mean_m", abs(prior_mean), "_sd_", prior_sd),
prior_mean > 0 ~ paste0("model_mean_p", prior_mean, "_sd_", prior_sd)
),
brms_object,
envir = .GlobalEnv)
# you can control compression too if you want
saveRDS(brms_object, file_name)
}
purrr::walk2(data$prior_mean, data$prior_sd, computation)