如何将大量 brms 保存为不同的 R 对象或 Rds/Rda 文件?

How to save numerous brms fit as distinct R objects or Rds/Rda file?

我正在尝试使用 brms::brm() 替换模型中一个参数的先验来拟合贝叶斯模型 purrr::map2() 一个接一个(我有 63 个先验参数)。我可以 'theoretically' 通过 list2env() 将每个拟合模型保存为全局环境(即我的工作区)中的不同对象,这要归功于我上一个问题的 。通过 list2env(),对象 (brmsfits) 将在所有 63 次迭代完成后被保存。然而,当我 运行 整个代码时,我总是收到一条错误消息说 Error in scan(con, nlines = 1, sep = ",", quiet = TRUE) : could not allocate memory (0 Mb) in C function 'R_AllocStringBuffer' 并且没有 brmsfit 对象存储在全局环境中,尽管模型拟合似乎完成了 63 次,与我之前的存在一样多。

因此,我想将每个 brmsfit 对象保存为 R 对象,并在其迭代完成后立即保存为 .Rds.Rda 文件 以避免这样的内存问题。但是,我应该怎么做才能意识到这一点?

MWE

请注意,以下命令只是一个“示意图”示例,包含我正在尝试执行的公开可用数据。它会工作 而不会出现我上面提到的问题 ,因为这个例子中 brm() 中的数据和模型要简单得多,而且先验的数量比我少得多。抢断。

library(lme4)
library(tidyverse)
library(magrittr)
library(brms)
library(cmdstanr)
library(rstan)

## Parallelize the chains using all the cores:
options(mc.cores = parallel::detectCores())

prior_test <- data.frame(
  sd = c(0.01, 0.01, 0.01),
  mean = c(50, -50, 0)
) %>% 
  mutate(
    id = row_number()
  )

list2env(
  purrr::map2(
    prior_test$sd,
    prior_test$mean,
    function(psd, pm){
          gc()
          gc()
          cbfm1 <- brm(
            Reaction ~ 0 + Intercept + Days + (0 + Intercept + Days|Subject), 
            data = sleepstudy,
            family = "normal",
            prior =
              c(
                prior(normal(0, 1), class = b, coef = Intercept),
                set_prior(
                  paste0(
                    "normal(",
                    pm,
                    ", ",
                    psd,
                    ")"
                  ),
                  class = "b",
                  coef = "Days"
                ),
                prior(normal(0, 1), class = sd),
                prior(lkj(2),       class = cor)    
              ),
            save_pars = save_pars(all = TRUE),
            backend = "cmdstanr"
          )
        }
  ) %>%
    setNames(paste0('model', prior_test$id)),
  .GlobalEnv
)

带回溯的错误消息

Error in scan(con, nlines = 1, sep = ",", quiet = TRUE) : 
  could not allocate memory (0 Mb) in C function 'R_AllocStringBuffer' 
15.
scan(con, nlines = 1, sep = ",", quiet = TRUE) 
14.
rstan::read_stan_csv(out$output_files()) 
13.
.fit_model(model, ...) 
12.
.fun(model = .x1, sdata = .x2, algorithm = .x3, backend = .x4, 
    iter = .x5, warmup = .x6, thin = .x7, chains = .x8, cores = .x9, 
    threads = .x10, inits = .x11, exclude = .x12, control = .x13, 
    future = .x14, seed = .x15, silent = .x16) 
11.
eval(expr, envir, ...) 
10.
eval(expr, envir, ...) 
9.
eval2(call, envir = args, enclos = parent.frame()) 
8.
do_call(fit_model, fit_args) 
7.
brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 + 
    agent:patient + agent:REGION1 + agent:REGION2 + patient:REGION1 + 
    patient:REGION2 + agent:patient:REGION1 + agent:patient:REGION2 + 
    (0 + Intercept + agent + patient + agent:patient | subj) +  ... 
6.
.f(.x[[i]], .y[[i]], ...) 
5.
map2(R1data$prior_sd, R1data$prior_mean, function(psd, pm) {
    gc()
    gc()
    brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 +  ... 
4.
eval(lhs, parent, parent) 
3.
eval(lhs, parent, parent) 
2.
map2(R1data$prior_sd, R1data$prior_mean, function(psd, pm) {
    gc()
    gc()
    brm(voice ~ 0 + Intercept + agent + patient + REGION1 + REGION2 +  ... 
1.
list2env(map2(R1data$prior_sd, R1data$prior_mean, function(psd, 
    pm) {
    gc()
    gc() ... 

我想你可以使用类似下面的东西(这是一个更简单的版本,避免了数据的来源、变量名、模型规范等)


dir.create("models")

# This function generates a file path, runs the `brm` function and saves the object
# returned by `brm()` as a .rds file
my_fun = function(prior_sd, prior_mean) {
  file_name = paste0("models/model_sd_", prior_sd, "_mean_", prior_mean, ".rds")
  brms_object = brm(...) # pass formula, prior, data, options, etc.
  saveRDS(brms_object, file_name) # you can control compression too if you want
}

sds = c(0.01, 0.01, 0.01)
means = c(50, -50, 0)
purrr::walk2(sds, means, my_fun)

您必须确保您要使用的数据框存在于全局环境中

我稍微修改了 中的函数,以便 (1) 我们可以保留 brmsfit 全局环境中的对象使用 assign(..., envir = .GlobalEnv) 和 (2) 当我们使用负值作为先验均值时,我们可以避免使用 - 的对象名称,R 将其解释为减法运算符。

data <- data.frame(
  prior_sd = prior_sd <- c(0.01, 0.01, 0.01),
  prior_mean = prior_mean <- c(50, -50, 0),
  id = case_when(
    prior_mean == 0 ~ paste0("model_mean_", prior_mean, "_sd_", prior_sd),
    prior_mean < 0 ~ paste0("model_mean_m", abs(prior_mean), "_sd_", prior_sd),
    prior_mean > 0 ~ paste0("model_mean_p", prior_mean, "_sd_", prior_sd)
    )
)

computation = function(prior_mean, prior_sd) {
  file_name = case_when(
    prior_mean == 0 ~ paste0("path-to-your-directory/model_mean_", prior_mean, "_sd_", prior_sd, ".rds"),
    prior_mean < 0 ~ paste0("path-to-your-directory/model_mean_m", abs(prior_mean), "_sd_", prior_sd, ".rds"),
    prior_mean > 0 ~ paste0("path-to-your-directory/model_mean_p", prior_mean, "_sd_", prior_sd, ".rds")
  )
  # pass formula, prior, data, options, etc.
  brms_object = brm(
    Reaction ~ 0 + Intercept + Days + (0 + Intercept + Days|Subject), 
    data = sleepstudy,
    family = "normal",
    prior =
      c(
        prior(normal(0, 1), class = b, coef = Intercept),
        set_prior(
          paste0(
            "normal(",
            prior_mean,
            ", ",
            prior_sd,
            ")"
          ),
          class = "b",
          coef = "Days"
        ),
        prior(normal(0, 1), class = sd),
        prior(lkj(2),       class = cor)    
      ),
    save_pars = save_pars(all = TRUE),
    backend = "cmdstanr"
  )
  # save the computation results with object name
  assign(
    case_when(
      prior_mean == 0 ~ paste0("model_mean_",  prior_mean, "_sd_", prior_sd),
      prior_mean <  0 ~ paste0("model_mean_m", abs(prior_mean), "_sd_", prior_sd),
      prior_mean >  0 ~ paste0("model_mean_p", prior_mean, "_sd_", prior_sd)
    ), 
    brms_object,
    envir = .GlobalEnv)
  # you can control compression too if you want
  saveRDS(brms_object, file_name) 
}

purrr::walk2(data$prior_mean, data$prior_sd, computation)