使我的两个不同的 R 函数成为一个函数

Make my Two Different R Functions to be Just One Function

我想在 R 中的 MonteCarlo 包中使用 MonteCarlo 函数,其中有一项要求是将 just one single function 提供给 MonteCarlo 包。

To run a simulation study, the user has to nest both - the generation of a sample and the calculation of the desired statistics from this sample - in a single function. This function is passed to MonteCarlo(). No additional programming is required (Vignette: The MonteCarlo Package).

与这个重要条件相反,我有两个不同的函数适合我的算法。我已经使用 中正确答案提供的 MonteCarlo 函数作为方法。

因此我想使用不同的方法,我编写了以下函数(function1 和 function2)以将其传递给 MonteCarlo 函数,如下所示:

这是我想用 R 做的算法:

  1. 通过arima.sim()函数从ARIMA模型模拟10个时间序列数据集
  2. 将系列拆分为 重叠 个可能的子系列 2s3s4s5s6s7s8s9s
  3. 对于每个大小,对块进行重新采样并替换,对于新系列,并通过 auto.arima() 函数从每个块大小的子系列中获得最佳 ARIMA 模型。
  4. 获取每个块大小的每个子系列RMSE

下面的 R 函数可以完成这项工作。

library(MonteCarlo)
library(forecast)
library(Metrics)
############################################
function1 <- function(lb, ov, n) {

  starts <- unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb))))
  ends <- pmin(starts + lb - 1, n)

  # truncate starts and ends to the first num elements
  num <- match(n, ends)
  head(data.frame(starts, ends), num)
}
#############################################
# parameter grids
n <- 10 # length of time series
lb <- seq(n-2) + 1 # vector of block sizes
phi <- 0.6 # autoregressive parameter
reps <- 3 # monte carlo replications

# simulation function  
function2 <- function(n, lb, phi) {

  #### simulate ####
  ov <- ceiling(lb/2)
  vblocks <- Vectorize(function1, c("lb", "ov"), SIMPLIFY = FALSE)
  d <- vblocks(lb = lb, ov = ov, n = n)
  ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)

  #### devide ####
  blk <- lapply(d, function(x) with(x, Map(function(i, j) ts[i:j], starts, ends)))
  #### resample ####
  res <- sample(blk, replace = TRUE, 10)        # resamples the blocks
  res.unlist <- unlist(res, use.names = FALSE)   # unlist the bootstrap series
  #### train, forecast ####
  train <- head(res.unlist, round(length(res.unlist) - 10)) # train set
  test <- tail(res.unlist, length(res.unlist) - length(train)) # test set
  nfuture <- forecast(train, # forecast
                      model = auto.arima(train), 
                      lambda = 0, biasadj = TRUE, h = length(test))$mean    
  ### metric ####
  RMSE <- rmse(test, nfuture) # return RMSE
  return(
    list("RMSE" = RMSE)
  )
}

param_list = list("n" = n, "lb" = lb, "phi" = phi)

set.seed(123, kind = "L'Ecuyer-CMRG")
MC_result <- MonteCarlo(func = bootstrap4, 
                            nrep = reps,
                            ncpus = parallel::detectCores() - 1,
                            param_list = param_list,
                            export_also = list(
                              "packages" = c("forecast", "Metrics")
                            ),
                            raw = T)

我在 运行 上面遇到了这个错误:

in snowfall::sfExport("func2", "func", "libloc_strings", "function1", : Unknown/unfound variable ends in export. (local=TRUE)

我想将 function1 集成到 function2 中,这样 function1 就不会成为 function2 中的一个函数。

这是我的试用版

function2 <- function(n, lb, phi) {

  #### simulate ####
  ov <- ceiling(lb/2)
  function1 <- head(data.frame(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))), pmin(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))) + lb - 1, n)), match(n, pmin(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))) + lb - 1, n)))
  vblocks <- Vectorize(function1, c("lb", "ov"), SIMPLIFY = FALSE)
  d <- vblocks(lb = lb, ov = ov, n = n)
  ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)

  #### devide ####
    blk <- lapply(d, function(x) with(x, Map(function(i, j) ts[i:j], unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))), pmin(unique(sort(c(seq(1, n, lb), seq(lb-ov+1, n, lb)))) + lb - 1, n))))

  #### resample ####
  res <- sample(blk, replace = TRUE, 10)        # resamples the blocks
  res.unlist <- unlist(res, use.names = FALSE)   # unlist the bootstrap series
  #### train, forecast ####
  train <- head(res.unlist, round(length(res.unlist) - 10)) # train set
  test <- tail(res.unlist, length(res.unlist) - length(train)) # test set
  nfuture <- forecast(train, # forecast
                      model = auto.arima(train), 
                      lambda = 0, biasadj = TRUE, h = length(test))$mean    
  ### metric ####
  RMSE <- rmse(test, nfuture) # return RMSE
  return(
    list("RMSE" = RMSE)
  )
}

当我把它传递给这个时:

set.seed(123, kind = "L'Ecuyer-CMRG")
MC_result <- MonteCarlo(func = function2, 
                            nrep = reps,
                            ncpus = parallel::detectCores() - 1,
                            param_list = param_list,
                            export_also = list(
                              "packages" = c("forecast", "Metrics")
                            ),
                            raw = T)

我收到此错误消息:

3 nodes produced errors; first error: could not find function "vblocks"

我在试验中所做的只是将整个 function1 作为单个语句放入 function2

您可以将 function1 的内容放入 function2 的正文中 - 包括变量赋值等

library(MonteCarlo)
library(forecast)
library(ModelMetrics)

mc_f <- function(n, lb, phi) {
  # Generate data
  ov <- ceiling(lb / 2)
  starts <- unique(sort(c(seq(1, n, lb), seq(lb - ov + 1, n, lb))))
  ends <- pmin(starts + lb - 1, n)
  num <- match(n, ends)
  d <- head(data.frame(starts, ends), num)
  
  ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)
  
  blk <- mapply(
    function(start, end) ts[start:end],
    d$starts, 
    d$ends, 
    SIMPLIFY = FALSE
  )
  
  # Resample
  res <- sample(blk, replace = TRUE, 10)
  res.unlist <- unlist(res, use.names = FALSE)
  
  # Train and forecast
  train <- head(res.unlist, round(length(res.unlist) - 10))
  test <- tail(res.unlist, length(res.unlist) - length(train))
  nfuture <- forecast(train,
                      model = auto.arima(train),
                      lambda = 0, biasadj = TRUE, h = length(test))$mean
  
  # Extract metric
  RMSE <- rmse(test, nfuture)
  list("RMSE" = RMSE)
}
reps <- 3
param_list <- list(n = 10, lb = seq(n - 2) + 1, phi = 0.6)

mc_result <- MonteCarlo(
  func = mc_f,
  nrep = reps,
  ncpus = parallel::detectCores() - 1,
  param_list = param_list
)
#> Grid of  8  parameter constellations to be evaluated. 
#>  
#> Simulation parallelized using 3 cpus. 
#>  
#> Progress: 
#>  
#>   |==================================================================================| 100%