如何在 R 中使用 Monte Carlo 作为 ARIMA 模拟函数
How to use Monte Carlo for ARIMA Simulation Function in R
这是我想用 R 做的算法:
- 通过
arima.sim()
函数从ARIMA
模型模拟10个时间序列数据集
- 将系列分成可能的子系列
2s
、3s
、4s
、5s
、6s
、7s
、 8s
,和 9s
。
- 对于每个大小,通过
auto.arima()
函数对每个块大小的子系列进行重新采样并替换新系列,并从子系列中获得最佳 ARIMA
模型。
- 获取每个块大小
RMSE
的每个子系列。
下面的 R
函数可以完成这项工作。
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = detectCores())
## simulate ARIMA(1,0, 0)
#n=10; phi <- 0.6; order <- c(1, 0, 0)
bootstrap1 <- function(n, phi){
ts <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
########################################################
## create a vector of block sizes
t <- length(ts) # the length of the time series
lb <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
BOOTSTRAP <- matrix(nrow = 1, ncol = length(lb))
colnames(BOOTSTRAP) <-lb
########################################################
## This section use foreach function to do detail in the brace
BOOTSTRAP <- foreach(b = 1:length(lb), .combine = 'cbind') %do%{
l <- lb[b]# block size at each instance
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) - 10)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast::forecast(train, model = forecast::auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- Metrics::rmse(test, nfuture) # RETURN RMSE
BOOTSTRAP[b] <- RMSE
}
BOOTSTRAPS <- matrix(BOOTSTRAP, nrow = 1, ncol = length(lb))
colnames(BOOTSTRAPS) <- lb
BOOTSTRAPS
return(list(BOOTSTRAPS))
}
调用函数
bootstrap1(10, 0.6)
我得到以下结果:
## 2 3 4 5 6 7 8 9
## [1,] 0.8920703 0.703974 0.6990448 0.714255 1.308236 0.809914 0.5315476 0.8175382
我想按时间顺序重复上面的step 1
到step 4
,然后想到了R
中的Monte Carlo
技术。因此,我加载了它的包和 运行 下面的函数:
param_list=list("n"=10, "phi"=0.6)
library(MonteCarlo)
MC_result<-MonteCarlo(func = bootstrap1, nrep=3, param_list = param_list)
期待以 matrix
形式获得以下结果:
## [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.8920703 0.703974 0.6990448 0.714255 1.308236 0.809914 0.5315476 0.8175382
## [2,] 0.8909836 0.8457537 1.095148 0.8918468 0.8913282 0.7894167 0.8911484 0.8694729
## [3,] 1.586785 1.224003 1.375026 1.292847 1.437359 1.418744 1.550254 1.30784
但我收到以下错误消息:
Error in MonteCarlo(func = bootstrap1, nrep = 3, param_list = param_list) :
func has to return a list with named components. Each component has to be scalar.
我怎样才能找到获得上述所需结果并使结果可重现的方法?
您收到此错误消息是因为 MonteCarlo 希望 bootstrap1()
接受 一个 参数组合进行模拟,并且它仅 returns 每个复制一个 值 (RMSE
)。这里不是这种情况,因为块长度 (lb
) 由模拟时间序列 (n
) within bootstrap1
的长度决定因此您将获得每次调用的 n - 2
块长度的结果。
一个解决方案是将块长度作为参数传递并适当地重写bootstrap1()
:
library(MonteCarlo)
library(forecast)
library(Metrics)
# parameter grids
n <- 10 # length of time series
lb <- seq(n-2) + 1 # vector of block sizes
phi <- 0.6 # autoregressive parameter
reps <- 3 # monte carlo replications
# simulation function
bootstrap1 <- function(n, lb, phi) {
#### simulate ####
ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)
#### devide ####
m <- ceiling(n / lb) # number of blocks
blk <- split(ts, rep(1:m, each = lb, length.out = n)) # divide into blocks
#### resample ####
res <- sample(blk, replace = TRUE, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
#### train, forecast ####
train <- head(res.unlist, round(length(res.unlist) - 10)) # train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # test set
nfuture <- forecast(train, # forecast
model = auto.arima(train),
lambda = 0, biasadj = TRUE, h = length(test))$mean
### metric ####
RMSE <- rmse(test, nfuture) # return RMSE
return(
list("RMSE" = RMSE)
)
}
param_list = list("n" = n, "lb" = lb, "phi" = phi)
到运行模拟,传递参数以及bootstrap1()
到MonteCarlo()
。对于并行执行的模拟,您需要通过 ncpus
设置核心数。 MonteCarlo 包使用 snowFall,所以它应该 运行 on Windows.
请注意,我还设置了 raw = T
(否则结果将是所有复制的平均值)。之前设置种子将使结果可重现。
set.seed(123)
MC_result <- MonteCarlo(func = bootstrap1,
nrep = reps,
ncpus = parallel::detectCores() - 1,
param_list = param_list,
export_also = list(
"packages" = c("forecast", "Metrics")
),
raw = T)
结果是一个数组。我认为最好通过 MakeFrame()
:
将其转换为 data.frame
Frame <- MakeFrame(MC_result)
虽然很容易得到一个 reps x lb
矩阵:
matrix(Frame$RMSE, ncol = length(lb), dimnames = list(1:reps, lb))
这是我想用 R 做的算法:
- 通过
arima.sim()
函数从ARIMA
模型模拟10个时间序列数据集 - 将系列分成可能的子系列
2s
、3s
、4s
、5s
、6s
、7s
、8s
,和9s
。 - 对于每个大小,通过
auto.arima()
函数对每个块大小的子系列进行重新采样并替换新系列,并从子系列中获得最佳ARIMA
模型。 - 获取每个块大小
RMSE
的每个子系列。
下面的 R
函数可以完成这项工作。
## Load packages and prepare multicore process
library(forecast)
library(future.apply)
plan(multisession)
library(parallel)
library(foreach)
library(doParallel)
n_cores <- detectCores()
cl <- makeCluster(n_cores)
registerDoParallel(cores = detectCores())
## simulate ARIMA(1,0, 0)
#n=10; phi <- 0.6; order <- c(1, 0, 0)
bootstrap1 <- function(n, phi){
ts <- arima.sim(n, model = list(ar=phi, order = c(1, 0, 0)), sd = 1)
########################################################
## create a vector of block sizes
t <- length(ts) # the length of the time series
lb <- seq(n-2)+1 # vector of block sizes to be 1 < l < n (i.e to be between 1 and n exclusively)
########################################################
## This section create matrix to store block means
BOOTSTRAP <- matrix(nrow = 1, ncol = length(lb))
colnames(BOOTSTRAP) <-lb
########################################################
## This section use foreach function to do detail in the brace
BOOTSTRAP <- foreach(b = 1:length(lb), .combine = 'cbind') %do%{
l <- lb[b]# block size at each instance
m <- ceiling(t / l) # number of blocks
blk <- split(ts, rep(1:m, each=l, length.out = t)) # divides the series into blocks
######################################################
res<-sample(blk, replace=T, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
train <- head(res.unlist, round(length(res.unlist) - 10)) # Train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # Test set
nfuture <- forecast::forecast(train, model = forecast::auto.arima(train), lambda=0, biasadj=TRUE, h = length(test))$mean # makes the `forecast of test set
RMSE <- Metrics::rmse(test, nfuture) # RETURN RMSE
BOOTSTRAP[b] <- RMSE
}
BOOTSTRAPS <- matrix(BOOTSTRAP, nrow = 1, ncol = length(lb))
colnames(BOOTSTRAPS) <- lb
BOOTSTRAPS
return(list(BOOTSTRAPS))
}
调用函数
bootstrap1(10, 0.6)
我得到以下结果:
## 2 3 4 5 6 7 8 9
## [1,] 0.8920703 0.703974 0.6990448 0.714255 1.308236 0.809914 0.5315476 0.8175382
我想按时间顺序重复上面的step 1
到step 4
,然后想到了R
中的Monte Carlo
技术。因此,我加载了它的包和 运行 下面的函数:
param_list=list("n"=10, "phi"=0.6)
library(MonteCarlo)
MC_result<-MonteCarlo(func = bootstrap1, nrep=3, param_list = param_list)
期待以 matrix
形式获得以下结果:
## [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.8920703 0.703974 0.6990448 0.714255 1.308236 0.809914 0.5315476 0.8175382
## [2,] 0.8909836 0.8457537 1.095148 0.8918468 0.8913282 0.7894167 0.8911484 0.8694729
## [3,] 1.586785 1.224003 1.375026 1.292847 1.437359 1.418744 1.550254 1.30784
但我收到以下错误消息:
Error in MonteCarlo(func = bootstrap1, nrep = 3, param_list = param_list) : func has to return a list with named components. Each component has to be scalar.
我怎样才能找到获得上述所需结果并使结果可重现的方法?
您收到此错误消息是因为 MonteCarlo 希望 bootstrap1()
接受 一个 参数组合进行模拟,并且它仅 returns 每个复制一个 值 (RMSE
)。这里不是这种情况,因为块长度 (lb
) 由模拟时间序列 (n
) within bootstrap1
的长度决定因此您将获得每次调用的 n - 2
块长度的结果。
一个解决方案是将块长度作为参数传递并适当地重写bootstrap1()
:
library(MonteCarlo)
library(forecast)
library(Metrics)
# parameter grids
n <- 10 # length of time series
lb <- seq(n-2) + 1 # vector of block sizes
phi <- 0.6 # autoregressive parameter
reps <- 3 # monte carlo replications
# simulation function
bootstrap1 <- function(n, lb, phi) {
#### simulate ####
ts <- arima.sim(n, model = list(ar = phi, order = c(1, 0, 0)), sd = 1)
#### devide ####
m <- ceiling(n / lb) # number of blocks
blk <- split(ts, rep(1:m, each = lb, length.out = n)) # divide into blocks
#### resample ####
res <- sample(blk, replace = TRUE, 10) # resamples the blocks
res.unlist <- unlist(res, use.names = FALSE) # unlist the bootstrap series
#### train, forecast ####
train <- head(res.unlist, round(length(res.unlist) - 10)) # train set
test <- tail(res.unlist, length(res.unlist) - length(train)) # test set
nfuture <- forecast(train, # forecast
model = auto.arima(train),
lambda = 0, biasadj = TRUE, h = length(test))$mean
### metric ####
RMSE <- rmse(test, nfuture) # return RMSE
return(
list("RMSE" = RMSE)
)
}
param_list = list("n" = n, "lb" = lb, "phi" = phi)
到运行模拟,传递参数以及bootstrap1()
到MonteCarlo()
。对于并行执行的模拟,您需要通过 ncpus
设置核心数。 MonteCarlo 包使用 snowFall,所以它应该 运行 on Windows.
请注意,我还设置了 raw = T
(否则结果将是所有复制的平均值)。之前设置种子将使结果可重现。
set.seed(123)
MC_result <- MonteCarlo(func = bootstrap1,
nrep = reps,
ncpus = parallel::detectCores() - 1,
param_list = param_list,
export_also = list(
"packages" = c("forecast", "Metrics")
),
raw = T)
结果是一个数组。我认为最好通过 MakeFrame()
:
Frame <- MakeFrame(MC_result)
虽然很容易得到一个 reps x lb
矩阵:
matrix(Frame$RMSE, ncol = length(lb), dimnames = list(1:reps, lb))