单位正方形上指标函数的 R 数值积分

Numerical integration in R for indicator functions on the unit square

我正在尝试编写一个简单的例程来获取单位正方形(二维)中某些区域的度量 我知道我可以使用标准数字之一。集成函数,但我想尝试优化我的代码,因为我感兴趣的 class 个函数非常少。

这是我的代码:

#GRID DEFINITION    
listgrid <- vector(mode="list", length=10201)
listgrid2 <- vector(mode="list", length=101)
for(i in 0:100)
{   
    listgrid2[[i+1]] <- c(NaN,i/100)        
}
listgrid = rep(listgrid2,101)
for(j in 0:100)
    {
        for(k in 0:100)
        {
            listgrid[[j*101 + k + 1]][1] <- j/100
        }
    }


#Indicator Integration Routine
indint <- function(FUN){

    valuegrid <- sapply(listgrid, FUN)
    result <- ifelse(sum(valuegrid)== 0,0, (sum(valuegrid)+50.5)/10201)
    return(result)

}

它只是定义了一个网格,然后它尝试所有的点并检查它是零还是 1,然后将网格的元素相加。 50.5 是偏差校正(我知道非常粗糙)

问题是这样既不快也不准确,我猜可能是因为sapply....

我想知道面积的区域并不复杂,它们的边界很平滑。

你会如何解决这个问题?

谢谢。

根据我所做的小测试,您最好使用 cubature 包中的 adaptIntegrate 函数,因为它是用 C 编写的。

否则,如果您可以将函数从返回向量值更改为 xy 的单独输入,则最好使用 outer.

这是一些测试代码,包括您的原始代码:

#GRID DEFINITION    
listgrid <- vector(mode="list", length=10201)
listgrid2 <- vector(mode="list", length=101)
for(i in 0:100)
{   
  listgrid2[[i+1]] <- c(NaN,i/100)        
}
listgrid = rep(listgrid2,101)
for(j in 0:100)
{
  for(k in 0:100)
  {
    listgrid[[j*101 + k + 1]][1] <- j/100
  }
}

#Indicator Integration Routine
indint <- function(FUN){

  valuegrid <- sapply(listgrid, FUN)
  result <- ifelse(sum(valuegrid)== 0,0, (sum(valuegrid)+50.5)/10201)
  return(result)

}

#Matrix Grid
Row <- rep(seq(0, 1, .01), 101)
Col <- rep(seq(0, 1, .01), each = 101)
Grid <- cbind(Row, Col)

#Integrator using apply
QuickInt <- function(FUN) {
  #FUN must be vector-valued and two-dimensional
  return(sum(apply(Grid, 1, FUN)) / dim(Grid)[1])
}

#Integrator using mapply
QuickInt2 <- function(FUN) {
  #FUN must take separate X and Y inpute and two-dimensional
  return(sum(mapply(FUN, x = Row, y = Col)) / length (Row))
}

#Index for each axis for outer
Index <- seq(0, 1, .01)

#Integrator using outer
QuickInt3 <- function(FUN) {
  #FUN must take separate X and Y inpute and two-dimensional
  return(sum(outer(Index, Index, FUN) / (length(Index) * length(Index))))
}

下面是一些测试函数:

#Two test functions, both in vector input (_V) and independant input (_I) forms
test_f1_I <- function(x, y) x^2 + y^2
test_f1_V <- function(X) X[1]^2 + X[2]^2
test_f2_I <- function(x, y) sin(x) * cos(y)
test_f2_V <- function(X) sin(X[1]) * cos(X[2])

测试精度:

#Accuracy Test
library(cubature)
TrueValues <- c(adaptIntegrate(test_f1_V, c(0, 0), c(1, 1))$integral, adaptIntegrate(test_f2_V, c(0, 0), c(1, 1))$integral)
InditV <- c(indint(test_f1_V), indint(test_f2_V))
Q1V <- c(QuickInt(test_f1_V), QuickInt(test_f2_V))
Q2V <- c(QuickInt2(test_f1_I), QuickInt2(test_f2_I))
Q3V <- c(QuickInt3(test_f1_I), QuickInt3(test_f2_I))
Rs <- rbind(TrueValues, InditV, Q1V, Q2V, Q3V)
Rs
> Rs
                [,1]      [,2]
TrueValues 0.6666667 0.3868223
InditV     0.6749505 0.3911174
Q1V        0.6700000 0.3861669
Q2V        0.6700000 0.3861669
Q3V        0.6700000 0.3861669

速度测试(R-3.1.2,Intel i7-2600K 超频至 4.6Ghz,16GB 内存,Windows 7 64 位,使用 OpenBLAS 2.13 编译的 R)。

#Speed Test
library(microbenchmark)
Spd1 <- microbenchmark(adaptIntegrate(test_f1_V, c(0,0), c(1,1)), 
                      indint(test_f1_V),
                      QuickInt(test_f1_V), 
                      QuickInt2(test_f1_I),
                      QuickInt3(test_f1_I), 
                      times = 100L, 
                      unit = "relative",
                      control = list(order = 'block'))
Spd2 <- microbenchmark(adaptIntegrate(test_f2_V, c(0,0), c(1,1)), 
                       indint(test_f2_V),
                       QuickInt(test_f2_V), 
                       QuickInt2(test_f2_I), 
                       QuickInt3(test_f2_I), 
                       times = 100L, 
                       unit = "relative",
                       control = list(order = 'block'))

> Spd1
Unit: relative
                                        expr         min          lq        mean      median          uq        max neval
 adaptIntegrate(test_f1_V, c(0, 0), c(1, 1))    1.000000    1.000000    1.000000    1.000000    1.000000    1.00000   100
                           indint(test_f1_V)  491.861071  543.166516  544.559059  568.961534  500.777301  934.62521   100
                         QuickInt(test_f1_V) 1777.972641 2102.021092 2188.762796 2279.148477 2141.449811 1781.36640   100
                        QuickInt2(test_f1_I)  681.548730  818.746406  887.345323  875.833969  923.155066  972.86832   100
                        QuickInt3(test_f1_I)    4.689201    4.525525    4.795768    4.401223    3.706085   31.28801   100
> Spd2
Unit: relative
                                        expr       min        lq      mean    median        uq        max neval
 adaptIntegrate(test_f2_V, c(0, 0), c(1, 1))   1.00000   1.00000   1.00000   1.00000   1.00000    1.00000   100
                           indint(test_f2_V) 192.49010 216.29215 232.99091 226.67006 239.32413  464.76166   100
                         QuickInt(test_f2_V) 628.38419 770.34591 872.15128 870.58423 937.67521 1018.59902   100
                        QuickInt2(test_f2_I) 267.68011 306.74721 345.69155 332.67551 352.19696  726.86772   100
                        QuickInt3(test_f2_I)  12.32649  12.05382  12.15228  11.89353  11.75468   26.54075   100

这是一种使用简单 Monte Carlo 技术进行估计的矢量化方法。您可以使用 nreplicate.n 来获得您想要的 accuracy/speed 权衡。

MC_estimator <- function(expr, n = 1E5, replicate.n = 50){
  f <- function(){
    # generate points randomly over the unit square
    x <- runif(n, min = -1)
    y <- runif(n, min = -1)

    # select points only in unit circle
    sel <- sqrt(x^2 + y^2) <= 1
    x <- x[sel]
    y <- y[sel]
    selN <- length(x)

    # select points in the desired area, find the proportion and multiply by total area of the sampled region for an estimate
    eval(parse(text = paste("pi*sum(", expr, ")/selN")))
  }
  # replicate the procedure to bootstrap the estimate
  mean(replicate(replicate.n, f()))
}

expr <- "y <= sin(x) & y >= x^2 & y <= 0.5 - x"


> system.time(MC_estimator(expr))
   user  system elapsed 
   1.20    0.01    1.23 
> true <- 0.0370152
> estimated <- MC_estimator(expr)
> cat(round(100*(estimated - true)/true, 2), "% difference")
0.34 % difference