与 R 中的 NA 进行交叉核二维卷积

cross kernel 2D convolution with NAs in R

我有一个包含 NA 的矩阵。我的目标是使用 4 个相邻值(交叉模式)的平均值来填充它们。这是“交叉”模式内核:

kernel
     [,1] [,2] [,3]
[1,] 0.00 0.25 0.00
[2,] 0.25 0.00 0.25
[3,] 0.00 0.25 0.00

这是一个带有 for 循环的代码。它有效,但我不喜欢它,它不可读(即使对我来说,也很难理解正在发生的事情)。此外,我没有进行基准测试,但我认为可能有最快的方法。

mx <- matrix(rep(NA, 5^2), nrow = 5)
mx[3,3] <- 10
index <- which(is.na(mx), arr.ind = T)

for (i in 1:nrow(index)) {
  # if NA is at first row, use that one, otherwise we will subtract one
  bottom <- ifelse(index[i, 1] <= 1, index[i, 1], index[i, 1] - 1)
  # if NA is at the last row, use that one, otherwise we will add 1
  top <- ifelse(index[i, 1] >= nrow(mx), index[i, 1], index[i, 1] + 1)
  
  # if NA is at first column use that one, otherwise subtract one
  left <- ifelse(index[i, 2] <= 1, index[i, 2], index[i, 2] - 1)
  # if NA is at the end, use that one, otherwise add one
  right <- ifelse(index[i, 2] >= ncol(mx), index[i, 2], index[i, 2] + 1)
  # inplace substitution
  mx[index[i,1],index[i,2]] <- mean(c(
    mx[bottom, index[i, 2]],
    mx[top, index[i, 2]],
    mx[index[i, 1], right],
    mx[index[i, 1], left] ),
    na.rm=TRUE)

}

预期结果是

> mx
     [,1] [,2] [,3] [,4] [,5]
[1,]  NaN  NaN  NaN  NaN  NaN
[2,]  NaN  NaN   10   10   10
[3,]  NaN   10   10   10   10
[4,]  NaN   10   10   10   10
[5,]  NaN   10   10   10   10

我尝试使用一些图片包,但找不到我要找的确切内容。 OpenImageR::convolutionNAs 失败,imager::inpaint() 完全按照我的要求进行,但不允许我提供自定义内核。

# all of this reasoning is to convolve this filter
kernel <- matrix(c(0,1,0, 1, 0, 1, 0, 1, 0), nrow=3)
kernel <- kernel/4 # we need 1/4 to get the proper weights
# and use it to calculate the mean of the values
# then use the indices to fill the NAs on the original matrix
# not exactly working as I would like because of NAs
#OpenImageR::convolution(mx, kernel = kernel, mode = "same")
#imager::inpaint() does something similar

由于 ifelse 是向量化的,我们可以将其移出 for 循环以获得一些改进:

v3 <- function(x) {
  x2 <- x
  index <- which(is.na(x), arr.ind = T)
  i <- index[, 1]
  j <- index[, 2]
  bottom <- ifelse(i <= 1, i, i - 1)
  top <- ifelse(i >= nrow(x), i, i + 1)
  left <- ifelse(j <= 1, j, j - 1)
  right <- ifelse(j >= ncol(x), j, j + 1)
  
  for (z in 1:nrow(index)) {
    ii <- i[z]
    jj <- j[z]
    v <- c(x2[bottom[z], jj], x2[top[z], jj], x2[ii, right[z]], x2[ii, left[z]])
    x2[ii, jj] <- mean(v, na.rm = TRUE)
  }
  x2
}

这一切都可以在没有循环的情况下完成,但这会给出不同的结果,因为在您的方法中,新值也用于进一步的计算:

v4 <- function(x) {
  x2 <- x
  index <- which(is.na(x), arr.ind = T)
  i <- index[, 1]
  j <- index[, 2]
  bottom <- ifelse(i <= 1, i, i - 1)
  top <- ifelse(i >= nrow(x), i, i + 1)
  left <- ifelse(j <= 1, j, j - 1)
  right <- ifelse(j >= ncol(x), j, j + 1)
  
  a <- c(x[cbind(bottom, j)],
         x[cbind(top, j)],
         x[cbind(i, right)],
         x[cbind(i, left)])
  a <- matrix(a, ncol = 4)
  r <- rowMeans(a, na.rm = T)
  x2[index] <- r
  x2
}
v4(mx)
# [,1] [,2] [,3] [,4] [,5]
# [1,]  NaN  NaN  NaN  NaN  NaN
# [2,]  NaN  NaN   10  NaN  NaN
# [3,]  NaN   10   10   10  NaN
# [4,]  NaN  NaN   10  NaN  NaN
# [5,]  NaN  NaN  NaN  NaN  NaN

...不匹配。基准:

bench::mark(original(mx2), v3(mx2), v4(mx2), check = F)[, 1:5]
# A tibble: 3 x 5
# expression         min   median `itr/sec` mem_alloc
# <bch:expr>    <bch:tm> <bch:tm>     <dbl> <bch:byt>
# 1 original(mx2)  742.1us  766.1us     1299.      488B
# 2 v3(mx2)        114.2us  119.3us     8322.    4.07KB
# 3 v4(mx2)         46.1us   48.6us    20275.    9.46KB

p.s。 v3 结果与原始结果匹配。