是否可以编写一个函数来从 data.frame 对象创建光栅文件?

Is it possible to write a single function to create raster files from a data.frame object?

我已经在名为“prec”的 R 中加载了 data.frame 对象,其中包含 1009549 行和 8 个变量。我想在每 4 个时间步长(“tstep”变量,从索引 2 到 241)为每个 x-y 坐标对创建 60 个累积“prec”变量值的栅格层,如下面的代码所总结。我执行了一个功能来分 3 个步骤创建每个文件来实现它。但是,是否可以为每个步骤编写一个函数或为整个代码(步骤 1 至 4)编写一个函数?

load required packages

library(data.table)
library(raster)

structure of the "prec" data.frame

> headTail(prec)

             x     y prec index tstep variable level                date
1        -47.8 -21.2    0     1     1     prec  1000 2015-01-01 00:00:00
1.1      -47.6 -21.2    0     1     1     prec  1000 2015-01-01 00:00:00
1.2      -47.4 -21.2    0     1     1     prec  1000 2015-01-01 00:00:00
1.3      -47.2 -21.2    0     1     1     prec  1000 2015-01-01 00:00:00
...        ...   ...  ...   ...   ...     <NA>   ...                <NA>
241.4185 -36.8  -7.2    0   241   241     prec  1000 2015-01-01 00:00:59
241.4186 -36.6  -7.2    0   241   241     prec  1000 2015-01-01 00:00:59
241.4187 -36.4  -7.2    0   241   241     prec  1000 2015-01-01 00:00:59
241.4188 -36.2  -7.2    0   241   241     prec  1000 2015-01-01 00:01:00

step 1: subset by tstep

prec_1 <- prec[prec$tstep %in% c(2, 3, 4, 5),]
prec_2 <- prec[prec$tstep %in% c(6, 7, 8, 9),]
prec_3 <- prec[prec$tstep %in% c(10, 11, 12, 13),]
...
prec_60 <- prec[prec$tstep %in% c( 238 , 239 , 240 , 241),]

step 2: coerce to data.table

prec_1_sum <- setDT(prec_1)[, list(prec_sum_1 = sum(prec*1000)), list(x, y)]
prec_2_sum <- setDT(prec_2)[, list(prec_sum_2 = sum(prec*1000)), list(x, y)]
prec_3_sum <- setDT(prec_3)[, list(prec_sum_3 = sum(prec*1000)), list(x, y)]
...
prec_60_sum <- setDT(prec_60)[, list(prec_sum_60 = sum(prec*1000)), list(x, y)]

step 3: create n raster layers

layer_1 <- rasterFromXYZ(prec_1_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
layer_2 <- rasterFromXYZ(prec_2_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
layer_3 <- rasterFromXYZ(prec_3_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
...
layer_60 <- rasterFromXYZ(prec_60_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))

step 4: stack raster layers

stack_prec <- stack(layer_1, layer_2, layer_3, layer_4, layer_5, layer_6, layer_7, layer_8, layer_9, layer_10,
                    layer_11, layer_12, layer_13, layer_14, layer_15, layer_16, layer_17, layer_18, layer_19, layer_20,
                    layer_21, layer_22, layer_23, layer_24, layer_25, layer_26, layer_27, layer_28, layer_29, layer_30,
                    layer_31, layer_32, layer_33, layer_34, layer_35, layer_36, layer_37, layer_38, layer_39, layer_40,
                    layer_41, layer_42, layer_43, layer_44, layer_45, layer_46, layer_47, layer_48, layer_49, layer_50,
                    layer_51, layer_52, layer_53, layer_54, layer_55, layer_56, layer_57, layer_58, layer_59, layer_60)

当我们有可用的示例数据时,帮助总是容易得多。将来您可以使用 dput(prec) 并复制并粘贴该输出供人们使用。至少一些示例数据是有用的,特别是当您使用的函数对数据的外观有特定规范时。在这里,我们生成了一些要使用的数据。

library(raster)
#> Loading required package: sp
library(data.table)
#> 
#> Attaching package: 'data.table'
#> The following object is masked from 'package:raster':
#> 
#>     shift

set.seed(1)
dat <- 
  data.frame(
    x = rep(seq(-47.8, -47.2, by = 0.2), 241),
    y = -21.2,
    prec = runif(964),
    tstep = rep(1:241, each = 4),
    date = c(rep(as.Date("2015-01-01"), 4), rep(seq(as.Date("2015-01-01"), by = "day", length.out = 60), each = 16))
  )

对于您的过程,将数据分组比分解数据似乎更直接一些。这样,您只需对一个数据集执行操作,而不必重复执行多次。这样一来,第 1 步和第 2 步就可以减少到只有几行。没有考虑太多优化这一点,我循环了第一步中创建的组来创建栅格图层。

raster_layers <- function(dat){
  
  ## some flexibility if there is a differing number of tsteps
  ## it will by default exclude the first tstep as in your example
  min_tstep <- min(dat$tstep)
  max_tstep <- max(dat$tstep)
  breaks <- seq(min_tstep, max_tstep, by = 4)
  
  ## Step 1
  dat$group <- cut(dat$tstep, breaks)
  dat <- dat[!is.na(dat$group), ]
  ## Step 2
  prec <- setDT(dat)[ , list(prec_sum = sum(prec * 1000)), by = list(group, x, y)]
  ## Step 3
  layer <- list()
  group <- unique(prec$group)
  j <- 1
  for (i in group){
    
    raster_dat <- prec[prec$group %in% i , c("x", "y", "prec_sum")]
    ## looks like your plot uses the names for changing labels??
    colnames(raster_dat)[colnames(raster_dat) == "prec_sum"] <- paste0("prec_sum_", j)
    layer[[j]] <- 
      rasterFromXYZ(raster_dat, 
                    res = c(0.20, 0.20), 
                    crs = sp::CRS("+init=epsg:4326"))
    j <- j + 1
  }
  ## Step 4
  stack_prec <- stack(unlist(layer))
  
  return(stack_prec)
}

例子

stack_prec <- raster_layers(dat = dat)

stack_prec
#> class      : RasterStack 
#> dimensions : 1, 4, 4, 60  (nrow, ncol, ncell, nlayers)
#> resolution : 0.2, 0.2  (x, y)
#> extent     : -47.9, -47.1, -21.3, -21.1  (xmin, xmax, ymin, ymax)
#> crs        : +init=epsg:4326 
#> names      : prec_sum_1, prec_sum_2, prec_sum_3, prec_sum_4, prec_sum_5, prec_sum_6, prec_sum_7, prec_sum_8, prec_sum_9, prec_sum_10, prec_sum_11, prec_sum_12, prec_sum_13, prec_sum_14, prec_sum_15, ... 
#> min values :  2112.4990,  1124.8232,  2007.5945,  1315.0517,  1729.9294,  1582.8684,  1524.0147,  1098.1529,  2008.5390,   1248.1860,   1680.0199,   1855.4024,    815.4047,   1204.8576,   1416.3943, ... 
#> max values :   2336.186,   2565.158,   2877.219,   2318.115,   3017.609,   2540.536,   2569.019,   2690.884,   2327.706,    2288.046,    3104.792,    2639.530,    2358.953,    2599.245,    2618.676, ...