是否可以编写一个函数来从 data.frame 对象创建光栅文件?
Is it possible to write a single function to create raster files from a data.frame object?
我已经在名为“prec”的 R 中加载了 data.frame 对象,其中包含 1009549 行和 8 个变量。我想在每 4 个时间步长(“tstep”变量,从索引 2 到 241)为每个 x-y 坐标对创建 60 个累积“prec”变量值的栅格层,如下面的代码所总结。我执行了一个功能来分 3 个步骤创建每个文件来实现它。但是,是否可以为每个步骤编写一个函数或为整个代码(步骤 1 至 4)编写一个函数?
load required packages
library(data.table)
library(raster)
structure of the "prec" data.frame
> headTail(prec)
x y prec index tstep variable level date
1 -47.8 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
1.1 -47.6 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
1.2 -47.4 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
1.3 -47.2 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
... ... ... ... ... ... <NA> ... <NA>
241.4185 -36.8 -7.2 0 241 241 prec 1000 2015-01-01 00:00:59
241.4186 -36.6 -7.2 0 241 241 prec 1000 2015-01-01 00:00:59
241.4187 -36.4 -7.2 0 241 241 prec 1000 2015-01-01 00:00:59
241.4188 -36.2 -7.2 0 241 241 prec 1000 2015-01-01 00:01:00
step 1: subset by tstep
prec_1 <- prec[prec$tstep %in% c(2, 3, 4, 5),]
prec_2 <- prec[prec$tstep %in% c(6, 7, 8, 9),]
prec_3 <- prec[prec$tstep %in% c(10, 11, 12, 13),]
...
prec_60 <- prec[prec$tstep %in% c( 238 , 239 , 240 , 241),]
step 2: coerce to data.table
prec_1_sum <- setDT(prec_1)[, list(prec_sum_1 = sum(prec*1000)), list(x, y)]
prec_2_sum <- setDT(prec_2)[, list(prec_sum_2 = sum(prec*1000)), list(x, y)]
prec_3_sum <- setDT(prec_3)[, list(prec_sum_3 = sum(prec*1000)), list(x, y)]
...
prec_60_sum <- setDT(prec_60)[, list(prec_sum_60 = sum(prec*1000)), list(x, y)]
step 3: create n raster layers
layer_1 <- rasterFromXYZ(prec_1_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
layer_2 <- rasterFromXYZ(prec_2_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
layer_3 <- rasterFromXYZ(prec_3_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
...
layer_60 <- rasterFromXYZ(prec_60_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
step 4: stack raster layers
stack_prec <- stack(layer_1, layer_2, layer_3, layer_4, layer_5, layer_6, layer_7, layer_8, layer_9, layer_10,
layer_11, layer_12, layer_13, layer_14, layer_15, layer_16, layer_17, layer_18, layer_19, layer_20,
layer_21, layer_22, layer_23, layer_24, layer_25, layer_26, layer_27, layer_28, layer_29, layer_30,
layer_31, layer_32, layer_33, layer_34, layer_35, layer_36, layer_37, layer_38, layer_39, layer_40,
layer_41, layer_42, layer_43, layer_44, layer_45, layer_46, layer_47, layer_48, layer_49, layer_50,
layer_51, layer_52, layer_53, layer_54, layer_55, layer_56, layer_57, layer_58, layer_59, layer_60)
当我们有可用的示例数据时,帮助总是容易得多。将来您可以使用 dput(prec) 并复制并粘贴该输出供人们使用。至少一些示例数据是有用的,特别是当您使用的函数对数据的外观有特定规范时。在这里,我们生成了一些要使用的数据。
library(raster)
#> Loading required package: sp
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following object is masked from 'package:raster':
#>
#> shift
set.seed(1)
dat <-
data.frame(
x = rep(seq(-47.8, -47.2, by = 0.2), 241),
y = -21.2,
prec = runif(964),
tstep = rep(1:241, each = 4),
date = c(rep(as.Date("2015-01-01"), 4), rep(seq(as.Date("2015-01-01"), by = "day", length.out = 60), each = 16))
)
对于您的过程,将数据分组比分解数据似乎更直接一些。这样,您只需对一个数据集执行操作,而不必重复执行多次。这样一来,第 1 步和第 2 步就可以减少到只有几行。没有考虑太多优化这一点,我循环了第一步中创建的组来创建栅格图层。
raster_layers <- function(dat){
## some flexibility if there is a differing number of tsteps
## it will by default exclude the first tstep as in your example
min_tstep <- min(dat$tstep)
max_tstep <- max(dat$tstep)
breaks <- seq(min_tstep, max_tstep, by = 4)
## Step 1
dat$group <- cut(dat$tstep, breaks)
dat <- dat[!is.na(dat$group), ]
## Step 2
prec <- setDT(dat)[ , list(prec_sum = sum(prec * 1000)), by = list(group, x, y)]
## Step 3
layer <- list()
group <- unique(prec$group)
j <- 1
for (i in group){
raster_dat <- prec[prec$group %in% i , c("x", "y", "prec_sum")]
## looks like your plot uses the names for changing labels??
colnames(raster_dat)[colnames(raster_dat) == "prec_sum"] <- paste0("prec_sum_", j)
layer[[j]] <-
rasterFromXYZ(raster_dat,
res = c(0.20, 0.20),
crs = sp::CRS("+init=epsg:4326"))
j <- j + 1
}
## Step 4
stack_prec <- stack(unlist(layer))
return(stack_prec)
}
例子
stack_prec <- raster_layers(dat = dat)
stack_prec
#> class : RasterStack
#> dimensions : 1, 4, 4, 60 (nrow, ncol, ncell, nlayers)
#> resolution : 0.2, 0.2 (x, y)
#> extent : -47.9, -47.1, -21.3, -21.1 (xmin, xmax, ymin, ymax)
#> crs : +init=epsg:4326
#> names : prec_sum_1, prec_sum_2, prec_sum_3, prec_sum_4, prec_sum_5, prec_sum_6, prec_sum_7, prec_sum_8, prec_sum_9, prec_sum_10, prec_sum_11, prec_sum_12, prec_sum_13, prec_sum_14, prec_sum_15, ...
#> min values : 2112.4990, 1124.8232, 2007.5945, 1315.0517, 1729.9294, 1582.8684, 1524.0147, 1098.1529, 2008.5390, 1248.1860, 1680.0199, 1855.4024, 815.4047, 1204.8576, 1416.3943, ...
#> max values : 2336.186, 2565.158, 2877.219, 2318.115, 3017.609, 2540.536, 2569.019, 2690.884, 2327.706, 2288.046, 3104.792, 2639.530, 2358.953, 2599.245, 2618.676, ...
我已经在名为“prec”的 R 中加载了 data.frame 对象,其中包含 1009549 行和 8 个变量。我想在每 4 个时间步长(“tstep”变量,从索引 2 到 241)为每个 x-y 坐标对创建 60 个累积“prec”变量值的栅格层,如下面的代码所总结。我执行了一个功能来分 3 个步骤创建每个文件来实现它。但是,是否可以为每个步骤编写一个函数或为整个代码(步骤 1 至 4)编写一个函数?
load required packages
library(data.table)
library(raster)
structure of the "prec" data.frame
> headTail(prec)
x y prec index tstep variable level date
1 -47.8 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
1.1 -47.6 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
1.2 -47.4 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
1.3 -47.2 -21.2 0 1 1 prec 1000 2015-01-01 00:00:00
... ... ... ... ... ... <NA> ... <NA>
241.4185 -36.8 -7.2 0 241 241 prec 1000 2015-01-01 00:00:59
241.4186 -36.6 -7.2 0 241 241 prec 1000 2015-01-01 00:00:59
241.4187 -36.4 -7.2 0 241 241 prec 1000 2015-01-01 00:00:59
241.4188 -36.2 -7.2 0 241 241 prec 1000 2015-01-01 00:01:00
step 1: subset by tstep
prec_1 <- prec[prec$tstep %in% c(2, 3, 4, 5),]
prec_2 <- prec[prec$tstep %in% c(6, 7, 8, 9),]
prec_3 <- prec[prec$tstep %in% c(10, 11, 12, 13),]
...
prec_60 <- prec[prec$tstep %in% c( 238 , 239 , 240 , 241),]
step 2: coerce to data.table
prec_1_sum <- setDT(prec_1)[, list(prec_sum_1 = sum(prec*1000)), list(x, y)]
prec_2_sum <- setDT(prec_2)[, list(prec_sum_2 = sum(prec*1000)), list(x, y)]
prec_3_sum <- setDT(prec_3)[, list(prec_sum_3 = sum(prec*1000)), list(x, y)]
...
prec_60_sum <- setDT(prec_60)[, list(prec_sum_60 = sum(prec*1000)), list(x, y)]
step 3: create n raster layers
layer_1 <- rasterFromXYZ(prec_1_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
layer_2 <- rasterFromXYZ(prec_2_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
layer_3 <- rasterFromXYZ(prec_3_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
...
layer_60 <- rasterFromXYZ(prec_60_sum [,1:3], res = c(0.20, 0.20), crs = sp::CRS("+init=epsg:4326"))
step 4: stack raster layers
stack_prec <- stack(layer_1, layer_2, layer_3, layer_4, layer_5, layer_6, layer_7, layer_8, layer_9, layer_10,
layer_11, layer_12, layer_13, layer_14, layer_15, layer_16, layer_17, layer_18, layer_19, layer_20,
layer_21, layer_22, layer_23, layer_24, layer_25, layer_26, layer_27, layer_28, layer_29, layer_30,
layer_31, layer_32, layer_33, layer_34, layer_35, layer_36, layer_37, layer_38, layer_39, layer_40,
layer_41, layer_42, layer_43, layer_44, layer_45, layer_46, layer_47, layer_48, layer_49, layer_50,
layer_51, layer_52, layer_53, layer_54, layer_55, layer_56, layer_57, layer_58, layer_59, layer_60)
当我们有可用的示例数据时,帮助总是容易得多。将来您可以使用 dput(prec) 并复制并粘贴该输出供人们使用。至少一些示例数据是有用的,特别是当您使用的函数对数据的外观有特定规范时。在这里,我们生成了一些要使用的数据。
library(raster)
#> Loading required package: sp
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following object is masked from 'package:raster':
#>
#> shift
set.seed(1)
dat <-
data.frame(
x = rep(seq(-47.8, -47.2, by = 0.2), 241),
y = -21.2,
prec = runif(964),
tstep = rep(1:241, each = 4),
date = c(rep(as.Date("2015-01-01"), 4), rep(seq(as.Date("2015-01-01"), by = "day", length.out = 60), each = 16))
)
对于您的过程,将数据分组比分解数据似乎更直接一些。这样,您只需对一个数据集执行操作,而不必重复执行多次。这样一来,第 1 步和第 2 步就可以减少到只有几行。没有考虑太多优化这一点,我循环了第一步中创建的组来创建栅格图层。
raster_layers <- function(dat){
## some flexibility if there is a differing number of tsteps
## it will by default exclude the first tstep as in your example
min_tstep <- min(dat$tstep)
max_tstep <- max(dat$tstep)
breaks <- seq(min_tstep, max_tstep, by = 4)
## Step 1
dat$group <- cut(dat$tstep, breaks)
dat <- dat[!is.na(dat$group), ]
## Step 2
prec <- setDT(dat)[ , list(prec_sum = sum(prec * 1000)), by = list(group, x, y)]
## Step 3
layer <- list()
group <- unique(prec$group)
j <- 1
for (i in group){
raster_dat <- prec[prec$group %in% i , c("x", "y", "prec_sum")]
## looks like your plot uses the names for changing labels??
colnames(raster_dat)[colnames(raster_dat) == "prec_sum"] <- paste0("prec_sum_", j)
layer[[j]] <-
rasterFromXYZ(raster_dat,
res = c(0.20, 0.20),
crs = sp::CRS("+init=epsg:4326"))
j <- j + 1
}
## Step 4
stack_prec <- stack(unlist(layer))
return(stack_prec)
}
例子
stack_prec <- raster_layers(dat = dat)
stack_prec
#> class : RasterStack
#> dimensions : 1, 4, 4, 60 (nrow, ncol, ncell, nlayers)
#> resolution : 0.2, 0.2 (x, y)
#> extent : -47.9, -47.1, -21.3, -21.1 (xmin, xmax, ymin, ymax)
#> crs : +init=epsg:4326
#> names : prec_sum_1, prec_sum_2, prec_sum_3, prec_sum_4, prec_sum_5, prec_sum_6, prec_sum_7, prec_sum_8, prec_sum_9, prec_sum_10, prec_sum_11, prec_sum_12, prec_sum_13, prec_sum_14, prec_sum_15, ...
#> min values : 2112.4990, 1124.8232, 2007.5945, 1315.0517, 1729.9294, 1582.8684, 1524.0147, 1098.1529, 2008.5390, 1248.1860, 1680.0199, 1855.4024, 815.4047, 1204.8576, 1416.3943, ...
#> max values : 2336.186, 2565.158, 2877.219, 2318.115, 3017.609, 2540.536, 2569.019, 2690.884, 2327.706, 2288.046, 3104.792, 2639.530, 2358.953, 2599.245, 2618.676, ...