计算 R data.table 中前 3 行的总和(通过网格方块)
calculating sum of previous 3 rows in R data.table (by grid-square)
我想为每个方格计算过去三天的降雨量,并将其作为新列添加到我的 data.table 中。明确地说,我想总结当前和前两 (2) 天的降雨量,对于每个气象网格方块
library ( zoo )
library (data.table)
# making the data.table
rain <- c(NA, NA, NA, 0, 0, 5, 1, 0, 3, 10) # rainfall values to work with
square <- c(1,1,1,1,1,1,1,1,1,2) # the geographic grid square for the rainfall measurement
desired_result <- c(NA, NA, NA, NA, NA, 5, 6, 6, 4, NA ) # this is the result I'm looking for (the last NA as we are now on to the first day of the second grid square)
weather <- data.table(rain, square, desired_result) # making the data.table
我的回答尝试:这条线曾经有效,但不再有效
weather[, rain_3 := filter(rain, rep(1, 2), sides = 1), by = list(square)]
所以我在这里尝试另一种方法:
# this next line gets the numbers right, but sums the following values, not the preceeding ones.
weather$rain_3 <- rollapply(zoo(weather$rain), list(seq(-2,0)), sum)
# here I add in the by weather$ square, but still no success
weather$rain_3 <- rollapply(zoo(weather$rain), list(seq(-2,0)), sum, by= list(weather$square))
如果您有任何见解或建议,我将不胜感激。
非常感谢!
这是一个使用最新 data.table
版本 (v 1.9.6+)
的快速高效的解决方案
weather[, rain_3 := Reduce(`+`, shift(rain, 0:2)), by = square]
weather
# rain square desired_result rain_3
# 1: NA 1 NA NA
# 2: NA 1 NA NA
# 3: NA 1 NA NA
# 4: 0 1 NA NA
# 5: 0 1 NA NA
# 6: 5 1 5 5
# 7: 1 1 6 6
# 8: 0 1 6 6
# 9: 3 1 4 4
# 10: 10 2 NA NA
这里的基本思想是 shift
rain
列两次,然后对行求和。
weather[, rain_3 := filter(rain, rep(1, 3), sides = 1), by = list(square)]
#Error in filter(rain, rep(1, 3), sides = 1) :
# 'filter' is longer than time series
weather[, rain_3 := if(.N > 2) filter(rain, rep(1, 3), sides = 1) else NA_real_,
by = square]
# rain square desired_result rain_3
# 1: NA 1 NA NA
# 2: NA 1 NA NA
# 3: NA 1 NA NA
# 4: 0 1 NA NA
# 5: 0 1 NA NA
# 6: 5 1 5 5
# 7: 1 1 6 6
# 8: 0 1 6 6
# 9: 3 1 4 4
#10: 10 2 NA NA
注意不要加载 dplyr,因为它屏蔽了 filter
。如果需要dplyr,可以显式调用stats::filter
。
你自己也快找到答案了。 rollsum
(或 rollapply
在您的情况下)为您提供长度为 N-2 的向量,因此您只需用 NA 填充所需的单元格。可以这样简单地完成:roll<-c(NA,NA,rollsum(yourvector,k=3))
这是我的做法。我正在使用 {RcppRoll} 包中的 roll_sum,因为它更快并且更容易处理 NA。来自 data.table 的简单 by
参数可让您按平方对结果进行分组。
library(RcppRoll)
weather[,rain_3:=if(.N>2){c(NA,NA,roll_sum(rain,n=3))}else{NA},by=square]
weather
rain square desired_result rain_3
1: NA 1 NA NA
2: NA 1 NA NA
3: NA 1 NA NA
4: 0 1 NA NA
5: 0 1 NA NA
6: 5 1 5 5
7: 1 1 6 6
8: 0 1 6 6
9: 3 1 4 4
10: 10 2 NA NA
一个dplyr
解决方案:
library(dplyr)
weather %>%
group_by(square) %>%
mutate(rain_3 = rain + lag(rain) + lag(rain, n = 2L))
结果:
Source: local data table [10 x 4]
rain square desired_result rain_3
(dbl) (dbl) (dbl) (dbl)
1 NA 1 NA NA
2 NA 1 NA NA
3 NA 1 NA NA
4 0 1 NA NA
5 0 1 NA NA
6 5 1 5 5
7 1 1 6 6
8 0 1 6 6
9 3 1 4 4
10 10 2 NA NA
如果要将 rain3 分配给数据集,可以在管道中使用 maggritr
中的 %<>%
符号:
library(magrittr)
weather %<>%
group_by......
rollapply
的解决方案是这样的:
weather[, rain_3 := rollapplyr(rain, 3, sum, fill = NA_real_), by = square]
给予:
rain square desired_result rain_3
1: NA 1 NA NA
2: NA 1 NA NA
3: NA 1 NA NA
4: 0 1 NA NA
5: 0 1 NA NA
6: 5 1 5 5
7: 1 1 6 6
8: 0 1 6 6
9: 3 1 4 4
10: 10 2 NA NA
更新
根据最初提出此问题后出现的动物园版本进行了简化。
聚会晚了,但是更新版本的 data.table
软件包(对我来说是 1.12.8)具有 frollsum
功能,可以比以前更干净地完成此操作(但非常多有效)答案:
library (data.table)
# making the data.table
rain <- c(NA, NA, NA, 0, 0, 5, 1, 0, 3, 10) # rainfall values to work with
square <- c(1,1,1,1,1,1,1,1,1,2) # the geographic grid square for the rainfall measurement
desired_result <- c(NA, NA, NA, NA, NA, 5, 6, 6, 4, NA ) # this is the result I'm looking for (the last NA as we are now on to the first day of the second grid square)
weather <- data.table(rain, square, desired_result) # making the data.table
# using `frollsum`
weather[, rain3 := frollsum(rain, n = 3), by = square][]
#> rain square desired_result rain3
#> 1: NA 1 NA NA
#> 2: NA 1 NA NA
#> 3: NA 1 NA NA
#> 4: 0 1 NA NA
#> 5: 0 1 NA NA
#> 6: 5 1 5 5
#> 7: 1 1 6 6
#> 8: 0 1 6 6
#> 9: 3 1 4 4
#> 10: 10 2 NA NA
由 reprex package (v0.3.0)
于 2020-07-09 创建
我想为每个方格计算过去三天的降雨量,并将其作为新列添加到我的 data.table 中。明确地说,我想总结当前和前两 (2) 天的降雨量,对于每个气象网格方块
library ( zoo )
library (data.table)
# making the data.table
rain <- c(NA, NA, NA, 0, 0, 5, 1, 0, 3, 10) # rainfall values to work with
square <- c(1,1,1,1,1,1,1,1,1,2) # the geographic grid square for the rainfall measurement
desired_result <- c(NA, NA, NA, NA, NA, 5, 6, 6, 4, NA ) # this is the result I'm looking for (the last NA as we are now on to the first day of the second grid square)
weather <- data.table(rain, square, desired_result) # making the data.table
我的回答尝试:这条线曾经有效,但不再有效
weather[, rain_3 := filter(rain, rep(1, 2), sides = 1), by = list(square)]
所以我在这里尝试另一种方法:
# this next line gets the numbers right, but sums the following values, not the preceeding ones.
weather$rain_3 <- rollapply(zoo(weather$rain), list(seq(-2,0)), sum)
# here I add in the by weather$ square, but still no success
weather$rain_3 <- rollapply(zoo(weather$rain), list(seq(-2,0)), sum, by= list(weather$square))
如果您有任何见解或建议,我将不胜感激。
非常感谢!
这是一个使用最新 data.table
版本 (v 1.9.6+)
weather[, rain_3 := Reduce(`+`, shift(rain, 0:2)), by = square]
weather
# rain square desired_result rain_3
# 1: NA 1 NA NA
# 2: NA 1 NA NA
# 3: NA 1 NA NA
# 4: 0 1 NA NA
# 5: 0 1 NA NA
# 6: 5 1 5 5
# 7: 1 1 6 6
# 8: 0 1 6 6
# 9: 3 1 4 4
# 10: 10 2 NA NA
这里的基本思想是 shift
rain
列两次,然后对行求和。
weather[, rain_3 := filter(rain, rep(1, 3), sides = 1), by = list(square)]
#Error in filter(rain, rep(1, 3), sides = 1) :
# 'filter' is longer than time series
weather[, rain_3 := if(.N > 2) filter(rain, rep(1, 3), sides = 1) else NA_real_,
by = square]
# rain square desired_result rain_3
# 1: NA 1 NA NA
# 2: NA 1 NA NA
# 3: NA 1 NA NA
# 4: 0 1 NA NA
# 5: 0 1 NA NA
# 6: 5 1 5 5
# 7: 1 1 6 6
# 8: 0 1 6 6
# 9: 3 1 4 4
#10: 10 2 NA NA
注意不要加载 dplyr,因为它屏蔽了 filter
。如果需要dplyr,可以显式调用stats::filter
。
你自己也快找到答案了。 rollsum
(或 rollapply
在您的情况下)为您提供长度为 N-2 的向量,因此您只需用 NA 填充所需的单元格。可以这样简单地完成:roll<-c(NA,NA,rollsum(yourvector,k=3))
这是我的做法。我正在使用 {RcppRoll} 包中的 roll_sum,因为它更快并且更容易处理 NA。来自 data.table 的简单 by
参数可让您按平方对结果进行分组。
library(RcppRoll)
weather[,rain_3:=if(.N>2){c(NA,NA,roll_sum(rain,n=3))}else{NA},by=square]
weather
rain square desired_result rain_3
1: NA 1 NA NA
2: NA 1 NA NA
3: NA 1 NA NA
4: 0 1 NA NA
5: 0 1 NA NA
6: 5 1 5 5
7: 1 1 6 6
8: 0 1 6 6
9: 3 1 4 4
10: 10 2 NA NA
一个dplyr
解决方案:
library(dplyr)
weather %>%
group_by(square) %>%
mutate(rain_3 = rain + lag(rain) + lag(rain, n = 2L))
结果:
Source: local data table [10 x 4]
rain square desired_result rain_3
(dbl) (dbl) (dbl) (dbl)
1 NA 1 NA NA
2 NA 1 NA NA
3 NA 1 NA NA
4 0 1 NA NA
5 0 1 NA NA
6 5 1 5 5
7 1 1 6 6
8 0 1 6 6
9 3 1 4 4
10 10 2 NA NA
如果要将 rain3 分配给数据集,可以在管道中使用 maggritr
中的 %<>%
符号:
library(magrittr)
weather %<>%
group_by......
rollapply
的解决方案是这样的:
weather[, rain_3 := rollapplyr(rain, 3, sum, fill = NA_real_), by = square]
给予:
rain square desired_result rain_3
1: NA 1 NA NA
2: NA 1 NA NA
3: NA 1 NA NA
4: 0 1 NA NA
5: 0 1 NA NA
6: 5 1 5 5
7: 1 1 6 6
8: 0 1 6 6
9: 3 1 4 4
10: 10 2 NA NA
更新
根据最初提出此问题后出现的动物园版本进行了简化。
聚会晚了,但是更新版本的 data.table
软件包(对我来说是 1.12.8)具有 frollsum
功能,可以比以前更干净地完成此操作(但非常多有效)答案:
library (data.table)
# making the data.table
rain <- c(NA, NA, NA, 0, 0, 5, 1, 0, 3, 10) # rainfall values to work with
square <- c(1,1,1,1,1,1,1,1,1,2) # the geographic grid square for the rainfall measurement
desired_result <- c(NA, NA, NA, NA, NA, 5, 6, 6, 4, NA ) # this is the result I'm looking for (the last NA as we are now on to the first day of the second grid square)
weather <- data.table(rain, square, desired_result) # making the data.table
# using `frollsum`
weather[, rain3 := frollsum(rain, n = 3), by = square][]
#> rain square desired_result rain3
#> 1: NA 1 NA NA
#> 2: NA 1 NA NA
#> 3: NA 1 NA NA
#> 4: 0 1 NA NA
#> 5: 0 1 NA NA
#> 6: 5 1 5 5
#> 7: 1 1 6 6
#> 8: 0 1 6 6
#> 9: 3 1 4 4
#> 10: 10 2 NA NA
由 reprex package (v0.3.0)
于 2020-07-09 创建