R 中基于小时数的最小移动 windows
Minimum of moving windows based on hours in R
我有一个数据框,其中第一列是日期,第三列是数据。像这样:
我必须创建移动 windows,从 1:30:00 到 2:30:00 和 2:30:00-3:30:00 等。
我必须搜索第三列的每个最小值,总共 windows。我找到了这个 runmin() 东西,但它并没有真正起作用,也不明白它是如何工作的
示例数据:
# build data programmatically
dat <- data.frame(
timestamp = as.POSIXct("2020-01-19 01:30:00", tz = "UTC") + seq(0, 3600, by=600),
int = 1L,
val = c(25, 70, 68, 53, 63, 65, 52)
)
# dump of existing data, e.g., dput(head(dat, 6))
dat <- structure(list(timestamp = structure(c(1579397400, 1579398000, 1579398600, 1579399200, 1579399800, 1579400400, 1579401000), class = c("POSIXct", "POSIXt"), tzone = "UTC"), int = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), val = c(25, 70, 68, 53, 63, 65, 52)), class = "data.frame", row.names = c(NA, -7L))
基础 R
bins <- seq(as.POSIXct("2020-01-19 00:30:00", tz = "UTC"), length.out = 5, by = "hour")
bins
# [1] "2020-01-19 00:30:00 UTC" "2020-01-19 01:30:00 UTC" "2020-01-19 02:30:00 UTC"
# [4] "2020-01-19 03:30:00 UTC" "2020-01-19 04:30:00 UTC"
dat$bin <- bins[ findInterval(dat$timestamp, bins) ]
dat
# timestamp int val bin
# 1 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00
# 2 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00
# 3 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00
# 4 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00
# 5 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00
# 6 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00
# 7 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00
aggregate(val ~ bin, data = dat, FUN = min)
# bin val
# 1 2020-01-19 01:30:00 25
# 2 2020-01-19 02:30:00 52
如果您需要添加一个包含该组时间的最小值的列(保留行),则
do.call(rbind, by(dat, dat$bin, function(z) transform(z, minval = min(val))))
# timestamp int val bin minval
# 2020-01-19 01:30:00.1 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.2 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.3 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.4 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.5 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.6 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00 25
# 2020-01-19 02:30:00 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00 52
整洁宇宙
library(dplyr)
# using `bins` from above
dat %>%
mutate(bin = bins[ findInterval(timestamp, bins) ]) %>%
group_by(bin) %>%
summarize(val = min(val), .groups = "drop")
# # A tibble: 2 x 2
# bin val
# <dttm> <dbl>
# 1 2020-01-19 01:30:00 25
# 2 2020-01-19 02:30:00 52
或
dat %>%
mutate(bin = bins[ findInterval(timestamp, bins) ]) %>%
group_by(bin) %>%
mutate(minval = min(val)) %>%
ungroup()
# # A tibble: 7 x 5
# timestamp int val bin minval
# <dttm> <int> <dbl> <dttm> <dbl>
# 1 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00 25
# 2 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00 25
# 3 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00 25
# 4 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00 25
# 5 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00 25
# 6 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00 25
# 7 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00 52
data.table
library(data.table)
# using
datDT[, bin := bins[ findInterval(timestamp, bins) ] ][, .(val = min(val)), by = .(bin) ]
# bin val
# <POSc> <num>
# 1: 2020-01-19 01:30:00 25
# 2: 2020-01-19 02:30:00 52
或
datDT[, bin := bins[ findInterval(timestamp, bins) ] ][, minval := min(val), by = .(bin) ]
datDT
# timestamp int val bin minval
# <POSc> <int> <num> <POSc> <num>
# 1: 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00 25
# 2: 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00 25
# 3: 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00 25
# 4: 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00 25
# 5: 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00 25
# 6: 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00 25
# 7: 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00 52
我有一个数据框,其中第一列是日期,第三列是数据。像这样:
我必须创建移动 windows,从 1:30:00 到 2:30:00 和 2:30:00-3:30:00 等。 我必须搜索第三列的每个最小值,总共 windows。我找到了这个 runmin() 东西,但它并没有真正起作用,也不明白它是如何工作的
示例数据:
# build data programmatically
dat <- data.frame(
timestamp = as.POSIXct("2020-01-19 01:30:00", tz = "UTC") + seq(0, 3600, by=600),
int = 1L,
val = c(25, 70, 68, 53, 63, 65, 52)
)
# dump of existing data, e.g., dput(head(dat, 6))
dat <- structure(list(timestamp = structure(c(1579397400, 1579398000, 1579398600, 1579399200, 1579399800, 1579400400, 1579401000), class = c("POSIXct", "POSIXt"), tzone = "UTC"), int = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), val = c(25, 70, 68, 53, 63, 65, 52)), class = "data.frame", row.names = c(NA, -7L))
基础 R
bins <- seq(as.POSIXct("2020-01-19 00:30:00", tz = "UTC"), length.out = 5, by = "hour")
bins
# [1] "2020-01-19 00:30:00 UTC" "2020-01-19 01:30:00 UTC" "2020-01-19 02:30:00 UTC"
# [4] "2020-01-19 03:30:00 UTC" "2020-01-19 04:30:00 UTC"
dat$bin <- bins[ findInterval(dat$timestamp, bins) ]
dat
# timestamp int val bin
# 1 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00
# 2 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00
# 3 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00
# 4 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00
# 5 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00
# 6 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00
# 7 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00
aggregate(val ~ bin, data = dat, FUN = min)
# bin val
# 1 2020-01-19 01:30:00 25
# 2 2020-01-19 02:30:00 52
如果您需要添加一个包含该组时间的最小值的列(保留行),则
do.call(rbind, by(dat, dat$bin, function(z) transform(z, minval = min(val))))
# timestamp int val bin minval
# 2020-01-19 01:30:00.1 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.2 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.3 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.4 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.5 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00 25
# 2020-01-19 01:30:00.6 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00 25
# 2020-01-19 02:30:00 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00 52
整洁宇宙
library(dplyr)
# using `bins` from above
dat %>%
mutate(bin = bins[ findInterval(timestamp, bins) ]) %>%
group_by(bin) %>%
summarize(val = min(val), .groups = "drop")
# # A tibble: 2 x 2
# bin val
# <dttm> <dbl>
# 1 2020-01-19 01:30:00 25
# 2 2020-01-19 02:30:00 52
或
dat %>%
mutate(bin = bins[ findInterval(timestamp, bins) ]) %>%
group_by(bin) %>%
mutate(minval = min(val)) %>%
ungroup()
# # A tibble: 7 x 5
# timestamp int val bin minval
# <dttm> <int> <dbl> <dttm> <dbl>
# 1 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00 25
# 2 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00 25
# 3 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00 25
# 4 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00 25
# 5 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00 25
# 6 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00 25
# 7 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00 52
data.table
library(data.table)
# using
datDT[, bin := bins[ findInterval(timestamp, bins) ] ][, .(val = min(val)), by = .(bin) ]
# bin val
# <POSc> <num>
# 1: 2020-01-19 01:30:00 25
# 2: 2020-01-19 02:30:00 52
或
datDT[, bin := bins[ findInterval(timestamp, bins) ] ][, minval := min(val), by = .(bin) ]
datDT
# timestamp int val bin minval
# <POSc> <int> <num> <POSc> <num>
# 1: 2020-01-19 01:30:00 1 25 2020-01-19 01:30:00 25
# 2: 2020-01-19 01:40:00 1 70 2020-01-19 01:30:00 25
# 3: 2020-01-19 01:50:00 1 68 2020-01-19 01:30:00 25
# 4: 2020-01-19 02:00:00 1 53 2020-01-19 01:30:00 25
# 5: 2020-01-19 02:10:00 1 63 2020-01-19 01:30:00 25
# 6: 2020-01-19 02:20:00 1 65 2020-01-19 01:30:00 25
# 7: 2020-01-19 02:30:00 1 52 2020-01-19 02:30:00 52