计算 R 中 2 个时间戳之间的秒数,不包括周末
Calculate seconds between 2 timestamps in R excluding weekends
如果我有一个包含 2 列的 YMD HMS 数据框,我如何计算两者之间的秒数差异(不包括周末)?
col 2 - col 1 = 以秒为单位的时间;需要排除周末秒数
Dates1 <- as.POSIXct("2011-01-30 12:00:00") + rep(0, 10)
Dates2 <- as.POSIXct("2011-02-04") + seq(0, 9, 1)
df <- data.frame(Dates1 = Dates1, Dates2 = Dates2)
我需要它给我 (388800 - 43200) = 345600;我减去 43200 的原因是因为那是从中午到午夜时钟停止的周日周末时间。
这是一个适用于矢量的剪辑:
#' Seconds difference without weekends
#'
#' @param a, b POSIXt
#' @param weekends 'character', day of the week (see
#' [base::strptime()] for the "%w" argument), "0" is Sunday, "6" is
#' Saturday; defaults to `c("0","6")`: Saturday and Sunday
#' @param units 'character', legal values for [base::units()], such as
#' "secs", "mins", "hours"
#' @return 'difftime' object
#' @md
secs_no_weekend <- function(a, b, weekends = c("0", "6"), units = "secs") {
out <- mapply(function(a0, b0) {
astart <- as.POSIXct(format(a0, "%Y-%m-%d 00:00:00"))
aend <- as.POSIXct(format(a0, "%Y-%m-%d 24:00:00"))
bstart <- as.POSIXct(format(b0, "%Y-%m-%d 00:00:00"))
days <- seq.POSIXt(astart, bstart, by = "day")
ndays <- length(days)
if (ndays == 1) {
d <- b0 - a0
units(d) <- "secs"
} else {
d <- rep(60 * 60 * 24, ndays) # secs
d[1] <- `units<-`(aend - a0, "secs")
d[ndays] <- `units<-`(b0 - bstart, "secs")
wkend <- format(days, "%w")
d[ wkend %in% weekends ] <- 0
}
sum(pmax(0, d))
}, a, b)
out <- structure(out, class = "difftime", units = units)
out
}
Testing/validation:
可能会更新,因为出现的示例与我的假设不符。
为了透视,这是本月(2019 年 6 月)的日历,ISO-8601(右)和 US/not-ISO(左):
week <- c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")
# sunfirst <- ... calculated
monfirst <- tibble(dt = seq(as.Date("2019-06-01"), as.Date("2019-06-30"), by="days")) %>%
mutate(
dow = factor(format(dt, format = "%a"), levels = week),
dom = as.integer(format(dt, format = "%e")),
wom = format(dt, format = "%V") # %U for sunfirst, %V for monfirst
) %>%
select(-dt) %>%
spread(dow, dom) %>%
select(-wom)
monfirst <- rbind(monfirst, NA)
cbind(sunfirst, ` `=" ", monfirst )
# Sun Mon Tue Wed Thu Fri Sat Mon Tue Wed Thu Fri Sat Sun
# 1 NA NA NA NA NA NA 1 NA NA NA NA NA 1 2
# 2 2 3 4 5 6 7 8 3 4 5 6 7 8 9
# 3 9 10 11 12 13 14 15 10 11 12 13 14 15 16
# 4 16 17 18 19 20 21 22 17 18 19 20 21 22 23
# 5 23 24 25 26 27 28 29 24 25 26 27 28 29 30
# 6 30 NA NA NA NA NA NA NA NA NA NA NA NA NA
一些数据和预期。 (我这里用dplyr
代替simplicity/readability,上面的函数不需要。)
dh <- 43200 # day-half, 60*60*12
d1 <- 86400 # day=1, 60*60*24
d4 <- 345600 # days=4, 4*d1
d5 <- 432000 # days=5
d7 <- 432000 # 7 days minus weekend
d <- tribble(
~x , ~y , ~expect, ~description
, "2019-06-03 12:00:00", "2019-06-03 12:00:05", 5 , "same day"
, "2019-06-03 12:00:00", "2019-06-04 12:00:05", d1+5 , "next day"
, "2019-06-03 12:00:00", "2019-06-07 12:00:05", d4+5 , "4d + 5"
, "2019-06-03 12:00:00", "2019-06-08 12:00:05", d4+dh , "start weekday, end weekend, no 5"
, "2019-06-03 12:00:00", "2019-06-09 12:00:05", d4+dh , "start weekday, end weekend+, no 5, same"
, "2019-06-03 12:00:00", "2019-06-10 12:00:05", d7+5 , "start/end weekday, 1 full week"
, "2019-06-02 12:00:00", "2019-06-03 12:00:05", dh+5 , "start weekend, end weekday, 1/2 day"
, "2019-06-02 12:00:00", "2019-06-08 12:00:05", d7 , "start/end weekend, no 5"
) %>% mutate_at(vars(x, y), as.POSIXct)
(out <- secs_no_weekend(d$x, d$y))
# Time differences in secs
# [1] 5 86405 345605 388800 388800 432005 43205 432000
all(out == d$expect)
# [1] TRUE
这是一个使用 lubridate
和其他 tidyverse
软件包的解决方案。 lubridate
的好处在于,它可以非常无缝地处理许多与时间有关的古怪问题,从时区到闰年,再到夏令时的切换。 (如果您关心这些,只需确保您的数据有时区。)
我在这里使用的概念是 lubridate 中的 intervals
(使用 %--%
运算符创建)。间隔字面上就是它听起来的样子:一个非常有用的 class,基本上有一个开始日期时间和一个结束日期时间。
我生成了两个数据集:一个用于你的开始和结束时间,另一个用于周末开始和结束时间,每个都有它的自己的区间列。在周末数据集中,请注意开始和结束时间被任意设置为一年的星期六和星期日。您应该设置对您有意义的值,或者想出一种方法从数据中设置它。 :)
从那里,我们将使用 lubridate 的 intersect
函数找到您的间隔和周末间隔之间的重叠,因此稍后我们可以计算相关的周末秒数并将其减去。
但首先我们使用 tidyr
中的 crossing
来确保我们在 weekends
数据集中检查每个周末的每个间隔。它只是运行两个数据集的笛卡尔积(参见 )。
最后我们使用 int_length
来计算周末秒数,将每个间隔的周末秒数加起来,计算每个间隔的总秒数,然后减去 weekend 秒,距离 总 秒。瞧!我们有总秒数,不包括周末。
此解决方案的另一个好处是它非常灵活。我将周末定义为 0:00 周六到 0:00 周一...但是您可以删除周五晚上、周一凌晨,只要您喜欢并满足您的分析要求。
library(dplyr)
library(tidyr)
library(tibble)
library(lubridate) # makes dates and times easier!
test <- tribble(
~start_time, ~end_time,
"2019-05-22 12:35:42", "2019-05-23 12:35:42", # same week no weekends
"2019-05-22 12:35:42", "2019-05-26 12:35:42", # ends during weekend
"2019-05-22 12:35:42", "2019-05-28 12:35:42", # next week full weekend
"2019-05-26 12:35:42", "2019-05-29 12:35:42", # starts during weekend
"2019-05-22 12:35:42", "2019-06-05 12:35:42" # two weeks two weekends
) %>%
mutate(
id = row_number(),
timespan = start_time %--% end_time
)
weekend_beginnings <- ymd_hms("2019-05-18 00:00:00") + weeks(0:51)
weekend_endings <- ymd_hms("2019-05-20 00:00:00") + weeks(0:51)
weekends <- weekend_beginnings %--% weekend_endings
final_answer <- crossing(test, weekends) %>%
mutate(
weekend_intersection = intersect(timespan, weekends),
weekend_seconds = int_length(weekend_intersection)
) %>%
group_by(id, start_time, end_time, timespan) %>%
summarise(
weekend_seconds = sum(weekend_seconds, na.rm = TRUE)
) %>%
mutate(
total_seconds = int_length(timespan),
weekday_seconds = total_seconds - weekend_seconds
)
glimpse(final_answer)
如果我有一个包含 2 列的 YMD HMS 数据框,我如何计算两者之间的秒数差异(不包括周末)?
col 2 - col 1 = 以秒为单位的时间;需要排除周末秒数
Dates1 <- as.POSIXct("2011-01-30 12:00:00") + rep(0, 10)
Dates2 <- as.POSIXct("2011-02-04") + seq(0, 9, 1)
df <- data.frame(Dates1 = Dates1, Dates2 = Dates2)
我需要它给我 (388800 - 43200) = 345600;我减去 43200 的原因是因为那是从中午到午夜时钟停止的周日周末时间。
这是一个适用于矢量的剪辑:
#' Seconds difference without weekends
#'
#' @param a, b POSIXt
#' @param weekends 'character', day of the week (see
#' [base::strptime()] for the "%w" argument), "0" is Sunday, "6" is
#' Saturday; defaults to `c("0","6")`: Saturday and Sunday
#' @param units 'character', legal values for [base::units()], such as
#' "secs", "mins", "hours"
#' @return 'difftime' object
#' @md
secs_no_weekend <- function(a, b, weekends = c("0", "6"), units = "secs") {
out <- mapply(function(a0, b0) {
astart <- as.POSIXct(format(a0, "%Y-%m-%d 00:00:00"))
aend <- as.POSIXct(format(a0, "%Y-%m-%d 24:00:00"))
bstart <- as.POSIXct(format(b0, "%Y-%m-%d 00:00:00"))
days <- seq.POSIXt(astart, bstart, by = "day")
ndays <- length(days)
if (ndays == 1) {
d <- b0 - a0
units(d) <- "secs"
} else {
d <- rep(60 * 60 * 24, ndays) # secs
d[1] <- `units<-`(aend - a0, "secs")
d[ndays] <- `units<-`(b0 - bstart, "secs")
wkend <- format(days, "%w")
d[ wkend %in% weekends ] <- 0
}
sum(pmax(0, d))
}, a, b)
out <- structure(out, class = "difftime", units = units)
out
}
Testing/validation:
可能会更新,因为出现的示例与我的假设不符。
为了透视,这是本月(2019 年 6 月)的日历,ISO-8601(右)和 US/not-ISO(左):
week <- c("Mon","Tue","Wed","Thu","Fri","Sat","Sun")
# sunfirst <- ... calculated
monfirst <- tibble(dt = seq(as.Date("2019-06-01"), as.Date("2019-06-30"), by="days")) %>%
mutate(
dow = factor(format(dt, format = "%a"), levels = week),
dom = as.integer(format(dt, format = "%e")),
wom = format(dt, format = "%V") # %U for sunfirst, %V for monfirst
) %>%
select(-dt) %>%
spread(dow, dom) %>%
select(-wom)
monfirst <- rbind(monfirst, NA)
cbind(sunfirst, ` `=" ", monfirst )
# Sun Mon Tue Wed Thu Fri Sat Mon Tue Wed Thu Fri Sat Sun
# 1 NA NA NA NA NA NA 1 NA NA NA NA NA 1 2
# 2 2 3 4 5 6 7 8 3 4 5 6 7 8 9
# 3 9 10 11 12 13 14 15 10 11 12 13 14 15 16
# 4 16 17 18 19 20 21 22 17 18 19 20 21 22 23
# 5 23 24 25 26 27 28 29 24 25 26 27 28 29 30
# 6 30 NA NA NA NA NA NA NA NA NA NA NA NA NA
一些数据和预期。 (我这里用dplyr
代替simplicity/readability,上面的函数不需要。)
dh <- 43200 # day-half, 60*60*12
d1 <- 86400 # day=1, 60*60*24
d4 <- 345600 # days=4, 4*d1
d5 <- 432000 # days=5
d7 <- 432000 # 7 days minus weekend
d <- tribble(
~x , ~y , ~expect, ~description
, "2019-06-03 12:00:00", "2019-06-03 12:00:05", 5 , "same day"
, "2019-06-03 12:00:00", "2019-06-04 12:00:05", d1+5 , "next day"
, "2019-06-03 12:00:00", "2019-06-07 12:00:05", d4+5 , "4d + 5"
, "2019-06-03 12:00:00", "2019-06-08 12:00:05", d4+dh , "start weekday, end weekend, no 5"
, "2019-06-03 12:00:00", "2019-06-09 12:00:05", d4+dh , "start weekday, end weekend+, no 5, same"
, "2019-06-03 12:00:00", "2019-06-10 12:00:05", d7+5 , "start/end weekday, 1 full week"
, "2019-06-02 12:00:00", "2019-06-03 12:00:05", dh+5 , "start weekend, end weekday, 1/2 day"
, "2019-06-02 12:00:00", "2019-06-08 12:00:05", d7 , "start/end weekend, no 5"
) %>% mutate_at(vars(x, y), as.POSIXct)
(out <- secs_no_weekend(d$x, d$y))
# Time differences in secs
# [1] 5 86405 345605 388800 388800 432005 43205 432000
all(out == d$expect)
# [1] TRUE
这是一个使用 lubridate
和其他 tidyverse
软件包的解决方案。 lubridate
的好处在于,它可以非常无缝地处理许多与时间有关的古怪问题,从时区到闰年,再到夏令时的切换。 (如果您关心这些,只需确保您的数据有时区。)
我在这里使用的概念是 lubridate 中的 intervals
(使用 %--%
运算符创建)。间隔字面上就是它听起来的样子:一个非常有用的 class,基本上有一个开始日期时间和一个结束日期时间。
我生成了两个数据集:一个用于你的开始和结束时间,另一个用于周末开始和结束时间,每个都有它的自己的区间列。在周末数据集中,请注意开始和结束时间被任意设置为一年的星期六和星期日。您应该设置对您有意义的值,或者想出一种方法从数据中设置它。 :)
从那里,我们将使用 lubridate 的 intersect
函数找到您的间隔和周末间隔之间的重叠,因此稍后我们可以计算相关的周末秒数并将其减去。
但首先我们使用 tidyr
中的 crossing
来确保我们在 weekends
数据集中检查每个周末的每个间隔。它只是运行两个数据集的笛卡尔积(参见
最后我们使用 int_length
来计算周末秒数,将每个间隔的周末秒数加起来,计算每个间隔的总秒数,然后减去 weekend 秒,距离 总 秒。瞧!我们有总秒数,不包括周末。
此解决方案的另一个好处是它非常灵活。我将周末定义为 0:00 周六到 0:00 周一...但是您可以删除周五晚上、周一凌晨,只要您喜欢并满足您的分析要求。
library(dplyr)
library(tidyr)
library(tibble)
library(lubridate) # makes dates and times easier!
test <- tribble(
~start_time, ~end_time,
"2019-05-22 12:35:42", "2019-05-23 12:35:42", # same week no weekends
"2019-05-22 12:35:42", "2019-05-26 12:35:42", # ends during weekend
"2019-05-22 12:35:42", "2019-05-28 12:35:42", # next week full weekend
"2019-05-26 12:35:42", "2019-05-29 12:35:42", # starts during weekend
"2019-05-22 12:35:42", "2019-06-05 12:35:42" # two weeks two weekends
) %>%
mutate(
id = row_number(),
timespan = start_time %--% end_time
)
weekend_beginnings <- ymd_hms("2019-05-18 00:00:00") + weeks(0:51)
weekend_endings <- ymd_hms("2019-05-20 00:00:00") + weeks(0:51)
weekends <- weekend_beginnings %--% weekend_endings
final_answer <- crossing(test, weekends) %>%
mutate(
weekend_intersection = intersect(timespan, weekends),
weekend_seconds = int_length(weekend_intersection)
) %>%
group_by(id, start_time, end_time, timespan) %>%
summarise(
weekend_seconds = sum(weekend_seconds, na.rm = TRUE)
) %>%
mutate(
total_seconds = int_length(timespan),
weekday_seconds = total_seconds - weekend_seconds
)
glimpse(final_answer)