计算不同行的列之间的时间差(difftime)
Calculate time difference (difftime) between columns of different rows
我有关于不同工作的 'Start' 和 'End' 时间的数据,按 'owner' 分组:
Data <- data.frame(
job = c(1, 2, 3, 4, 5),
owner = c("name1", "name2", "name1", "name1", "name2"),
Start = as.POSIXct(c("2015-01-01 15:00:00", "2015-01-01 15:01:00", "2015-01-01 15:13:00", "2015-01-01 15:20:00", "2015-01-01 15:39:02"), format="%Y-%m-%d %H:%M:%S"),
End = as.POSIXct(c("2015-01-01 15:11:11", "2015-01-01 15:17:21", "2015-01-01 15:17:00", "2015-01-01 15:31:21", "2015-01-01 15:40:11"), format="%Y-%m-%d %H:%M:%S")
)
对于每个所有者,我想计算每个所有者的作业之间的空闲时间,即一个作业的 'End' 时间与下一个作业的 'Start' 时间之间的差异。
如何使用 difftime()
来计算特定行和不同列中时间之间的时间差?
结果应如下所示:
job, owner, idletime
1, name1, NA
2, name2, NA
3, name1, 1.816667 # End of row 1 minus Start of row 3
4, name1, 3.0 # End of row 3 minus Start of row 4
...
library(dplyr)
Data <- data.frame(
job = c(1, 2, 3, 4, 5),
owner = c("name1", "name2", "name1", "name1", "name2"),
Start = as.POSIXct(c("2015-01-01 15:00:00", "2015-01-01 15:01:00", "2015-01-01 15:13:00", "2015-01-01 15:20:00", "2015-01-01 15:39:02"), format="%Y-%m-%d %H:%M:%S"),
End = as.POSIXct(c("2015-01-01 15:11:11", "2015-01-01 15:17:21", "2015-01-01 15:17:00", "2015-01-01 15:31:21", "2015-01-01 15:40:11"), format="%Y-%m-%d %H:%M:%S")
)
Data %>%
group_by(owner) %>%
arrange(Start) %>%
mutate(lagEnd = lag(End),
idletime = difftime(Start,lagEnd, units="mins")) %>%
ungroup %>%
arrange(job) %>%
select(job,owner,idletime)
# job owner idletime
# 1 1 name1 NA mins
# 2 2 name2 NA mins
# 3 3 name1 1.816667 mins
# 4 4 name1 3.000000 mins
# 5 5 name2 21.683333 mins
这是一个可能的解决方案,使用 data.table
library(data.table) # v 1.9.5+
setDT(Data)[, idletime := difftime(Start, shift(End), units = "mins"), by = owner]
# job owner Start End idletime
# 1: 1 name1 2015-01-01 15:00:00 2015-01-01 15:11:11 NA mins
# 2: 2 name2 2015-01-01 15:01:00 2015-01-01 15:17:21 NA mins
# 3: 3 name1 2015-01-01 15:13:00 2015-01-01 15:17:00 1.816667 mins
# 4: 4 name1 2015-01-01 15:20:00 2015-01-01 15:31:21 3.000000 mins
# 5: 5 name2 2015-01-01 15:39:02 2015-01-01 15:40:11 21.683333 mins
或使用dplyr
library(dplyr)
Data %>%
group_by(owner) %>%
mutate(idletime = difftime(Start, lag(End), units = "mins"))
# Source: local data frame [5 x 5]
# Groups: owner
#
# job owner Start End idletime
# 1 1 name1 2015-01-01 15:00:00 2015-01-01 15:11:11 NA mins
# 2 2 name2 2015-01-01 15:01:00 2015-01-01 15:17:21 NA mins
# 3 3 name1 2015-01-01 15:13:00 2015-01-01 15:17:00 1.816667 mins
# 4 4 name1 2015-01-01 15:20:00 2015-01-01 15:31:21 3.000000 mins
# 5 5 name2 2015-01-01 15:39:02 2015-01-01 15:40:11 21.683333 mins
如果我们使用 base R
,ave
将是一个选项。我们使用 ave
得到 'End' 的 'End' 分组 'owner',使用它作为 difftime
中的第二个参数来创建 'idtime'。
Data$idtime <- with(Data, difftime(Start, ave(End, owner,FUN=lag), units='mins'))
Data
# job owner Start End idtime
#1 1 name1 2015-01-01 15:00:00 2015-01-01 15:11:11 NA mins
#2 2 name2 2015-01-01 15:01:00 2015-01-01 15:17:21 NA mins
#3 3 name1 2015-01-01 15:13:00 2015-01-01 15:17:00 1.816667 mins
#4 4 name1 2015-01-01 15:20:00 2015-01-01 15:31:21 3.000000 mins
#5 5 name2 2015-01-01 15:39:02 2015-01-01 15:40:11 21.683333 mins
注意:我将列名称命名为 'idtime' 以使代码保持在一行中:-)
我有关于不同工作的 'Start' 和 'End' 时间的数据,按 'owner' 分组:
Data <- data.frame(
job = c(1, 2, 3, 4, 5),
owner = c("name1", "name2", "name1", "name1", "name2"),
Start = as.POSIXct(c("2015-01-01 15:00:00", "2015-01-01 15:01:00", "2015-01-01 15:13:00", "2015-01-01 15:20:00", "2015-01-01 15:39:02"), format="%Y-%m-%d %H:%M:%S"),
End = as.POSIXct(c("2015-01-01 15:11:11", "2015-01-01 15:17:21", "2015-01-01 15:17:00", "2015-01-01 15:31:21", "2015-01-01 15:40:11"), format="%Y-%m-%d %H:%M:%S")
)
对于每个所有者,我想计算每个所有者的作业之间的空闲时间,即一个作业的 'End' 时间与下一个作业的 'Start' 时间之间的差异。
如何使用 difftime()
来计算特定行和不同列中时间之间的时间差?
结果应如下所示:
job, owner, idletime
1, name1, NA
2, name2, NA
3, name1, 1.816667 # End of row 1 minus Start of row 3
4, name1, 3.0 # End of row 3 minus Start of row 4
...
library(dplyr)
Data <- data.frame(
job = c(1, 2, 3, 4, 5),
owner = c("name1", "name2", "name1", "name1", "name2"),
Start = as.POSIXct(c("2015-01-01 15:00:00", "2015-01-01 15:01:00", "2015-01-01 15:13:00", "2015-01-01 15:20:00", "2015-01-01 15:39:02"), format="%Y-%m-%d %H:%M:%S"),
End = as.POSIXct(c("2015-01-01 15:11:11", "2015-01-01 15:17:21", "2015-01-01 15:17:00", "2015-01-01 15:31:21", "2015-01-01 15:40:11"), format="%Y-%m-%d %H:%M:%S")
)
Data %>%
group_by(owner) %>%
arrange(Start) %>%
mutate(lagEnd = lag(End),
idletime = difftime(Start,lagEnd, units="mins")) %>%
ungroup %>%
arrange(job) %>%
select(job,owner,idletime)
# job owner idletime
# 1 1 name1 NA mins
# 2 2 name2 NA mins
# 3 3 name1 1.816667 mins
# 4 4 name1 3.000000 mins
# 5 5 name2 21.683333 mins
这是一个可能的解决方案,使用 data.table
library(data.table) # v 1.9.5+
setDT(Data)[, idletime := difftime(Start, shift(End), units = "mins"), by = owner]
# job owner Start End idletime
# 1: 1 name1 2015-01-01 15:00:00 2015-01-01 15:11:11 NA mins
# 2: 2 name2 2015-01-01 15:01:00 2015-01-01 15:17:21 NA mins
# 3: 3 name1 2015-01-01 15:13:00 2015-01-01 15:17:00 1.816667 mins
# 4: 4 name1 2015-01-01 15:20:00 2015-01-01 15:31:21 3.000000 mins
# 5: 5 name2 2015-01-01 15:39:02 2015-01-01 15:40:11 21.683333 mins
或使用dplyr
library(dplyr)
Data %>%
group_by(owner) %>%
mutate(idletime = difftime(Start, lag(End), units = "mins"))
# Source: local data frame [5 x 5]
# Groups: owner
#
# job owner Start End idletime
# 1 1 name1 2015-01-01 15:00:00 2015-01-01 15:11:11 NA mins
# 2 2 name2 2015-01-01 15:01:00 2015-01-01 15:17:21 NA mins
# 3 3 name1 2015-01-01 15:13:00 2015-01-01 15:17:00 1.816667 mins
# 4 4 name1 2015-01-01 15:20:00 2015-01-01 15:31:21 3.000000 mins
# 5 5 name2 2015-01-01 15:39:02 2015-01-01 15:40:11 21.683333 mins
如果我们使用 base R
,ave
将是一个选项。我们使用 ave
得到 'End' 的 'End' 分组 'owner',使用它作为 difftime
中的第二个参数来创建 'idtime'。
Data$idtime <- with(Data, difftime(Start, ave(End, owner,FUN=lag), units='mins'))
Data
# job owner Start End idtime
#1 1 name1 2015-01-01 15:00:00 2015-01-01 15:11:11 NA mins
#2 2 name2 2015-01-01 15:01:00 2015-01-01 15:17:21 NA mins
#3 3 name1 2015-01-01 15:13:00 2015-01-01 15:17:00 1.816667 mins
#4 4 name1 2015-01-01 15:20:00 2015-01-01 15:31:21 3.000000 mins
#5 5 name2 2015-01-01 15:39:02 2015-01-01 15:40:11 21.683333 mins
注意:我将列名称命名为 'idtime' 以使代码保持在一行中:-)