计算时间差结果全为0

Calculate time difference and results are all 0

我需要计算时间差,做一个变量'duration'。示例数据如下所示:

                endTime                   startTime     user_id      categories duration
1      2019-02-22T19:17:02.618  019-02-22T19:16:58.377   10224   communication   0 secs
2      2019-02-23T12:19:01.055 2019-02-23T12:18:44.414   10224   communication   0 secs
3      2019-02-25T21:03:15.771 2019-02-25T21:03:06.961   10224 utility & tools   0 secs
4      2019-02-27T19:22:41.174 2019-02-27T19:22:32.246   10224   communication   0 secs

endTime 和 startTime 都使用 as.POSIXct 设置为日期格式。我在 base R 中使用 difftime,代码如下所示:

dat$duration <- difftime(dat$startTime,dat$endTime)

并且所有持续时间的值为 0。我不明白为什么会这样。 我也检查了其他一些库(chronlubridate)来计算这个,似乎它们只接受一个包含两次的字符串,而不是两个变量。将两个变量合并为一个字符串对我来说似乎不明智..有更简单的方法吗?谢谢!!

输出:

structure(list(battery = c(47L, 41L, 18L, 94L, 94L, 93L, 73L, 
73L, 47L, 49L), endTime = c("2019-02-22T19:17:02.618", "2019-02-23T12:19:01.055", 
"2019-02-25T21:03:15.771", "2019-02-27T19:22:41.174", "2019-02-27T19:22:53.256", 
"2019-02-27T23:51:16.407", "2019-03-02T20:18:28.090", "2019-03-02T20:18:43.488", 
"2019-03-19T13:07:16.993", "2019-03-19T12:16:36.962"), session = c(1550859371L, 
1550920714L, 1551124876L, 1551291720L, 1551291720L, 1551307871L, 
1551554295L, 1551554295L, 1552997232L, 1552994133L), startTime = c("2019-02-22T19:16:58.377", 
"2019-02-23T12:18:44.414", "2019-02-25T21:03:06.961", "2019-02-27T19:22:32.246", 
"2019-02-27T19:22:45.404", "2019-02-27T23:51:15.270", "2019-03-02T20:18:21.362", 
"2019-03-02T20:18:37.066", "2019-03-19T13:07:15.348", "2019-03-19T12:15:38.440"
), user_id = c(10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 
10224L, 10224L, 10224L, 10224L), categories = structure(c(1L, 
1L, 6L, 1L, 2L, 2L, 6L, 1L, 2L, 1L), .Label = c("communication", 
"games & entertainment", "lifestyle", "news & information outlet", 
"social network", "utility & tools"), class = "factor"), duration = structure(c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "secs")), row.names = c(NA, 
10L), class = "data.frame")

以下适合我。 difftime 不为零。

op_digs <- options(digits.secs = 3)

dat$endTime <- as.POSIXct(dat$endTime, format = "%Y-%m-%dT%H:%M:%OS")
dat$startTime <- as.POSIXct(dat$startTime, format = "%Y-%m-%dT%H:%M:%OS")
difftime(dat$startTime,dat$endTime)
#Time differences in secs
#[1]  -4.241 -16.641  -8.810  -8.928

dat$duration <- difftime(dat$startTime,dat$endTime)

options(digits.secs = op_digs)

数据.

dat <- read.table(text = "
                endTime                   startTime     user_id      categories duration
1      2019-02-22T19:17:02.618 2019-02-22T19:16:58.377   10224   communication   '0 secs'
2      2019-02-23T12:19:01.055 2019-02-23T12:18:44.414   10224   communication   '0 secs'
3      2019-02-25T21:03:15.771 2019-02-25T21:03:06.961   10224 'utility & tools'   '0 secs'
4      2019-02-27T19:22:41.174 2019-02-27T19:22:32.246   10224   communication   '0 secs'
", header = TRUE)

这是一个基于 dplyr 的解决方案,它具有更流畅的工作流程:

library(dplyr)
dat %>%
  mutate_at(vars(startTime, endTime), ~as.POSIXct(strptime(.x, format = c("%Y-%m-%dT%H:%M:%OS")))) %>%
  mutate(duration = endTime - startTime)

   battery             endTime    session           startTime user_id            categories    duration
1       47 2019-02-22 19:17:02 1550859371 2019-02-22 19:16:58   10224         communication  4.241 secs
2       41 2019-02-23 12:19:01 1550920714 2019-02-23 12:18:44   10224         communication 16.641 secs
3       18 2019-02-25 21:03:15 1551124876 2019-02-25 21:03:06   10224       utility & tools  8.810 secs
4       94 2019-02-27 19:22:41 1551291720 2019-02-27 19:22:32   10224         communication  8.928 secs
5       94 2019-02-27 19:22:53 1551291720 2019-02-27 19:22:45   10224 games & entertainment  7.852 secs
6       93 2019-02-27 23:51:16 1551307871 2019-02-27 23:51:15   10224 games & entertainment  1.137 secs
7       73 2019-03-02 20:18:28 1551554295 2019-03-02 20:18:21   10224       utility & tools  6.728 secs
8       73 2019-03-02 20:18:43 1551554295 2019-03-02 20:18:37   10224         communication  6.422 secs
9       47 2019-03-19 13:07:16 1552997232 2019-03-19 13:07:15   10224 games & entertainment  1.645 secs
10      49 2019-03-19 12:16:36 1552994133 2019-03-19 12:15:38   10224         communication 58.522 secs

基本上,您首先将 endTimestartTime 列转换为正确的格式 (POSIXct),然后使用简单的减法。

下面是如何使用 lubridatedplyr 包解决这个问题。它将提供与上述相同的结果。只是方式不同

df<-structure(list(battery = c(47L, 41L, 18L, 94L, 94L, 93L, 73L, 73L, 47L, 49L), 
                   endTime = c("2019-02-22T19:17:02.618", "2019-02-23T12:19:01.055", "2019-02-25T21:03:15.771", "2019-02-27T19:22:41.174", "2019-02-27T19:22:53.256", "2019-02-27T23:51:16.407", "2019-03-02T20:18:28.090", "2019-03-02T20:18:43.488", "2019-03-19T13:07:16.993", "2019-03-19T12:16:36.962"), 
                   session = c(1550859371L,1550920714L, 1551124876L, 1551291720L, 1551291720L, 1551307871L,1551554295L, 1551554295L, 1552997232L, 1552994133L),
                   startTime = c("2019-02-22T19:16:58.377",  "2019-02-23T12:18:44.414", "2019-02-25T21:03:06.961", "2019-02-27T19:22:32.246", "2019-02-27T19:22:45.404", "2019-02-27T23:51:15.270", "2019-03-02T20:18:21.362", "2019-03-02T20:18:37.066", "2019-03-19T13:07:15.348", "2019-03-19T12:15:38.440"),
                   user_id = c(10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L), 
                   categories = structure(c(1L, 1L, 6L, 1L, 2L, 2L, 6L, 1L, 2L, 1L), .Label = c("communication", "games & entertainment", "lifestyle", "news & information outlet", "social network", "utility & tools"), class = "factor"), 
                   duration = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "secs")), row.names = c(NA, 10L), class = "data.frame")

library(dplyr)
library(lubridate)
df<-df %>%
  mutate(endTime = ymd_hms(endTime), startTime = ymd_hms(startTime)) %>%
  mutate(duration = endTime - startTime)
df2

这是输出

   battery             endTime    session           startTime user_id            categories    duration
1       47 2019-02-22 19:17:02 1550859371 2019-02-22 19:16:58   10224         communication  4.241 secs
2       41 2019-02-23 12:19:01 1550920714 2019-02-23 12:18:44   10224         communication 16.641 secs
3       18 2019-02-25 21:03:15 1551124876 2019-02-25 21:03:06   10224       utility & tools  8.810 secs
4       94 2019-02-27 19:22:41 1551291720 2019-02-27 19:22:32   10224         communication  8.928 secs
5       94 2019-02-27 19:22:53 1551291720 2019-02-27 19:22:45   10224 games & entertainment  7.852 secs
6       93 2019-02-27 23:51:16 1551307871 2019-02-27 23:51:15   10224 games & entertainment  1.137 secs
7       73 2019-03-02 20:18:28 1551554295 2019-03-02 20:18:21   10224       utility & tools  6.728 secs
8       73 2019-03-02 20:18:43 1551554295 2019-03-02 20:18:37   10224         communication  6.422 secs
9       47 2019-03-19 13:07:16 1552997232 2019-03-19 13:07:15   10224 games & entertainment  1.645 secs
10      49 2019-03-19 12:16:36 1552994133 2019-03-19 12:15:38   10224         communication 58.522 secs