计算时间差结果全为0
Calculate time difference and results are all 0
我需要计算时间差,做一个变量'duration'。示例数据如下所示:
endTime startTime user_id categories duration
1 2019-02-22T19:17:02.618 019-02-22T19:16:58.377 10224 communication 0 secs
2 2019-02-23T12:19:01.055 2019-02-23T12:18:44.414 10224 communication 0 secs
3 2019-02-25T21:03:15.771 2019-02-25T21:03:06.961 10224 utility & tools 0 secs
4 2019-02-27T19:22:41.174 2019-02-27T19:22:32.246 10224 communication 0 secs
endTime 和 startTime 都使用 as.POSIXct 设置为日期格式。我在 base R 中使用 difftime,代码如下所示:
dat$duration <- difftime(dat$startTime,dat$endTime)
并且所有持续时间的值为 0。我不明白为什么会这样。
我也检查了其他一些库(chron
、lubridate
)来计算这个,似乎它们只接受一个包含两次的字符串,而不是两个变量。将两个变量合并为一个字符串对我来说似乎不明智..有更简单的方法吗?谢谢!!
输出:
structure(list(battery = c(47L, 41L, 18L, 94L, 94L, 93L, 73L,
73L, 47L, 49L), endTime = c("2019-02-22T19:17:02.618", "2019-02-23T12:19:01.055",
"2019-02-25T21:03:15.771", "2019-02-27T19:22:41.174", "2019-02-27T19:22:53.256",
"2019-02-27T23:51:16.407", "2019-03-02T20:18:28.090", "2019-03-02T20:18:43.488",
"2019-03-19T13:07:16.993", "2019-03-19T12:16:36.962"), session = c(1550859371L,
1550920714L, 1551124876L, 1551291720L, 1551291720L, 1551307871L,
1551554295L, 1551554295L, 1552997232L, 1552994133L), startTime = c("2019-02-22T19:16:58.377",
"2019-02-23T12:18:44.414", "2019-02-25T21:03:06.961", "2019-02-27T19:22:32.246",
"2019-02-27T19:22:45.404", "2019-02-27T23:51:15.270", "2019-03-02T20:18:21.362",
"2019-03-02T20:18:37.066", "2019-03-19T13:07:15.348", "2019-03-19T12:15:38.440"
), user_id = c(10224L, 10224L, 10224L, 10224L, 10224L, 10224L,
10224L, 10224L, 10224L, 10224L), categories = structure(c(1L,
1L, 6L, 1L, 2L, 2L, 6L, 1L, 2L, 1L), .Label = c("communication",
"games & entertainment", "lifestyle", "news & information outlet",
"social network", "utility & tools"), class = "factor"), duration = structure(c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "secs")), row.names = c(NA,
10L), class = "data.frame")
以下适合我。 difftime
不为零。
op_digs <- options(digits.secs = 3)
dat$endTime <- as.POSIXct(dat$endTime, format = "%Y-%m-%dT%H:%M:%OS")
dat$startTime <- as.POSIXct(dat$startTime, format = "%Y-%m-%dT%H:%M:%OS")
difftime(dat$startTime,dat$endTime)
#Time differences in secs
#[1] -4.241 -16.641 -8.810 -8.928
dat$duration <- difftime(dat$startTime,dat$endTime)
options(digits.secs = op_digs)
数据.
dat <- read.table(text = "
endTime startTime user_id categories duration
1 2019-02-22T19:17:02.618 2019-02-22T19:16:58.377 10224 communication '0 secs'
2 2019-02-23T12:19:01.055 2019-02-23T12:18:44.414 10224 communication '0 secs'
3 2019-02-25T21:03:15.771 2019-02-25T21:03:06.961 10224 'utility & tools' '0 secs'
4 2019-02-27T19:22:41.174 2019-02-27T19:22:32.246 10224 communication '0 secs'
", header = TRUE)
这是一个基于 dplyr
的解决方案,它具有更流畅的工作流程:
library(dplyr)
dat %>%
mutate_at(vars(startTime, endTime), ~as.POSIXct(strptime(.x, format = c("%Y-%m-%dT%H:%M:%OS")))) %>%
mutate(duration = endTime - startTime)
battery endTime session startTime user_id categories duration
1 47 2019-02-22 19:17:02 1550859371 2019-02-22 19:16:58 10224 communication 4.241 secs
2 41 2019-02-23 12:19:01 1550920714 2019-02-23 12:18:44 10224 communication 16.641 secs
3 18 2019-02-25 21:03:15 1551124876 2019-02-25 21:03:06 10224 utility & tools 8.810 secs
4 94 2019-02-27 19:22:41 1551291720 2019-02-27 19:22:32 10224 communication 8.928 secs
5 94 2019-02-27 19:22:53 1551291720 2019-02-27 19:22:45 10224 games & entertainment 7.852 secs
6 93 2019-02-27 23:51:16 1551307871 2019-02-27 23:51:15 10224 games & entertainment 1.137 secs
7 73 2019-03-02 20:18:28 1551554295 2019-03-02 20:18:21 10224 utility & tools 6.728 secs
8 73 2019-03-02 20:18:43 1551554295 2019-03-02 20:18:37 10224 communication 6.422 secs
9 47 2019-03-19 13:07:16 1552997232 2019-03-19 13:07:15 10224 games & entertainment 1.645 secs
10 49 2019-03-19 12:16:36 1552994133 2019-03-19 12:15:38 10224 communication 58.522 secs
基本上,您首先将 endTime
和 startTime
列转换为正确的格式 (POSIXct),然后使用简单的减法。
下面是如何使用 lubridate
和 dplyr
包解决这个问题。它将提供与上述相同的结果。只是方式不同
df<-structure(list(battery = c(47L, 41L, 18L, 94L, 94L, 93L, 73L, 73L, 47L, 49L),
endTime = c("2019-02-22T19:17:02.618", "2019-02-23T12:19:01.055", "2019-02-25T21:03:15.771", "2019-02-27T19:22:41.174", "2019-02-27T19:22:53.256", "2019-02-27T23:51:16.407", "2019-03-02T20:18:28.090", "2019-03-02T20:18:43.488", "2019-03-19T13:07:16.993", "2019-03-19T12:16:36.962"),
session = c(1550859371L,1550920714L, 1551124876L, 1551291720L, 1551291720L, 1551307871L,1551554295L, 1551554295L, 1552997232L, 1552994133L),
startTime = c("2019-02-22T19:16:58.377", "2019-02-23T12:18:44.414", "2019-02-25T21:03:06.961", "2019-02-27T19:22:32.246", "2019-02-27T19:22:45.404", "2019-02-27T23:51:15.270", "2019-03-02T20:18:21.362", "2019-03-02T20:18:37.066", "2019-03-19T13:07:15.348", "2019-03-19T12:15:38.440"),
user_id = c(10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L),
categories = structure(c(1L, 1L, 6L, 1L, 2L, 2L, 6L, 1L, 2L, 1L), .Label = c("communication", "games & entertainment", "lifestyle", "news & information outlet", "social network", "utility & tools"), class = "factor"),
duration = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "secs")), row.names = c(NA, 10L), class = "data.frame")
library(dplyr)
library(lubridate)
df<-df %>%
mutate(endTime = ymd_hms(endTime), startTime = ymd_hms(startTime)) %>%
mutate(duration = endTime - startTime)
df2
这是输出
battery endTime session startTime user_id categories duration
1 47 2019-02-22 19:17:02 1550859371 2019-02-22 19:16:58 10224 communication 4.241 secs
2 41 2019-02-23 12:19:01 1550920714 2019-02-23 12:18:44 10224 communication 16.641 secs
3 18 2019-02-25 21:03:15 1551124876 2019-02-25 21:03:06 10224 utility & tools 8.810 secs
4 94 2019-02-27 19:22:41 1551291720 2019-02-27 19:22:32 10224 communication 8.928 secs
5 94 2019-02-27 19:22:53 1551291720 2019-02-27 19:22:45 10224 games & entertainment 7.852 secs
6 93 2019-02-27 23:51:16 1551307871 2019-02-27 23:51:15 10224 games & entertainment 1.137 secs
7 73 2019-03-02 20:18:28 1551554295 2019-03-02 20:18:21 10224 utility & tools 6.728 secs
8 73 2019-03-02 20:18:43 1551554295 2019-03-02 20:18:37 10224 communication 6.422 secs
9 47 2019-03-19 13:07:16 1552997232 2019-03-19 13:07:15 10224 games & entertainment 1.645 secs
10 49 2019-03-19 12:16:36 1552994133 2019-03-19 12:15:38 10224 communication 58.522 secs
我需要计算时间差,做一个变量'duration'。示例数据如下所示:
endTime startTime user_id categories duration
1 2019-02-22T19:17:02.618 019-02-22T19:16:58.377 10224 communication 0 secs
2 2019-02-23T12:19:01.055 2019-02-23T12:18:44.414 10224 communication 0 secs
3 2019-02-25T21:03:15.771 2019-02-25T21:03:06.961 10224 utility & tools 0 secs
4 2019-02-27T19:22:41.174 2019-02-27T19:22:32.246 10224 communication 0 secs
endTime 和 startTime 都使用 as.POSIXct 设置为日期格式。我在 base R 中使用 difftime,代码如下所示:
dat$duration <- difftime(dat$startTime,dat$endTime)
并且所有持续时间的值为 0。我不明白为什么会这样。
我也检查了其他一些库(chron
、lubridate
)来计算这个,似乎它们只接受一个包含两次的字符串,而不是两个变量。将两个变量合并为一个字符串对我来说似乎不明智..有更简单的方法吗?谢谢!!
输出:
structure(list(battery = c(47L, 41L, 18L, 94L, 94L, 93L, 73L,
73L, 47L, 49L), endTime = c("2019-02-22T19:17:02.618", "2019-02-23T12:19:01.055",
"2019-02-25T21:03:15.771", "2019-02-27T19:22:41.174", "2019-02-27T19:22:53.256",
"2019-02-27T23:51:16.407", "2019-03-02T20:18:28.090", "2019-03-02T20:18:43.488",
"2019-03-19T13:07:16.993", "2019-03-19T12:16:36.962"), session = c(1550859371L,
1550920714L, 1551124876L, 1551291720L, 1551291720L, 1551307871L,
1551554295L, 1551554295L, 1552997232L, 1552994133L), startTime = c("2019-02-22T19:16:58.377",
"2019-02-23T12:18:44.414", "2019-02-25T21:03:06.961", "2019-02-27T19:22:32.246",
"2019-02-27T19:22:45.404", "2019-02-27T23:51:15.270", "2019-03-02T20:18:21.362",
"2019-03-02T20:18:37.066", "2019-03-19T13:07:15.348", "2019-03-19T12:15:38.440"
), user_id = c(10224L, 10224L, 10224L, 10224L, 10224L, 10224L,
10224L, 10224L, 10224L, 10224L), categories = structure(c(1L,
1L, 6L, 1L, 2L, 2L, 6L, 1L, 2L, 1L), .Label = c("communication",
"games & entertainment", "lifestyle", "news & information outlet",
"social network", "utility & tools"), class = "factor"), duration = structure(c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "secs")), row.names = c(NA,
10L), class = "data.frame")
以下适合我。 difftime
不为零。
op_digs <- options(digits.secs = 3)
dat$endTime <- as.POSIXct(dat$endTime, format = "%Y-%m-%dT%H:%M:%OS")
dat$startTime <- as.POSIXct(dat$startTime, format = "%Y-%m-%dT%H:%M:%OS")
difftime(dat$startTime,dat$endTime)
#Time differences in secs
#[1] -4.241 -16.641 -8.810 -8.928
dat$duration <- difftime(dat$startTime,dat$endTime)
options(digits.secs = op_digs)
数据.
dat <- read.table(text = "
endTime startTime user_id categories duration
1 2019-02-22T19:17:02.618 2019-02-22T19:16:58.377 10224 communication '0 secs'
2 2019-02-23T12:19:01.055 2019-02-23T12:18:44.414 10224 communication '0 secs'
3 2019-02-25T21:03:15.771 2019-02-25T21:03:06.961 10224 'utility & tools' '0 secs'
4 2019-02-27T19:22:41.174 2019-02-27T19:22:32.246 10224 communication '0 secs'
", header = TRUE)
这是一个基于 dplyr
的解决方案,它具有更流畅的工作流程:
library(dplyr)
dat %>%
mutate_at(vars(startTime, endTime), ~as.POSIXct(strptime(.x, format = c("%Y-%m-%dT%H:%M:%OS")))) %>%
mutate(duration = endTime - startTime)
battery endTime session startTime user_id categories duration
1 47 2019-02-22 19:17:02 1550859371 2019-02-22 19:16:58 10224 communication 4.241 secs
2 41 2019-02-23 12:19:01 1550920714 2019-02-23 12:18:44 10224 communication 16.641 secs
3 18 2019-02-25 21:03:15 1551124876 2019-02-25 21:03:06 10224 utility & tools 8.810 secs
4 94 2019-02-27 19:22:41 1551291720 2019-02-27 19:22:32 10224 communication 8.928 secs
5 94 2019-02-27 19:22:53 1551291720 2019-02-27 19:22:45 10224 games & entertainment 7.852 secs
6 93 2019-02-27 23:51:16 1551307871 2019-02-27 23:51:15 10224 games & entertainment 1.137 secs
7 73 2019-03-02 20:18:28 1551554295 2019-03-02 20:18:21 10224 utility & tools 6.728 secs
8 73 2019-03-02 20:18:43 1551554295 2019-03-02 20:18:37 10224 communication 6.422 secs
9 47 2019-03-19 13:07:16 1552997232 2019-03-19 13:07:15 10224 games & entertainment 1.645 secs
10 49 2019-03-19 12:16:36 1552994133 2019-03-19 12:15:38 10224 communication 58.522 secs
基本上,您首先将 endTime
和 startTime
列转换为正确的格式 (POSIXct),然后使用简单的减法。
下面是如何使用 lubridate
和 dplyr
包解决这个问题。它将提供与上述相同的结果。只是方式不同
df<-structure(list(battery = c(47L, 41L, 18L, 94L, 94L, 93L, 73L, 73L, 47L, 49L),
endTime = c("2019-02-22T19:17:02.618", "2019-02-23T12:19:01.055", "2019-02-25T21:03:15.771", "2019-02-27T19:22:41.174", "2019-02-27T19:22:53.256", "2019-02-27T23:51:16.407", "2019-03-02T20:18:28.090", "2019-03-02T20:18:43.488", "2019-03-19T13:07:16.993", "2019-03-19T12:16:36.962"),
session = c(1550859371L,1550920714L, 1551124876L, 1551291720L, 1551291720L, 1551307871L,1551554295L, 1551554295L, 1552997232L, 1552994133L),
startTime = c("2019-02-22T19:16:58.377", "2019-02-23T12:18:44.414", "2019-02-25T21:03:06.961", "2019-02-27T19:22:32.246", "2019-02-27T19:22:45.404", "2019-02-27T23:51:15.270", "2019-03-02T20:18:21.362", "2019-03-02T20:18:37.066", "2019-03-19T13:07:15.348", "2019-03-19T12:15:38.440"),
user_id = c(10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L, 10224L),
categories = structure(c(1L, 1L, 6L, 1L, 2L, 2L, 6L, 1L, 2L, 1L), .Label = c("communication", "games & entertainment", "lifestyle", "news & information outlet", "social network", "utility & tools"), class = "factor"),
duration = structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), class = "difftime", units = "secs")), row.names = c(NA, 10L), class = "data.frame")
library(dplyr)
library(lubridate)
df<-df %>%
mutate(endTime = ymd_hms(endTime), startTime = ymd_hms(startTime)) %>%
mutate(duration = endTime - startTime)
df2
这是输出
battery endTime session startTime user_id categories duration
1 47 2019-02-22 19:17:02 1550859371 2019-02-22 19:16:58 10224 communication 4.241 secs
2 41 2019-02-23 12:19:01 1550920714 2019-02-23 12:18:44 10224 communication 16.641 secs
3 18 2019-02-25 21:03:15 1551124876 2019-02-25 21:03:06 10224 utility & tools 8.810 secs
4 94 2019-02-27 19:22:41 1551291720 2019-02-27 19:22:32 10224 communication 8.928 secs
5 94 2019-02-27 19:22:53 1551291720 2019-02-27 19:22:45 10224 games & entertainment 7.852 secs
6 93 2019-02-27 23:51:16 1551307871 2019-02-27 23:51:15 10224 games & entertainment 1.137 secs
7 73 2019-03-02 20:18:28 1551554295 2019-03-02 20:18:21 10224 utility & tools 6.728 secs
8 73 2019-03-02 20:18:43 1551554295 2019-03-02 20:18:37 10224 communication 6.422 secs
9 47 2019-03-19 13:07:16 1552997232 2019-03-19 13:07:15 10224 games & entertainment 1.645 secs
10 49 2019-03-19 12:16:36 1552994133 2019-03-19 12:15:38 10224 communication 58.522 secs