行与因子 R 之间的时间差
difference Time between rows by factor R
我有这个样本:
data <- structure(list(timestamp = c(1401581040991, 1401581230769, 1401581410907,
1401581591597, 1401581960830, 1401582002091, 1401582140958, 1401582330515,
1401585071017, 1401585432174, 1401585641225, 1401586011911, 1401587120695,
1401588721173, 1401589081689, 1401581041819, 1401585363131, 1401586083812,
1401586983743, 1401588785148), timestamp_pretty = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 17L, 18L, 20L,
2L, 11L, 15L, 16L, 19L), .Label = c("01/06/2014 00:04:00", "01/06/2014 00:04:01",
"01/06/2014 00:07:10", "01/06/2014 00:10:10", "01/06/2014 00:13:11",
"01/06/2014 00:19:20", "01/06/2014 00:20:02", "01/06/2014 00:22:20",
"01/06/2014 00:25:30", "01/06/2014 01:11:11", "01/06/2014 01:16:03",
"01/06/2014 01:17:12", "01/06/2014 01:20:41", "01/06/2014 01:26:51",
"01/06/2014 01:28:03", "01/06/2014 01:43:03", "01/06/2014 01:45:20",
"01/06/2014 02:12:01", "01/06/2014 02:13:05", "01/06/2014 02:18:01"
), class = "factor"), mmsi = c(205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205482000L, 205482000L, 205482000L, 205482000L, 205482000L),
diff_time_seconds = c(NA, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 3L, 1L, 2L, NA, 9L, 4L, 1L, 3L)), .Names = c("timestamp",
"timestamp_pretty", "mmsi", "diff_time_seconds"), row.names = c(NA,
-20L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000000001e0788>, sorted = "mmsi")
我想知道每个因素的每行之间的时间差(以秒为单位)。例如对于一个因素的第一次出现,时间差为0.
我可以用 timestamp
列(纪元时间)来完成。但是当我尝试使用 timestamp_pretty
时,它变得很疯狂。我一直在四处寻找,但找不到解决方案,但我记得我几天前找到了...
这是我的输出示例:你可以清楚地看到 diff_time_seconds_timestamp_pretty
不对...
structure(list(timestamp = c(1401581040991, 1401581230769, 1401581410907,
1401581591597, 1401581960830, 1401582002091, 1401582140958, 1401582330515,
1401585071017, 1401585432174, 1401585641225, 1401586011911, 1401587120695,
1401588721173, 1401589081689, 1401581041819, 1401585363131, 1401586083812,
1401586983743, 1401588785148), timestamp_pretty = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 17L, 18L, 20L,
2L, 11L, 15L, 16L, 19L), .Label = c("01/06/2014 00:04:00", "01/06/2014 00:04:01",
"01/06/2014 00:07:10", "01/06/2014 00:10:10", "01/06/2014 00:13:11",
"01/06/2014 00:19:20", "01/06/2014 00:20:02", "01/06/2014 00:22:20",
"01/06/2014 00:25:30", "01/06/2014 01:11:11", "01/06/2014 01:16:03",
"01/06/2014 01:17:12", "01/06/2014 01:20:41", "01/06/2014 01:26:51",
"01/06/2014 01:28:03", "01/06/2014 01:43:03", "01/06/2014 01:45:20",
"01/06/2014 02:12:01", "01/06/2014 02:13:05", "01/06/2014 02:18:01"
), class = "factor"), mmsi = c(205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205482000L, 205482000L, 205482000L, 205482000L, 205482000L),
diff_time_seconds_timestamp_pretty = c(NA, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 2L, NA, 9L, 4L, 1L, 3L
), diff_time_seconds_timestamp = c(NA, 189778, 180138, 180690,
369233, 41261, 138867, 189557, 2740502, 361157, 209051, 370686,
1108784, 1600478, 360516, NA, 4321312, 720681, 899931, 1801405
)), .Names = c("timestamp", "timestamp_pretty", "mmsi", "diff_time_seconds_timestamp_pretty",
"diff_time_seconds_timestamp"), row.names = c(NA, -20L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x00000000001e0788>, sorted = "mmsi")
我使用了以下代码:
data <- data[,c("timestamp", "timestamp_pretty","mmsi")]
data <- data[order(data$mmsi, data$timestamp_pretty),]
library("data.table")
data<-data.table(data)
setkey(data,mmsi)
data[,diff_time_seconds_timestamp_pretty:=c(NA,diff(timestamp_pretty)),by=mmsi]
options(digits=12)
data[,diff_time_seconds_timestamp:=c(NA,diff(timestamp)),by=mmsi]
我认为 diff_time_seconds_timestamp_pretty
的格式有问题,但我坚持这个!
谢谢!
diff(timestamp_pretty)
适用于因子的数值,而不适用于日期(无论如何它都不适用于字符)。
像这样转换或创建 POSIXct 列(我确实选择了第二种方式):
data[, pos_time:=as.POSIXct( timestamp_pretty, format="%d/%m/%Y %H:%M:%S", tz="UTC" )]
现在 diff 可以工作了:
data[,diff_time_seconds_timestamp_pretty:=c(NA,diff(pos_time)),by="mmsi"]
给出:
> head(data)
timestamp timestamp_pretty mmsi diff_time_seconds pos_time diff_time_seconds_timestamp_pretty
1: 1.401581e+12 01/06/2014 00:04:00 205477000 NA 2014-06-01 00:04:00 NA
2: 1.401581e+12 01/06/2014 00:07:10 205477000 2 2014-06-01 00:07:10 190
3: 1.401581e+12 01/06/2014 00:10:10 205477000 1 2014-06-01 00:10:10 180
4: 1.401582e+12 01/06/2014 00:13:11 205477000 1 2014-06-01 00:13:11 181
5: 1.401582e+12 01/06/2014 00:19:20 205477000 1 2014-06-01 00:19:20 369
6: 1.401582e+12 01/06/2014 00:20:02 205477000 1 2014-06-01 00:20:02 42
我有这个样本:
data <- structure(list(timestamp = c(1401581040991, 1401581230769, 1401581410907,
1401581591597, 1401581960830, 1401582002091, 1401582140958, 1401582330515,
1401585071017, 1401585432174, 1401585641225, 1401586011911, 1401587120695,
1401588721173, 1401589081689, 1401581041819, 1401585363131, 1401586083812,
1401586983743, 1401588785148), timestamp_pretty = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 17L, 18L, 20L,
2L, 11L, 15L, 16L, 19L), .Label = c("01/06/2014 00:04:00", "01/06/2014 00:04:01",
"01/06/2014 00:07:10", "01/06/2014 00:10:10", "01/06/2014 00:13:11",
"01/06/2014 00:19:20", "01/06/2014 00:20:02", "01/06/2014 00:22:20",
"01/06/2014 00:25:30", "01/06/2014 01:11:11", "01/06/2014 01:16:03",
"01/06/2014 01:17:12", "01/06/2014 01:20:41", "01/06/2014 01:26:51",
"01/06/2014 01:28:03", "01/06/2014 01:43:03", "01/06/2014 01:45:20",
"01/06/2014 02:12:01", "01/06/2014 02:13:05", "01/06/2014 02:18:01"
), class = "factor"), mmsi = c(205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205482000L, 205482000L, 205482000L, 205482000L, 205482000L),
diff_time_seconds = c(NA, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 1L, 1L, 3L, 1L, 2L, NA, 9L, 4L, 1L, 3L)), .Names = c("timestamp",
"timestamp_pretty", "mmsi", "diff_time_seconds"), row.names = c(NA,
-20L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000000001e0788>, sorted = "mmsi")
我想知道每个因素的每行之间的时间差(以秒为单位)。例如对于一个因素的第一次出现,时间差为0.
我可以用 timestamp
列(纪元时间)来完成。但是当我尝试使用 timestamp_pretty
时,它变得很疯狂。我一直在四处寻找,但找不到解决方案,但我记得我几天前找到了...
这是我的输出示例:你可以清楚地看到 diff_time_seconds_timestamp_pretty
不对...
structure(list(timestamp = c(1401581040991, 1401581230769, 1401581410907,
1401581591597, 1401581960830, 1401582002091, 1401582140958, 1401582330515,
1401585071017, 1401585432174, 1401585641225, 1401586011911, 1401587120695,
1401588721173, 1401589081689, 1401581041819, 1401585363131, 1401586083812,
1401586983743, 1401588785148), timestamp_pretty = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 17L, 18L, 20L,
2L, 11L, 15L, 16L, 19L), .Label = c("01/06/2014 00:04:00", "01/06/2014 00:04:01",
"01/06/2014 00:07:10", "01/06/2014 00:10:10", "01/06/2014 00:13:11",
"01/06/2014 00:19:20", "01/06/2014 00:20:02", "01/06/2014 00:22:20",
"01/06/2014 00:25:30", "01/06/2014 01:11:11", "01/06/2014 01:16:03",
"01/06/2014 01:17:12", "01/06/2014 01:20:41", "01/06/2014 01:26:51",
"01/06/2014 01:28:03", "01/06/2014 01:43:03", "01/06/2014 01:45:20",
"01/06/2014 02:12:01", "01/06/2014 02:13:05", "01/06/2014 02:18:01"
), class = "factor"), mmsi = c(205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205477000L, 205477000L, 205477000L, 205477000L, 205477000L, 205477000L,
205482000L, 205482000L, 205482000L, 205482000L, 205482000L),
diff_time_seconds_timestamp_pretty = c(NA, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 1L, 2L, NA, 9L, 4L, 1L, 3L
), diff_time_seconds_timestamp = c(NA, 189778, 180138, 180690,
369233, 41261, 138867, 189557, 2740502, 361157, 209051, 370686,
1108784, 1600478, 360516, NA, 4321312, 720681, 899931, 1801405
)), .Names = c("timestamp", "timestamp_pretty", "mmsi", "diff_time_seconds_timestamp_pretty",
"diff_time_seconds_timestamp"), row.names = c(NA, -20L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x00000000001e0788>, sorted = "mmsi")
我使用了以下代码:
data <- data[,c("timestamp", "timestamp_pretty","mmsi")]
data <- data[order(data$mmsi, data$timestamp_pretty),]
library("data.table")
data<-data.table(data)
setkey(data,mmsi)
data[,diff_time_seconds_timestamp_pretty:=c(NA,diff(timestamp_pretty)),by=mmsi]
options(digits=12)
data[,diff_time_seconds_timestamp:=c(NA,diff(timestamp)),by=mmsi]
我认为 diff_time_seconds_timestamp_pretty
的格式有问题,但我坚持这个!
谢谢!
diff(timestamp_pretty)
适用于因子的数值,而不适用于日期(无论如何它都不适用于字符)。
像这样转换或创建 POSIXct 列(我确实选择了第二种方式):
data[, pos_time:=as.POSIXct( timestamp_pretty, format="%d/%m/%Y %H:%M:%S", tz="UTC" )]
现在 diff 可以工作了:
data[,diff_time_seconds_timestamp_pretty:=c(NA,diff(pos_time)),by="mmsi"]
给出:
> head(data)
timestamp timestamp_pretty mmsi diff_time_seconds pos_time diff_time_seconds_timestamp_pretty
1: 1.401581e+12 01/06/2014 00:04:00 205477000 NA 2014-06-01 00:04:00 NA
2: 1.401581e+12 01/06/2014 00:07:10 205477000 2 2014-06-01 00:07:10 190
3: 1.401581e+12 01/06/2014 00:10:10 205477000 1 2014-06-01 00:10:10 180
4: 1.401582e+12 01/06/2014 00:13:11 205477000 1 2014-06-01 00:13:11 181
5: 1.401582e+12 01/06/2014 00:19:20 205477000 1 2014-06-01 00:19:20 369
6: 1.401582e+12 01/06/2014 00:20:02 205477000 1 2014-06-01 00:20:02 42