基于五分钟时间间隔统计和聚合数据
Counting and aggregating data based on five minutes time interval
我的数据df
是这样的
ID <- c(1,1,1,2,2,2,3,3,3,3,4,4)
Time <- c("7:44:29","7:44:30","7:45:31","7:46:35","7:47:37","7:48:34","7:49:35","7:50:36",'7:51:13',"7:52:14","7:52:15","7:53:16")
Speed_kmh <- c(2,5,8,2,4,6,9,8,6,8,10,14)
Distance_m <- c(162,162,162,162,162,162,162,157,150,137,122,102)
df <- data.frame(ID, Time, Speed_kmh, Distance_m)
df
ID Time Speed_kmh Distance_m
1 1 7:44:29 2 162
2 1 7:44:30 5 162
3 1 7:45:31 8 162
4 2 7:46:35 2 162
5 2 7:47:37 4 162
6 2 7:48:34 6 162
7 3 7:49:35 9 162
8 3 7:50:36 8 157
9 3 7:51:13 6 150
10 3 7:52:14 8 137
11 4 7:52:15 10 122
12 4 7:53:16 14 102
我想按五分钟间隔做两件事
1-统计唯一ID值的个数
2- 平均 Speed_kmh 和 Distance_m 值
所需的输出应如下所示。
ID Time Speed_kmh Distance_m
1 2 7:48:34 5 162
2 2 7:53:16 9 138
你可以试试
library(lubridate)
lilibrary(data.table)
df1 %>%
mutate(Time = hms(Time)) %>%
mutate(tt = floor(lubridate::minute(Time - Time[1])/5) ) %>%
group_by(tt) %>%
summarize(ID = length(unique(ID)), Time = last(Time), Speed_kmh = mean(Speed_kmh), Distance_m = mean(Distance_m)) %>%
select(-tt)
ID Time Speed_kmh Distance_m
<dbl> <Period> <dbl> <dbl>
1 2 7H 48M 34S 4.5 162
2 2 7H 53M 16S 9.17 138.
df1 %>%
mutate(Time = hms(Time)) %>%
mutate(ttt= (lubridate::minute(Time) + lubridate::hour(Time)* 60)) %>%
mutate(tt = floor(ttt/5) ) %>%
group_by(tt) %>%
summarize(ID = length(unique(ID)), Time = last(Time), Speed_kmh = mean(Speed_kmh), Distance_m = mean(Distance_m))
tt ID Time Speed_kmh Distance_m
<dbl> <int> <Period> <dbl> <dbl>
1 92 1 7H 44M 30S 3.5 162
2 93 3 7H 49M 35S 5.8 162
3 94 2 7H 53M 16S 9.2 134.
我的数据df
是这样的
ID <- c(1,1,1,2,2,2,3,3,3,3,4,4)
Time <- c("7:44:29","7:44:30","7:45:31","7:46:35","7:47:37","7:48:34","7:49:35","7:50:36",'7:51:13',"7:52:14","7:52:15","7:53:16")
Speed_kmh <- c(2,5,8,2,4,6,9,8,6,8,10,14)
Distance_m <- c(162,162,162,162,162,162,162,157,150,137,122,102)
df <- data.frame(ID, Time, Speed_kmh, Distance_m)
df
ID Time Speed_kmh Distance_m
1 1 7:44:29 2 162
2 1 7:44:30 5 162
3 1 7:45:31 8 162
4 2 7:46:35 2 162
5 2 7:47:37 4 162
6 2 7:48:34 6 162
7 3 7:49:35 9 162
8 3 7:50:36 8 157
9 3 7:51:13 6 150
10 3 7:52:14 8 137
11 4 7:52:15 10 122
12 4 7:53:16 14 102
我想按五分钟间隔做两件事
1-统计唯一ID值的个数
2- 平均 Speed_kmh 和 Distance_m 值
所需的输出应如下所示。
ID Time Speed_kmh Distance_m
1 2 7:48:34 5 162
2 2 7:53:16 9 138
你可以试试
library(lubridate)
lilibrary(data.table)
df1 %>%
mutate(Time = hms(Time)) %>%
mutate(tt = floor(lubridate::minute(Time - Time[1])/5) ) %>%
group_by(tt) %>%
summarize(ID = length(unique(ID)), Time = last(Time), Speed_kmh = mean(Speed_kmh), Distance_m = mean(Distance_m)) %>%
select(-tt)
ID Time Speed_kmh Distance_m
<dbl> <Period> <dbl> <dbl>
1 2 7H 48M 34S 4.5 162
2 2 7H 53M 16S 9.17 138.
df1 %>%
mutate(Time = hms(Time)) %>%
mutate(ttt= (lubridate::minute(Time) + lubridate::hour(Time)* 60)) %>%
mutate(tt = floor(ttt/5) ) %>%
group_by(tt) %>%
summarize(ID = length(unique(ID)), Time = last(Time), Speed_kmh = mean(Speed_kmh), Distance_m = mean(Distance_m))
tt ID Time Speed_kmh Distance_m
<dbl> <int> <Period> <dbl> <dbl>
1 92 1 7H 44M 30S 3.5 162
2 93 3 7H 49M 35S 5.8 162
3 94 2 7H 53M 16S 9.2 134.