如何计算 R 中 2 个时间戳之间的观察值(给出的示例)?
How to count observations between 2 timestamps in R (Example given)?
我有 my_df 涵盖不同设备的数据及其各自的开始和结束时间戳,我想根据我的 expected_df [=15] 的解释计算给定时间戳上的设备的确切数量=].
my_df<-data.frame(customer=rep("XYZ",2),device=c("x","a"),
start_timestamp=c("2020-05-13 07:50:06","2020-05-13 08:01:06"),
end_startstamp=c("2020-05-13 08:05:06","2020-05-13 08:10:06"),
start_date=c("2020-05-13","2020-05-13"),start_hour=c(7,8),start_minute=c(50,1),
end_hour=c(8,8),end_minute=c(5,10))
my_df
customer device start_timestamp end_startstamp start_date start_hour start_minute end_hour end_minute
1 XYZ x 2020-05-13 07:50:06 2020-05-13 08:05:06 2020-05-13 7 50 8 5
2 XYZ a 2020-05-13 08:01:06 2020-05-13 08:10:06 2020-05-13 8 1 8 10
expected_df<-data.frame(customer=rep("XYZ",21),time_stamp=c("2020-05-13 07:50:00","2020-05-13 07:51:00","2020-05-13 07:52:00","2020-05-13 07:53:00",
"2020-05-13 07:54:00","2020-05-13 07:55:00","2020-05-13 07:56:00","2020-05-13 07:57:00",
"2020-05-13 07:58:00","2020-05-13 07:59:00","2020-05-13 08:00:00","2020-05-13 08:01:00",
"2020-05-13 08:02:00","2020-05-13 08:03:00","2020-05-13 08:04:00","2020-05-13 08:05:00",
"2020-05-13 08:06:00","2020-05-13 08:07:00","2020-05-13 08:08:00","2020-05-13 08:09:00",
"2020-05-13 08:10:00"),
no_devices_seen=c(1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,1,1,1,1,1))
这是使用 tidyverse
函数的方法:
library(dplyr)
library(lubridate)
my_df %>%
mutate(across(c(start_timestamp, end_startstamp),
~floor_date(ymd_hms(.x), 'minute'))) %>%
mutate(timestamp = purrr::map2(start_timestamp, end_startstamp,
seq, by = '1 min')) %>%
tidyr::unnest(timestamp) %>%
count(customer, timestamp, name = 'no_of_device_seen')
# customer timestamp no_of_device_seen
#1 XYZ 2020-05-13 07:50:00 1
#2 XYZ 2020-05-13 07:51:00 1
#3 XYZ 2020-05-13 07:52:00 1
#4 XYZ 2020-05-13 07:53:00 1
#5 XYZ 2020-05-13 07:54:00 1
#6 XYZ 2020-05-13 07:55:00 1
#7 XYZ 2020-05-13 07:56:00 1
#8 XYZ 2020-05-13 07:57:00 1
#9 XYZ 2020-05-13 07:58:00 1
#10 XYZ 2020-05-13 07:59:00 1
#11 XYZ 2020-05-13 08:00:00 1
#12 XYZ 2020-05-13 08:01:00 2
#13 XYZ 2020-05-13 08:02:00 2
#14 XYZ 2020-05-13 08:03:00 2
#15 XYZ 2020-05-13 08:04:00 2
#16 XYZ 2020-05-13 08:05:00 2
#17 XYZ 2020-05-13 08:06:00 1
#18 XYZ 2020-05-13 08:07:00 1
#19 XYZ 2020-05-13 08:08:00 1
#20 XYZ 2020-05-13 08:09:00 1
#21 XYZ 2020-05-13 08:10:00 1
我有 my_df 涵盖不同设备的数据及其各自的开始和结束时间戳,我想根据我的 expected_df [=15] 的解释计算给定时间戳上的设备的确切数量=].
my_df<-data.frame(customer=rep("XYZ",2),device=c("x","a"),
start_timestamp=c("2020-05-13 07:50:06","2020-05-13 08:01:06"),
end_startstamp=c("2020-05-13 08:05:06","2020-05-13 08:10:06"),
start_date=c("2020-05-13","2020-05-13"),start_hour=c(7,8),start_minute=c(50,1),
end_hour=c(8,8),end_minute=c(5,10))
my_df
customer device start_timestamp end_startstamp start_date start_hour start_minute end_hour end_minute
1 XYZ x 2020-05-13 07:50:06 2020-05-13 08:05:06 2020-05-13 7 50 8 5
2 XYZ a 2020-05-13 08:01:06 2020-05-13 08:10:06 2020-05-13 8 1 8 10
expected_df<-data.frame(customer=rep("XYZ",21),time_stamp=c("2020-05-13 07:50:00","2020-05-13 07:51:00","2020-05-13 07:52:00","2020-05-13 07:53:00",
"2020-05-13 07:54:00","2020-05-13 07:55:00","2020-05-13 07:56:00","2020-05-13 07:57:00",
"2020-05-13 07:58:00","2020-05-13 07:59:00","2020-05-13 08:00:00","2020-05-13 08:01:00",
"2020-05-13 08:02:00","2020-05-13 08:03:00","2020-05-13 08:04:00","2020-05-13 08:05:00",
"2020-05-13 08:06:00","2020-05-13 08:07:00","2020-05-13 08:08:00","2020-05-13 08:09:00",
"2020-05-13 08:10:00"),
no_devices_seen=c(1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,1,1,1,1,1))
这是使用 tidyverse
函数的方法:
library(dplyr)
library(lubridate)
my_df %>%
mutate(across(c(start_timestamp, end_startstamp),
~floor_date(ymd_hms(.x), 'minute'))) %>%
mutate(timestamp = purrr::map2(start_timestamp, end_startstamp,
seq, by = '1 min')) %>%
tidyr::unnest(timestamp) %>%
count(customer, timestamp, name = 'no_of_device_seen')
# customer timestamp no_of_device_seen
#1 XYZ 2020-05-13 07:50:00 1
#2 XYZ 2020-05-13 07:51:00 1
#3 XYZ 2020-05-13 07:52:00 1
#4 XYZ 2020-05-13 07:53:00 1
#5 XYZ 2020-05-13 07:54:00 1
#6 XYZ 2020-05-13 07:55:00 1
#7 XYZ 2020-05-13 07:56:00 1
#8 XYZ 2020-05-13 07:57:00 1
#9 XYZ 2020-05-13 07:58:00 1
#10 XYZ 2020-05-13 07:59:00 1
#11 XYZ 2020-05-13 08:00:00 1
#12 XYZ 2020-05-13 08:01:00 2
#13 XYZ 2020-05-13 08:02:00 2
#14 XYZ 2020-05-13 08:03:00 2
#15 XYZ 2020-05-13 08:04:00 2
#16 XYZ 2020-05-13 08:05:00 2
#17 XYZ 2020-05-13 08:06:00 1
#18 XYZ 2020-05-13 08:07:00 1
#19 XYZ 2020-05-13 08:08:00 1
#20 XYZ 2020-05-13 08:09:00 1
#21 XYZ 2020-05-13 08:10:00 1