如何计算 R 中 2 个时间戳之间的观察值(给出的示例)?

How to count observations between 2 timestamps in R (Example given)?

我有 my_df 涵盖不同设备的数据及其各自的开始和结束时间戳,我想根据我的 expected_df [=15] 的解释计算给定时间戳上的设备的确切数量=].

my_df<-data.frame(customer=rep("XYZ",2),device=c("x","a"),
                    start_timestamp=c("2020-05-13 07:50:06","2020-05-13 08:01:06"),
                    end_startstamp=c("2020-05-13 08:05:06","2020-05-13 08:10:06"),
                    start_date=c("2020-05-13","2020-05-13"),start_hour=c(7,8),start_minute=c(50,1),
                    end_hour=c(8,8),end_minute=c(5,10))
my_df
customer device     start_timestamp      end_startstamp start_date start_hour start_minute end_hour end_minute
1      XYZ      x 2020-05-13 07:50:06 2020-05-13 08:05:06 2020-05-13          7           50      8          5
2      XYZ      a 2020-05-13 08:01:06 2020-05-13 08:10:06 2020-05-13          8            1         8         10



expected_df<-data.frame(customer=rep("XYZ",21),time_stamp=c("2020-05-13 07:50:00","2020-05-13 07:51:00","2020-05-13 07:52:00","2020-05-13 07:53:00",
                                                       "2020-05-13 07:54:00","2020-05-13 07:55:00","2020-05-13 07:56:00","2020-05-13 07:57:00",
                                                       "2020-05-13 07:58:00","2020-05-13 07:59:00","2020-05-13 08:00:00","2020-05-13 08:01:00",
                                                       "2020-05-13 08:02:00","2020-05-13 08:03:00","2020-05-13 08:04:00","2020-05-13 08:05:00",
                                                       "2020-05-13 08:06:00","2020-05-13 08:07:00","2020-05-13 08:08:00","2020-05-13 08:09:00",
                                                       "2020-05-13 08:10:00"),
                    no_devices_seen=c(1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,1,1,1,1,1))

这是使用 tidyverse 函数的方法:

library(dplyr)
library(lubridate)

my_df %>%
   mutate(across(c(start_timestamp, end_startstamp), 
                   ~floor_date(ymd_hms(.x), 'minute'))) %>%
   mutate(timestamp = purrr::map2(start_timestamp, end_startstamp, 
                      seq, by = '1 min')) %>%
   tidyr::unnest(timestamp) %>%
   count(customer, timestamp, name = 'no_of_device_seen') 


#  customer           timestamp no_of_device_seen
#1       XYZ 2020-05-13 07:50:00                 1
#2       XYZ 2020-05-13 07:51:00                 1
#3       XYZ 2020-05-13 07:52:00                 1
#4       XYZ 2020-05-13 07:53:00                 1
#5       XYZ 2020-05-13 07:54:00                 1
#6       XYZ 2020-05-13 07:55:00                 1
#7       XYZ 2020-05-13 07:56:00                 1
#8       XYZ 2020-05-13 07:57:00                 1
#9       XYZ 2020-05-13 07:58:00                 1
#10      XYZ 2020-05-13 07:59:00                 1
#11      XYZ 2020-05-13 08:00:00                 1
#12      XYZ 2020-05-13 08:01:00                 2
#13      XYZ 2020-05-13 08:02:00                 2
#14      XYZ 2020-05-13 08:03:00                 2
#15      XYZ 2020-05-13 08:04:00                 2
#16      XYZ 2020-05-13 08:05:00                 2
#17      XYZ 2020-05-13 08:06:00                 1
#18      XYZ 2020-05-13 08:07:00                 1
#19      XYZ 2020-05-13 08:08:00                 1
#20      XYZ 2020-05-13 08:09:00                 1
#21      XYZ 2020-05-13 08:10:00                 1