在 R 中按组查找和汇总时间戳之间的同时发生的事件

Find and summarise co-occurring events between timestamps by group in R

我正在处理点播电视日志数据集,我想识别和汇总两个不同时间戳之间的活动设备数量。我在下面创建了一个模拟数据框;每行都是一个“会话”,包含唯一的客户标识符、唯一的设备标识符、正在观看的节目类型和每个会话的 start/end 次:-


df<-structure(list(CustomerID = c("0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"0289d477-427e-4b91-bd4d-8fd579ef2b87", "0289d477-427e-4b91-bd4d-8fd579ef2b87", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "12a6dd1e-484c-4c94-a7ab-6443a58b4159", 
"12a6dd1e-484c-4c94-a7ab-6443a58b4159", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fb442c22-2595-4245-9f49-a2ea3581ee88", "fb442c22-2595-4245-9f49-a2ea3581ee88", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "fc20bfb6-172e-4f55-9467-12ed99579503", 
"fc20bfb6-172e-4f55-9467-12ed99579503", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"e4f0a5ef-f808-4869-9370-c7fcee63ea98", "e4f0a5ef-f808-4869-9370-c7fcee63ea98", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", "aeffea0a-fdbf-4c88-8a47-8eaeee4339ef", 
"aeffea0a-fdbf-4c88-8a47-8eaeee4339ef"), DeviceID = c("b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "ccb94e13-2004-4642-82fb-73fd2cdd979e", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", 
"b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", "ccb94e13-2004-4642-82fb-73fd2cdd979e", 
"b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"b8d7b4ab-3d1e-40a1-ba9e-13b7d82d519d", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "ccb94e13-2004-4642-82fb-73fd2cdd979e", 
"b8136ab5-3e81-4ead-a52b-f23609bc4899", "420dc9bf-c14e-4bcd-9559-e1b491f05182", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "a8f4bb78-a0f2-476b-9303-2761b06a65fc", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "66a9e7dd-57ee-4c8a-a090-950cae9b02a1", 
"a8f4bb78-a0f2-476b-9303-2761b06a65fc", "a8f4bb78-a0f2-476b-9303-2761b06a65fc", 
"c293d135-800e-4a62-898d-f0959bf0870d", "66a9e7dd-57ee-4c8a-a090-950cae9b02a1", 
"a8f4bb78-a0f2-476b-9303-2761b06a65fc", "09109879-1061-4325-ae85-9c853dbf7882", 
"09109879-1061-4325-ae85-9c853dbf7882", "c293d135-800e-4a62-898d-f0959bf0870d", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "a8f4bb78-a0f2-476b-9303-2761b06a65fc", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "09109879-1061-4325-ae85-9c853dbf7882", 
"66a9e7dd-57ee-4c8a-a090-950cae9b02a1", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"6caaebb0-775f-4da9-9d34-414e2cb02ef6", "6caaebb0-775f-4da9-9d34-414e2cb02ef6", 
"41f98340-0724-4dcc-b9bd-1bdd12307f87", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"a481ffe1-9bfe-4cac-9da8-553c4da2e224", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"a481ffe1-9bfe-4cac-9da8-553c4da2e224", "07d11a53-9b21-4fa1-b055-41f0247c642f", 
"66b65dcb-5416-4bcc-ac9e-2222e2d50a28", "c1ceebbb-24cf-4b1d-8576-8bcb3aaa4534", 
"29277218-9798-406e-b9ee-717184bf6f0e", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"41f98340-0724-4dcc-b9bd-1bdd12307f87", "41f98340-0724-4dcc-b9bd-1bdd12307f87", 
"66b65dcb-5416-4bcc-ac9e-2222e2d50a28", "66b65dcb-5416-4bcc-ac9e-2222e2d50a28", 
"29277218-9798-406e-b9ee-717184bf6f0e", "c1ceebbb-24cf-4b1d-8576-8bcb3aaa4534", 
"66b65dcb-5416-4bcc-ac9e-2222e2d50a28", "04ba4776-8afc-4e86-86de-7b85668bf075", 
"a58aa3cc-a231-4a82-8377-56b34306a446", "04ba4776-8afc-4e86-86de-7b85668bf075", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "d2832ac1-f3fd-468a-ace9-efa6a4e25e41", 
"d2832ac1-f3fd-468a-ace9-efa6a4e25e41", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"a58aa3cc-a231-4a82-8377-56b34306a446", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "d2832ac1-f3fd-468a-ace9-efa6a4e25e41", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"4eb8bf81-1f5c-4593-8205-2d0a0d77d0d0", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"7c193822-4ce9-4086-b274-d013e2180ae1", "7c193822-4ce9-4086-b274-d013e2180ae1", 
"58286c18-2df6-461b-8a04-096625f678d2", "58286c18-2df6-461b-8a04-096625f678d2", 
"58286c18-2df6-461b-8a04-096625f678d2", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"fc5d9b88-a545-4f69-9c55-7b57103a165c", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "58286c18-2df6-461b-8a04-096625f678d2", 
"fc5d9b88-a545-4f69-9c55-7b57103a165c", "fc5d9b88-a545-4f69-9c55-7b57103a165c", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "58286c18-2df6-461b-8a04-096625f678d2", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1", "3007c886-8fde-4b05-8ae5-b4f8df0467a1", 
"3007c886-8fde-4b05-8ae5-b4f8df0467a1"), ShowGenre = c("Music", 
"Music", "Sport", "Drama", "Kids", "Documentary", "News", "Movie", 
"Drama", "News", "News", "Kids", "Documentary", "Movie", "Movie", 
"Documentary", "Movie", "Music", "Sport", "Movie", "Movie", "Movie", 
"Drama", "News", "Movie", "Movie", "Documentary", "Movie", "Music", 
"Drama", "News", "News", "Movie", "Drama", "News", "Documentary", 
"Documentary", "Drama", "Music", "Sport", "Sport", "Movie", "Music", 
"Drama", "Sport", "Drama", "Drama", "Kids", "Drama", "Documentary", 
"Sport", "Music", "Music", "Documentary", "Drama", "News", "Music", 
"Music", "Movie", "Documentary", "Documentary", "Documentary", 
"Sport", "Music", "News", "News", "Sport", "Documentary", "Music", 
"Documentary", "News", "Drama", "Drama", "Documentary", "News", 
"Music", "Kids", "Drama", "Documentary", "News", "Drama", "Documentary", 
"Movie", "News", "Kids", "Movie", "Music", "Kids", "Kids", "Movie", 
"Music", "News", "Movie", "Kids", "Music", "Music", "Kids", "Kids", 
"News", "Kids", "Movie", "Documentary"), SessionStart = structure(c(1612132904, 
1612133106, 1612136282, 1612139373, 1612139378, 1612140041, 1612140405, 
1612143192, 1612143292, 1612143854, 1612143976, 1612144065, 1612144220, 
1612144263, 1612144334, 1612144356, 1612146166, 1612146226, 1612146248, 
1612146440, 1612146989, 1612147206, 1612148624, 1612152735, 1612153241, 
1612153475, 1612154929, 1612155104, 1612155562, 1612155992, 1612159668, 
1612159851, 1612160073, 1612165858, 1612168664, 1612169607, 1612169662, 
1612169779, 1612171481, 1612172015, 1612172166, 1612172358, 1612172446, 
1612172505, 1612172544, 1612172601, 1612172607, 1612172969, 1612173898, 
1612175729, 1612177333, 1612178891, 1612180467, 1612180651, 1612181087, 
1612181168, 1612181233, 1612186335, 1612186358, 1612186740, 1612187098, 
1612187181, 1612187519, 1612187704, 1612187730, 1612187890, 1612187936, 
1612188139, 1612188486, 1612188494, 1612188580, 1612192309, 1612192504, 
1612193382, 1612194334, 1612194365, 1612194396, 1612194579, 1612194762, 
1612194984, 1612195094, 1612195096, 1612195252, 1612195837, 1612196401, 
1612199002, 1612200677, 1612200762, 1612200829, 1612201556, 1612201802, 
1612202166, 1612202555, 1612202852, 1612203272, 1612204749, 1612204989, 
1612205005, 1612205067, 1612206077, 1612206260, 1612206263), tzone = "Europe/London", class = c("POSIXct", 
"POSIXt")), SessionEnd = structure(c(1612137925, 1612139792, 
1612140039, 1612141093, 1612139380, 1612143136, 1612140640, 1612143256, 
1612146067, 1612144022, 1612152403, 1612144131, 1612144270, 1612144284, 
1612144337, 1612144652, 1612146227, 1612146238, 1612146439, 1612146493, 
1612152522, 1612148610, 1612149051, 1612153217, 1612153464, 1612154778, 
1612155086, 1612155551, 1612155877, 1612156110, 1612159851, 1612160072, 
1612160227, 1612168654, 1612171480, 1612169607, 1612169740, 1612172007, 
1612172194, 1612172104, 1612172337, 1612172465, 1612172496, 1612172520, 
1612172599, 1612172604, 1612172653, 1612175721, 1612174311, 1612177318, 
1612177340, 1612178923, 1612180650, 1612180839, 1612181167, 1612181232, 
1612181276, 1612186398, 1612186358, 1612186885, 1612187809, 1612187184, 
1612187704, 1612187890, 1612187789, 1612187899, 1612188138, 1612188485, 
1612188498, 1612189623, 1612188597, 1612192404, 1612193479, 1612195723, 
1612194375, 1612194396, 1612194578, 1612194761, 1612194984, 1612195044, 
1612195251, 1612195517, 1612195252, 1612195864, 1612196431, 1612200445, 
1612201368, 1612200786, 1612200896, 1612201633, 1612202122, 1612204649, 
1612205037, 1612203929, 1612203278, 1612204749, 1612205014, 1612205834, 
1612205067, 1612206261, 1612206305, 1612206343), tzone = "Europe/London", class = c("POSIXct", 
"POSIXt"))), class = "data.frame", row.names = c(NA, -102L))

所以在这个数据框中,有 6 个唯一的客户标识符,每个标识符都有不同数量的唯一设备。以下是客户的独特设备计数:-

* <chr>                                <int>
1 0289d477-427e-4b91-bd4d-8fd579ef2b87     4
2 12a6dd1e-484c-4c94-a7ab-6443a58b4159     4
3 aeffea0a-fdbf-4c88-8a47-8eaeee4339ef     3
4 e4f0a5ef-f808-4869-9370-c7fcee63ea98     5
5 fb442c22-2595-4245-9f49-a2ea3581ee88     1
6 fc20bfb6-172e-4f55-9467-12ed99579503     6

你可以想象,在现代点播电视服务中,家里有人可能正在大屏幕电视上观看节目,而与此同时,另一个房间里有人正在 tablet/mobile phone/等

鉴于此背景,我希望能够总结以下内容:-

  1. 每位客户有多少个会话同时处于活动状态?使用第一个活动会话的“SessionStart”时间戳和最后一个活动会话的“SessionEnd”时间戳作为定义的周期。
  2. 在此会话期间处于活动状态的唯一设备数
  3. 在活动会话中观看的独特类型的数量

期望输出

这是数据框的前两行:-

如果这些是我们必须处理的仅有的两行,那么所需的输出将如下所示:-

CustomerID                                Num_Unique_Devices     Num_Unique_Genre       Genres       
0289d477-427e-4b91-bd4d-8fd579ef2b87               2                    1                 Music

StartTime_FirstSession      EndTime_LastSession
2021-01-31 22:41:44         2021-02-01 00:36:32


任何人都可以帮助想出一个解决方案来获得这个结果吗?目前它需要一些超出我专业知识的东西,因此非常感谢任何帮助。谢谢:)

library(tidyverse)

我们可以从添加一个指示器开始,该指示器显示当前会话是否 与上一节重叠。可以这样做

(step1 <- df %>% 
  as_tibble() %>% 
  group_by(CustomerID) %>% 
  arrange(SessionStart) %>% 
  mutate(
    overlap = SessionStart < lag(SessionEnd)
  )
)
#> # A tibble: 102 x 6
#> # Groups:   CustomerID [6]
#>    CustomerID DeviceID ShowGenre SessionStart        SessionEnd          overlap
#>    <chr>      <chr>    <chr>     <dttm>              <dttm>              <lgl>  
#>  1 0289d477-~ b8d7b4a~ Music     2021-01-31 22:41:44 2021-02-01 00:05:25 NA     
#>  2 0289d477-~ b8136ab~ Music     2021-01-31 22:45:06 2021-02-01 00:36:32 TRUE   
#>  3 0289d477-~ 420dc9b~ Sport     2021-01-31 23:38:02 2021-02-01 00:40:39 TRUE   
#>  4 0289d477-~ b8136ab~ Drama     2021-02-01 00:29:33 2021-02-01 00:58:13 TRUE   
#>  5 0289d477-~ ccb94e1~ Kids      2021-02-01 00:29:38 2021-02-01 00:29:40 TRUE   
#>  6 0289d477-~ b8136ab~ Document~ 2021-02-01 00:40:41 2021-02-01 01:32:16 FALSE  
#>  7 0289d477-~ b8d7b4a~ News      2021-02-01 00:46:45 2021-02-01 00:50:40 TRUE   
#>  8 0289d477-~ b8d7b4a~ Movie     2021-02-01 01:33:12 2021-02-01 01:34:16 FALSE  
#>  9 0289d477-~ ccb94e1~ Drama     2021-02-01 01:34:52 2021-02-01 02:21:07 FALSE  
#> 10 0289d477-~ b8d7b4a~ News      2021-02-01 01:44:14 2021-02-01 01:47:02 TRUE   
#> # ... with 92 more rows

重叠列中的 NA 应更改为 FALSE。我们可以做的 与合并。接下来我们要做的是创建一个 SessionNumber 每次非重叠会话开始时递增。一种方法是 cumsum(!overlap).

(step2 <- step1 %>% 
  mutate(
    SessionID = cumsum(!coalesce(overlap, FALSE))
  )
)
#> # A tibble: 102 x 7
#> # Groups:   CustomerID [6]
#>    CustomerID DeviceID ShowGenre SessionStart        SessionEnd          overlap
#>    <chr>      <chr>    <chr>     <dttm>              <dttm>              <lgl>  
#>  1 0289d477-~ b8d7b4a~ Music     2021-01-31 22:41:44 2021-02-01 00:05:25 NA     
#>  2 0289d477-~ b8136ab~ Music     2021-01-31 22:45:06 2021-02-01 00:36:32 TRUE   
#>  3 0289d477-~ 420dc9b~ Sport     2021-01-31 23:38:02 2021-02-01 00:40:39 TRUE   
#>  4 0289d477-~ b8136ab~ Drama     2021-02-01 00:29:33 2021-02-01 00:58:13 TRUE   
#>  5 0289d477-~ ccb94e1~ Kids      2021-02-01 00:29:38 2021-02-01 00:29:40 TRUE   
#>  6 0289d477-~ b8136ab~ Document~ 2021-02-01 00:40:41 2021-02-01 01:32:16 FALSE  
#>  7 0289d477-~ b8d7b4a~ News      2021-02-01 00:46:45 2021-02-01 00:50:40 TRUE   
#>  8 0289d477-~ b8d7b4a~ Movie     2021-02-01 01:33:12 2021-02-01 01:34:16 FALSE  
#>  9 0289d477-~ ccb94e1~ Drama     2021-02-01 01:34:52 2021-02-01 02:21:07 FALSE  
#> 10 0289d477-~ b8d7b4a~ News      2021-02-01 01:44:14 2021-02-01 01:47:02 TRUE   
#> # ... with 92 more rows, and 1 more variable: SessionID <int>

最后,我们现在 group_by CustomerID 和 SessionID 并创建最终的 带有摘要的数据框。

(step3 <- step2 %>% 
    group_by(CustomerID, SessionID) %>% 
    summarise(
      n_unique_devices = n_distinct(DeviceID),
      n_unique_genres = n_distinct(ShowGenre),
      first_session_start = min(SessionStart),
      last_session_end = max(SessionEnd),
      .groups = "drop"
    )
)
#> # A tibble: 69 x 6
#>    CustomerID     SessionID n_unique_devices n_unique_genres first_session_start
#>  * <chr>              <int>            <int>           <int> <dttm>             
#>  1 0289d477-427e~         1                4               4 2021-01-31 22:41:44
#>  2 0289d477-427e~         2                2               2 2021-02-01 00:40:41
#>  3 0289d477-427e~         3                1               1 2021-02-01 01:33:12
#>  4 0289d477-427e~         4                3               3 2021-02-01 01:34:52
#>  5 0289d477-427e~         5                2               2 2021-02-01 01:50:20
#>  6 0289d477-427e~         6                1               1 2021-02-01 01:52:14
#>  7 0289d477-427e~         7                1               1 2021-02-01 01:52:36
#>  8 0289d477-427e~         8                1               1 2021-02-01 02:22:46
#>  9 12a6dd1e-484c~         1                1               1 2021-02-01 02:23:46
#> 10 12a6dd1e-484c~         2                1               1 2021-02-01 02:24:08
#> # ... with 59 more rows, and 1 more variable: last_session_end <dttm>

reprex package (v1.0.0)

于 2021-07-14 创建