SQL:在注册过程中获取最后引用和 post 引用页面

SQL: Get last referring and post referring page during a signup process

我正在尝试为 select 'before' 和 'after' 页面编写高效的 SQL 查询以进行注册。我有一个使用 for 循环的解决方案,它不能扩展,我希望得到一个 SQL 本机解决方案。

对于单个 clientId,我想在注册之前和注册之后获取最新页面(加入过程的每一方只有 1 个)。

加入过程总是 /join/complete

输入:

clientId    time    path
1           0       /page1
1           10      /page2
1           20      /join/<random_token_id>
1           30      /join/<random_token_id>/step2
1           40      /join/complete
1           50      /page2
2           0       /page3
2           10      /join/complete

输出

ClientId   Before     After
1          /page2     /page2
2          /page3     null

如果 SQL 中有简单的解决方案,我将不胜感激。 如果它很复杂,就把它去掉。我会留下代码 运行 过夜。

#standardSQL
WITH lineup AS (
  SELECT clientId, time, path, 
    ROW_NUMBER() OVER(PARTITION BY clientId ORDER BY time) pos  
  FROM `project.dataset.table`
), start AS (
  SELECT row.clientId, row.pos FROM (
    SELECT ARRAY_AGG(t ORDER BY pos LIMIT 1)[OFFSET(0)] row
    FROM lineup t WHERE STARTS_WITH(path, '/join/')
    GROUP BY clientId)
), complete AS (
  SELECT clientId, pos FROM lineup WHERE path = '/join/complete'
), before AS (
  SELECT lineup.clientId, path FROM lineup JOIN start 
  ON lineup.clientId = start.clientId AND lineup.pos = start.pos - 1
), after AS (
  SELECT lineup.clientId, path FROM lineup JOIN complete 
  ON lineup.clientId = complete.clientId AND lineup.pos = complete.pos + 1
)
SELECT clientId, before.path AS before, after.path AS after
FROM before FULL OUTER JOIN after USING (clientId)

您可以使用问题中的虚拟数据测试/玩上面的内容,如下所示

#standardSQL
WITH `project.dataset.table` AS (
  SELECT 1 clientId, 0 time, '/page1' path UNION ALL
  SELECT 1, 10, '/page2' UNION ALL
  SELECT 1, 20, '/join/<random_token_id>' UNION ALL
  SELECT 1, 30, '/join/<random_token_id>/step2' UNION ALL
  SELECT 1, 40, '/join/complete' UNION ALL
  SELECT 1, 50, '/page2' UNION ALL
  SELECT 2, 0, '/page3' UNION ALL
  SELECT 2, 10, '/join/complete' UNION ALL
  SELECT 3, 0, '/join/complete' UNION ALL
  SELECT 3, 10, '/page4' 
), lineup AS (
  SELECT clientId, time, path, 
    ROW_NUMBER() OVER(PARTITION BY clientId ORDER BY time) pos  
  FROM `project.dataset.table`
), start AS (
  SELECT row.clientId, row.pos FROM (
    SELECT ARRAY_AGG(t ORDER BY pos LIMIT 1)[OFFSET(0)] row
    FROM lineup t WHERE STARTS_WITH(path, '/join/')
    GROUP BY clientId)
), complete AS (
  SELECT clientId, pos FROM lineup WHERE path = '/join/complete'
), before AS (
  SELECT lineup.clientId, path FROM lineup JOIN start 
  ON lineup.clientId = start.clientId AND lineup.pos = start.pos - 1
), after AS (
  SELECT lineup.clientId, path FROM lineup JOIN complete 
  ON lineup.clientId = complete.clientId AND lineup.pos = complete.pos + 1
)
SELECT clientId, before.path AS before, after.path AS after
FROM before FULL OUTER JOIN after USING (clientId)   

结果为

Row clientId    before      after    
1   1           /page2      /page2   
2   2           /page3      null     
3   3           null        /page4