如何在 SQL Bigquery 中计算另一个事件之前特定事件的数量?
How to count number of a particular event before another event in SQL Bigquery?
我有一个 table 包含日期、事件和用户。有一个名为 'A' 的事件。我想知道在 Sql Bigquery 中的事件 'A' 之前和之后特定事件发生了多少次。例如,
User Date Events
123 2018-02-13 X.Y.A
123 2018-02-12 X.Y.B
134 2018-02-10 Y.Z.A
123 2018-02-11 A
123 2018-02-01 X.Y.Z
134 2018-02-05 X.Y.B
134 2018-02-04 A
输出会是这样的
User Event Before After
123 A 1 3
134 A 0 1
我必须计算的事件包含一个特定的前缀。意味着我必须检查以( X.Y.then 某些事件名称)开头的事件。因此,X.Y.SomeEvent 是我必须为其设置计数器的事件。有什么建议吗?
用户window函数查找"A"发生的日期。然后使用条件聚合统计前后事件:
select userid,
sum(case when date < a_date and event like 'X.Y%' then 1 else 0 end) as before,
sum(case when date > a_date and event like 'X.Y%' then 1 else 0 end) as before
from (select t.*,
min(case when event = 'A' then date end) over (partition by userid) as a_date
from t
) t
group by userid
以下适用于 BigQuery SQL
#standardSQL
SELECT user, event, before, after
FROM (
SELECT user, event,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) before,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING ) after
FROM `project.dataset.events`
)
WHERE event = 'A'
-- ORDER BY user
你可以在你的问题中用虚拟数据测试它
#standardSQL
WITH `project.dataset.events` AS (
SELECT 123 user, '2018-02-13' dt, 'X.Y.A' event UNION ALL
SELECT 123, '2018-02-12', 'X.Y.B' UNION ALL
SELECT 123, '2018-02-11', 'A' UNION ALL
SELECT 134, '2018-02-10', 'Y.Z.A' UNION ALL
SELECT 134, '2018-02-05', 'X.Y.B' UNION ALL
SELECT 134, '2018-02-04', 'A' UNION ALL
SELECT 123, '2018-02-01', 'X.Y.Z'
)
SELECT user, event, before, after
FROM (
SELECT user, event,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) before,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING ) after
FROM `project.dataset.events`
)
WHERE event = 'A'
ORDER BY user
我有一个 table 包含日期、事件和用户。有一个名为 'A' 的事件。我想知道在 Sql Bigquery 中的事件 'A' 之前和之后特定事件发生了多少次。例如,
User Date Events
123 2018-02-13 X.Y.A
123 2018-02-12 X.Y.B
134 2018-02-10 Y.Z.A
123 2018-02-11 A
123 2018-02-01 X.Y.Z
134 2018-02-05 X.Y.B
134 2018-02-04 A
输出会是这样的
User Event Before After
123 A 1 3
134 A 0 1
我必须计算的事件包含一个特定的前缀。意味着我必须检查以( X.Y.then 某些事件名称)开头的事件。因此,X.Y.SomeEvent 是我必须为其设置计数器的事件。有什么建议吗?
用户window函数查找"A"发生的日期。然后使用条件聚合统计前后事件:
select userid,
sum(case when date < a_date and event like 'X.Y%' then 1 else 0 end) as before,
sum(case when date > a_date and event like 'X.Y%' then 1 else 0 end) as before
from (select t.*,
min(case when event = 'A' then date end) over (partition by userid) as a_date
from t
) t
group by userid
以下适用于 BigQuery SQL
#standardSQL
SELECT user, event, before, after
FROM (
SELECT user, event,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) before,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING ) after
FROM `project.dataset.events`
)
WHERE event = 'A'
-- ORDER BY user
你可以在你的问题中用虚拟数据测试它
#standardSQL
WITH `project.dataset.events` AS (
SELECT 123 user, '2018-02-13' dt, 'X.Y.A' event UNION ALL
SELECT 123, '2018-02-12', 'X.Y.B' UNION ALL
SELECT 123, '2018-02-11', 'A' UNION ALL
SELECT 134, '2018-02-10', 'Y.Z.A' UNION ALL
SELECT 134, '2018-02-05', 'X.Y.B' UNION ALL
SELECT 134, '2018-02-04', 'A' UNION ALL
SELECT 123, '2018-02-01', 'X.Y.Z'
)
SELECT user, event, before, after
FROM (
SELECT user, event,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) before,
COUNTIF(event LIKE 'X.Y.%') OVER(PARTITION BY user ORDER BY dt ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING ) after
FROM `project.dataset.events`
)
WHERE event = 'A'
ORDER BY user