BigQuery:使用标准查询 sql
BigQuery: Querying with standard sql
我有这个table:
client_id session_id time action transaction_id
------------------------------------------------------
1 1 15:01 view NULL
1 1 15:02 basket NULL
1 1 15:03 basket NULL
1 1 15:04 purchase 1
1 2 15:05 basket NULL
1 2 15:06 purchase 2
1 2 15:07 view NULL
我想在会话内部,对于所有先前的操作来注册第一次发生的 transaction_id(因此在 15:03 transaction_id = NULL)
session_id time transaction_id
------------------------------------
1 15:01 1
1 15:02 1
1 15:03 NULL
1 15:04 1
2 15:05 2
2 15:06 2
2 15:07 NULL
嗯。 . .假设每个会话只有一个事务 ID,那么您可以使用 window 函数:
select t.*,
(case when row_number() over (partition by client_id, session_id, action
order by time) = 1
then max(transactc
ion_id) over (partition by client_id, session_id)
end) as new_transaction_id
from t
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT
client_id, session_id, time, action,
(CASE
WHEN ROW_NUMBER()
OVER (PARTITION BY client_id, session_id, grp, action ORDER BY time) = 1
THEN MAX(transaction_id) OVER (PARTITION BY client_id, session_id, grp) END
) AS transaction_id
FROM (
SELECT *,
COUNTIF(transaction_id IS NOT NULL)
OVER(PARTITION BY client_id, session_id
ORDER BY time ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS grp
FROM YourTable
)
-- ORDER BY client_id, session_id, time
您可以使用如下虚拟数据进行测试
#standardSQL
WITH YourTable AS (
SELECT 1 AS client_id, 1 AS session_id, '15:01' AS time, 'view' AS action, NULL AS transaction_id UNION ALL
SELECT 1, 1, '15:02', 'basket', NULL UNION ALL
SELECT 1, 1, '15:03', 'basket', NULL UNION ALL
SELECT 1, 1, '15:04', 'purchase', 1 UNION ALL
SELECT 1, 1, '15:05', 'basket', NULL UNION ALL
SELECT 1, 1, '15:06', 'basket', NULL UNION ALL
SELECT 1, 1, '15:07', 'purchase', 3 UNION ALL
SELECT 1, 2, '15:08', 'basket', NULL UNION ALL
SELECT 1, 2, '15:09', 'purchase', 2 UNION ALL
SELECT 1, 2, '15:10', 'view', NULL
)
SELECT
client_id, session_id, time, action,
(CASE
WHEN ROW_NUMBER()
OVER (PARTITION BY client_id, session_id, grp, action ORDER BY time) = 1
THEN MAX(transaction_id) OVER (PARTITION BY client_id, session_id, grp) END
) AS transaction_id
FROM (
SELECT *,
COUNTIF(transaction_id IS NOT NULL)
OVER(PARTITION BY client_id, session_id
ORDER BY time ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS grp
FROM YourTable
)
-- ORDER BY client_id, session_id, time
输出符合预期
client_id session_id time action transaction_id
1 1 15:01 view 1
1 1 15:02 basket 1
1 1 15:03 basket null
1 1 15:04 purchase 1
1 1 15:05 basket 3
1 1 15:06 basket null
1 1 15:07 purchase 3
1 2 15:08 basket 2
1 2 15:09 purchase 2
1 2 15:10 view null
我有这个table:
client_id session_id time action transaction_id
------------------------------------------------------
1 1 15:01 view NULL
1 1 15:02 basket NULL
1 1 15:03 basket NULL
1 1 15:04 purchase 1
1 2 15:05 basket NULL
1 2 15:06 purchase 2
1 2 15:07 view NULL
我想在会话内部,对于所有先前的操作来注册第一次发生的 transaction_id(因此在 15:03 transaction_id = NULL)
session_id time transaction_id
------------------------------------
1 15:01 1
1 15:02 1
1 15:03 NULL
1 15:04 1
2 15:05 2
2 15:06 2
2 15:07 NULL
嗯。 . .假设每个会话只有一个事务 ID,那么您可以使用 window 函数:
select t.*,
(case when row_number() over (partition by client_id, session_id, action
order by time) = 1
then max(transactc
ion_id) over (partition by client_id, session_id)
end) as new_transaction_id
from t
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT
client_id, session_id, time, action,
(CASE
WHEN ROW_NUMBER()
OVER (PARTITION BY client_id, session_id, grp, action ORDER BY time) = 1
THEN MAX(transaction_id) OVER (PARTITION BY client_id, session_id, grp) END
) AS transaction_id
FROM (
SELECT *,
COUNTIF(transaction_id IS NOT NULL)
OVER(PARTITION BY client_id, session_id
ORDER BY time ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS grp
FROM YourTable
)
-- ORDER BY client_id, session_id, time
您可以使用如下虚拟数据进行测试
#standardSQL
WITH YourTable AS (
SELECT 1 AS client_id, 1 AS session_id, '15:01' AS time, 'view' AS action, NULL AS transaction_id UNION ALL
SELECT 1, 1, '15:02', 'basket', NULL UNION ALL
SELECT 1, 1, '15:03', 'basket', NULL UNION ALL
SELECT 1, 1, '15:04', 'purchase', 1 UNION ALL
SELECT 1, 1, '15:05', 'basket', NULL UNION ALL
SELECT 1, 1, '15:06', 'basket', NULL UNION ALL
SELECT 1, 1, '15:07', 'purchase', 3 UNION ALL
SELECT 1, 2, '15:08', 'basket', NULL UNION ALL
SELECT 1, 2, '15:09', 'purchase', 2 UNION ALL
SELECT 1, 2, '15:10', 'view', NULL
)
SELECT
client_id, session_id, time, action,
(CASE
WHEN ROW_NUMBER()
OVER (PARTITION BY client_id, session_id, grp, action ORDER BY time) = 1
THEN MAX(transaction_id) OVER (PARTITION BY client_id, session_id, grp) END
) AS transaction_id
FROM (
SELECT *,
COUNTIF(transaction_id IS NOT NULL)
OVER(PARTITION BY client_id, session_id
ORDER BY time ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS grp
FROM YourTable
)
-- ORDER BY client_id, session_id, time
输出符合预期
client_id session_id time action transaction_id
1 1 15:01 view 1
1 1 15:02 basket 1
1 1 15:03 basket null
1 1 15:04 purchase 1
1 1 15:05 basket 3
1 1 15:06 basket null
1 1 15:07 purchase 3
1 2 15:08 basket 2
1 2 15:09 purchase 2
1 2 15:10 view null