当 table 太大时 ROW_NUMBER() 失败
ROW_NUMBER() fails when table is too big
我正在使用 Bigquery,我需要使用 ROW_NUMBER() 才能仅获取符合某些条件的第一行。
示例:
select *except(rn)
from (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id order by timedate desc) AS rn
FROM
table
)
where rn = 1
但是,查询会失败,因为 table 太大了。如何在不 运行 资源不足的情况下应用这种逻辑?
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT AS VALUE ARRAY_AGG(t ORDER BY timedate DESC LIMIT 1)[OFFSET(0)]
FROM `project.dataset.table` t
GROUP BY id
你可以测试,玩上面的虚拟数据如下
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 2 timedate, 3 z UNION ALL
SELECT 1,4,5 UNION ALL
SELECT 1,6,7 UNION ALL
SELECT 2,8,9 UNION ALL
SELECT 2, 10, 11
)
SELECT AS VALUE ARRAY_AGG(t ORDER BY timedate DESC LIMIT 1)[OFFSET(0)]
FROM `project.dataset.table` t
GROUP BY id
结果是
Row id timedate z
1 1 6 7
2 2 10 11
我正在使用 Bigquery,我需要使用 ROW_NUMBER() 才能仅获取符合某些条件的第一行。
示例:
select *except(rn)
from (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id order by timedate desc) AS rn
FROM
table
)
where rn = 1
但是,查询会失败,因为 table 太大了。如何在不 运行 资源不足的情况下应用这种逻辑?
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT AS VALUE ARRAY_AGG(t ORDER BY timedate DESC LIMIT 1)[OFFSET(0)]
FROM `project.dataset.table` t
GROUP BY id
你可以测试,玩上面的虚拟数据如下
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 id, 2 timedate, 3 z UNION ALL
SELECT 1,4,5 UNION ALL
SELECT 1,6,7 UNION ALL
SELECT 2,8,9 UNION ALL
SELECT 2, 10, 11
)
SELECT AS VALUE ARRAY_AGG(t ORDER BY timedate DESC LIMIT 1)[OFFSET(0)]
FROM `project.dataset.table` t
GROUP BY id
结果是
Row id timedate z
1 1 6 7
2 2 10 11