GroupBy + 条件
GroupBy + Condition
如果我有table喜欢
ID | Start | End | Name | Code
1 | 1000 | 1001 | ABC1 | A
1 | 1000 | 1001 | FSD1 | B
2 | 1200 | 1201 | CC12 | A
3 | 4000 | 4002 | GF41 | B
3 | 4000 | 4002 | ML65 | A
使用SQL标准,我们能不能写出如下if条件:
如果对于(ID, Start, End),代码是A和B,那么只打印
记录 w/ B。否则,打印记录。这是输出:
ID | Start | End | Name | Code
1 | 1000 | 1001 | FSD1 | B
2 | 1200 | 1201 | CC12 | A
3 | 4000 | 4002 | GF41 | B
谢谢!
以下适用于 BigQuery Standrad SQL
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 1 id, 1000 start, 1001 `end`, 'ABC1' name, 'A' code UNION ALL
SELECT 1, 1000, 1001, 'FSD1', 'B' UNION ALL
SELECT 2, 1200, 1201, 'CC12', 'A' UNION ALL
SELECT 3, 4000, 4002, 'GF41', 'B' UNION ALL
SELECT 3, 4000, 4002, 'ML65', 'A'
)
SELECT
id, start, `end`,
ARRAY_AGG(STRUCT(name, code) ORDER BY code DESC LIMIT 1)[OFFSET(0)].*
FROM `project.dataset.your_table`
GROUP BY id, start, `end`
-- ORDER BY id
结果应该是
Row id start end name code
1 1 1000 1001 FSD1 B
2 2 1200 1201 CC12 A
3 3 4000 4002 GF41 B
具有相同结果的替代选项,但如果您的 table 中有 more/other 字段需要包含在输出
中,则可能会很有用
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 1 id, 1000 start, 1001 `end`, 'ABC1' name, 'A' code UNION ALL
SELECT 1, 1000, 1001, 'FSD1', 'B' UNION ALL
SELECT 2, 1200, 1201, 'CC12', 'A' UNION ALL
SELECT 3, 4000, 4002, 'GF41', 'B' UNION ALL
SELECT 3, 4000, 4002, 'ML65', 'A'
)
SELECT row.*
FROM (
SELECT ARRAY_AGG(t ORDER BY code DESC LIMIT 1)[OFFSET(0)] row
FROM `project.dataset.your_table` t
GROUP BY id, start, `end`
)
-- ORDER BY id
您可以使用 row_number()
:
select t.*
from (select t.*,
row_number() over (partition by id order by code desc) as seqnum
from t
) t
where seqnum = 1;
这种方法相对于聚合方法的优势在于它很容易包含所有列,而没有聚合开销。
如果我有table喜欢
ID | Start | End | Name | Code
1 | 1000 | 1001 | ABC1 | A
1 | 1000 | 1001 | FSD1 | B
2 | 1200 | 1201 | CC12 | A
3 | 4000 | 4002 | GF41 | B
3 | 4000 | 4002 | ML65 | A
使用SQL标准,我们能不能写出如下if条件:
如果对于(ID, Start, End),代码是A和B,那么只打印 记录 w/ B。否则,打印记录。这是输出:
ID | Start | End | Name | Code
1 | 1000 | 1001 | FSD1 | B
2 | 1200 | 1201 | CC12 | A
3 | 4000 | 4002 | GF41 | B
谢谢!
以下适用于 BigQuery Standrad SQL
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 1 id, 1000 start, 1001 `end`, 'ABC1' name, 'A' code UNION ALL
SELECT 1, 1000, 1001, 'FSD1', 'B' UNION ALL
SELECT 2, 1200, 1201, 'CC12', 'A' UNION ALL
SELECT 3, 4000, 4002, 'GF41', 'B' UNION ALL
SELECT 3, 4000, 4002, 'ML65', 'A'
)
SELECT
id, start, `end`,
ARRAY_AGG(STRUCT(name, code) ORDER BY code DESC LIMIT 1)[OFFSET(0)].*
FROM `project.dataset.your_table`
GROUP BY id, start, `end`
-- ORDER BY id
结果应该是
Row id start end name code
1 1 1000 1001 FSD1 B
2 2 1200 1201 CC12 A
3 3 4000 4002 GF41 B
具有相同结果的替代选项,但如果您的 table 中有 more/other 字段需要包含在输出
中,则可能会很有用#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 1 id, 1000 start, 1001 `end`, 'ABC1' name, 'A' code UNION ALL
SELECT 1, 1000, 1001, 'FSD1', 'B' UNION ALL
SELECT 2, 1200, 1201, 'CC12', 'A' UNION ALL
SELECT 3, 4000, 4002, 'GF41', 'B' UNION ALL
SELECT 3, 4000, 4002, 'ML65', 'A'
)
SELECT row.*
FROM (
SELECT ARRAY_AGG(t ORDER BY code DESC LIMIT 1)[OFFSET(0)] row
FROM `project.dataset.your_table` t
GROUP BY id, start, `end`
)
-- ORDER BY id
您可以使用 row_number()
:
select t.*
from (select t.*,
row_number() over (partition by id order by code desc) as seqnum
from t
) t
where seqnum = 1;
这种方法相对于聚合方法的优势在于它很容易包含所有列,而没有聚合开销。