使用 BigQuery 根据给定条件计算 column_2 中每个不同值的 column_1 个值
count of column_1 values for every distinct value in column_2 based on a given condition using BigQuery
我有一个table喜欢,
date_1 | date_2
2018-12-08 | 2018-12-07
2018-12-09 | 2018-12-07
2018-12-13 | 2018-12-07
2018-12-16 | 2018-12-07
2018-12-14 | 2018-12-09
输出 table 应该是这样的,
date_2 | count_of_date_1_after_date_2
2018-12-07 | 5
2018-12-09 | 3
我想要 date_1
'after' date_2
.
的计数
我尝试并想出了这个答案,但我不确定这是否是最好的方法。有更好的方法吗?
WITH
sample_table AS (
SELECT DATE('2018-12-08') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-09') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-13') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-16') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-14') AS date_1, DATE('2018-12-09') AS date_2, 'AAA' as uid
),
distinct_date_2 AS (
SELECT DISTINCT(date_2) AS distinct_date, uid FROM sample_table
)
SELECT distinct_date, COUNTIF(date_1 > distinct_date)
FROM sample_table
LEFT JOIN distinct_date_2
USING (uid)
GROUP BY distinct_date
ORDER BY distinct_date
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT date_2,
(SELECT COUNT(1) FROM t.arr date_1 WHERE date_1 > date_2) AS count_of_date_1_after_date_2
FROM (
SELECT date_2, ANY_VALUE(arr) arr
FROM (
SELECT date_2, ARRAY_AGG(date_1) OVER() arr
FROM `project.dataset.table`
)
GROUP BY date_2
) t
您可以使用您问题中的样本数据来测试和使用上面的示例,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT DATE '2018-12-08' date_1, DATE '2018-12-07' date_2 UNION ALL
SELECT '2018-12-09', '2018-12-07' UNION ALL
SELECT '2018-12-13', '2018-12-07' UNION ALL
SELECT '2018-12-16', '2018-12-07' UNION ALL
SELECT '2018-12-14', '2018-12-09'
)
SELECT date_2,
(SELECT COUNT(1) FROM t.arr date_1 WHERE date_1 > date_2) AS count_of_date_1_after_date_2
FROM (
SELECT date_2, ANY_VALUE(arr) arr
FROM (
SELECT date_2, ARRAY_AGG(date_1) OVER() arr
FROM `project.dataset.table`
)
GROUP BY date_2
) t
结果
Row date_2 count_of_date_1_after_date_2
1 2018-12-07 5
2 2018-12-09 3
我有一个table喜欢,
date_1 | date_2
2018-12-08 | 2018-12-07
2018-12-09 | 2018-12-07
2018-12-13 | 2018-12-07
2018-12-16 | 2018-12-07
2018-12-14 | 2018-12-09
输出 table 应该是这样的,
date_2 | count_of_date_1_after_date_2
2018-12-07 | 5
2018-12-09 | 3
我想要 date_1
'after' date_2
.
我尝试并想出了这个答案,但我不确定这是否是最好的方法。有更好的方法吗?
WITH
sample_table AS (
SELECT DATE('2018-12-08') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-09') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-13') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-16') AS date_1, DATE('2018-12-07') AS date_2, 'AAA' as uid UNION ALL
SELECT DATE('2018-12-14') AS date_1, DATE('2018-12-09') AS date_2, 'AAA' as uid
),
distinct_date_2 AS (
SELECT DISTINCT(date_2) AS distinct_date, uid FROM sample_table
)
SELECT distinct_date, COUNTIF(date_1 > distinct_date)
FROM sample_table
LEFT JOIN distinct_date_2
USING (uid)
GROUP BY distinct_date
ORDER BY distinct_date
以下适用于 BigQuery 标准 SQL
#standardSQL
SELECT date_2,
(SELECT COUNT(1) FROM t.arr date_1 WHERE date_1 > date_2) AS count_of_date_1_after_date_2
FROM (
SELECT date_2, ANY_VALUE(arr) arr
FROM (
SELECT date_2, ARRAY_AGG(date_1) OVER() arr
FROM `project.dataset.table`
)
GROUP BY date_2
) t
您可以使用您问题中的样本数据来测试和使用上面的示例,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT DATE '2018-12-08' date_1, DATE '2018-12-07' date_2 UNION ALL
SELECT '2018-12-09', '2018-12-07' UNION ALL
SELECT '2018-12-13', '2018-12-07' UNION ALL
SELECT '2018-12-16', '2018-12-07' UNION ALL
SELECT '2018-12-14', '2018-12-09'
)
SELECT date_2,
(SELECT COUNT(1) FROM t.arr date_1 WHERE date_1 > date_2) AS count_of_date_1_after_date_2
FROM (
SELECT date_2, ANY_VALUE(arr) arr
FROM (
SELECT date_2, ARRAY_AGG(date_1) OVER() arr
FROM `project.dataset.table`
)
GROUP BY date_2
) t
结果
Row date_2 count_of_date_1_after_date_2
1 2018-12-07 5
2 2018-12-09 3