BigQuery:加入 2 个表,但只根据日期列选择行
BigQuery: join 2 tables but only choosing rows based on date column
我有 2 table
第一个tabletable_new_data
像
date type data
2022-01 t1 0
2022-03 t2 1
2021-08 t1 1
第二个tabletable_old_data
就像
date type data
2021-10 t1 2
2022-04 t2 3
2021-07 t1 4
2021-06 t1 5
我想要 sql 代码片段 table_new_data LEFT JOIN table_old_data
并产生以下结果。
new_date type new_data old_date old_data
2022-01 t1 0 2021-10 2
2022-03 t2 1 null null
2021-08 t1 1 2021-07 4
请注意,
- 仅连接具有相同
type
的行
- 对于
table_new_data
中的每一行,仅与 table_old_data
中具有 最接近的前一行 date
的行连接。例如,对于 table_new_data
中的 2021-08 t1 1
,我们只想加入 table_old_data
中的 2021-07 t1 4
。
date
在 YYYY-MM 中。
见下文...考虑边缘情况。
-- ---------------------------------------------------------------------------------
-- create dummy NEW table
-- ---------------------------------------------------------------------------------
WITH
table_new_data AS (
SELECT
'2022-01' AS date,
't1' AS type,
0 AS DATA
UNION ALL
SELECT
'2022-03' AS date,
't2' AS type,
1 AS DATA
UNION ALL
SELECT
'2021-08' AS date,
't1' AS type,
1 AS DATA ),
-- ---------------------------------------------------------------------------------
-- create dummy OLD table
-- ---------------------------------------------------------------------------------
table_old_data AS (
SELECT
'2021-10' AS date,
't1' AS type,
2 AS DATA
UNION ALL
SELECT
'2022-04' AS date,
't2' AS type,
3 AS DATA
UNION ALL
SELECT
'2021-07' AS date,
't1' AS type,
4 AS DATA
UNION ALL
SELECT
'2021-06' AS date,
't1' AS type,
5 AS DATA),
-- ---------------------------------------------------------------------------------
-- create joined tables based on dates from old table being LOWER (may need <=??)
-- create order = ROW_NUMBER() function to see which date is closest from old table
-- make sure to test on edge cases where dates are the same or equal to
-- ---------------------------------------------------------------------------------
ordered AS (
SELECT
nd.date AS new_date,
nd.type,
nd.DATA AS new_data,
od.date AS old_date,
od.DATA AS old_data,
ROW_NUMBER() OVER(PARTITION BY nd.type, nd.date ORDER BY nd.date ) AS rn
FROM
table_new_data nd
LEFT JOIN
table_old_data od
ON
nd.type = od.type
AND od.date < nd.date )
-- ---------------------------------------------------------------------------------
-- final table to reproduce desired output in question
-- ---------------------------------------------------------------------------------
SELECT
* EXCEPT(rn)
FROM
ordered
WHERE
rn = 1
输出:
我有 2 table
第一个tabletable_new_data
像
date type data
2022-01 t1 0
2022-03 t2 1
2021-08 t1 1
第二个tabletable_old_data
就像
date type data
2021-10 t1 2
2022-04 t2 3
2021-07 t1 4
2021-06 t1 5
我想要 sql 代码片段 table_new_data LEFT JOIN table_old_data
并产生以下结果。
new_date type new_data old_date old_data
2022-01 t1 0 2021-10 2
2022-03 t2 1 null null
2021-08 t1 1 2021-07 4
请注意,
- 仅连接具有相同
type
的行
- 对于
table_new_data
中的每一行,仅与table_old_data
中具有 最接近的前一行date
的行连接。例如,对于table_new_data
中的2021-08 t1 1
,我们只想加入table_old_data
中的2021-07 t1 4
。
date
在 YYYY-MM 中。
见下文...考虑边缘情况。
-- ---------------------------------------------------------------------------------
-- create dummy NEW table
-- ---------------------------------------------------------------------------------
WITH
table_new_data AS (
SELECT
'2022-01' AS date,
't1' AS type,
0 AS DATA
UNION ALL
SELECT
'2022-03' AS date,
't2' AS type,
1 AS DATA
UNION ALL
SELECT
'2021-08' AS date,
't1' AS type,
1 AS DATA ),
-- ---------------------------------------------------------------------------------
-- create dummy OLD table
-- ---------------------------------------------------------------------------------
table_old_data AS (
SELECT
'2021-10' AS date,
't1' AS type,
2 AS DATA
UNION ALL
SELECT
'2022-04' AS date,
't2' AS type,
3 AS DATA
UNION ALL
SELECT
'2021-07' AS date,
't1' AS type,
4 AS DATA
UNION ALL
SELECT
'2021-06' AS date,
't1' AS type,
5 AS DATA),
-- ---------------------------------------------------------------------------------
-- create joined tables based on dates from old table being LOWER (may need <=??)
-- create order = ROW_NUMBER() function to see which date is closest from old table
-- make sure to test on edge cases where dates are the same or equal to
-- ---------------------------------------------------------------------------------
ordered AS (
SELECT
nd.date AS new_date,
nd.type,
nd.DATA AS new_data,
od.date AS old_date,
od.DATA AS old_data,
ROW_NUMBER() OVER(PARTITION BY nd.type, nd.date ORDER BY nd.date ) AS rn
FROM
table_new_data nd
LEFT JOIN
table_old_data od
ON
nd.type = od.type
AND od.date < nd.date )
-- ---------------------------------------------------------------------------------
-- final table to reproduce desired output in question
-- ---------------------------------------------------------------------------------
SELECT
* EXCEPT(rn)
FROM
ordered
WHERE
rn = 1
输出: