根据扩展的日期范围连接表
Joining tables based on expanded date range
我有两个 table、orders
和 currency_rates
。我需要使用 orders.received_at
和 currency_rates.valid_from
合并订单(并且还使用公共列 currency_id,但这很简单)。
我面临的问题是 currency_rates.valid_from
不是完整的 table 日期,而只是日期范围的开始和结束日期。实际上,我有三个货币周期 '2019-12-01' - '2020-01-03'
、'2020-01-03' - '2020-01-09'
、'2020-01-09' - onward
。在 Postgres 中最优雅的实现方式是什么?是否可以使用 Select 语句来做到这一点?谢谢!
编辑:欢迎不手动指定范围的解决方案。
EDIT2:添加了一个 table orders_currency_rates,其中包含一些所需结果的示例行。
CREATE TABLE orders
(
id BIGINT,
received_at DATE,
shipping_cost DOUBLE PRECISION,
currency_id VARCHAR,
invoice_address_id BIGINT,
delivery_address_id BIGINT
);
INSERT INTO orders (id, received_at, shipping_cost, currency_id, invoice_address_id, delivery_address_id)
VALUES (385902, '2020-01-01', 0, 'CZK', 1, 11),
(386278, '2020-01-02', 12.83, 'USD', 2, NULL),
(386279, '2020-01-03', 49.36, 'USD', 3, 12),
(386280, '2020-01-03', 12.83, 'USD', 4, 13),
(386281, '2020-01-05', 12.83, 'USD', 5, 14),
(386282, '2020-01-06', 11.43, 'GBP', 6, NULL),
(386283, '2020-01-07', 12.83, 'USD', 7, 15),
(386284, '2020-01-08', 44.03, 'EUR', 8, NULL),
(386285, '2020-01-11', 12.83, 'USD', 9, NULL),
(386286, '2020-02-12', 62.55, 'USD', 10, NULL);
CREATE TABLE currency_rates
(
currency_id VARCHAR,
rate DOUBLE PRECISION,
valid_from DATE
);
INSERT INTO currency_rates (currency_id, rate, valid_from)
VALUES ('EUR', 24.165, '2019-12-01'),
('USD', 19.359, '2019-12-01'),
('GBP', 27.039, '2019-12-01'),
('PLN', 5.5, '2019-12-01'),
('EUR', 25.2, '2020-01-03'),
('USD', 20.34, '2020-01-03'),
('GBP', 28.4, '2020-01-03'),
('PLN', 5.3, '2020-01-03'),
('EUR', 26.165, '2020-01-09'),
('USD', 21.359, '2020-01-09'),
('GBP', 29.039, '2020-01-09'),
('PLN', 5.8, '2020-01-09');
连接所需结果的示例。
CREATE TABLE orders_currency_rates
(
id BIGINT,
received_at DATE,
shipping_cost DOUBLE PRECISION,
currency_id VARCHAR,
invoice_address_id BIGINT,
delivery_address_id BIGINT,
rate DOUBLE PRECISION,
valid_from DATE
);
INSERT INTO orders_currency_rates (id, received_at, shipping_cost, currency_id, invoice_address_id, delivery_address_id, rate, valid_from)
VALUES (386278, '2020-01-02', 12.83, 'USD', 2, NULL, 19.359, '2019-12-01'),
(386279, '2020-01-03', 49.36, 'USD', 3, 12, 20.34, '2020-01-03'),
(386286, '2020-02-12', 62.55, 'USD', 10, NULL, 21.359, '2020-01-09');
我会用
SELECT ...
FROM (SELECT CASE WHEN received_at >= '2020-01-09'
THEN DATE '2020-01-09'
WHEN received_at >= '2020-01-03'
THEN DATE '2020-01-03'
ELSE DATE '2019-12-01'
END AS valid_from,
...
FROM orders
) AS orders_adj
JOIN currency_rates USING (valid_from);
如何实现这个范围查询?
关键是在每个LEFT JOIN
.
中过滤两次数据
-
- 过滤掉询问时间早于费率时间的部分。
-
- 按
max
过滤最接近的通缉时间。
SELECT * FROM orders o LEFT JOIN currency_rates c
ON c.currency_id = o.currency_id AND c.valid_from = (
SELECT max(valid_from) FROM currency_rates
WHERE valid_from <= o.received_at
);
/* My result:
id | received_at | shipping_cost | currency_id | invoice_address_id | delivery_address_id | currency_id | rate | valid_from
--------+-------------+---------------+-------------+--------------------+---------------------+-------------+--------+------------
385902 | 2020-01-01 | 0 | CZK | 1 | 11 | | |
386284 | 2020-01-08 | 44.03 | EUR | 8 | | EUR | 25.2 | 2020-01-03
386282 | 2020-01-06 | 11.43 | GBP | 6 | | GBP | 28.4 | 2020-01-03
386278 | 2020-01-02 | 12.83 | USD | 2 | | USD | 19.359 | 2019-12-01
386281 | 2020-01-05 | 12.83 | USD | 5 | 14 | USD | 20.34 | 2020-01-03
386279 | 2020-01-03 | 49.36 | USD | 3 | 12 | USD | 20.34 | 2020-01-03
386280 | 2020-01-03 | 12.83 | USD | 4 | 13 | USD | 20.34 | 2020-01-03
386283 | 2020-01-07 | 12.83 | USD | 7 | 15 | USD | 20.34 | 2020-01-03
386285 | 2020-01-11 | 12.83 | USD | 9 | | USD | 21.359 | 2020-01-09
386286 | 2020-02-12 | 62.55 | USD | 10 | | USD | 21.359 | 2020-01-09
(10 rows)
*/
使用横向连接!
select o.*, cr.*
from orders o left join lateral
(select cr.*
from currency_rates cr
where cr.currency_id = o.currency_id and
cr.valid_from <= o.received_at
order by cr.valid_from desc
where 1=1
) cr
on 1=1
我有两个 table、orders
和 currency_rates
。我需要使用 orders.received_at
和 currency_rates.valid_from
合并订单(并且还使用公共列 currency_id,但这很简单)。
我面临的问题是 currency_rates.valid_from
不是完整的 table 日期,而只是日期范围的开始和结束日期。实际上,我有三个货币周期 '2019-12-01' - '2020-01-03'
、'2020-01-03' - '2020-01-09'
、'2020-01-09' - onward
。在 Postgres 中最优雅的实现方式是什么?是否可以使用 Select 语句来做到这一点?谢谢!
编辑:欢迎不手动指定范围的解决方案。
EDIT2:添加了一个 table orders_currency_rates,其中包含一些所需结果的示例行。
CREATE TABLE orders
(
id BIGINT,
received_at DATE,
shipping_cost DOUBLE PRECISION,
currency_id VARCHAR,
invoice_address_id BIGINT,
delivery_address_id BIGINT
);
INSERT INTO orders (id, received_at, shipping_cost, currency_id, invoice_address_id, delivery_address_id)
VALUES (385902, '2020-01-01', 0, 'CZK', 1, 11),
(386278, '2020-01-02', 12.83, 'USD', 2, NULL),
(386279, '2020-01-03', 49.36, 'USD', 3, 12),
(386280, '2020-01-03', 12.83, 'USD', 4, 13),
(386281, '2020-01-05', 12.83, 'USD', 5, 14),
(386282, '2020-01-06', 11.43, 'GBP', 6, NULL),
(386283, '2020-01-07', 12.83, 'USD', 7, 15),
(386284, '2020-01-08', 44.03, 'EUR', 8, NULL),
(386285, '2020-01-11', 12.83, 'USD', 9, NULL),
(386286, '2020-02-12', 62.55, 'USD', 10, NULL);
CREATE TABLE currency_rates
(
currency_id VARCHAR,
rate DOUBLE PRECISION,
valid_from DATE
);
INSERT INTO currency_rates (currency_id, rate, valid_from)
VALUES ('EUR', 24.165, '2019-12-01'),
('USD', 19.359, '2019-12-01'),
('GBP', 27.039, '2019-12-01'),
('PLN', 5.5, '2019-12-01'),
('EUR', 25.2, '2020-01-03'),
('USD', 20.34, '2020-01-03'),
('GBP', 28.4, '2020-01-03'),
('PLN', 5.3, '2020-01-03'),
('EUR', 26.165, '2020-01-09'),
('USD', 21.359, '2020-01-09'),
('GBP', 29.039, '2020-01-09'),
('PLN', 5.8, '2020-01-09');
连接所需结果的示例。
CREATE TABLE orders_currency_rates
(
id BIGINT,
received_at DATE,
shipping_cost DOUBLE PRECISION,
currency_id VARCHAR,
invoice_address_id BIGINT,
delivery_address_id BIGINT,
rate DOUBLE PRECISION,
valid_from DATE
);
INSERT INTO orders_currency_rates (id, received_at, shipping_cost, currency_id, invoice_address_id, delivery_address_id, rate, valid_from)
VALUES (386278, '2020-01-02', 12.83, 'USD', 2, NULL, 19.359, '2019-12-01'),
(386279, '2020-01-03', 49.36, 'USD', 3, 12, 20.34, '2020-01-03'),
(386286, '2020-02-12', 62.55, 'USD', 10, NULL, 21.359, '2020-01-09');
我会用
SELECT ...
FROM (SELECT CASE WHEN received_at >= '2020-01-09'
THEN DATE '2020-01-09'
WHEN received_at >= '2020-01-03'
THEN DATE '2020-01-03'
ELSE DATE '2019-12-01'
END AS valid_from,
...
FROM orders
) AS orders_adj
JOIN currency_rates USING (valid_from);
如何实现这个范围查询?
关键是在每个LEFT JOIN
.
中过滤两次数据
-
- 过滤掉询问时间早于费率时间的部分。
-
- 按
max
过滤最接近的通缉时间。
- 按
SELECT * FROM orders o LEFT JOIN currency_rates c
ON c.currency_id = o.currency_id AND c.valid_from = (
SELECT max(valid_from) FROM currency_rates
WHERE valid_from <= o.received_at
);
/* My result:
id | received_at | shipping_cost | currency_id | invoice_address_id | delivery_address_id | currency_id | rate | valid_from
--------+-------------+---------------+-------------+--------------------+---------------------+-------------+--------+------------
385902 | 2020-01-01 | 0 | CZK | 1 | 11 | | |
386284 | 2020-01-08 | 44.03 | EUR | 8 | | EUR | 25.2 | 2020-01-03
386282 | 2020-01-06 | 11.43 | GBP | 6 | | GBP | 28.4 | 2020-01-03
386278 | 2020-01-02 | 12.83 | USD | 2 | | USD | 19.359 | 2019-12-01
386281 | 2020-01-05 | 12.83 | USD | 5 | 14 | USD | 20.34 | 2020-01-03
386279 | 2020-01-03 | 49.36 | USD | 3 | 12 | USD | 20.34 | 2020-01-03
386280 | 2020-01-03 | 12.83 | USD | 4 | 13 | USD | 20.34 | 2020-01-03
386283 | 2020-01-07 | 12.83 | USD | 7 | 15 | USD | 20.34 | 2020-01-03
386285 | 2020-01-11 | 12.83 | USD | 9 | | USD | 21.359 | 2020-01-09
386286 | 2020-02-12 | 62.55 | USD | 10 | | USD | 21.359 | 2020-01-09
(10 rows)
*/
使用横向连接!
select o.*, cr.*
from orders o left join lateral
(select cr.*
from currency_rates cr
where cr.currency_id = o.currency_id and
cr.valid_from <= o.received_at
order by cr.valid_from desc
where 1=1
) cr
on 1=1