如何在多个 CTE 中引用日期数组?
How do I reference a date array in multiple CTE's?
我有一个工作查询,它根据截至该月底的客户交易计算摘要指标(在计算中使用该月的最后一天)。
我现在正在尝试 运行 针对文件中的所有月份进行这些计算。
我有两个引用报告月份的 CTE,但我无法弄清楚如何以及在何处将日期数组加入查询。
如果有任何建议,我将不胜感激!
下面的工作查询将月末日期硬编码到查询中。
With input_data AS (
SELECT '#1238' as order_id, DATE('2021-12-15') as order_date, 'c12345' as cust_id, 18 as order_value
UNION ALL SELECT '#1201' as order_id, DATE('2021-10-10') as order_date, 'c12345' as cust_id, 18 as order_value
UNION ALL SELECT '#1198' as order_id, DATE('2021-07-05') as order_date, 'c12345' as cust_id, 20 as order_value
UNION ALL SELECT '#1134' as order_id, DATE('2020-10-15') as order_date, 'c12345' as cust_id, 10 as order_value
UNION ALL SELECT '#1112' as order_id, DATE('2019-08-10') as order_date, 'c12345' as cust_id, 5 as order_value
UNION ALL SELECT '#1234' as order_id, DATE('2021-07-05') as order_date, 'c11111' as cust_id, 118 as order_value
UNION ALL SELECT '#1294' as order_id, DATE('2021-01-05') as order_date, 'c11111' as cust_id, 68 as order_value
UNION ALL SELECT '#1290' as order_id, DATE('2021-01-01') as order_date, 'c11111' as cust_id, 82 as order_value
UNION ALL SELECT '#1284' as order_id, DATE('2020-01-15') as order_date, 'c22222' as cust_id, 98 as order_value),
-- date array that I'm trying to use to group by in final step but not used yet
dates AS (
SELECT
LAST_DAY(array_date, month) AS mth_end_date
FROM
UNNEST(GENERATE_DATE_ARRAY(
(SELECT MIN(order_date) FROM input_data),
(SELECT MAX(order_date) FROM input_data), INTERVAL 1 MONTH)) as array_date),
-- for each order record add the date of that customers prior order
-- exclude all orders in the future (relative to the report date)
enhanced_orders AS (
SELECT
*
,lag(order_date) OVER (PARTITION by cust_id ORDER BY order_date) AS cust_prev_order_date
FROM input_data
WHERE order_date < Date('2021-12-31')),
-- for each customer determine the most recent, and the next most recent, order dates
cust_calc as (
SELECT
cust_id
, max(order_date) as max_order_date
, max(cust_prev_order_date) as max_prev_order_date
FROM enhanced_orders
GROUP by 1),
-- for each customer calculate how many days ago their most recent order was from the report date,
-- and calculate the days between their last two most recent orders
-- exclude customers if their most recent order was >12m ago, or they don't have an order prior to their most recent one
cust_calc2 AS (
SELECT
cust_id
, date_diff(Date('2021-12-31'), max_order_date, day) as recency
, date_diff(max_order_date, max_prev_order_date, day) as latency
FROM cust_calc
WHERE max_prev_order_date IS NOT NULL
AND max_order_date > date_sub(Date('2021-12-31'), INTERVAL 1 YEAR))
-- average the recency and latency of all customers at the report date
SELECT
--- add in format_date('%Y-%m', eom_date)
avg(recency) AS monthly_recency
,avg(latency) AS monthly_latency
FROM cust_calc2
-- Group by 1
以下是基于 PostgreSQL14 的测试。但是建议的更改应该可以直接应用。
数据中仅月份的结果:
计算所有 input_data 的先前订单日期,没有限制。我们稍后会添加该限制:
, enhanced_orders AS (
-- for each order record add the date of that customers prior order
SELECT *
, lag(order_date) OVER (PARTITION by cust_id ORDER BY order_date) AS cust_prev_order_date
FROM input_data
)
现在引入报表日期列表(隐含交叉连接),并根据每个对应的mth_end_date限制行,在GROUP BY项中添加mth_end_date:
, cust_calc as (
-- for each customer determine the most recent, and the next most recent, order dates
-- exclude all orders in the future (relative to the report date)
SELECT cust_id
, mth_end_date
, max(order_date) as max_order_date
, max(cust_prev_order_date) as max_prev_order_date
FROM enhanced_orders, dates
WHERE order_date < mth_end_date
GROUP BY 1, 2
)
根据需要在下一个术语中包含 mth_end_date。为 eom_date 添加别名。
注意:对 PG 进行了一些细微的更改。只关注 mth_end_date 调整:
, cust_calc2 AS (
-- for each customer calculate how many days ago their most recent order was from the report date,
-- and calculate the days between their last two most recent orders
-- exclude customers if their most recent order was >12m ago, or they don't have an order prior to their most recent one
SELECT cust_id
, mth_end_date AS eom_date
, age(mth_end_date, max_order_date) as recency
, age(max_order_date, max_prev_order_date) as latency
FROM cust_calc
WHERE max_prev_order_date IS NOT NULL
AND max_order_date > (mth_end_date - INTERVAL '1' YEAR)
)
最后,最后一个查询表达式按eom_date分组:
-- average the recency and latency of all customers at the report date
-- add in format_date('%Y-%m', eom_date)
SELECT eom_date
, avg(recency) AS monthly_recency
, avg(latency) AS monthly_latency
FROM cust_calc2
GROUP BY 1
ORDER BY eom_date
;
最终PGSQL:
CREATE OR REPLACE FUNCTION last_day(date)
RETURNS date AS
$$
SELECT (date_trunc('MONTH', ) + INTERVAL '1 MONTH - 1 day')::date;
$$ LANGUAGE 'sql' IMMUTABLE STRICT;
With RECURSIVE input_data AS (
SELECT '#1238' as order_id, DATE('2021-12-15') as order_date, 'c12345' as cust_id, 18 as order_value UNION ALL
SELECT '#1201' as order_id, DATE('2021-10-10') as order_date, 'c12345' as cust_id, 18 as order_value UNION ALL
SELECT '#1198' as order_id, DATE('2021-07-05') as order_date, 'c12345' as cust_id, 20 as order_value UNION ALL
SELECT '#1134' as order_id, DATE('2020-10-15') as order_date, 'c12345' as cust_id, 10 as order_value UNION ALL
SELECT '#1112' as order_id, DATE('2019-08-10') as order_date, 'c12345' as cust_id, 5 as order_value UNION ALL
SELECT '#1234' as order_id, DATE('2021-07-05') as order_date, 'c11111' as cust_id, 118 as order_value UNION ALL
SELECT '#1294' as order_id, DATE('2021-01-05') as order_date, 'c11111' as cust_id, 68 as order_value UNION ALL
SELECT '#1290' as order_id, DATE('2021-01-01') as order_date, 'c11111' as cust_id, 82 as order_value UNION ALL
SELECT '#1284' as order_id, DATE('2020-01-15') as order_date, 'c22222' as cust_id, 98 as order_value
)
, dates AS (
-- date array to use to group by in final step
SELECT DISTINCT LAST_DAY(order_date::DATE) AS mth_end_date
FROM input_data
)
, enhanced_orders AS (
-- for each order record add the date of that customers prior order
SELECT *
, lag(order_date) OVER (PARTITION by cust_id ORDER BY order_date) AS cust_prev_order_date
FROM input_data
)
, cust_calc as (
-- for each customer determine the most recent, and the next most recent, order dates
-- exclude all orders in the future (relative to the report date)
SELECT cust_id
, mth_end_date
, max(order_date) as max_order_date
, max(cust_prev_order_date) as max_prev_order_date
FROM enhanced_orders, dates
WHERE order_date < mth_end_date
GROUP BY 1, 2
)
, cust_calc2 AS (
-- for each customer calculate how many days ago their most recent order was from the report date,
-- and calculate the days between their last two most recent orders
-- exclude customers if their most recent order was >12m ago, or they don't have an order prior to their most recent one
SELECT cust_id
, mth_end_date AS eom_date
, age(mth_end_date, max_order_date) as recency
, age(max_order_date, max_prev_order_date) as latency
FROM cust_calc
WHERE max_prev_order_date IS NOT NULL
AND max_order_date > (mth_end_date - INTERVAL '1' YEAR)
)
-- average the recency and latency of all customers at the report date
-- add in format_date('%Y-%m', eom_date)
SELECT eom_date
, avg(recency) AS monthly_recency
, avg(latency) AS monthly_latency
FROM cust_calc2
GROUP BY 1
ORDER BY eom_date
;
我有一个工作查询,它根据截至该月底的客户交易计算摘要指标(在计算中使用该月的最后一天)。
我现在正在尝试 运行 针对文件中的所有月份进行这些计算。
我有两个引用报告月份的 CTE,但我无法弄清楚如何以及在何处将日期数组加入查询。
如果有任何建议,我将不胜感激!
下面的工作查询将月末日期硬编码到查询中。
With input_data AS (
SELECT '#1238' as order_id, DATE('2021-12-15') as order_date, 'c12345' as cust_id, 18 as order_value
UNION ALL SELECT '#1201' as order_id, DATE('2021-10-10') as order_date, 'c12345' as cust_id, 18 as order_value
UNION ALL SELECT '#1198' as order_id, DATE('2021-07-05') as order_date, 'c12345' as cust_id, 20 as order_value
UNION ALL SELECT '#1134' as order_id, DATE('2020-10-15') as order_date, 'c12345' as cust_id, 10 as order_value
UNION ALL SELECT '#1112' as order_id, DATE('2019-08-10') as order_date, 'c12345' as cust_id, 5 as order_value
UNION ALL SELECT '#1234' as order_id, DATE('2021-07-05') as order_date, 'c11111' as cust_id, 118 as order_value
UNION ALL SELECT '#1294' as order_id, DATE('2021-01-05') as order_date, 'c11111' as cust_id, 68 as order_value
UNION ALL SELECT '#1290' as order_id, DATE('2021-01-01') as order_date, 'c11111' as cust_id, 82 as order_value
UNION ALL SELECT '#1284' as order_id, DATE('2020-01-15') as order_date, 'c22222' as cust_id, 98 as order_value),
-- date array that I'm trying to use to group by in final step but not used yet
dates AS (
SELECT
LAST_DAY(array_date, month) AS mth_end_date
FROM
UNNEST(GENERATE_DATE_ARRAY(
(SELECT MIN(order_date) FROM input_data),
(SELECT MAX(order_date) FROM input_data), INTERVAL 1 MONTH)) as array_date),
-- for each order record add the date of that customers prior order
-- exclude all orders in the future (relative to the report date)
enhanced_orders AS (
SELECT
*
,lag(order_date) OVER (PARTITION by cust_id ORDER BY order_date) AS cust_prev_order_date
FROM input_data
WHERE order_date < Date('2021-12-31')),
-- for each customer determine the most recent, and the next most recent, order dates
cust_calc as (
SELECT
cust_id
, max(order_date) as max_order_date
, max(cust_prev_order_date) as max_prev_order_date
FROM enhanced_orders
GROUP by 1),
-- for each customer calculate how many days ago their most recent order was from the report date,
-- and calculate the days between their last two most recent orders
-- exclude customers if their most recent order was >12m ago, or they don't have an order prior to their most recent one
cust_calc2 AS (
SELECT
cust_id
, date_diff(Date('2021-12-31'), max_order_date, day) as recency
, date_diff(max_order_date, max_prev_order_date, day) as latency
FROM cust_calc
WHERE max_prev_order_date IS NOT NULL
AND max_order_date > date_sub(Date('2021-12-31'), INTERVAL 1 YEAR))
-- average the recency and latency of all customers at the report date
SELECT
--- add in format_date('%Y-%m', eom_date)
avg(recency) AS monthly_recency
,avg(latency) AS monthly_latency
FROM cust_calc2
-- Group by 1
以下是基于 PostgreSQL14 的测试。但是建议的更改应该可以直接应用。
数据中仅月份的结果:
计算所有 input_data 的先前订单日期,没有限制。我们稍后会添加该限制:
, enhanced_orders AS (
-- for each order record add the date of that customers prior order
SELECT *
, lag(order_date) OVER (PARTITION by cust_id ORDER BY order_date) AS cust_prev_order_date
FROM input_data
)
现在引入报表日期列表(隐含交叉连接),并根据每个对应的mth_end_date限制行,在GROUP BY项中添加mth_end_date:
, cust_calc as (
-- for each customer determine the most recent, and the next most recent, order dates
-- exclude all orders in the future (relative to the report date)
SELECT cust_id
, mth_end_date
, max(order_date) as max_order_date
, max(cust_prev_order_date) as max_prev_order_date
FROM enhanced_orders, dates
WHERE order_date < mth_end_date
GROUP BY 1, 2
)
根据需要在下一个术语中包含 mth_end_date。为 eom_date 添加别名。
注意:对 PG 进行了一些细微的更改。只关注 mth_end_date 调整:
, cust_calc2 AS (
-- for each customer calculate how many days ago their most recent order was from the report date,
-- and calculate the days between their last two most recent orders
-- exclude customers if their most recent order was >12m ago, or they don't have an order prior to their most recent one
SELECT cust_id
, mth_end_date AS eom_date
, age(mth_end_date, max_order_date) as recency
, age(max_order_date, max_prev_order_date) as latency
FROM cust_calc
WHERE max_prev_order_date IS NOT NULL
AND max_order_date > (mth_end_date - INTERVAL '1' YEAR)
)
最后,最后一个查询表达式按eom_date分组:
-- average the recency and latency of all customers at the report date
-- add in format_date('%Y-%m', eom_date)
SELECT eom_date
, avg(recency) AS monthly_recency
, avg(latency) AS monthly_latency
FROM cust_calc2
GROUP BY 1
ORDER BY eom_date
;
最终PGSQL:
CREATE OR REPLACE FUNCTION last_day(date)
RETURNS date AS
$$
SELECT (date_trunc('MONTH', ) + INTERVAL '1 MONTH - 1 day')::date;
$$ LANGUAGE 'sql' IMMUTABLE STRICT;
With RECURSIVE input_data AS (
SELECT '#1238' as order_id, DATE('2021-12-15') as order_date, 'c12345' as cust_id, 18 as order_value UNION ALL
SELECT '#1201' as order_id, DATE('2021-10-10') as order_date, 'c12345' as cust_id, 18 as order_value UNION ALL
SELECT '#1198' as order_id, DATE('2021-07-05') as order_date, 'c12345' as cust_id, 20 as order_value UNION ALL
SELECT '#1134' as order_id, DATE('2020-10-15') as order_date, 'c12345' as cust_id, 10 as order_value UNION ALL
SELECT '#1112' as order_id, DATE('2019-08-10') as order_date, 'c12345' as cust_id, 5 as order_value UNION ALL
SELECT '#1234' as order_id, DATE('2021-07-05') as order_date, 'c11111' as cust_id, 118 as order_value UNION ALL
SELECT '#1294' as order_id, DATE('2021-01-05') as order_date, 'c11111' as cust_id, 68 as order_value UNION ALL
SELECT '#1290' as order_id, DATE('2021-01-01') as order_date, 'c11111' as cust_id, 82 as order_value UNION ALL
SELECT '#1284' as order_id, DATE('2020-01-15') as order_date, 'c22222' as cust_id, 98 as order_value
)
, dates AS (
-- date array to use to group by in final step
SELECT DISTINCT LAST_DAY(order_date::DATE) AS mth_end_date
FROM input_data
)
, enhanced_orders AS (
-- for each order record add the date of that customers prior order
SELECT *
, lag(order_date) OVER (PARTITION by cust_id ORDER BY order_date) AS cust_prev_order_date
FROM input_data
)
, cust_calc as (
-- for each customer determine the most recent, and the next most recent, order dates
-- exclude all orders in the future (relative to the report date)
SELECT cust_id
, mth_end_date
, max(order_date) as max_order_date
, max(cust_prev_order_date) as max_prev_order_date
FROM enhanced_orders, dates
WHERE order_date < mth_end_date
GROUP BY 1, 2
)
, cust_calc2 AS (
-- for each customer calculate how many days ago their most recent order was from the report date,
-- and calculate the days between their last two most recent orders
-- exclude customers if their most recent order was >12m ago, or they don't have an order prior to their most recent one
SELECT cust_id
, mth_end_date AS eom_date
, age(mth_end_date, max_order_date) as recency
, age(max_order_date, max_prev_order_date) as latency
FROM cust_calc
WHERE max_prev_order_date IS NOT NULL
AND max_order_date > (mth_end_date - INTERVAL '1' YEAR)
)
-- average the recency and latency of all customers at the report date
-- add in format_date('%Y-%m', eom_date)
SELECT eom_date
, avg(recency) AS monthly_recency
, avg(latency) AS monthly_latency
FROM cust_calc2
GROUP BY 1
ORDER BY eom_date
;