Presto SQL 查找当前交易前一年的交易数量
Presto SQL to find number of transactions in the year before the current transaction
我有一个(简化的)交易 table 客户和订单日期。对于每个 row/order,我想查找当前订单前一年的订单数。我可以通过自连接来做到这一点,但是当我的事务 table 大得多时,它会变得低效。我想我真的想在日期字段上使用一个范围介于 between 之间的 window 函数,但这还没有在 Presto 中实现。关于如何更有效地执行此操作的任何想法?
with
transactions as (
select
1 as customer,
date '2020-01-01' as order_date
union all
select
1 as customer,
date '2020-01-26' as order_date
union all
select
1 as customer,
date '2020-02-01' as order_date
union all
select
1 as customer,
date '2020-02-02' as order_date
)
select
t1.*,
count(case when t2.order_date between date_add('day', -14, t1.order_date) and date_add('day', -1, t1.order_date) then t2.order_date else null end) as orders_14_days_before
from
transactions t1
left join
transactions t2 on t1.customer = t2.customer
group by
t1.customer,
t1.order_date
结果:
customer order_date orders_14_days_before
1 2020-01-01 0
1 2020-01-26 0
1 2020-02-01 1
1 2020-02-02 2
Presto 似乎不完全支持 range
window 规范。所以你可以用另一种方式做到这一点。 . .通过做事的来龙去脉:
with cd as (
select customer, order_date as dte, 1 as inc
from transactions
union all
select customer, order_date + interval '1' year, -1 inc
from transactions
)
select t.*, cd.one_year_count
from (select customer, dte,
sum(sum(inc)) over (partition by customer order by dte) as one_year_count
from cd
group by customer, date
) cd join
transactions t
on cd.dte = t.order_date;
您应该会发现这要快得多。
感谢 Gordon Linoff 上面的回答,我对其进行了调整以获得正确答案(至少在 Athena 中)。您不需要 sum(sum()) over ...
,只需 sum() over ...
就足够了。
with
transactions as (
select
1 as customer,
date '2020-01-01' as order_date
union all
select
1 as customer,
date '2020-01-26' as order_date
union all
select
1 as customer,
date '2020-02-01' as order_date
union all
select
1 as customer,
date '2020-02-02' as order_date
),
cd as (
select
customer,
order_date as dte,
1 as inc
from
transactions
union all
select
customer,
order_date + interval '13' day,
-1 inc
from
transactions
),
cd2 as (
select
customer,
dte,
inc,
sum(inc) over (partition by customer order by dte rows between unbounded preceding and 1 preceding) as one_year_count
from
cd
)
select
t.*,
coalesce(cd2.one_year_count, 0) as one_year_count
from
cd2
inner join
transactions t
on cd2.dte = t.order_date
where
cd2.inc = 1
order by
2 asc
我有一个(简化的)交易 table 客户和订单日期。对于每个 row/order,我想查找当前订单前一年的订单数。我可以通过自连接来做到这一点,但是当我的事务 table 大得多时,它会变得低效。我想我真的想在日期字段上使用一个范围介于 between 之间的 window 函数,但这还没有在 Presto 中实现。关于如何更有效地执行此操作的任何想法?
with
transactions as (
select
1 as customer,
date '2020-01-01' as order_date
union all
select
1 as customer,
date '2020-01-26' as order_date
union all
select
1 as customer,
date '2020-02-01' as order_date
union all
select
1 as customer,
date '2020-02-02' as order_date
)
select
t1.*,
count(case when t2.order_date between date_add('day', -14, t1.order_date) and date_add('day', -1, t1.order_date) then t2.order_date else null end) as orders_14_days_before
from
transactions t1
left join
transactions t2 on t1.customer = t2.customer
group by
t1.customer,
t1.order_date
结果:
customer order_date orders_14_days_before
1 2020-01-01 0
1 2020-01-26 0
1 2020-02-01 1
1 2020-02-02 2
Presto 似乎不完全支持 range
window 规范。所以你可以用另一种方式做到这一点。 . .通过做事的来龙去脉:
with cd as (
select customer, order_date as dte, 1 as inc
from transactions
union all
select customer, order_date + interval '1' year, -1 inc
from transactions
)
select t.*, cd.one_year_count
from (select customer, dte,
sum(sum(inc)) over (partition by customer order by dte) as one_year_count
from cd
group by customer, date
) cd join
transactions t
on cd.dte = t.order_date;
您应该会发现这要快得多。
感谢 Gordon Linoff 上面的回答,我对其进行了调整以获得正确答案(至少在 Athena 中)。您不需要 sum(sum()) over ...
,只需 sum() over ...
就足够了。
with
transactions as (
select
1 as customer,
date '2020-01-01' as order_date
union all
select
1 as customer,
date '2020-01-26' as order_date
union all
select
1 as customer,
date '2020-02-01' as order_date
union all
select
1 as customer,
date '2020-02-02' as order_date
),
cd as (
select
customer,
order_date as dte,
1 as inc
from
transactions
union all
select
customer,
order_date + interval '13' day,
-1 inc
from
transactions
),
cd2 as (
select
customer,
dte,
inc,
sum(inc) over (partition by customer order by dte rows between unbounded preceding and 1 preceding) as one_year_count
from
cd
)
select
t.*,
coalesce(cd2.one_year_count, 0) as one_year_count
from
cd2
inner join
transactions t
on cd2.dte = t.order_date
where
cd2.inc = 1
order by
2 asc