在 PostgreSQL 中使用 window 函数是否可以找出一行中的项目数?
Is it possible to find out the number of items in a row by using window functions in PostgreSQL?
如何找出有多少卖家有付款,且连续付款时间小于1分钟,且连续至少执行3次? (答案是 2 个卖家)
以及如何计算此类付款的次数? (答案是 10 次付款)
貌似这样的问题可以用window函数解决,不过我没遇到过这种问题
CREATE TABLE T (seller_id int, payment_id varchar(3), payment_time timestamp, second_diff int);
INSERT INTO T (seller_id, payment_id, payment_time, second_diff)
VALUES
(1, 'pl', '2015-01-08 09:23:04', 151),
(1, 'p2', '2015-01-08 09:25:35', 50),
(1, 'p3', '2015-01-08 09:26:25', 48),
(1, 'p4', '2015-01-08 09:27:23', 36),
(1, 'p5', '2015-01-08 09:27:59', 41),
(1, 'p6', '2015-01-08 09:28:40', 70),
(1, 'p7', '2015-01-08 09:29:50', 50),
(1, 'p8', '2015-01-08 09:30:40', 45),
(1, 'p9', '2015-01-08 09:31:25', 35),
(1, 'p10', '2015-01-08 09:32:00', null),
(2, 'pll', '2015-01-08 09:25:35', 25),
(2, 'p12', '2015-01-08 09:26:00', 55),
(2, 'p13', '2015-01-08 09:26:55', 30),
(2, 'p14', '2015-01-08 09:27:25', 95),
(2, 'p15', '2015-01-08 09:29:00', null),
(3, 'p16', '2015-01-08 10:41:00', 65),
(3, 'p17', '2015-01-08 10:42:05', 75),
(3, 'p18', '2015-01-08 10:43:20', 90),
(3, 'p19', '2015-01-08 10:43:20', 39),
(3, 'p20', '2015-01-08 10:43:59', 50),
(3, 'p21', '2015-01-08 10:44:49', null);
使用包含整数除以 60 的 OVER 子句的聚合函数,然后对该除法进行过滤以获得结果 0
WITH T AS
(
SELECT ... COUNT(*) OVER(PARTITION BY second_diff / 60) AS CNT, second_diff / 60 AS GRP
FROM ....
)
SELECT * FROM T
WHERE GRP = 0
with A as (
select seller_id, payment_time, second_diff,
case when
lag(case when second_diff < 60 then 1 else 0 end)
over (partition by seller_id order by payment_time)
= case when second_diff < 60 then 1 else 0 end
then 0 else 1 end as transition
from T
), B as (
select *,
sum(transition)
over (partition by seller_id order by payment_time) as grp
from A
), C as (
select seller_id, count(*) as p
from B
where second_diff < 60
group by seller_id, grp
having count(*) >= 3
)
select count(distinct seller_id) as sellers, sum(p) as payments
from C;
此方法会查找值中的转换,并对其进行计数。内部 case
表达式的输出值并不重要,只要它们匹配即可。
https://dbfiddle.uk/?rdbms=postgres_9.6&fiddle=606b796d793248336a95637f02ce117b
以下是主题的一些变体:
选项 #1b:
with A as (
select seller_id, payment_time, second_diff,
case when
lag(case when second_diff < 60 then 1 else 0 end)
over (partition by seller_id order by payment_time)
= case when second_diff < 60 then 1 else 0 end
then 0 else 1 end as transition
from T
), B as (
select *,
sum(transition)
over (partition by seller_id order by payment_time) as grp
from A
)
select
dense_rank() over (order by seller_id)
+ dense_rank() over (order by seller_id desc) - 1 as sellers,
sum(count(*)) over () as payments
from B
where second_diff < 60
group by seller_id, grp
having count(*) >= 3
limit 1;
这只是一步完成 count(distinct)
的另一种方法。
选项#2:
with A as (
select seller_id, payment_time, second_diff,
row_number() over (partition by seller_id order by payment_time) as rn
from T
), B as (
select *,
rn - row_number() over (partition by seller_id order by payment_time) as grp
from A
where second_diff < 60
), C as (
select seller_id, count(*) as p
from B
group by seller_id, grp
having count(*) >= 3
)
select count(distinct seller_id) as sellers, sum(p) as payments
from C;
此方法使用行编号预过滤和 post 过滤在系列中查找中断。
如何找出有多少卖家有付款,且连续付款时间小于1分钟,且连续至少执行3次? (答案是 2 个卖家) 以及如何计算此类付款的次数? (答案是 10 次付款) 貌似这样的问题可以用window函数解决,不过我没遇到过这种问题
CREATE TABLE T (seller_id int, payment_id varchar(3), payment_time timestamp, second_diff int);
INSERT INTO T (seller_id, payment_id, payment_time, second_diff)
VALUES
(1, 'pl', '2015-01-08 09:23:04', 151),
(1, 'p2', '2015-01-08 09:25:35', 50),
(1, 'p3', '2015-01-08 09:26:25', 48),
(1, 'p4', '2015-01-08 09:27:23', 36),
(1, 'p5', '2015-01-08 09:27:59', 41),
(1, 'p6', '2015-01-08 09:28:40', 70),
(1, 'p7', '2015-01-08 09:29:50', 50),
(1, 'p8', '2015-01-08 09:30:40', 45),
(1, 'p9', '2015-01-08 09:31:25', 35),
(1, 'p10', '2015-01-08 09:32:00', null),
(2, 'pll', '2015-01-08 09:25:35', 25),
(2, 'p12', '2015-01-08 09:26:00', 55),
(2, 'p13', '2015-01-08 09:26:55', 30),
(2, 'p14', '2015-01-08 09:27:25', 95),
(2, 'p15', '2015-01-08 09:29:00', null),
(3, 'p16', '2015-01-08 10:41:00', 65),
(3, 'p17', '2015-01-08 10:42:05', 75),
(3, 'p18', '2015-01-08 10:43:20', 90),
(3, 'p19', '2015-01-08 10:43:20', 39),
(3, 'p20', '2015-01-08 10:43:59', 50),
(3, 'p21', '2015-01-08 10:44:49', null);
使用包含整数除以 60 的 OVER 子句的聚合函数,然后对该除法进行过滤以获得结果 0
WITH T AS
(
SELECT ... COUNT(*) OVER(PARTITION BY second_diff / 60) AS CNT, second_diff / 60 AS GRP
FROM ....
)
SELECT * FROM T
WHERE GRP = 0
with A as (
select seller_id, payment_time, second_diff,
case when
lag(case when second_diff < 60 then 1 else 0 end)
over (partition by seller_id order by payment_time)
= case when second_diff < 60 then 1 else 0 end
then 0 else 1 end as transition
from T
), B as (
select *,
sum(transition)
over (partition by seller_id order by payment_time) as grp
from A
), C as (
select seller_id, count(*) as p
from B
where second_diff < 60
group by seller_id, grp
having count(*) >= 3
)
select count(distinct seller_id) as sellers, sum(p) as payments
from C;
此方法会查找值中的转换,并对其进行计数。内部 case
表达式的输出值并不重要,只要它们匹配即可。
https://dbfiddle.uk/?rdbms=postgres_9.6&fiddle=606b796d793248336a95637f02ce117b
以下是主题的一些变体:
选项 #1b:
with A as (
select seller_id, payment_time, second_diff,
case when
lag(case when second_diff < 60 then 1 else 0 end)
over (partition by seller_id order by payment_time)
= case when second_diff < 60 then 1 else 0 end
then 0 else 1 end as transition
from T
), B as (
select *,
sum(transition)
over (partition by seller_id order by payment_time) as grp
from A
)
select
dense_rank() over (order by seller_id)
+ dense_rank() over (order by seller_id desc) - 1 as sellers,
sum(count(*)) over () as payments
from B
where second_diff < 60
group by seller_id, grp
having count(*) >= 3
limit 1;
这只是一步完成 count(distinct)
的另一种方法。
选项#2:
with A as (
select seller_id, payment_time, second_diff,
row_number() over (partition by seller_id order by payment_time) as rn
from T
), B as (
select *,
rn - row_number() over (partition by seller_id order by payment_time) as grp
from A
where second_diff < 60
), C as (
select seller_id, count(*) as p
from B
group by seller_id, grp
having count(*) >= 3
)
select count(distinct seller_id) as sellers, sum(p) as payments
from C;
此方法使用行编号预过滤和 post 过滤在系列中查找中断。