SQL 查找日期列中间隔的查询
SQL query to find gaps within a column of dates
我有一个 table,其中包含每天的状态和日期,我试图找出状态何时发生变化,以及每次状态变化之间是否存在差距/某种状态有多少天。
+------+----------------+--------+
| user | date | status |
+----- +----------------+--------+
| 1 | 12/01/2017 | open |
| 1 | 12/02/2017 | open |
| 1 | 12/03/2017 | open |
| 1 | 12/04/2017 | closed |
| 1 | 12/05/2017 | closed |
| 1 | 12/06/2017 | open |
| 1 | 12/07/2017 | open |
+------+----------------+--------+
预期输出:
+------+------------+----------------+-------- ----+------------+
| user | status | days_in_status | min | max |
+----- +------------+----------------+-------------+------------+
| 1 | open | 3 | 12/01/2017 | 12/03/2017 |
| 1 | closed | 2 | 12/04/2017 | 12/05/2017 |
| 1 | open | 2 | 12/06/2017 | 12/07/2017 |
+------+------ -----+----------------+-------------+-- ---------+
这是一种缺口孤岛问题。在这种情况下,从每一天中减去一个序号可能是识别“孤岛”的最简单解决方案:
select user, status, count(*) as num_days, min(date), max(date)
from (select t.*,
row_number() over (partition by user, status order by date) as seqnum
from t
) t
group by user, status, date - seqnum * interval '1 day'
在 Postgres 中,您可以直接从 date
中减去 integer
:
SELECT "user", status
, count(*) AS days_in_status, min(date) AS min, max(date) AS max
FROM (
SELECT "user", status, date
, date - row_number() OVER (PARTITION BY "user", status ORDER BY date)::int AS grp_date
FROM tbl
) t
GROUP BY "user", status, grp_date
ORDER BY "user", min;
db<>fiddle here
参见:
- Select longest continuous sequence
- How do I determine the last day of the previous month using PostgreSQL?
旁白:user
是一个 reserved word,您应该避免将其作为实际列名。
SQL 查询以查找日期列中日期 GAPS 的宽度
从 Gordon 上面的答案中借用的完整示例显示了岛屿宽度和岛屿间隙,复制并粘贴到 postgresql 和 运行 中。
drop table if exists foobar;
CREATE TABLE foobar( tick text, date_val date );
insert into foobar values('XYZ', '2021-01-03'); --island 1 has width 2
insert into foobar values('XYZ', '2021-01-04'); --island 1
insert into foobar values('XYZ', '2021-05-09'); --island 2 has width 3
insert into foobar values('XYZ', '2021-05-10'); --island 2
insert into foobar values('XYZ', '2021-05-11'); --island 2
insert into foobar values('XYZ', '2021-07-07'); --island 3 has width 4
insert into foobar values('XYZ', '2021-07-08'); --island 3
insert into foobar values('XYZ', '2021-07-09'); --island 3
insert into foobar values('XYZ', '2021-07-10'); --island 3
insert into foobar values('XYZ', '2022-10-10'); --island 4 has width 1
select tick, island_width, min_val, max_val,
min_val - lag(max_val) over (order by max_val) as gap_width from
(
select tick, count(*) as island_width,
min(date_val) min_val, max(date_val) max_val
from (
select t.*,
row_number() over ( partition by tick order by date_val ) as seqnum
from foobar t where tick = 'XYZ'
) t
group by tick, date_val - seqnum * interval '1 day'
) t2 order by max_val desc
打印:
┌──────┬──────────────┬────────────┬────────────┬───────────┐
│ tick │ island_width │ min_val │ max_val │ gap_width │
├──────┼──────────────┼────────────┼────────────┼───────────┤
│ XYZ │ 1 │ 2022-10-10 │ 2022-10-10 │ 457 │
│ XYZ │ 4 │ 2021-07-07 │ 2021-07-10 │ 57 │
│ XYZ │ 3 │ 2021-05-09 │ 2021-05-11 │ 125 │
│ XYZ │ 2 │ 2021-01-03 │ 2021-01-04 │ ¤ │
└──────┴──────────────┴────────────┴────────────┴───────────┘
详情:在这个数据中有4个岛屿。 4个岛之间有3个缺口。最后一个岛有缺口。第一个岛有一个零间隙。
SQL 通过减去一天并递减 rownumber 直到所有日期都被计算在内,找到每个 min_date
和 max_date
之间的天数。 partition
以某种方式按日期和 2 天移动 window 使用 over
和 lag
以及默认后视 1. Black Box Demon magic 将岛屿分组测试通过。
我有一个 table,其中包含每天的状态和日期,我试图找出状态何时发生变化,以及每次状态变化之间是否存在差距/某种状态有多少天。
+------+----------------+--------+
| user | date | status |
+----- +----------------+--------+
| 1 | 12/01/2017 | open |
| 1 | 12/02/2017 | open |
| 1 | 12/03/2017 | open |
| 1 | 12/04/2017 | closed |
| 1 | 12/05/2017 | closed |
| 1 | 12/06/2017 | open |
| 1 | 12/07/2017 | open |
+------+----------------+--------+
预期输出:
+------+------------+----------------+-------- ----+------------+
| user | status | days_in_status | min | max |
+----- +------------+----------------+-------------+------------+
| 1 | open | 3 | 12/01/2017 | 12/03/2017 |
| 1 | closed | 2 | 12/04/2017 | 12/05/2017 |
| 1 | open | 2 | 12/06/2017 | 12/07/2017 |
+------+------ -----+----------------+-------------+-- ---------+
这是一种缺口孤岛问题。在这种情况下,从每一天中减去一个序号可能是识别“孤岛”的最简单解决方案:
select user, status, count(*) as num_days, min(date), max(date)
from (select t.*,
row_number() over (partition by user, status order by date) as seqnum
from t
) t
group by user, status, date - seqnum * interval '1 day'
在 Postgres 中,您可以直接从 date
中减去 integer
:
SELECT "user", status
, count(*) AS days_in_status, min(date) AS min, max(date) AS max
FROM (
SELECT "user", status, date
, date - row_number() OVER (PARTITION BY "user", status ORDER BY date)::int AS grp_date
FROM tbl
) t
GROUP BY "user", status, grp_date
ORDER BY "user", min;
db<>fiddle here
参见:
- Select longest continuous sequence
- How do I determine the last day of the previous month using PostgreSQL?
旁白:user
是一个 reserved word,您应该避免将其作为实际列名。
SQL 查询以查找日期列中日期 GAPS 的宽度
从 Gordon 上面的答案中借用的完整示例显示了岛屿宽度和岛屿间隙,复制并粘贴到 postgresql 和 运行 中。
drop table if exists foobar;
CREATE TABLE foobar( tick text, date_val date );
insert into foobar values('XYZ', '2021-01-03'); --island 1 has width 2
insert into foobar values('XYZ', '2021-01-04'); --island 1
insert into foobar values('XYZ', '2021-05-09'); --island 2 has width 3
insert into foobar values('XYZ', '2021-05-10'); --island 2
insert into foobar values('XYZ', '2021-05-11'); --island 2
insert into foobar values('XYZ', '2021-07-07'); --island 3 has width 4
insert into foobar values('XYZ', '2021-07-08'); --island 3
insert into foobar values('XYZ', '2021-07-09'); --island 3
insert into foobar values('XYZ', '2021-07-10'); --island 3
insert into foobar values('XYZ', '2022-10-10'); --island 4 has width 1
select tick, island_width, min_val, max_val,
min_val - lag(max_val) over (order by max_val) as gap_width from
(
select tick, count(*) as island_width,
min(date_val) min_val, max(date_val) max_val
from (
select t.*,
row_number() over ( partition by tick order by date_val ) as seqnum
from foobar t where tick = 'XYZ'
) t
group by tick, date_val - seqnum * interval '1 day'
) t2 order by max_val desc
打印:
┌──────┬──────────────┬────────────┬────────────┬───────────┐
│ tick │ island_width │ min_val │ max_val │ gap_width │
├──────┼──────────────┼────────────┼────────────┼───────────┤
│ XYZ │ 1 │ 2022-10-10 │ 2022-10-10 │ 457 │
│ XYZ │ 4 │ 2021-07-07 │ 2021-07-10 │ 57 │
│ XYZ │ 3 │ 2021-05-09 │ 2021-05-11 │ 125 │
│ XYZ │ 2 │ 2021-01-03 │ 2021-01-04 │ ¤ │
└──────┴──────────────┴────────────┴────────────┴───────────┘
详情:在这个数据中有4个岛屿。 4个岛之间有3个缺口。最后一个岛有缺口。第一个岛有一个零间隙。
SQL 通过减去一天并递减 rownumber 直到所有日期都被计算在内,找到每个 min_date
和 max_date
之间的天数。 partition
以某种方式按日期和 2 天移动 window 使用 over
和 lag
以及默认后视 1. Black Box Demon magic 将岛屿分组测试通过。