创建包含 2 列的时间范围 date_time
create time range with 2 columns date_time
我面临的问题是如何从 Teradata ANSI 中重叠的多个时间段中找到不同的时间段 SQL。
例如,附表包含多个重叠的时间段,我如何在 Teradata 中将这些时间段组合成 3 个唯一的时间段 SQL???
我想我可以在 python 中使用循环函数来完成,但不确定如何在 SQL
中完成
ID
Start Date
End Date
001
2005-01-01
2006-01-01
001
2005-01-01
2007-01-01
001
2008-01-01
2008-06-01
001
2008-04-01
2008-12-01
001
2010-01-01
2010-05-01
001
2010-04-01
2010-12-01
001
2010-11-01
2012-01-01
我的预期结果是:
ID
start_Date
end_date
001
2005-01-01
2007-01-01
001
2008-01-01
2008-12-01
001
2010-01-01
2012-01-01
这是一个缺口和孤岛问题。试试这个:
with u as
(select ID, start_date, end_date,
case
when start_date <= lag(end_date) over(partition by ID order by start_date, end_date) then 0
else 1 end as grp
from table_name),
v as
(select ID, start_date, end_date,
sum(grp) over(partition by ID order by start_date, end_date) as island
from u)
select ID, min(start_date) as start_Date, max(end_date) as end_date
from v
group by ID, island;
基本上,您可以通过比较当前行的 start_date
和前一行的 end_date
(按 start_date、end_date 排序)来识别“岛屿”,如果它先于那么它就是同一个岛。然后你可以滚动 sum()
来获取岛屿编号。最后 select 来自每个岛的 min(start_date) 和 max(end_date) 以获得所需的输出。
我在 Dbeaver 中尝试了它,但功能稍有变化,这可能会起作用:
select ID,Start_Date,End_Date
from
(
select t.*,
dense_rank () over(partition by extract (year from Start_Date) order BY End_Date desc) drnk
from testing_123 t
) temp
where temp.drnk = 1
ORDER BY Start_Date;
试试这个
WITH a as (
SELECT
ID,
LEFT(Start_Date, 4) as Year,
MIN(Start_Date) as New_Start_Date
FROM
TAB1
GROUP BY
ID,
LEFT(Start_Date, 4)
), b as (
SELECT
a.ID,
Year,
New_Start_Date,
End_Date
FROM
a
LEFT JOIN
TAB1
ON LEFT(a.New_Start_Date, 4) = LEFT(TAB1.Start_Date, 4)
)
select
ID,
New_Start_Date as Start_Date,
MAX(End_Date)
from
b
GROUP BY
ID,
New_Start_Date;
示例:https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=97f91b68c635aebfb752538cdd752ace
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
执行 row-by-row 比较:
SELECT *
FROM table_name
MATCH_RECOGNIZE(
PARTITION BY id
ORDER BY start_date
MEASURES
FIRST(start_date) AS start_date,
MAX(end_date) AS end_date
ONE ROW PER MATCH
PATTERN (overlapping_ranges* last_range)
DEFINE overlapping_ranges AS NEXT(start_date) <= MAX(end_date)
)
其中,对于示例数据:
CREATE TABLE table_name (ID, Start_Date, End_Date) AS
SELECT '001', DATE '2005-01-01', DATE '2006-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2005-01-01', DATE '2007-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-01-01', DATE '2008-06-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-04-01', DATE '2008-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-01-01', DATE '2010-05-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-04-01', DATE '2010-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-11-01', DATE '2012-01-01' FROM DUAL;
输出:
ID
START_DATE
END_DATE
001
2005-01-01 00:00:00
2007-01-01 00:00:00
001
2008-01-01 00:00:00
2008-12-01 00:00:00
001
2010-01-01 00:00:00
2012-01-01 00:00:00
db<>fiddle here
更新:替代查询
SELECT id,
start_date,
end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) * type AS cnt
FROM table_name
UNPIVOT (dt FOR type IN (start_date AS 1, end_date AS -1))
)
WHERE cnt IN (1,0)
)
PIVOT (MAX(dt) FOR cnt IN (1 AS start_date, 0 AS end_date))
或者,不使用 UNPIVOT
、PIVOT
或 ROWNUM
并且在 Oracle 和 PostgreSQL 中都有效的等效项:
SELECT id,
MAX(CASE cnt WHEN 1 THEN dt END) AS start_date,
MAX(CASE cnt WHEN 0 THEN dt END) AS end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, rn) * type AS cnt
FROM (
SELECT r.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY dt ASC, type DESC) AS rn
FROM (
SELECT id, 1 AS type, start_date AS dt FROM table_name
UNION ALL
SELECT id, -1 AS type, end_date AS dt FROM table_name
) r
) p
) s
WHERE cnt IN (1,0)
) t
GROUP BY id, grp
更新 2:另一种选择
SELECT id,
MIN(start_date) AS start_date,
MAX(end_Date) AS end_date
FROM (
SELECT t.*,
SUM(CASE WHEN start_date <= prev_max THEN 0 ELSE 1 END)
OVER (PARTITION BY id ORDER BY start_date) AS grp
FROM (
SELECT t.*,
MAX(end_date) OVER (
PARTITION BY id ORDER BY start_date
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS prev_max
FROM table_name t
) t
) t
GROUP BY id, grp
db<>fiddle Oracle PostgreSQL
我面临的问题是如何从 Teradata ANSI 中重叠的多个时间段中找到不同的时间段 SQL。
例如,附表包含多个重叠的时间段,我如何在 Teradata 中将这些时间段组合成 3 个唯一的时间段 SQL???
我想我可以在 python 中使用循环函数来完成,但不确定如何在 SQL
中完成ID | Start Date | End Date |
---|---|---|
001 | 2005-01-01 | 2006-01-01 |
001 | 2005-01-01 | 2007-01-01 |
001 | 2008-01-01 | 2008-06-01 |
001 | 2008-04-01 | 2008-12-01 |
001 | 2010-01-01 | 2010-05-01 |
001 | 2010-04-01 | 2010-12-01 |
001 | 2010-11-01 | 2012-01-01 |
我的预期结果是:
ID | start_Date | end_date |
---|---|---|
001 | 2005-01-01 | 2007-01-01 |
001 | 2008-01-01 | 2008-12-01 |
001 | 2010-01-01 | 2012-01-01 |
这是一个缺口和孤岛问题。试试这个:
with u as
(select ID, start_date, end_date,
case
when start_date <= lag(end_date) over(partition by ID order by start_date, end_date) then 0
else 1 end as grp
from table_name),
v as
(select ID, start_date, end_date,
sum(grp) over(partition by ID order by start_date, end_date) as island
from u)
select ID, min(start_date) as start_Date, max(end_date) as end_date
from v
group by ID, island;
基本上,您可以通过比较当前行的 start_date
和前一行的 end_date
(按 start_date、end_date 排序)来识别“岛屿”,如果它先于那么它就是同一个岛。然后你可以滚动 sum()
来获取岛屿编号。最后 select 来自每个岛的 min(start_date) 和 max(end_date) 以获得所需的输出。
我在 Dbeaver 中尝试了它,但功能稍有变化,这可能会起作用:
select ID,Start_Date,End_Date
from
(
select t.*,
dense_rank () over(partition by extract (year from Start_Date) order BY End_Date desc) drnk
from testing_123 t
) temp
where temp.drnk = 1
ORDER BY Start_Date;
试试这个
WITH a as (
SELECT
ID,
LEFT(Start_Date, 4) as Year,
MIN(Start_Date) as New_Start_Date
FROM
TAB1
GROUP BY
ID,
LEFT(Start_Date, 4)
), b as (
SELECT
a.ID,
Year,
New_Start_Date,
End_Date
FROM
a
LEFT JOIN
TAB1
ON LEFT(a.New_Start_Date, 4) = LEFT(TAB1.Start_Date, 4)
)
select
ID,
New_Start_Date as Start_Date,
MAX(End_Date)
from
b
GROUP BY
ID,
New_Start_Date;
示例:https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=97f91b68c635aebfb752538cdd752ace
从 Oracle 12 开始,您可以使用 MATCH_RECOGNIZE
执行 row-by-row 比较:
SELECT *
FROM table_name
MATCH_RECOGNIZE(
PARTITION BY id
ORDER BY start_date
MEASURES
FIRST(start_date) AS start_date,
MAX(end_date) AS end_date
ONE ROW PER MATCH
PATTERN (overlapping_ranges* last_range)
DEFINE overlapping_ranges AS NEXT(start_date) <= MAX(end_date)
)
其中,对于示例数据:
CREATE TABLE table_name (ID, Start_Date, End_Date) AS
SELECT '001', DATE '2005-01-01', DATE '2006-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2005-01-01', DATE '2007-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-01-01', DATE '2008-06-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-04-01', DATE '2008-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-01-01', DATE '2010-05-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-04-01', DATE '2010-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-11-01', DATE '2012-01-01' FROM DUAL;
输出:
ID START_DATE END_DATE 001 2005-01-01 00:00:00 2007-01-01 00:00:00 001 2008-01-01 00:00:00 2008-12-01 00:00:00 001 2010-01-01 00:00:00 2012-01-01 00:00:00
db<>fiddle here
更新:替代查询
SELECT id,
start_date,
end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) * type AS cnt
FROM table_name
UNPIVOT (dt FOR type IN (start_date AS 1, end_date AS -1))
)
WHERE cnt IN (1,0)
)
PIVOT (MAX(dt) FOR cnt IN (1 AS start_date, 0 AS end_date))
或者,不使用 UNPIVOT
、PIVOT
或 ROWNUM
并且在 Oracle 和 PostgreSQL 中都有效的等效项:
SELECT id,
MAX(CASE cnt WHEN 1 THEN dt END) AS start_date,
MAX(CASE cnt WHEN 0 THEN dt END) AS end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, rn) * type AS cnt
FROM (
SELECT r.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY dt ASC, type DESC) AS rn
FROM (
SELECT id, 1 AS type, start_date AS dt FROM table_name
UNION ALL
SELECT id, -1 AS type, end_date AS dt FROM table_name
) r
) p
) s
WHERE cnt IN (1,0)
) t
GROUP BY id, grp
更新 2:另一种选择
SELECT id,
MIN(start_date) AS start_date,
MAX(end_Date) AS end_date
FROM (
SELECT t.*,
SUM(CASE WHEN start_date <= prev_max THEN 0 ELSE 1 END)
OVER (PARTITION BY id ORDER BY start_date) AS grp
FROM (
SELECT t.*,
MAX(end_date) OVER (
PARTITION BY id ORDER BY start_date
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS prev_max
FROM table_name t
) t
) t
GROUP BY id, grp
db<>fiddle Oracle PostgreSQL