需要按连续日期对记录进行分组 SQL
Need to group records by consecutive date SQL
我正在构建查询以构建连续的日期
create table #consecutivedates (
sgid nvarchar(max),
metric nvarchar(max),
targetvalue nvarchar(max),
startdate datetime,
enddate datetime
)
insert into #consecutivedates values
('2177', '515818', '18', '2019-09-01', '2019-09-30'),
('2177', '515818', '125', '2019-08-01', '2019-08-31'),
('2177', '515818', '15', '2019-07-01', '2019-07-31')
SELECT sgid,metric, CAST(startdate AS DATE) startdate, CAST(enddate AS DATE) enddate,
ROW_NUMBER() OVER ( ORDER BY sgid, metric, startdate ) rn
INTO #temp
FROM #consecutivedates
-- GroupingColumn in cte used to identify and group consecutive dates
;WITH cte
AS ( SELECT sgid ,
metric ,
startdate ,
enddate ,
1 AS GroupingColumn ,
rn
FROM #temp
WHERE rn = 1
UNION ALL
SELECT t2.sgid ,
t2.metric,
t2.startdate,
t2.enddate ,
CASE WHEN t2.startdate = DATEADD(day, 1, cte.enddate)
AND cte.sgid = t2.sgid AND cte.metric=t2.metric
THEN cte.GroupingColumn
ELSE cte.GroupingColumn + 1
END AS GroupingColumn ,
t2.rn
FROM #temp t2
INNER JOIN cte ON t2.rn = cte.rn + 1
)
SELECT sgid,metric, MIN(startdate) AS startdate, MAX(enddate) AS enddate
FROM cte
GROUP BY sgid,metric, GroupingColumn
DROP TABLE #temp
DROP TABLE #consecutivedates
但我遇到了两个问题。
当文件很大时查询成本太高。
如果日期类似于
开始日期结束日期
'2019-08-01' '2019-09-30'
'2019-10-01' '2019-10-31'
然后查询将不会对日期进行分组,我需要它足够聪明来解决这种情况。
任何帮助都会很好,
谢谢。
我理解您想将具有相同 sgid
和 metric
且相邻的记录组合在一起(即下一条记录在当前记录结束后一天开始)。
这是一个使用 window 总和来定义组的间隙和孤岛解决方案:
select sgid, metric, min(startdate) startdate, max(enddate) enddate
from (
select
t.*,
sum(case when startdate = dateadd(day, 1, lag_enddate) then 0 else 1 end)
over(partition by sgid, metric order by startdate) grp
from (
select
t.*,
lag(enddate) over(partition by sgid, metric order by startdate) lag_enddate
from #consecutivedates t
) t
) t
group by sgid, metric, grp
对于您的样本数据,所有三个记录都相邻,这会产生:
sgid | metric | startdate | enddate
:--- | :----- | :---------------------- | :----------------------
2177 | 515818 | 2019-07-01 00:00:00.000 | 2019-09-30 00:00:00.000
请注意,该查询使用 SQL 服务器日期函数(我怀疑您正在使用):其他数据库中存在替代方法。
Window 帧的性能可能更高,但您需要对其进行测试。这是替代解决方案:
select sgid, metric, min(startdate) as mindate, max(enddate) as maxdate
from (
select *,
sum(case when startdate > dateadd(day, 1, prev_enddate) then 1 else 0 end)
over(partition by sgid, metric order by startdate) as grp
from (
select
sgid, metric, startdate, enddate,
max(enddate)
over(partition by sgid, metric order by startdate
rows between unbounded preceding and 1 preceding) as prev_enddate
from #consecutivedates
) x
) y
group by sgid, metric, grp
结果(有更全面的数据脚本):
sgid metric mindate maxdate
---- ------ --------------------- ---------------------
2177 515818 2019-03-01 00:00:00.0 2019-03-31 00:00:00.0
2177 515818 2019-07-01 00:00:00.0 2019-09-30 00:00:00.0
2177 515818 2019-11-01 00:00:00.0 2019-11-30 00:00:00.0
2177 515820 2019-10-01 00:00:00.0 2019-10-31 00:00:00.0
这是我使用的数据脚本:
create table #consecutivedates (sgid nvarchar(max), metric nvarchar(max),
targetvalue nvarchar(max), startdate datetime, enddate datetime);
insert into #consecutivedates values
('2177', '515818', '18', '2019-09-01', '2019-09-30'),
('2177', '515818', '125', '2019-08-01', '2019-08-31'),
('2177', '515818', '15', '2019-07-01', '2019-07-31'),
('2177', '515820', '15', '2019-10-01', '2019-10-31'),
('2177', '515818', '15', '2019-03-01', '2019-03-31'),
('2177', '515818', '15', '2019-11-01', '2019-11-30')
我正在构建查询以构建连续的日期
create table #consecutivedates (
sgid nvarchar(max),
metric nvarchar(max),
targetvalue nvarchar(max),
startdate datetime,
enddate datetime
)
insert into #consecutivedates values
('2177', '515818', '18', '2019-09-01', '2019-09-30'),
('2177', '515818', '125', '2019-08-01', '2019-08-31'),
('2177', '515818', '15', '2019-07-01', '2019-07-31')
SELECT sgid,metric, CAST(startdate AS DATE) startdate, CAST(enddate AS DATE) enddate,
ROW_NUMBER() OVER ( ORDER BY sgid, metric, startdate ) rn
INTO #temp
FROM #consecutivedates
-- GroupingColumn in cte used to identify and group consecutive dates
;WITH cte
AS ( SELECT sgid ,
metric ,
startdate ,
enddate ,
1 AS GroupingColumn ,
rn
FROM #temp
WHERE rn = 1
UNION ALL
SELECT t2.sgid ,
t2.metric,
t2.startdate,
t2.enddate ,
CASE WHEN t2.startdate = DATEADD(day, 1, cte.enddate)
AND cte.sgid = t2.sgid AND cte.metric=t2.metric
THEN cte.GroupingColumn
ELSE cte.GroupingColumn + 1
END AS GroupingColumn ,
t2.rn
FROM #temp t2
INNER JOIN cte ON t2.rn = cte.rn + 1
)
SELECT sgid,metric, MIN(startdate) AS startdate, MAX(enddate) AS enddate
FROM cte
GROUP BY sgid,metric, GroupingColumn
DROP TABLE #temp
DROP TABLE #consecutivedates
但我遇到了两个问题。
当文件很大时查询成本太高。
如果日期类似于
开始日期结束日期 '2019-08-01' '2019-09-30' '2019-10-01' '2019-10-31'
然后查询将不会对日期进行分组,我需要它足够聪明来解决这种情况。
任何帮助都会很好,
谢谢。
我理解您想将具有相同 sgid
和 metric
且相邻的记录组合在一起(即下一条记录在当前记录结束后一天开始)。
这是一个使用 window 总和来定义组的间隙和孤岛解决方案:
select sgid, metric, min(startdate) startdate, max(enddate) enddate
from (
select
t.*,
sum(case when startdate = dateadd(day, 1, lag_enddate) then 0 else 1 end)
over(partition by sgid, metric order by startdate) grp
from (
select
t.*,
lag(enddate) over(partition by sgid, metric order by startdate) lag_enddate
from #consecutivedates t
) t
) t
group by sgid, metric, grp
对于您的样本数据,所有三个记录都相邻,这会产生:
sgid | metric | startdate | enddate :--- | :----- | :---------------------- | :---------------------- 2177 | 515818 | 2019-07-01 00:00:00.000 | 2019-09-30 00:00:00.000
请注意,该查询使用 SQL 服务器日期函数(我怀疑您正在使用):其他数据库中存在替代方法。
Window 帧的性能可能更高,但您需要对其进行测试。这是替代解决方案:
select sgid, metric, min(startdate) as mindate, max(enddate) as maxdate
from (
select *,
sum(case when startdate > dateadd(day, 1, prev_enddate) then 1 else 0 end)
over(partition by sgid, metric order by startdate) as grp
from (
select
sgid, metric, startdate, enddate,
max(enddate)
over(partition by sgid, metric order by startdate
rows between unbounded preceding and 1 preceding) as prev_enddate
from #consecutivedates
) x
) y
group by sgid, metric, grp
结果(有更全面的数据脚本):
sgid metric mindate maxdate
---- ------ --------------------- ---------------------
2177 515818 2019-03-01 00:00:00.0 2019-03-31 00:00:00.0
2177 515818 2019-07-01 00:00:00.0 2019-09-30 00:00:00.0
2177 515818 2019-11-01 00:00:00.0 2019-11-30 00:00:00.0
2177 515820 2019-10-01 00:00:00.0 2019-10-31 00:00:00.0
这是我使用的数据脚本:
create table #consecutivedates (sgid nvarchar(max), metric nvarchar(max),
targetvalue nvarchar(max), startdate datetime, enddate datetime);
insert into #consecutivedates values
('2177', '515818', '18', '2019-09-01', '2019-09-30'),
('2177', '515818', '125', '2019-08-01', '2019-08-31'),
('2177', '515818', '15', '2019-07-01', '2019-07-31'),
('2177', '515820', '15', '2019-10-01', '2019-10-31'),
('2177', '515818', '15', '2019-03-01', '2019-03-31'),
('2177', '515818', '15', '2019-11-01', '2019-11-30')