需要按连续日期对记录进行分组 SQL

Question

我正在构建查询以构建连续的日期

create table #consecutivedates (
    sgid nvarchar(max), 
    metric nvarchar(max), 
    targetvalue nvarchar(max), 
    startdate datetime, 
    enddate datetime
)

insert into #consecutivedates values 
    ('2177', '515818', '18', '2019-09-01',  '2019-09-30'),
    ('2177', '515818', '125', '2019-08-01',  '2019-08-31'),
    ('2177', '515818', '15', '2019-07-01',  '2019-07-31')

SELECT  sgid,metric, CAST(startdate AS DATE) startdate, CAST(enddate AS DATE) enddate,
        ROW_NUMBER() OVER ( ORDER BY sgid, metric, startdate ) rn
INTO    #temp
FROM    #consecutivedates

-- GroupingColumn in cte used to identify and group consecutive dates
;WITH    cte
          AS ( SELECT   sgid ,
          metric , 
                        startdate ,
                        enddate ,
                        1 AS GroupingColumn ,
                        rn
               FROM     #temp
               WHERE    rn = 1
               UNION ALL
               SELECT   t2.sgid ,
               t2.metric,
                        t2.startdate,
                        t2.enddate ,
                        CASE WHEN t2.startdate = DATEADD(day, 1, cte.enddate) 
                                  AND cte.sgid = t2.sgid AND cte.metric=t2.metric
                             THEN cte.GroupingColumn
                             ELSE cte.GroupingColumn + 1
                        END AS GroupingColumn ,
                        t2.rn
               FROM     #temp t2
                        INNER JOIN cte ON t2.rn = cte.rn + 1
             )
    SELECT  sgid,metric, MIN(startdate) AS startdate, MAX(enddate) AS enddate
    FROM    cte
    GROUP BY sgid,metric, GroupingColumn

DROP TABLE #temp
DROP TABLE #consecutivedates

但我遇到了两个问题。

当文件很大时查询成本太高。
如果日期类似于

开始日期结束日期 '2019-08-01' '2019-09-30' '2019-10-01' '2019-10-31'

然后查询将不会对日期进行分组，我需要它足够聪明来解决这种情况。

任何帮助都会很好，

谢谢。

Answer 1

我理解您想将具有相同 sgid 和 metric 且相邻的记录组合在一起（即下一条记录在当前记录结束后一天开始）。

这是一个使用 window 总和来定义组的间隙和孤岛解决方案：

select sgid, metric, min(startdate) startdate, max(enddate) enddate
from (
    select
        t.*,
        sum(case when startdate = dateadd(day, 1, lag_enddate) then 0 else 1 end)
            over(partition by sgid, metric order by startdate) grp
    from (
        select 
            t.*, 
            lag(enddate) over(partition by sgid, metric order by startdate) lag_enddate
        from  #consecutivedates t
    ) t
) t
group by sgid, metric, grp

对于您的样本数据，所有三个记录都相邻，这会产生：

sgid | metric | startdate               | enddate                
:--- | :----- | :---------------------- | :----------------------
2177 | 515818 | 2019-07-01 00:00:00.000 | 2019-09-30 00:00:00.000

Demo on DB Fiddle

请注意，该查询使用 SQL 服务器日期函数（我怀疑您正在使用）：其他数据库中存在替代方法。

Answer 2

Window 帧的性能可能更高，但您需要对其进行测试。这是替代解决方案：

select sgid, metric, min(startdate) as mindate, max(enddate) as maxdate
from (
  select *,
    sum(case when startdate > dateadd(day, 1, prev_enddate) then 1 else 0 end)
      over(partition by sgid, metric order by startdate) as grp
  from (
    select 
      sgid, metric, startdate, enddate,
      max(enddate)
        over(partition by sgid, metric order by startdate 
             rows between unbounded preceding and 1 preceding) as prev_enddate
    from #consecutivedates
  ) x
) y  
group by sgid, metric, grp

结果（有更全面的数据脚本）：

sgid  metric  mindate                maxdate              
----  ------  ---------------------  ---------------------
2177  515818  2019-03-01 00:00:00.0  2019-03-31 00:00:00.0
2177  515818  2019-07-01 00:00:00.0  2019-09-30 00:00:00.0
2177  515818  2019-11-01 00:00:00.0  2019-11-30 00:00:00.0
2177  515820  2019-10-01 00:00:00.0  2019-10-31 00:00:00.0

这是我使用的数据脚本：

create table #consecutivedates (sgid nvarchar(max), metric nvarchar(max), 
  targetvalue nvarchar(max), startdate datetime, enddate datetime);

insert into #consecutivedates values
  ('2177', '515818', '18', '2019-09-01',  '2019-09-30'),
  ('2177', '515818', '125', '2019-08-01',  '2019-08-31'),
  ('2177', '515818', '15', '2019-07-01',  '2019-07-31'),
  ('2177', '515820', '15', '2019-10-01',  '2019-10-31'),
  ('2177', '515818', '15', '2019-03-01',  '2019-03-31'),
  ('2177', '515818', '15', '2019-11-01',  '2019-11-30')

需要按连续日期对记录进行分组 SQL

Need to group records by consecutive date SQL

sql

tsql

date

query-optimization

gaps-and-islands