当时间在给定的分钟间隔内连续时对记录进行分组

Group records when times are sequential within a given minute interval

在 SQL Server 2017 中,当 teacherid 和 customerid 相同并且预定时间是前一条记录后的 50 分钟时,我试图将多条记录分组为一条记录,即。

Table:时间表

----------------------------------------------------------------
id  custormerid teacherid   schedule
----------------------------------------------------------------
571654  1085        46      2022-02-22 07:00:00.000
571657  1085        46      2022-02-25 07:00:00.000
571658  1085        46      2022-02-26 07:00:00.000
571659  1085        46      2022-02-26 07:50:00.000
571660  1085        46      2022-02-26 08:40:00.000
571666  1085        46      2022-02-26 10:20:00.000
571661  1085        46      2022-02-28 07:00:00.000
571662  1085        46      2022-02-28 07:50:00.000
571663  1085        11      2022-02-28 08:40:00.000
571664  1085        46      2022-02-24 07:00:00.000
571665  1085        46      2022-02-24 07:50:00.000

想要的结果

    --------------------------------------------------------------------------
    custormerid     teacherid   schedule            Qty
    --------------------------------------------------------------------------
    1085        46      2022-02-22 07:00:00.000     1
    1085        46      2022-02-25 07:00:00.000     1
    1085        46      2022-02-26 07:00:00.000     3
    1085        46      2022-02-26 10:20:00.000     1
    1085        46      2022-02-28 07:00:00.000     2
    1085        11      2022-02-28 08:40:00.000     1
    1085        46      2022-02-24 07:00:00.000     2

DDL 脚本:

CREATE TABLE [dbo].[Schedule](
    [id] [int] NOT NULL,
    [custormerid] [int] NULL,
    [teacherid] [int] NULL,
    [schedule] [datetime] NULL) 
    

INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571654, 1085, 46, CAST(N'2022-02-22T07:00:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571657, 1085, 46, CAST(N'2022-02-25T07:00:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571658, 1085, 46, CAST(N'2022-02-26T07:00:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571659, 1085, 46, CAST(N'2022-02-26T07:50:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571660, 1085, 46, CAST(N'2022-02-26T08:40:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571661, 1085, 46, CAST(N'2022-02-28T07:00:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571662, 1085, 46, CAST(N'2022-02-28T07:50:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571663, 1085, 11, CAST(N'2022-02-28T08:40:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571664, 1085, 46, CAST(N'2022-02-24T07:00:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571665, 1085, 46, CAST(N'2022-02-24T07:50:00.000' AS DateTime))
INSERT [dbo].[Schedule] ([id], [custormerid], [teacherid], [schedule]) VALUES (571666, 1085, 46, CAST(N'2022-02-24T10:20:00.000' AS DateTime))

我看到了一些使用 CET 的例子,但我不太明白如何得到那个结果

我认为您不需要 CTE 来解决该问题。通过使用 Group by 你可以做到这一点 =>

SELECT [custormerid],[teacherid],MIN([schedule]) [schedule],
       COUNT(*) Qty
 FROM [dbo].[Schedule]
GROUP BY [custormerid],[teacherid],CAST([schedule] AS DATE)

更新: 现在,如果你想保持你在问题中提供的输出顺序,那么你可以像下面那样使用 CTE

WITH CTE AS
(
    SELECT MAX([id]) MyID,[custormerid],[teacherid],MIN([schedule]) [schedule],
           COUNT(*) Qty
     FROM [dbo].[Schedule]
    GROUP BY [custormerid],[teacherid],CAST([schedule] AS DATE)
) 
SELECT [custormerid],[teacherid],[schedule],Qty FROM CTE ORDER BY MyID

输出:

最终更新: 现在,我已经使用了几种技术来实现所要求的。你在评论中说你需要用 50 分钟的差异来计算它们。所以,我用多个 CTE 做到了这一点。该查询为少量数据提供了完美的输出,但我没有测试大量数据。另一件事是,对于大量数据,查询性能可能会很慢。所以,请尝试并告诉我 =>

WITH myCTE AS
(
 SELECT *,
      --I have used DENSE_RANK for Ordering the rows by [schedule]
      DENSE_RANK() OVER(PARTITION BY [custormerid],[teacherid],CAST([schedule] AS DATE) ORDER BY ID,CAST([schedule] AS DATE)) DRNK
 FROM [dbo].[Schedule]
),
CTE AS
(
    --Finding the parentID using logic of 50 min and others
    SELECT *,
    CASE WHEN DATEADD(MINUTE, -50,  [schedule])<=LAG([schedule],1) OVER(ORDER BY (SELECT NULL)) 
              AND [teacherid]=LAG([teacherid],1) OVER(ORDER BY (SELECT NULL))
              AND [custormerid]=LAG([custormerid],1) OVER(ORDER BY (SELECT NULL))
              THEN LAG(ID,1) OVER(ORDER BY (SELECT NULL)) 
              ELSE NULL END AS parentid
    FROM myCTE
),
myY AS
(   
   --Grouping the items Using Tree concept
    SELECT CTE.*, id AS rootid FROM CTE
    WHERE parentid IS NULL
    UNION ALL
    SELECT C.*, P.rootid FROM myY AS P
    INNER JOIN CTE AS C ON P.id = C.parentid
)
SELECT
    MAX([custormerid]) [custormerid],
    MAX([teacherid]) [teacherid],
    MIN([schedule]) [schedule],
    COUNT(*) Qty
FROM myY
GROUP BY rootid

请尝试以下解决方案。

SQL

-- DDL and sample data population, start
DECLARE @Schedule TABLE (
    id int NOT NULL,
    custormerid int NULL,
    teacherid int NULL,
    schedule datetime NULL) 

INSERT @Schedule (id, custormerid, teacherid, schedule) VALUES 
(571654, 1085, 46, '2022-02-22T07:00:00.000')
,(571657, 1085, 46,'2022-02-25T07:00:00.000')
,(571658, 1085, 46,'2022-02-26T07:00:00.000')
,(571659, 1085, 46,'2022-02-26T07:50:00.000')
,(571660, 1085, 46,'2022-02-26T08:40:00.000')
,(571661, 1085, 46,'2022-02-28T07:00:00.000')
,(571662, 1085, 46,'2022-02-28T07:50:00.000')
,(571663, 1085, 11,'2022-02-28T08:40:00.000')
,(571664, 1085, 46,'2022-02-24T07:00:00.000')
,(571665, 1085, 46,'2022-02-24T07:50:00.000');
-- DDL and sample data population, end

WITH rs AS
(
    SELECT *
        , DATEDIFF(minute, '1900-01-01', schedule) % 50 AS gr
    FROM @Schedule
)
SELECT custormerid, teacherid, MIN(schedule) AS schedule, COUNT(*) AS Qty
FROM rs
GROUP BY  custormerid, teacherid, gr
ORDER BY MIN(schedule);

输出

+-------------+-----------+-------------------------+-----+
| custormerid | teacherid |        schedule         | Qty |
+-------------+-----------+-------------------------+-----+
|        1085 |        46 | 2022-02-22 07:00:00.000 |   1 |
|        1085 |        46 | 2022-02-24 07:00:00.000 |   2 |
|        1085 |        46 | 2022-02-25 07:00:00.000 |   1 |
|        1085 |        46 | 2022-02-26 07:00:00.000 |   3 |
|        1085 |        46 | 2022-02-28 07:00:00.000 |   2 |
|        1085 |        11 | 2022-02-28 08:40:00.000 |   1 |
+-------------+-----------+-------------------------+-----+

方法#2

太丑了,我不喜欢,但试一试。

;WITH rs AS
(
   SELECT * 
      , LAG(schedule, 1) OVER (PARTITION BY custormerid, teacherid ORDER BY schedule) AS LagValue
      , ROW_NUMBER() OVER (PARTITION BY custormerid, teacherid, CAST(schedule AS DATE) ORDER BY schedule) AS seq
   FROM @Schedule
), cte AS (
    SELECT *
       , IIF(DATEDIFF(MINUTE, LagValue, schedule) <= 50 OR seq = 1, 1, 0) AS [legit]
       , DATEDIFF(minute, '1900-01-01', schedule) % 50 AS gr
    FROM rs
)
SELECT custormerid, teacherid, MIN(schedule) AS schedule --, legit, gr
    , Qty = SUM(IIF(legit=0,1,legit))
FROM cte
GROUP BY custormerid, teacherid, gr, legit
ORDER BY custormerid, teacherid, MIN(schedule);