SQL :在分区上使用 LEAD/LAG and/or ROW_NUMBER 查找 2 个事件之间的持续时间
SQL : Finding duration between 2 events using LEAD/LAG and/or ROW_NUMBER over a partition
我有一个 table,其中包含按车辆和时间划分的一系列事件。
车辆在这段时间内连接或断开连接。
我有兴趣计算连接事件 (connected = 1) 和随后的断开连接事件 (connected = 0) 之间的持续时间
我想在分区上使用 SQL 的 LEAD and LAG。
我想知道如何对我的数据进行分区以实现此目标。
当然VehicleId是第一候选。第二个计算域是什么?
示例数据:
CREATE TABLE #Events (VehicleId int, connected bit, Time DateTime);
INSERT INTO #Events (VehicleId, connected, Time)
VALUES(25931, 0, '2020-10-13 16:02:10.117'),
(25931, 1, '2020-10-13 11:32:39.213'),
(25931, 1, '2020-10-13 10:04:29.470'),
(25925, 1, '2020-10-13 07:41:31.637'),
(25925, 1, '2020-10-13 06:06:22.600'),
(25931, 1, '2020-10-13 05:23:19.433'),
(25927, 1, '2020-10-13 01:01:36.460'),
(25931, 0, '2020-10-13 17:55:10.380'),
(25931, 0, '2020-10-13 12:14:10.837'),
(25931, 0, '2020-10-13 10:53:54.527'),
(25925, 0, '2020-10-13 09:06:52.063'),
(25931, 0, '2020-10-13 08:32:45.230'),
(25925, 0, '2020-10-13 06:42:37.627'),
(25925, 0, '2020-10-13 05:12:08.070'),
(25927, 0, '2020-10-13 04:42:23.887'),
(25927, 0, '2020-10-13 00:56:36.090')
SELECT * FROM #Events ORDER BY Time ASC
DROP TABLE #Events
SELECT查询结果:
VehicleId connected Time
25927 0 2020-10-13 00:56:36.090
25927 1 2020-10-13 01:01:36.460
25927 0 2020-10-13 04:42:23.887
25925 0 2020-10-13 05:12:08.070
25931 1 2020-10-13 05:23:19.433
25925 1 2020-10-13 06:06:22.600
25925 0 2020-10-13 06:42:37.627
25925 1 2020-10-13 07:41:31.637
25931 0 2020-10-13 08:32:45.230
25925 0 2020-10-13 09:06:52.063
25931 1 2020-10-13 10:04:29.470
25931 0 2020-10-13 10:53:54.527
25931 1 2020-10-13 11:32:39.213
25931 0 2020-10-13 12:14:10.837
25931 0 2020-10-13 16:02:10.117
25931 0 2020-10-13 17:55:10.380
编辑:
我期待的结果集类似于
VehicleId, Duration (min)
25927, 221
以下两个事件:
25927 1 2020-10-13 01:01:36.460
25927 0 2020-10-13 04:42:23.887
每个 vehicleid 和一对 connected/disconnected.
依此类推
提前致谢。
编辑 2:根据评论 FIRST_VALUE/LAST_VALUE 不适合。问题已更新。
看起来您想要对“过渡”行的时间差求和,其中车辆从状态断开连接到连接状态。如果是这样,您可以使用 lag()
:
select vehicleid, sum(datediff(minute, lag_time, time)) sum_diff
from (
select e.*,
lag(connected) over(partition by vehicleid order by time) lag_connected,
lag(time) over(partition by vehicleid order by time) lag_time
from #events e
) e
where connected = 0 and lag_connected = 1
group by vehicleid
对于您的示例数据,this returns:
vehicleid | sum_diff
--------: | -------:
25925 | 121
25927 | 221
25931 | 280
假设1
总是后跟[=12=]那辆车,那么你可以使用 LEAD
获取下一次,然后 SUM
DATEDIFF
:
WITH CTE AS(
SELECT VehicleId,
connected,
[Time], --Time is a data type, and doesn't have a date portion, I would suggest using a different name
LEAD(Time) OVER (PARTITION BY VehicleID ORDER BY [Time]) AS NextTime
FROM #Events E
WHERE VehicleID = 25927)
SELECT VehicleID,
SUM(DATEDIFF(MINUTE,[Time],NextTime)) AS Duration
FROM CTE
WHERE Connected = 1
GROUP BY VehicleId;
此解决方案不使用 FIRST_VALUE/LAST_VALUE。
;
WITH Ranked AS
(
SELECT
*,
DateRowNumber = ROW_NUMBER()OVER(ORDER BY Time)
FROM #Events
)
,Joined AS
(
SELECT
*,
JoiningId = CASE WHEN connected=1 THEN LEAD(DateRowNumber) OVER(PARTITION BY VehicleId ORDER BY Time) ELSE NULL END
FROM Ranked
)
SELECT
J.VehicleId,
J.Time,
R.Time,
DifferencetInSeconds = DATEDIFF(SECOND,J.Time, R.Time)
FROM
Joined J
INNER JOIN Ranked R ON r.DateRowNumber = J.JoiningId
ORDER BY
J.VehicleId, J.Time
SELECT * FROM #Events ORDER BY VehicleID, Time
VehicleId Time Time DifferencetInSeconds
----------- ----------------------- ----------------------- --------------------
25925 2020-10-13 06:06:22.600 2020-10-13 06:42:37.627 2175
25925 2020-10-13 07:41:31.637 2020-10-13 09:06:52.063 5121
25927 2020-10-13 01:01:36.460 2020-10-13 04:42:23.887 13247
25931 2020-10-13 05:23:19.433 2020-10-13 08:32:45.230 11366
25931 2020-10-13 10:04:29.470 2020-10-13 10:53:54.527 2965
25931 2020-10-13 11:32:39.213 2020-10-13 12:14:10.837 2491
我有一个 table,其中包含按车辆和时间划分的一系列事件。 车辆在这段时间内连接或断开连接。
我有兴趣计算连接事件 (connected = 1) 和随后的断开连接事件 (connected = 0) 之间的持续时间
我想在分区上使用 SQL 的 LEAD and LAG。 我想知道如何对我的数据进行分区以实现此目标。
当然VehicleId是第一候选。第二个计算域是什么?
示例数据:
CREATE TABLE #Events (VehicleId int, connected bit, Time DateTime);
INSERT INTO #Events (VehicleId, connected, Time)
VALUES(25931, 0, '2020-10-13 16:02:10.117'),
(25931, 1, '2020-10-13 11:32:39.213'),
(25931, 1, '2020-10-13 10:04:29.470'),
(25925, 1, '2020-10-13 07:41:31.637'),
(25925, 1, '2020-10-13 06:06:22.600'),
(25931, 1, '2020-10-13 05:23:19.433'),
(25927, 1, '2020-10-13 01:01:36.460'),
(25931, 0, '2020-10-13 17:55:10.380'),
(25931, 0, '2020-10-13 12:14:10.837'),
(25931, 0, '2020-10-13 10:53:54.527'),
(25925, 0, '2020-10-13 09:06:52.063'),
(25931, 0, '2020-10-13 08:32:45.230'),
(25925, 0, '2020-10-13 06:42:37.627'),
(25925, 0, '2020-10-13 05:12:08.070'),
(25927, 0, '2020-10-13 04:42:23.887'),
(25927, 0, '2020-10-13 00:56:36.090')
SELECT * FROM #Events ORDER BY Time ASC
DROP TABLE #Events
SELECT查询结果:
VehicleId connected Time
25927 0 2020-10-13 00:56:36.090
25927 1 2020-10-13 01:01:36.460
25927 0 2020-10-13 04:42:23.887
25925 0 2020-10-13 05:12:08.070
25931 1 2020-10-13 05:23:19.433
25925 1 2020-10-13 06:06:22.600
25925 0 2020-10-13 06:42:37.627
25925 1 2020-10-13 07:41:31.637
25931 0 2020-10-13 08:32:45.230
25925 0 2020-10-13 09:06:52.063
25931 1 2020-10-13 10:04:29.470
25931 0 2020-10-13 10:53:54.527
25931 1 2020-10-13 11:32:39.213
25931 0 2020-10-13 12:14:10.837
25931 0 2020-10-13 16:02:10.117
25931 0 2020-10-13 17:55:10.380
编辑: 我期待的结果集类似于
VehicleId, Duration (min)
25927, 221
以下两个事件:
25927 1 2020-10-13 01:01:36.460
25927 0 2020-10-13 04:42:23.887
每个 vehicleid 和一对 connected/disconnected.
依此类推提前致谢。
编辑 2:根据评论 FIRST_VALUE/LAST_VALUE 不适合。问题已更新。
看起来您想要对“过渡”行的时间差求和,其中车辆从状态断开连接到连接状态。如果是这样,您可以使用 lag()
:
select vehicleid, sum(datediff(minute, lag_time, time)) sum_diff
from (
select e.*,
lag(connected) over(partition by vehicleid order by time) lag_connected,
lag(time) over(partition by vehicleid order by time) lag_time
from #events e
) e
where connected = 0 and lag_connected = 1
group by vehicleid
对于您的示例数据,this returns:
vehicleid | sum_diff --------: | -------: 25925 | 121 25927 | 221 25931 | 280
假设1
总是后跟[=12=]那辆车,那么你可以使用 LEAD
获取下一次,然后 SUM
DATEDIFF
:
WITH CTE AS(
SELECT VehicleId,
connected,
[Time], --Time is a data type, and doesn't have a date portion, I would suggest using a different name
LEAD(Time) OVER (PARTITION BY VehicleID ORDER BY [Time]) AS NextTime
FROM #Events E
WHERE VehicleID = 25927)
SELECT VehicleID,
SUM(DATEDIFF(MINUTE,[Time],NextTime)) AS Duration
FROM CTE
WHERE Connected = 1
GROUP BY VehicleId;
此解决方案不使用 FIRST_VALUE/LAST_VALUE。
;
WITH Ranked AS
(
SELECT
*,
DateRowNumber = ROW_NUMBER()OVER(ORDER BY Time)
FROM #Events
)
,Joined AS
(
SELECT
*,
JoiningId = CASE WHEN connected=1 THEN LEAD(DateRowNumber) OVER(PARTITION BY VehicleId ORDER BY Time) ELSE NULL END
FROM Ranked
)
SELECT
J.VehicleId,
J.Time,
R.Time,
DifferencetInSeconds = DATEDIFF(SECOND,J.Time, R.Time)
FROM
Joined J
INNER JOIN Ranked R ON r.DateRowNumber = J.JoiningId
ORDER BY
J.VehicleId, J.Time
SELECT * FROM #Events ORDER BY VehicleID, Time
VehicleId Time Time DifferencetInSeconds
----------- ----------------------- ----------------------- --------------------
25925 2020-10-13 06:06:22.600 2020-10-13 06:42:37.627 2175
25925 2020-10-13 07:41:31.637 2020-10-13 09:06:52.063 5121
25927 2020-10-13 01:01:36.460 2020-10-13 04:42:23.887 13247
25931 2020-10-13 05:23:19.433 2020-10-13 08:32:45.230 11366
25931 2020-10-13 10:04:29.470 2020-10-13 10:53:54.527 2965
25931 2020-10-13 11:32:39.213 2020-10-13 12:14:10.837 2491