使用 SQL 对具有共同状态的连续项目进行分组(包括虚拟数据)
Using SQL to group consecutive items that share a common status (dummy data included)
鉴于 table 有时在一个组中重复状态(在本例中为“车辆”),我想将这些状态合并到一行中并汇总 status_seconds。数据看起来像这样(我将在下面包含一些 TSQL 以 select 虚拟数据到临时 table 中以便于使用此示例)
例如,我想将 T101 车辆 table 的前三行合并为 status_seconds = 1+1+2(4 秒)的一行。对于虚拟数据,这些是需要合并的具有连续状态行的车辆。
请注意,在第 5-7 行中,T101 的旅行状态被 T102 状态分开。
对我来说,这似乎是递归 CTE 的问题,但我发现它很难解决。
到目前为止,我已经能够识别上述内容的锚节点。 IE。对于每辆车,我可以确定车辆状态的最后一次出现。这是虚拟数据和标识锚节点的 CTE。
CREATE TABLE ##vehiclesAndStates
(
id INT,
vehicle_name VARCHAR(30),
vehicle_status VARCHAR(30),
status_end_time DATETIME,
status_seconds INT
)
INSERT INTO ##vehiclesAndStates VALUES
(100, 'T101', 'STOPPED', '2020-12-04 09:43:18.000', 1)
,(801, 'T101', 'STOPPED', '2020-12-04 09:43:19.000', 1)
,(745, 'T101', 'STOPPED', '2020-12-04 09:43:20.000', 2)
,(925, 'T101', 'TURNING', '2020-12-04 09:43:22.000', 1)
,(626, 'T101', 'TRAVELLING', '2020-12-04 09:43:23.000', 10)
,(401, 'T102', 'STOPPED', '2020-12-04 09:43:23.000', 10)
,(201, 'T101', 'TRAVELLING', '2020-12-04 09:43:33.000', 1)
,(808, 'T102', 'STOPPPED', '2020-12-04 09:43:33.000', 3)
,(707, 'T102', 'STOPPPED', '2020-12-04 09:43:35.000', 7)
,(888, 'T101', 'TURNING', '2020-12-04 09:43:34.000', 1)
,(42, 'T101', 'STOPPED', '2020-12-04 09:43:35.000', 3)
,(2, 'T102', 'PARKED', '2020-12-04 09:43:35.000', 10)
,(911, 'T101', 'TRAVELLING', '2020-12-04 09:43:35.000', 1)
SELECT * FROM ##vehiclesAndStates
-- identify anchor nodes: rows where the previous status for a vehicle was different
;with cte_AnchorNodes as
(
SELECT i.*
FROM (
SELECT
a.ID
,a.vehicle_name
,a.vehicle_status
,a.status_end_time
,a.status_seconds
,previous_vehicle_status = LAG(a.vehicle_status,1) OVER (
ORDER BY a.vehicle_name, a.status_end_time
)
,previous_ID = LAG(a.ID,1) OVER (
ORDER BY a.vehicle_name, a.status_end_time
)
FROM
##vehiclesAndStates a
) i
WHERE i.vehicle_status <> IsNull(i.previous_vehicle_status, 'Handle Nulls')
)
结果
但是,我正在努力使递归 CTE 起作用:
--Select * 来自 cte_AnchorNodes a.vehicle_name、a.status_end_time
的订单
,cteRecursiveStatuses (Id, VehicleName, VehicleStatus, StatusEndTime, recursionDepth) AS
(
SELECT
a.ID
,a.vehicle_name
,a.vehicle_status
,a.status_end_time
,0 recursionDepth
FROM cte_AnchorNodes a
UNION ALL
SELECT
??
FROM
##vehiclesAndStates b
JOIN
cteRecursiveStatuses r ON r.Id = ??
AND b.vehicle_status = r.VehicleStatus
)
Select * From cteRecursiveStatuses
DROP TABLE ##vehiclesAndStates
这是一个典型的间隙和孤岛问题,您希望将共享相同车辆和状态(孤岛)的“相邻”行组合在一起。
您不需要为此进行递归查询:window 函数可以完成此操作。在这里,最简单的方法可能是使用行号之间的差异来标识组。
select vehicle_name, vehicle_status,
min(status_end_time) as min_status_end_time,
max(status_end_time) as max_status_end_time,
sum(status_seconds) as sum_status_seconds
from (
select vs.*,
row_number() over(partition by vehicle_name order by status_end_time) rn1,
row_number() over(partition by vehicle_name, vehicle_status order by status_end_time) rn2
from ##vehiclesAndStates vs
) t
group by vehicle_name, vehicle_status, rn1 - rn2
order by vehicle_name, min(status_end_time)
您可以 运行 单独子查询并查看行号如何变化以了解更多信息。
对于您的示例数据,the query returns:
vehicle_name | vehicle_status | min_status_end_time | max_status_end_time | sum_status_seconds
:----------- | :------------- | :---------------------- | :---------------------- | -----------------:
T101 | STOPPED | 2020-12-04 09:43:18.000 | 2020-12-04 09:43:20.000 | 4
T101 | TURNING | 2020-12-04 09:43:22.000 | 2020-12-04 09:43:22.000 | 1
T101 | TRAVELLING | 2020-12-04 09:43:23.000 | 2020-12-04 09:43:33.000 | 11
T101 | TURNING | 2020-12-04 09:43:34.000 | 2020-12-04 09:43:34.000 | 1
T101 | TRAVELLING | 2020-12-04 09:43:35.000 | 2020-12-04 09:43:35.000 | 1
T101 | STOPPED | 2020-12-04 09:43:35.000 | 2020-12-04 09:43:35.000 | 3
T102 | STOPPED | 2020-12-04 09:43:23.000 | 2020-12-04 09:43:23.000 | 10
T102 | STOPPPED | 2020-12-04 09:43:33.000 | 2020-12-04 09:43:35.000 | 10
T102 | PARKED | 2020-12-04 09:43:35.000 | 2020-12-04 09:43:35.000 | 10
鉴于 table 有时在一个组中重复状态(在本例中为“车辆”),我想将这些状态合并到一行中并汇总 status_seconds。数据看起来像这样(我将在下面包含一些 TSQL 以 select 虚拟数据到临时 table 中以便于使用此示例)
例如,我想将 T101 车辆 table 的前三行合并为 status_seconds = 1+1+2(4 秒)的一行。对于虚拟数据,这些是需要合并的具有连续状态行的车辆。
请注意,在第 5-7 行中,T101 的旅行状态被 T102 状态分开。
对我来说,这似乎是递归 CTE 的问题,但我发现它很难解决。
到目前为止,我已经能够识别上述内容的锚节点。 IE。对于每辆车,我可以确定车辆状态的最后一次出现。这是虚拟数据和标识锚节点的 CTE。
CREATE TABLE ##vehiclesAndStates
(
id INT,
vehicle_name VARCHAR(30),
vehicle_status VARCHAR(30),
status_end_time DATETIME,
status_seconds INT
)
INSERT INTO ##vehiclesAndStates VALUES
(100, 'T101', 'STOPPED', '2020-12-04 09:43:18.000', 1)
,(801, 'T101', 'STOPPED', '2020-12-04 09:43:19.000', 1)
,(745, 'T101', 'STOPPED', '2020-12-04 09:43:20.000', 2)
,(925, 'T101', 'TURNING', '2020-12-04 09:43:22.000', 1)
,(626, 'T101', 'TRAVELLING', '2020-12-04 09:43:23.000', 10)
,(401, 'T102', 'STOPPED', '2020-12-04 09:43:23.000', 10)
,(201, 'T101', 'TRAVELLING', '2020-12-04 09:43:33.000', 1)
,(808, 'T102', 'STOPPPED', '2020-12-04 09:43:33.000', 3)
,(707, 'T102', 'STOPPPED', '2020-12-04 09:43:35.000', 7)
,(888, 'T101', 'TURNING', '2020-12-04 09:43:34.000', 1)
,(42, 'T101', 'STOPPED', '2020-12-04 09:43:35.000', 3)
,(2, 'T102', 'PARKED', '2020-12-04 09:43:35.000', 10)
,(911, 'T101', 'TRAVELLING', '2020-12-04 09:43:35.000', 1)
SELECT * FROM ##vehiclesAndStates
-- identify anchor nodes: rows where the previous status for a vehicle was different
;with cte_AnchorNodes as
(
SELECT i.*
FROM (
SELECT
a.ID
,a.vehicle_name
,a.vehicle_status
,a.status_end_time
,a.status_seconds
,previous_vehicle_status = LAG(a.vehicle_status,1) OVER (
ORDER BY a.vehicle_name, a.status_end_time
)
,previous_ID = LAG(a.ID,1) OVER (
ORDER BY a.vehicle_name, a.status_end_time
)
FROM
##vehiclesAndStates a
) i
WHERE i.vehicle_status <> IsNull(i.previous_vehicle_status, 'Handle Nulls')
)
结果
但是,我正在努力使递归 CTE 起作用:
--Select * 来自 cte_AnchorNodes a.vehicle_name、a.status_end_time
的订单,cteRecursiveStatuses (Id, VehicleName, VehicleStatus, StatusEndTime, recursionDepth) AS
(
SELECT
a.ID
,a.vehicle_name
,a.vehicle_status
,a.status_end_time
,0 recursionDepth
FROM cte_AnchorNodes a
UNION ALL
SELECT
??
FROM
##vehiclesAndStates b
JOIN
cteRecursiveStatuses r ON r.Id = ??
AND b.vehicle_status = r.VehicleStatus
)
Select * From cteRecursiveStatuses
DROP TABLE ##vehiclesAndStates
这是一个典型的间隙和孤岛问题,您希望将共享相同车辆和状态(孤岛)的“相邻”行组合在一起。
您不需要为此进行递归查询:window 函数可以完成此操作。在这里,最简单的方法可能是使用行号之间的差异来标识组。
select vehicle_name, vehicle_status,
min(status_end_time) as min_status_end_time,
max(status_end_time) as max_status_end_time,
sum(status_seconds) as sum_status_seconds
from (
select vs.*,
row_number() over(partition by vehicle_name order by status_end_time) rn1,
row_number() over(partition by vehicle_name, vehicle_status order by status_end_time) rn2
from ##vehiclesAndStates vs
) t
group by vehicle_name, vehicle_status, rn1 - rn2
order by vehicle_name, min(status_end_time)
您可以 运行 单独子查询并查看行号如何变化以了解更多信息。
对于您的示例数据,the query returns:
vehicle_name | vehicle_status | min_status_end_time | max_status_end_time | sum_status_seconds :----------- | :------------- | :---------------------- | :---------------------- | -----------------: T101 | STOPPED | 2020-12-04 09:43:18.000 | 2020-12-04 09:43:20.000 | 4 T101 | TURNING | 2020-12-04 09:43:22.000 | 2020-12-04 09:43:22.000 | 1 T101 | TRAVELLING | 2020-12-04 09:43:23.000 | 2020-12-04 09:43:33.000 | 11 T101 | TURNING | 2020-12-04 09:43:34.000 | 2020-12-04 09:43:34.000 | 1 T101 | TRAVELLING | 2020-12-04 09:43:35.000 | 2020-12-04 09:43:35.000 | 1 T101 | STOPPED | 2020-12-04 09:43:35.000 | 2020-12-04 09:43:35.000 | 3 T102 | STOPPED | 2020-12-04 09:43:23.000 | 2020-12-04 09:43:23.000 | 10 T102 | STOPPPED | 2020-12-04 09:43:33.000 | 2020-12-04 09:43:35.000 | 10 T102 | PARKED | 2020-12-04 09:43:35.000 | 2020-12-04 09:43:35.000 | 10