计算某些记录不完整时登录和注销时间之间的持续时间
Calculate duration between login and logout time when some records are incomplete
我想计算用户在网站上花费的总时间。有3种情况。
存在用户登录时间和退出时间的记录。
-->总时间应该是登录和退出的时间差之和
有用户登录时间记录,但没有登出时间。
-->总时间应该标记为-1.
用户多次登录,只有1次退出
-->总时间应该是最早登录时间和退出时间的时间差之和。
我的table
CREATE TABLE #my_table
(
id BIGINT IDENTITY PRIMARY KEY
,userID INT
,login_time DATETIME
,logout_time DATETIME
);
INSERT INTO #my_table
SELECT 222222, '2016-05-19 01:06:00.000', '2016-05-19 01:10:00.000'
UNION ALL SELECT 222222, '2016-05-19 01:12:00.000', '2016-05-19 01:20:00.000'
UNION ALL SELECT 333333, '2016-05-24 14:44:00.000', '2016-05-24 14:47:00.000'
UNION ALL SELECT 333333, '2016-05-24 14:59:00.000', NULL
UNION ALL SELECT 444444, '2016-05-24 14:48:00.000', '2016-05-24 14:49:00.000'
UNION ALL SELECT 444444, '2016-05-24 14:50:00.000', NULL
UNION ALL SELECT 444444, '2016-05-24 14:51:00.000', NULL
UNION ALL SELECT 444444, '2016-05-24 14:53:00.000', '2016-05-24 14:59:00.000'
预期结果
对于大多数情况,数据库中捕获的记录将是情况1,但有时也会捕获情况2和情况3。我需要一个脚本来计算所有情况下的总登录时间。
如何查询?
IF OBJECT_ID('tempdb..#my_table') IS NOT NULL
DROP TABLE #my_table
CREATE TABLE #my_table
(
id BIGINT IDENTITY PRIMARY KEY
,userID INT
,login_time DATETIME
,logout_time DATETIME
);
DECLARE @MT TABLE
(
id BIGINT
,userID INT
,login_time DATETIME
,logout_time DATETIME
);
DECLARE @DRes TABLE (
userID INT,
logtime INT
)
DECLARE @Counter1 INT = 0,
@login_time1 DATETIME,
@logout_time1 DATETIME,
@login_time2 DATETIME
INSERT INTO #my_table VALUES
(222222, '2016-05-19 01:06:00.000', '2016-05-19 01:10:00.000')
,(222222, '2016-05-19 01:12:00.000', '2016-05-19 01:20:00.000')
,(333333, '2016-05-24 14:44:00.000', '2016-05-24 14:47:00.000')
,(333333, '2016-05-24 14:59:00.000', NULL)
,(444444, '2016-05-24 14:48:00.000', '2016-05-24 14:49:00.000')
,(444444, '2016-05-24 14:50:00.000', NULL)
,(444444, '2016-05-24 14:51:00.000', NULL)
,(444444, '2016-05-24 14:53:00.000', '2016-05-24 14:59:00.000')
INSERT INTO @MT
SELECT * FROM #my_table
;WITH MaxLog
AS (
SELECT userID, MAX(login_time) AS max_login
FROM @MT
GROUP BY userID
),
DelRec
AS (
SELECT ml.userID
FROM MaxLog ml
LEFT JOIN @MT mt
ON ml.userID = mt.userID
WHERE mt.logout_time IS NULL
AND ml.max_login = mt.login_time
)
DELETE mt
FROM @MT mt
INNER JOIN
DelRec dr
ON mt.userID = dr.userID
WHERE mt.logout_time IS NOT NULL
;WITH StillIn
AS (
SELECT userID, COUNT(*) AS cnt
FROM @MT
GROUP BY userID
HAVING COUNT(*) = 1
)
UPDATE mt
SET logout_time = DATEADD(mi,-1,login_time)
FROM @MT mt
JOIN StillIn si
ON si.UserID = mt.UserId
WHILE @Counter1 < (SELECT MAX(id) FROM @MT)
BEGIN
SET @Counter1 += 1
SET @login_time1 = (SELECT login_time FROM @MT WHERE id = @Counter1)
SET @logout_time1 = (SELECT logout_time FROM @MT WHERE id = @Counter1)
IF @logout_time1 IS NULL
BEGIN
IF @login_time2 IS NULL
BEGIN
SET @login_time2 = @login_time1
END
END
ELSE
BEGIN
IF @login_time2 IS NULL
BEGIN
INSERT INTO @DRes
SELECT userID, DATEDIFF(mi,@login_time1,@logout_time1)
FROM @MT
WHERE id = @Counter1
END
ELSE
BEGIN
INSERT INTO @DRes
SELECT userID, DATEDIFF(mi,@login_time2,@logout_time1)
FROM @MT
WHERE id = @Counter1
SET @login_time2 = NULL
END
END
END
SELECT userID, SUM(logtime)
FROM @DRes
GROUP BY userID
下面的查询多次使用 ROW_NUMBER
函数来选择所需的行,并在 logout_time
为 NULL 时使用 LEAD
函数来选择 "look ahead"。 LEAD
自 SQL Server 2012 起可用。
运行 逐步查询,逐个 CTE 并检查中间结果以了解其工作原理。
CTE_Groups
是经典的 gaps-and-islands
查询,用于标记 logout_time
.
中具有连续 NULL 的行
CTE_RN
以这种方式将数字分配给行,logout_time
中连续的 NULL 得到连续的数字。此结果在 CTE_Fixed
中过滤,以仅获取每组 NULL 的第一行。如果 logout_time
为 NULL,则 LEAD
函数用于从下一行中选取一个值来生成 fixed_logout_time
.
具有 NULL logout_time
的行和具有非 NULL logout_time
的下一行将一起列在 CTE_Fixed
中。我们只需要从这些对中选择一行。相同的方法 - 在 CTE_FixedRN
中使用 ROW_NUMBER
并在 CTE_Sum
.
中选择第一行
然后我们可以在分钟内计算出 Duration
并将总和分组 userID
。
如果没有非 NULL logout_time
,DATEDIFF
将 return NULL,它将被一些大的负数替换。在最后的 SELECT
中,负数 Duration
将被替换为 -1
以指示最后一个间隔仍然开放。
WITH
CTE_Groups
AS
(
SELECT
userID
,login_time
,logout_time
,ROW_NUMBER()
OVER(PARTITION BY userID ORDER BY login_time)
- ROW_NUMBER()
OVER(PARTITION BY userID, logout_time ORDER BY login_time) AS GroupNumber
FROM #my_table
)
,CTE_RN
AS
(
SELECT
userID
,login_time
,logout_time
,ROW_NUMBER()
OVER(PARTITION BY userID, GroupNumber ORDER BY login_time) AS rn
FROM CTE_Groups
)
,CTE_Fixed
AS
(
SELECT
userID
,login_time
,ISNULL(logout_time, LEAD(logout_time)
OVER(PARTITION BY userID ORDER BY login_time)) AS fixed_logout_time
FROM CTE_RN
WHERE rn = 1
)
,CTE_FixedRN
AS
(
SELECT
userID
,login_time
,fixed_logout_time
,ROW_NUMBER()
OVER(PARTITION BY userID, fixed_logout_time ORDER BY login_time) AS rn
FROM CTE_Fixed
)
,CTE_Sum
AS
(
SELECT
userID
,SUM(ISNULL(
DATEDIFF(minute, login_time, fixed_logout_time),
-1000000)) AS Duration
FROM CTE_FixedRN
WHERE rn = 1
GROUP BY userID
)
SELECT
userID
,CASE WHEN Duration < 0 THEN -1 ELSE Duration END AS Duration
FROM CTE_Sum
ORDER BY userID;
结果
+--------+----------+
| userID | Duration |
+--------+----------+
| 222222 | 12 |
| 333333 | -1 |
| 444444 | 10 |
+--------+----------+
我想计算用户在网站上花费的总时间。有3种情况。
存在用户登录时间和退出时间的记录。
-->总时间应该是登录和退出的时间差之和
有用户登录时间记录,但没有登出时间。
-->总时间应该标记为-1.
用户多次登录,只有1次退出
-->总时间应该是最早登录时间和退出时间的时间差之和。
我的table
CREATE TABLE #my_table
(
id BIGINT IDENTITY PRIMARY KEY
,userID INT
,login_time DATETIME
,logout_time DATETIME
);
INSERT INTO #my_table
SELECT 222222, '2016-05-19 01:06:00.000', '2016-05-19 01:10:00.000'
UNION ALL SELECT 222222, '2016-05-19 01:12:00.000', '2016-05-19 01:20:00.000'
UNION ALL SELECT 333333, '2016-05-24 14:44:00.000', '2016-05-24 14:47:00.000'
UNION ALL SELECT 333333, '2016-05-24 14:59:00.000', NULL
UNION ALL SELECT 444444, '2016-05-24 14:48:00.000', '2016-05-24 14:49:00.000'
UNION ALL SELECT 444444, '2016-05-24 14:50:00.000', NULL
UNION ALL SELECT 444444, '2016-05-24 14:51:00.000', NULL
UNION ALL SELECT 444444, '2016-05-24 14:53:00.000', '2016-05-24 14:59:00.000'
预期结果
对于大多数情况,数据库中捕获的记录将是情况1,但有时也会捕获情况2和情况3。我需要一个脚本来计算所有情况下的总登录时间。
如何查询?
IF OBJECT_ID('tempdb..#my_table') IS NOT NULL
DROP TABLE #my_table
CREATE TABLE #my_table
(
id BIGINT IDENTITY PRIMARY KEY
,userID INT
,login_time DATETIME
,logout_time DATETIME
);
DECLARE @MT TABLE
(
id BIGINT
,userID INT
,login_time DATETIME
,logout_time DATETIME
);
DECLARE @DRes TABLE (
userID INT,
logtime INT
)
DECLARE @Counter1 INT = 0,
@login_time1 DATETIME,
@logout_time1 DATETIME,
@login_time2 DATETIME
INSERT INTO #my_table VALUES
(222222, '2016-05-19 01:06:00.000', '2016-05-19 01:10:00.000')
,(222222, '2016-05-19 01:12:00.000', '2016-05-19 01:20:00.000')
,(333333, '2016-05-24 14:44:00.000', '2016-05-24 14:47:00.000')
,(333333, '2016-05-24 14:59:00.000', NULL)
,(444444, '2016-05-24 14:48:00.000', '2016-05-24 14:49:00.000')
,(444444, '2016-05-24 14:50:00.000', NULL)
,(444444, '2016-05-24 14:51:00.000', NULL)
,(444444, '2016-05-24 14:53:00.000', '2016-05-24 14:59:00.000')
INSERT INTO @MT
SELECT * FROM #my_table
;WITH MaxLog
AS (
SELECT userID, MAX(login_time) AS max_login
FROM @MT
GROUP BY userID
),
DelRec
AS (
SELECT ml.userID
FROM MaxLog ml
LEFT JOIN @MT mt
ON ml.userID = mt.userID
WHERE mt.logout_time IS NULL
AND ml.max_login = mt.login_time
)
DELETE mt
FROM @MT mt
INNER JOIN
DelRec dr
ON mt.userID = dr.userID
WHERE mt.logout_time IS NOT NULL
;WITH StillIn
AS (
SELECT userID, COUNT(*) AS cnt
FROM @MT
GROUP BY userID
HAVING COUNT(*) = 1
)
UPDATE mt
SET logout_time = DATEADD(mi,-1,login_time)
FROM @MT mt
JOIN StillIn si
ON si.UserID = mt.UserId
WHILE @Counter1 < (SELECT MAX(id) FROM @MT)
BEGIN
SET @Counter1 += 1
SET @login_time1 = (SELECT login_time FROM @MT WHERE id = @Counter1)
SET @logout_time1 = (SELECT logout_time FROM @MT WHERE id = @Counter1)
IF @logout_time1 IS NULL
BEGIN
IF @login_time2 IS NULL
BEGIN
SET @login_time2 = @login_time1
END
END
ELSE
BEGIN
IF @login_time2 IS NULL
BEGIN
INSERT INTO @DRes
SELECT userID, DATEDIFF(mi,@login_time1,@logout_time1)
FROM @MT
WHERE id = @Counter1
END
ELSE
BEGIN
INSERT INTO @DRes
SELECT userID, DATEDIFF(mi,@login_time2,@logout_time1)
FROM @MT
WHERE id = @Counter1
SET @login_time2 = NULL
END
END
END
SELECT userID, SUM(logtime)
FROM @DRes
GROUP BY userID
下面的查询多次使用 ROW_NUMBER
函数来选择所需的行,并在 logout_time
为 NULL 时使用 LEAD
函数来选择 "look ahead"。 LEAD
自 SQL Server 2012 起可用。
运行 逐步查询,逐个 CTE 并检查中间结果以了解其工作原理。
CTE_Groups
是经典的 gaps-and-islands
查询,用于标记 logout_time
.
CTE_RN
以这种方式将数字分配给行,logout_time
中连续的 NULL 得到连续的数字。此结果在 CTE_Fixed
中过滤,以仅获取每组 NULL 的第一行。如果 logout_time
为 NULL,则 LEAD
函数用于从下一行中选取一个值来生成 fixed_logout_time
.
具有 NULL logout_time
的行和具有非 NULL logout_time
的下一行将一起列在 CTE_Fixed
中。我们只需要从这些对中选择一行。相同的方法 - 在 CTE_FixedRN
中使用 ROW_NUMBER
并在 CTE_Sum
.
然后我们可以在分钟内计算出 Duration
并将总和分组 userID
。
如果没有非 NULL logout_time
,DATEDIFF
将 return NULL,它将被一些大的负数替换。在最后的 SELECT
中,负数 Duration
将被替换为 -1
以指示最后一个间隔仍然开放。
WITH
CTE_Groups
AS
(
SELECT
userID
,login_time
,logout_time
,ROW_NUMBER()
OVER(PARTITION BY userID ORDER BY login_time)
- ROW_NUMBER()
OVER(PARTITION BY userID, logout_time ORDER BY login_time) AS GroupNumber
FROM #my_table
)
,CTE_RN
AS
(
SELECT
userID
,login_time
,logout_time
,ROW_NUMBER()
OVER(PARTITION BY userID, GroupNumber ORDER BY login_time) AS rn
FROM CTE_Groups
)
,CTE_Fixed
AS
(
SELECT
userID
,login_time
,ISNULL(logout_time, LEAD(logout_time)
OVER(PARTITION BY userID ORDER BY login_time)) AS fixed_logout_time
FROM CTE_RN
WHERE rn = 1
)
,CTE_FixedRN
AS
(
SELECT
userID
,login_time
,fixed_logout_time
,ROW_NUMBER()
OVER(PARTITION BY userID, fixed_logout_time ORDER BY login_time) AS rn
FROM CTE_Fixed
)
,CTE_Sum
AS
(
SELECT
userID
,SUM(ISNULL(
DATEDIFF(minute, login_time, fixed_logout_time),
-1000000)) AS Duration
FROM CTE_FixedRN
WHERE rn = 1
GROUP BY userID
)
SELECT
userID
,CASE WHEN Duration < 0 THEN -1 ELSE Duration END AS Duration
FROM CTE_Sum
ORDER BY userID;
结果
+--------+----------+
| userID | Duration |
+--------+----------+
| 222222 | 12 |
| 333333 | -1 |
| 444444 | 10 |
+--------+----------+