标记记录级别的差异
Mark differences on record level
真正的智力题就在这里。
我想标记 de X1 之间的转换。所以从A->B,B->A和B->C,C->B的过渡。
这是为了将我的数据集减少到只有那些有转换的记录。
最终目标是计算 X1 A 最后一次发生和 X1 B 第一次发生之间的天数。
例如:
ID1 -> 转换 A 到 B,日期 2018-01-20 和 2018-02-01。那么答案必须是 x 之间的天数。
或
示例 2:ID1 -> 转换 A 到 B,日期 2018-02-16 和 2018-03-01。那么答案必须是 x 之间的天数。
ID X1 Date
1 A 2018-01-01
1 A 2018-01-20
1 B 2018-02-01
1 A 2018-02-15
1 A 2018-02-16
1 B 2018-03-01
2 B 2018-01-01
2 C 2018-03-05
2 C 2018-03-06
2 C 2018-03-08
2 B 2018-03-20
我已经有了这个循环,但是这个循环太长了(每条记录 0.3 秒):
DECLARE @Loop INT
DECLARE @MAXROWS INT
SET @Loop = 1
SET @MAXROWS = (SELECT COUNT (*) FROM Y_Table)
WHILE (@Loop <= @MAXROWS)
BEGIN
UPDATE Y_Table
SET Y_Table.Test_MIN = (CASE WHEN
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop) =
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop) <
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop) <>
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1)
THEN '1' ELSE '0' END)
FROM Y_Table
WHERE
LIFE_CYCLE = @Loop
UPDATE Y_Table
SET Y_Table.Test_MAX = (CASE WHEN
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop) =
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop) <
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop) <>
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1)
THEN '1' ELSE '0' END)
FROM Y_Table
WHERE
LIFE_CYCLE = @Loop + 1
PRINT @Loop
SET @Loop = @Loop + 1
END
如何解决?
您可以使用 LAG
获取之前的 X1
和 Date
值。如果当前 X1
值与之前的值不同,那么您有一个转换并且可以以天为单位计算差异。
DECLARE @DataSource TABLE
(
[ID] SMALLINT
,[X1] CHAR(1)
,[Date] DATE
);
INSERT INTO @DataSource ([ID], [X1], [Date])
VALUES ('1', 'A', '2018-01-01')
,('1', 'A', '2018-01-20')
,('1', 'B', '2018-02-01')
,('1', 'A', '2018-02-15')
,('1', 'A', '2018-02-16')
,('1', 'B', '2018-03-01')
,('2', 'B', '2018-01-01')
,('2', 'C', '2018-03-05')
,('2', 'C', '2018-03-06')
,('2', 'C', '2018-03-08')
,('2', 'B', '2018-03-20');
SELECT *
,IIF
(
[X1] <> LAG([X1], 1, NULL) OVER(PARTITION BY [ID] ORDER BY [Date] ASC)
,DATEDIFF(DAY, LAG([Date], 1, NULL) OVER(PARTITION BY [ID] ORDER BY [Date] ASC), [Date])
,NULL
) AS [TransitionInDays]
FROM @DataSource;
请注意,您需要 SQL Server 2012+ 才能使用 LAG
。如果不是这种情况,请告诉我。
- 使用 LEAD 获取下一行
- 过滤到不同的地方(=转换)
像这样
DECLARE @data table (ID int, X1 char(1), myDate date)
INSERT @data VALUES (1, 'A', '2018-01-01'),
(1, 'A', '2018-01-20'),
(1, 'B', '2018-02-01'),
(1, 'A', '2018-02-15'),
(1, 'A', '2018-02-16'),
(1, 'B', '2018-03-01'),
(2, 'B', '2018-01-01'),
(2, 'C', '2018-03-05'),
(2, 'C', '2018-03-06'),
(2, 'C', '2018-03-08'),
(2, 'B', '2018-03-20');
WITH GetNextRows AS (
SELECT
*,
NextX1 = LEAD (X1) OVER (PARTITION BY ID ORDER BY myDate),
NextDate = LEAD (myDate) OVER (PARTITION BY ID ORDER BY myDate)
FROM
@data
)
SELECT
*,
Transation = CONCAT(X1, ' -> ', NextX1),
DaysGap = DATEDIFF(DAY, myDate, NextDate)
FROM
GetNextRows
WHERE
NextX1 <> X1;
ID X1 myDate NextX1 NextDate Transation DaysGap
1 A 2018-01-20 B 2018-02-01 A -> B 12
1 B 2018-02-01 A 2018-02-15 B -> A 14
1 A 2018-02-16 B 2018-03-01 A -> B 13
2 B 2018-01-01 C 2018-03-05 B -> C 63
2 C 2018-03-08 B 2018-03-20 C -> B 12
真正的智力题就在这里。
我想标记 de X1 之间的转换。所以从A->B,B->A和B->C,C->B的过渡。
这是为了将我的数据集减少到只有那些有转换的记录。
最终目标是计算 X1 A 最后一次发生和 X1 B 第一次发生之间的天数。
例如:
ID1 -> 转换 A 到 B,日期 2018-01-20 和 2018-02-01。那么答案必须是 x 之间的天数。
或
示例 2:ID1 -> 转换 A 到 B,日期 2018-02-16 和 2018-03-01。那么答案必须是 x 之间的天数。
ID X1 Date
1 A 2018-01-01
1 A 2018-01-20
1 B 2018-02-01
1 A 2018-02-15
1 A 2018-02-16
1 B 2018-03-01
2 B 2018-01-01
2 C 2018-03-05
2 C 2018-03-06
2 C 2018-03-08
2 B 2018-03-20
我已经有了这个循环,但是这个循环太长了(每条记录 0.3 秒):
DECLARE @Loop INT
DECLARE @MAXROWS INT
SET @Loop = 1
SET @MAXROWS = (SELECT COUNT (*) FROM Y_Table)
WHILE (@Loop <= @MAXROWS)
BEGIN
UPDATE Y_Table
SET Y_Table.Test_MIN = (CASE WHEN
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop) =
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop) <
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop) <>
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1)
THEN '1' ELSE '0' END)
FROM Y_Table
WHERE
LIFE_CYCLE = @Loop
UPDATE Y_Table
SET Y_Table.Test_MAX = (CASE WHEN
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop) =
(SELECT Y_Table.ID FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop) <
(SELECT Y_Table.Date FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1) AND
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop) <>
(SELECT Y_Table.X1 FROM Y_Table WHERE LIFE_CYCLE = @Loop + 1)
THEN '1' ELSE '0' END)
FROM Y_Table
WHERE
LIFE_CYCLE = @Loop + 1
PRINT @Loop
SET @Loop = @Loop + 1
END
如何解决?
您可以使用 LAG
获取之前的 X1
和 Date
值。如果当前 X1
值与之前的值不同,那么您有一个转换并且可以以天为单位计算差异。
DECLARE @DataSource TABLE
(
[ID] SMALLINT
,[X1] CHAR(1)
,[Date] DATE
);
INSERT INTO @DataSource ([ID], [X1], [Date])
VALUES ('1', 'A', '2018-01-01')
,('1', 'A', '2018-01-20')
,('1', 'B', '2018-02-01')
,('1', 'A', '2018-02-15')
,('1', 'A', '2018-02-16')
,('1', 'B', '2018-03-01')
,('2', 'B', '2018-01-01')
,('2', 'C', '2018-03-05')
,('2', 'C', '2018-03-06')
,('2', 'C', '2018-03-08')
,('2', 'B', '2018-03-20');
SELECT *
,IIF
(
[X1] <> LAG([X1], 1, NULL) OVER(PARTITION BY [ID] ORDER BY [Date] ASC)
,DATEDIFF(DAY, LAG([Date], 1, NULL) OVER(PARTITION BY [ID] ORDER BY [Date] ASC), [Date])
,NULL
) AS [TransitionInDays]
FROM @DataSource;
请注意,您需要 SQL Server 2012+ 才能使用 LAG
。如果不是这种情况,请告诉我。
- 使用 LEAD 获取下一行
- 过滤到不同的地方(=转换)
像这样
DECLARE @data table (ID int, X1 char(1), myDate date)
INSERT @data VALUES (1, 'A', '2018-01-01'),
(1, 'A', '2018-01-20'),
(1, 'B', '2018-02-01'),
(1, 'A', '2018-02-15'),
(1, 'A', '2018-02-16'),
(1, 'B', '2018-03-01'),
(2, 'B', '2018-01-01'),
(2, 'C', '2018-03-05'),
(2, 'C', '2018-03-06'),
(2, 'C', '2018-03-08'),
(2, 'B', '2018-03-20');
WITH GetNextRows AS (
SELECT
*,
NextX1 = LEAD (X1) OVER (PARTITION BY ID ORDER BY myDate),
NextDate = LEAD (myDate) OVER (PARTITION BY ID ORDER BY myDate)
FROM
@data
)
SELECT
*,
Transation = CONCAT(X1, ' -> ', NextX1),
DaysGap = DATEDIFF(DAY, myDate, NextDate)
FROM
GetNextRows
WHERE
NextX1 <> X1;
ID X1 myDate NextX1 NextDate Transation DaysGap
1 A 2018-01-20 B 2018-02-01 A -> B 12
1 B 2018-02-01 A 2018-02-15 B -> A 14
1 A 2018-02-16 B 2018-03-01 A -> B 13
2 B 2018-01-01 C 2018-03-05 B -> C 63
2 C 2018-03-08 B 2018-03-20 C -> B 12