根据累计值排名
Rank based on cumulative value
我想根据 UID 的升序对 ID 和值列进行排名。一旦值列的值与先前的值不同,预期输出就必须更改。排名必须在每个新 ID 上重新开始
UID ID Value Expected Output
1 1 0 1
2 1 0 1
3 1 1 2
4 1 1 2
5 1 1 2
6 1 0 3
7 1 1 4
8 1 0 5
9 1 0 5
10 1 0 5
11 2 1 1
12 2 1 1
13 2 0 2
14 2 0 2
15 2 1 3
这是我创建的示例数据集:
CREATE TABLE [dbo].[Data] (
[UID] [int] NOT NULL,
[ID] [int] NULL,
[Value] [int] NULL
);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (1, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (2, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (3, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (4, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (5, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (6, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (7, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (8, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (9, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (10, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (11, 2, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (12, 2, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (13, 2, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (14, 2, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (15, 2, 1);
我认为解决此 gaps-and-islands 问题的最简单方法是使用 lag()
检索“先前”值,然后每次值更改时递增的 window 总和.
select uid, id, value,
1 + sum(case when value <> lag_value then 1 else 0 end)
over(partition by id order by uid) grp
from (
select d.*, lag(value, 1, value) over(partition by id order by uid) lag_value
from data d
) d
order by uid
uid | id | value | grp
--: | -: | ----: | --:
1 | 1 | 0 | 1
2 | 1 | 0 | 1
3 | 1 | 1 | 2
4 | 1 | 1 | 2
5 | 1 | 1 | 2
6 | 1 | 0 | 3
7 | 1 | 1 | 4
8 | 1 | 0 | 5
9 | 1 | 0 | 5
10 | 1 | 0 | 5
11 | 2 | 1 | 1
12 | 2 | 1 | 1
13 | 2 | 0 | 2
14 | 2 | 0 | 2
15 | 2 | 1 | 3
这是一个缺口和孤岛问题。我认为最简单的方法是使用行号差异方法:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY UID) rn1,
ROW_NUMBER() OVER (PARTITION BY ID, [Value] ORDER BY UID) rn2
FROM Data
)
SELECT *, DENSE_RANK() OVER (PARTITION BY ID ORDER BY rn1 - rn2, [Value]) AS output
FROM cte
ORDER BY UID;
我想根据 UID 的升序对 ID 和值列进行排名。一旦值列的值与先前的值不同,预期输出就必须更改。排名必须在每个新 ID 上重新开始
UID ID Value Expected Output
1 1 0 1
2 1 0 1
3 1 1 2
4 1 1 2
5 1 1 2
6 1 0 3
7 1 1 4
8 1 0 5
9 1 0 5
10 1 0 5
11 2 1 1
12 2 1 1
13 2 0 2
14 2 0 2
15 2 1 3
这是我创建的示例数据集:
CREATE TABLE [dbo].[Data] (
[UID] [int] NOT NULL,
[ID] [int] NULL,
[Value] [int] NULL
);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (1, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (2, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (3, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (4, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (5, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (6, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (7, 1, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (8, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (9, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (10, 1, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (11, 2, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (12, 2, 1);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (13, 2, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (14, 2, 0);
INSERT [dbo].[Data] ([UID], [ID], [Value]) VALUES (15, 2, 1);
我认为解决此 gaps-and-islands 问题的最简单方法是使用 lag()
检索“先前”值,然后每次值更改时递增的 window 总和.
select uid, id, value,
1 + sum(case when value <> lag_value then 1 else 0 end)
over(partition by id order by uid) grp
from (
select d.*, lag(value, 1, value) over(partition by id order by uid) lag_value
from data d
) d
order by uid
uid | id | value | grp --: | -: | ----: | --: 1 | 1 | 0 | 1 2 | 1 | 0 | 1 3 | 1 | 1 | 2 4 | 1 | 1 | 2 5 | 1 | 1 | 2 6 | 1 | 0 | 3 7 | 1 | 1 | 4 8 | 1 | 0 | 5 9 | 1 | 0 | 5 10 | 1 | 0 | 5 11 | 2 | 1 | 1 12 | 2 | 1 | 1 13 | 2 | 0 | 2 14 | 2 | 0 | 2 15 | 2 | 1 | 3
这是一个缺口和孤岛问题。我认为最简单的方法是使用行号差异方法:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY UID) rn1,
ROW_NUMBER() OVER (PARTITION BY ID, [Value] ORDER BY UID) rn2
FROM Data
)
SELECT *, DENSE_RANK() OVER (PARTITION BY ID ORDER BY rn1 - rn2, [Value]) AS output
FROM cte
ORDER BY UID;