如何在值更改后创建新的顺序标志(在 SQL 中)
How to create a new sequential flag after a change in value (in SQL)
我正在尝试根据部分的更改创建一个连续的数字标志。
当学生加入一个部分时,该标志应为 1,并继续为 1,直到部分发生变化。
第一次更改时标志应为 2,第二次更改时应为 3,依此类推。
由于某个部分在更改后可能会重复,因此我发现创建理想的结果具有挑战性。
如有任何帮助,我们将不胜感激。
示例数据
create table dbo.cluster_test
(student_id int not null
,record_date date not null
,section varchar(30) null)
insert into cluster_test
(student_id, record_date, section)
values
(123, '2020-02-06', NULL)
,(123, '2020-05-14', 'A')
,(123, '2020-08-12', 'A')
,(123, '2020-09-01', 'B')
,(123, '2020-09-15', 'A')
,(123, '2020-09-29', 'A')
,(123, '2020-11-02', NULL)
,(123, '2020-11-30', NULL)
,(789, '2020-01-12', NULL)
,(789, '2020-04-12', 'A')
,(789, '2020-05-03', NULL)
,(789, '2020-06-13', 'A')
,(789, '2020-06-30', 'B')
,(789, '2020-07-01', 'B')
,(789, '2020-07-22', 'A')
想要的结果
student_id
record_date
section
flag
123
2020-02-06
NULL
NULL
123
2020-05-14
A
1
123
2020-08-12
A
1
123
2020-09-01
B
2
123
2020-09-15
A
3
123
2020-09-29
A
3
123
2020-11-02
NULL
NULL
123
2020-11-30
NULL
NULL
789
2020-01-12
NULL
NULL
789
2020-04-12
A
1
789
2020-05-03
NULL
NULL
789
2020-06-13
A
2
789
2020-06-30
B
3
789
2020-07-01
B
3
789
2020-07-22
A
4
尝试:
select
student_id
,record_date
,section
,case when section is not null then row_number() over(partition by student_id, section order by record_date asc)
end row#
,case when (section is not null) and (lag(section, 1) over(partition by student_id order by record_date asc) is null) then 'start'
when (lag(section, 1) over(partition by student_id order by record_date asc) is not null) and (section != lag(section, 1) over(partition by student_id order by record_date asc)) then 'change'
end chk_txt
,case when section is not null then (case when (section is not null) and (lag(section, 1) over(partition by student_id order by record_date asc) is null) then 1
when (lag(section, 1) over(partition by student_id order by record_date asc) is not null) and (section != lag(section, 1) over(partition by student_id order by record_date asc)) then 1
else 0
end)
end chk_val2
from cluster_test
order by 1, 2
是gap和island问题。您可以按如下方式使用解析函数:
Select student_id, record_date, section,
Case when section is not null
Then sum(case when section is not null and (section <> lgs or lgs is null) then 1 end)
over (partition by student_id order by record_date)
End as flag
From (
Select student_id, record_date, section,
Lag(section) over (partition by student_id order by record_date) as lgs
From cluster_test t
) t
order by student_id, record_date;
您可以选择多个 CTE 并获取如下所示的数据:
with cte_studentSection as (
SELECT student_id, record_Date, section
, lead(section) over(partition by student_id order by record_date) as nextSection
, row_number() over (partition by student_id order by record_date) as rnk
FROM dbo.Cluster_test
where section is not null
), cte_studentSectionFlag as (
SELECT Student_id, record_date, section, rnk, 1 as flag
from cte_studentSection as oc
where record_date = (SELECT MIN(record_Date) from cte_studentSection where student_id = oc.student_id)
union all
SELECT oc.Student_id, oc.record_date, oc.section,oc.rnk, case when oc.section = cte.section then cte.flag else cte.flag + 1 end
from cte_studentSection as oc
inner join cte_studentSectionFlag as cte on cte.rnk + 1 = oc.rnk and oc.student_id = cte.student_id
)
select student_id, record_date, section, flag
from cte_studentsectionflag
union all
select student_id, record_date, section, null as flag
from dbo.Cluster_test
where section is null
order by student_id, record_date;
student_id
record_date
section
flag
123
2020-02-06
NULL
NULL
123
2020-05-14
A
1
123
2020-08-12
A
1
123
2020-09-01
B
2
123
2020-09-15
A
3
123
2020-09-29
A
3
123
2020-11-02
NULL
NULL
123
2020-11-30
NULL
NULL
789
2020-01-12
NULL
NULL
789
2020-04-12
A
1
789
2020-05-03
NULL
NULL
789
2020-06-13
A
1
789
2020-06-30
B
2
789
2020-07-01
B
2
789
2020-07-22
A
3
我正在尝试根据部分的更改创建一个连续的数字标志。
当学生加入一个部分时,该标志应为 1,并继续为 1,直到部分发生变化。 第一次更改时标志应为 2,第二次更改时应为 3,依此类推。
由于某个部分在更改后可能会重复,因此我发现创建理想的结果具有挑战性。
如有任何帮助,我们将不胜感激。
示例数据
create table dbo.cluster_test
(student_id int not null
,record_date date not null
,section varchar(30) null)
insert into cluster_test
(student_id, record_date, section)
values
(123, '2020-02-06', NULL)
,(123, '2020-05-14', 'A')
,(123, '2020-08-12', 'A')
,(123, '2020-09-01', 'B')
,(123, '2020-09-15', 'A')
,(123, '2020-09-29', 'A')
,(123, '2020-11-02', NULL)
,(123, '2020-11-30', NULL)
,(789, '2020-01-12', NULL)
,(789, '2020-04-12', 'A')
,(789, '2020-05-03', NULL)
,(789, '2020-06-13', 'A')
,(789, '2020-06-30', 'B')
,(789, '2020-07-01', 'B')
,(789, '2020-07-22', 'A')
想要的结果
student_id | record_date | section | flag |
---|---|---|---|
123 | 2020-02-06 | NULL | NULL |
123 | 2020-05-14 | A | 1 |
123 | 2020-08-12 | A | 1 |
123 | 2020-09-01 | B | 2 |
123 | 2020-09-15 | A | 3 |
123 | 2020-09-29 | A | 3 |
123 | 2020-11-02 | NULL | NULL |
123 | 2020-11-30 | NULL | NULL |
789 | 2020-01-12 | NULL | NULL |
789 | 2020-04-12 | A | 1 |
789 | 2020-05-03 | NULL | NULL |
789 | 2020-06-13 | A | 2 |
789 | 2020-06-30 | B | 3 |
789 | 2020-07-01 | B | 3 |
789 | 2020-07-22 | A | 4 |
尝试:
select
student_id
,record_date
,section
,case when section is not null then row_number() over(partition by student_id, section order by record_date asc)
end row#
,case when (section is not null) and (lag(section, 1) over(partition by student_id order by record_date asc) is null) then 'start'
when (lag(section, 1) over(partition by student_id order by record_date asc) is not null) and (section != lag(section, 1) over(partition by student_id order by record_date asc)) then 'change'
end chk_txt
,case when section is not null then (case when (section is not null) and (lag(section, 1) over(partition by student_id order by record_date asc) is null) then 1
when (lag(section, 1) over(partition by student_id order by record_date asc) is not null) and (section != lag(section, 1) over(partition by student_id order by record_date asc)) then 1
else 0
end)
end chk_val2
from cluster_test
order by 1, 2
是gap和island问题。您可以按如下方式使用解析函数:
Select student_id, record_date, section,
Case when section is not null
Then sum(case when section is not null and (section <> lgs or lgs is null) then 1 end)
over (partition by student_id order by record_date)
End as flag
From (
Select student_id, record_date, section,
Lag(section) over (partition by student_id order by record_date) as lgs
From cluster_test t
) t
order by student_id, record_date;
您可以选择多个 CTE 并获取如下所示的数据:
with cte_studentSection as (
SELECT student_id, record_Date, section
, lead(section) over(partition by student_id order by record_date) as nextSection
, row_number() over (partition by student_id order by record_date) as rnk
FROM dbo.Cluster_test
where section is not null
), cte_studentSectionFlag as (
SELECT Student_id, record_date, section, rnk, 1 as flag
from cte_studentSection as oc
where record_date = (SELECT MIN(record_Date) from cte_studentSection where student_id = oc.student_id)
union all
SELECT oc.Student_id, oc.record_date, oc.section,oc.rnk, case when oc.section = cte.section then cte.flag else cte.flag + 1 end
from cte_studentSection as oc
inner join cte_studentSectionFlag as cte on cte.rnk + 1 = oc.rnk and oc.student_id = cte.student_id
)
select student_id, record_date, section, flag
from cte_studentsectionflag
union all
select student_id, record_date, section, null as flag
from dbo.Cluster_test
where section is null
order by student_id, record_date;
student_id | record_date | section | flag |
---|---|---|---|
123 | 2020-02-06 | NULL | NULL |
123 | 2020-05-14 | A | 1 |
123 | 2020-08-12 | A | 1 |
123 | 2020-09-01 | B | 2 |
123 | 2020-09-15 | A | 3 |
123 | 2020-09-29 | A | 3 |
123 | 2020-11-02 | NULL | NULL |
123 | 2020-11-30 | NULL | NULL |
789 | 2020-01-12 | NULL | NULL |
789 | 2020-04-12 | A | 1 |
789 | 2020-05-03 | NULL | NULL |
789 | 2020-06-13 | A | 1 |
789 | 2020-06-30 | B | 2 |
789 | 2020-07-01 | B | 2 |
789 | 2020-07-22 | A | 3 |