为特定子群体创建连续注册孤岛
Create Continuous Enrollment islands for a certain sub-population
我的最终目标是为单个子群体的每个 CLIENTID
创建连续注册天数的孤岛:'Adult Expansion' 日历年 2019 和 2020。一个 CLIENTID
可以是在一个日历年内与多个子人群相关联,但永远不能同时与多个子人群相关联(入学时没有重叠)。我的数据可以追溯到 2016 年,但我只对 2019 年和 2020 年感兴趣。数据的结构是每一行都是一个注册期,有注册的开始和结束日期,与一个子人群相关联。
我在下面包含了一些虚拟数据和所需的输出以更好地说明我的目标:
CREATE TABLE #t (
CLIENTID NVARCHAR(9),
DEMONSTRATION_POPULATION NVARCHAR(30),
ELIGBEGIN DATE,
ELIGEND DATE,
AGE INT
)
INSERT INTO #t
VALUES
('123456789', 'Adult Expansion', '2019-12-16', '2019-12-31', 52)
, ('123456789', 'Adult Expansion', '2020-01-01', '2020-01-15', 52)
, ('123456789', 'Adult Expansion', '2020-03-01', '2020-03-31', 52)
, ('123456789', 'Adult Expansion', '2020-04-01', '2020-04-30', 52)
, ('123456789', 'Adult Expansion', '2020-05-01', '2020-05-31', 52)
, ('123456789', 'Adult Expansion', '2020-06-01', '2020-06-30', 52)
, ('123456789', 'Adult Expansion', '2020-07-01', '2020-07-31', 52)
, ('123456789', 'Adult Expansion', '2020-08-01', '2020-08-31', 52)
, ('123456789', 'Adult Expansion', '2020-09-01', '2020-09-30', 52)
, ('123456789', 'Adult Expansion', '2020-10-01', '2020-10-31', 52)
, ('123456789', 'Adult Expansion', '2020-11-01', '2020-11-30', 52)
, ('123456789', 'Adult Expansion', '2020-12-01', '2020-12-31', 52)
------------------------NEW CLIENTID-----------------------------
,('012345678', 'Demonstration Population #3', '2019-10-01', '2019-10-31', 52)
,('012345678', 'Demonstration Population #3', '2019-11-01', '2019-11-30', 52)
,('012345678', 'Demonstration Population #3', '2019-12-01', '2019-12-31', 52)
,('012345678', 'Demonstration Population #3', '2020-01-01', '2020-01-31', 52)
,('012345678', 'Adult Expansion', '2020-02-01', '2020-02-28', 52)
,('012345678', 'Demonstration Population #3', '2020-02-29', '2020-02-29', 52)
,('012345678', 'Adult Expansion', '2020-03-01', '2020-03-31', 52)
,('012345678', 'Adult Expansion', '2020-04-01', '2020-04-30', 52)
,('012345678', 'Adult Expansion', '2020-05-01', '2020-05-31', 52)
,('012345678', 'Adult Expansion', '2020-06-01', '2020-06-30', 52)
,('012345678', 'Adult Expansion', '2020-07-01', '2020-07-31', 52)
,('012345678', 'Adult Expansion', '2020-08-01', '2020-08-31', 52)
,('012345678', 'Adult Expansion', '2020-09-01', '2020-09-30', 52)
,('012345678', 'Adult Expansion', '2020-10-01', '2020-10-31', 52)
,('012345678', 'Adult Expansion', '2020-11-01', '2020-11-30', 52)
,('012345678', 'Adult Expansion', '2020-12-01', '2020-12-31', 52)
---------------------------NEW CLIENTID---------------------------
,('020234587', 'Adult Expansion', '2019-06-01', '2019-06-30', 36)
,('020234587', 'Adult Expansion', '2019-08-01', '2019-08-31', 36)
,('020234587', 'Adult Expansion', '2019-09-01', '2019-09-30', 36)
,('020234587', 'Adult Expansion', '2019-10-01', '2019-10-31', 36)
,('020234587', 'Adult Expansion', '2019-11-01', '2019-11-30', 36)
,('020234587', 'Non-1115-Waiver', '2019-12-01', '2019-12-31', 36)
,('020234587', 'Non-1115-Waiver', '2020-01-01', '2020-01-31', 36)
,('020234587', 'Non-1115-Waiver', '2020-02-01', '2020-02-29', 36)
期望输出:
CLIENTID
AGE
ELIGBEGIN
ELIGEND
Sequence_ID
123456789
52
2019-12-19
2019-12-31
1
123456789
52
2020-01-01
2020-01-15
2
123456789
52
2020-03-01
2020-12-31
3
012345678
52
2020-02-01
2020-28-20
1
012345678
52
2020-03-01
2020-03-31
2
020234587
36
2019-06-01
2019-06-30
1
020234587
36
2019-08-01
2019-11-30
2
然后这是我用来尝试解决这个问题的当前代码。
SELECT * INTO #y1 FROM #t
WHERE YEAR(ELIGBEGIN) = '2019'
SELECT s1.CLIENTID
, s1.AGE
, CAST(s1.ELIGBEGIN AS DATETIME) AS ELIGBEGIN
, MIN(CAST(t1.ELIGEND AS DATETIME)) AS ELIGEND
, ROW_NUMBER() OVER(PARTITION BY s1.CLIENTID ORDER BY CAST(s1.ELIGBEGIN AS DATETIME)) AS Sequence_ID
INTO #CY19
FROM --[dbo].[Eligs]
--#t s1
#y1 s1
INNER JOIN #y1--#t
t1 ON t1.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) <= CAST(t1.ELIGEND AS DATETIME)
AND t1.DEMONSTRATION_POPULATION = 'Adult Expansion'
AND NOT EXISTS
(
SELECT * FROM #y1 t2--#t t2
WHERE t2.CLIENTID = t1.CLIENTID
AND (CAST(t1.ELIGEND AS DATETIME) + 1) >= CAST(t2.ELIGBEGIN AS DATETIME)
AND CAST(t1.ELIGEND AS DATETIME) < CAST(t2.ELIGEND AS DATETIME)
)
WHERE --s1.DEMONSTRATION_POPULATION = 'Adult Expansion' AND
NOT EXISTS
(
SELECT * FROM #y1 s2--#t s2
WHERE s2.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) > CAST(s2.ELIGBEGIN AS DATETIME)
AND (CAST(s1.ELIGBEGIN AS DATETIME) - 1) <= CAST(s2.ELIGEND AS DATETIME)
)
--AND s1.DEMONSTRATION_POPLUATION = 'Adult Expansion'
--AND s1.UMIC = '1'
GROUP BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME), s1.AGE, s1.ELIGBEGIN, s1.DEMONSTRATION_POPULATION
ORDER BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME)
--do the same for the 2020 calendar year:
SELECT * INTO #y2
FROM #t
WHERE YEAR(ELIGBEGIN) = '2020'
SELECT s1.CLIENTID
, s1.AGE
, CAST(s1.ELIGBEGIN AS DATETIME) AS ELIGBEGIN
, MIN(CAST(t1.ELIGEND AS DATETIME)) AS ELIGEND
, ROW_NUMBER() OVER(PARTITION BY s1.CLIENTID ORDER BY CAST(s1.ELIGBEGIN AS DATETIME)) AS Sequence_ID
INTO #CY20
FROM --[dbo].[Eligs]
--#t s1
#y2 s1
INNER JOIN #y2--#t
t1 ON t1.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) <= CAST(t1.ELIGEND AS DATETIME)
AND s1.DEMONSTRATION_POPULATION = 'Adult Expansion'
AND NOT EXISTS
(
SELECT * FROM #y2 t2--#t t2
WHERE t2.CLIENTID = t1.CLIENTID
AND (CAST(t1.ELIGEND AS DATETIME) + 1) >= CAST(t2.ELIGBEGIN AS DATETIME)
AND CAST(t1.ELIGEND AS DATETIME) < CAST(t2.ELIGEND AS DATETIME)
)
WHERE --s1.DEMONSTRATION_POPLUATION = 'Adult Expansion'
--AND
NOT EXISTS
(
SELECT * FROM #y2 s2--#t s2
WHERE s2.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) > CAST(s2.ELIGBEGIN AS DATETIME)
AND (CAST(s1.ELIGBEGIN AS DATETIME) - 1) <= CAST(s2.ELIGEND AS DATETIME)
)
--AND s1.DEMONSTRATION_POPLUATION = 'Adult Expansion'
--AND s1.UMIC = '1'
GROUP BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME), s1.AGE
--ORDER BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME)
;
SELECT CLIENTID
, AGE
, ELIGBEGIN
, ELIGEND
, Sequence_ID = ROW_NUMBER() OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)
INTO #testcase
FROM (
SELECT * FROM #CY19
UNION
SELECT * FROM #CY20) a
SELECT * FROM #testcase
ORDER BY CLIENTID
然而,最终SELECT * FROM #testcase
的实际输出如下:
实际结果:
CLIENTID
AGE
ELIGBEGIN
ELIGEND
Sequence_ID
123456789
52
2019-12-19
2019-12-31
1
123456789
52
2020-01-01
2020-01-15
2
123456789
52
2020-03-01
2020-12-31
3
020234587
36
2019-06-01
2019-06-30
1
020234587
36
2019-08-01
2019-11-30
2
如您所见,我面临三个主要问题,即实际输出与所需输出不匹配。
- 我必须 运行 在不同的年份两次进行相同的查询,因为我不知道如何对从 2019-12-31 到 2020-01 连续注册的会员进行细分注册- 31岁及以上。如果我 运行 代码 table
#t
而不是 table #y1
,第一个 CLIENTID 的输出将是 12/16/19 到 1/15/20,我不想要。
- 我完全失去了一个 CLIENTID,因为他们在 'Adult Expansion' 子群体中,然后在 'Demonstration Population #3' 中,然后又回到 'Adult Expansion'
- 出于某种原因,代码将正确忽略前面的行,其中客户的
DEMONSTRATION_POPULATION != 'Adult Expansion'
但如果 CLIENTID 从 'Adult Expansion' 更改为不同的子群体,则实际输出仍包括注册日期来自这个不同的子群体,这直接反对我想要的输出。当一个人在 'Adult Expansion'. 中注册时,我只想要注册部分
如有哪位大侠能协助解决以上三个问题,将不胜感激!
提前感谢任何提示!
这是一个使用间隙和孤岛方法的解决方案:
;WITH prevNextCTE
AS
(
SELECT CLIENTID,
DEMONSTRATION_POPULATION,
ELIGBEGIN,
ELIGEND,
AGE,
-- compare the year of the previous record to the current to create split at year end
CASE WHEN YEAR(LAG(ELIGEND,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)) = YEAR(ELIGBEGIN)
THEN LAG(ELIGEND,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)
END AS prevELIGEND,
-- compare the year of the next record to the current to create split at year end
CASE WHEN YEAR(LEAD(ELIGBEGIN,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)) = YEAR(ELIGEND)
THEN LEAD(ELIGBEGIN,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)
END AS nextELIGBEGIN
FROM #t
WHERE DEMONSTRATION_POPULATION = 'Adult Expansion'
)
,islandStartCTE
AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN) AS sequence_no
FROM prevNextCTE
WHERE prevELIGEND IS NULL
OR DATEADD(DAY,1,prevELIGEND) < ELIGBEGIN
)
,islandEndCTE
AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN) AS sequence_no
FROM prevNextCTE
WHERE nextELIGBEGIN IS NULL
OR DATEADD(DAY,1,ELIGEND) < nextELIGBEGIN
)
SELECT iss.CLIENTID,
iss.AGE,
iss.ELIGBEGIN,
ise.ELIGEND,
iss.sequence_no
FROM islandStartCTE AS iss
-- left join here is not necessary but makes it easier to spot errors
-- when a start position has no matching end (which should not occur)
LEFT
JOIN islandEndCTE AS ise
ON ise.CLIENTID = iss.CLIENTID
AND ise.sequence_no = iss.sequence_no
我的最终目标是为单个子群体的每个 CLIENTID
创建连续注册天数的孤岛:'Adult Expansion' 日历年 2019 和 2020。一个 CLIENTID
可以是在一个日历年内与多个子人群相关联,但永远不能同时与多个子人群相关联(入学时没有重叠)。我的数据可以追溯到 2016 年,但我只对 2019 年和 2020 年感兴趣。数据的结构是每一行都是一个注册期,有注册的开始和结束日期,与一个子人群相关联。
我在下面包含了一些虚拟数据和所需的输出以更好地说明我的目标:
CREATE TABLE #t (
CLIENTID NVARCHAR(9),
DEMONSTRATION_POPULATION NVARCHAR(30),
ELIGBEGIN DATE,
ELIGEND DATE,
AGE INT
)
INSERT INTO #t
VALUES
('123456789', 'Adult Expansion', '2019-12-16', '2019-12-31', 52)
, ('123456789', 'Adult Expansion', '2020-01-01', '2020-01-15', 52)
, ('123456789', 'Adult Expansion', '2020-03-01', '2020-03-31', 52)
, ('123456789', 'Adult Expansion', '2020-04-01', '2020-04-30', 52)
, ('123456789', 'Adult Expansion', '2020-05-01', '2020-05-31', 52)
, ('123456789', 'Adult Expansion', '2020-06-01', '2020-06-30', 52)
, ('123456789', 'Adult Expansion', '2020-07-01', '2020-07-31', 52)
, ('123456789', 'Adult Expansion', '2020-08-01', '2020-08-31', 52)
, ('123456789', 'Adult Expansion', '2020-09-01', '2020-09-30', 52)
, ('123456789', 'Adult Expansion', '2020-10-01', '2020-10-31', 52)
, ('123456789', 'Adult Expansion', '2020-11-01', '2020-11-30', 52)
, ('123456789', 'Adult Expansion', '2020-12-01', '2020-12-31', 52)
------------------------NEW CLIENTID-----------------------------
,('012345678', 'Demonstration Population #3', '2019-10-01', '2019-10-31', 52)
,('012345678', 'Demonstration Population #3', '2019-11-01', '2019-11-30', 52)
,('012345678', 'Demonstration Population #3', '2019-12-01', '2019-12-31', 52)
,('012345678', 'Demonstration Population #3', '2020-01-01', '2020-01-31', 52)
,('012345678', 'Adult Expansion', '2020-02-01', '2020-02-28', 52)
,('012345678', 'Demonstration Population #3', '2020-02-29', '2020-02-29', 52)
,('012345678', 'Adult Expansion', '2020-03-01', '2020-03-31', 52)
,('012345678', 'Adult Expansion', '2020-04-01', '2020-04-30', 52)
,('012345678', 'Adult Expansion', '2020-05-01', '2020-05-31', 52)
,('012345678', 'Adult Expansion', '2020-06-01', '2020-06-30', 52)
,('012345678', 'Adult Expansion', '2020-07-01', '2020-07-31', 52)
,('012345678', 'Adult Expansion', '2020-08-01', '2020-08-31', 52)
,('012345678', 'Adult Expansion', '2020-09-01', '2020-09-30', 52)
,('012345678', 'Adult Expansion', '2020-10-01', '2020-10-31', 52)
,('012345678', 'Adult Expansion', '2020-11-01', '2020-11-30', 52)
,('012345678', 'Adult Expansion', '2020-12-01', '2020-12-31', 52)
---------------------------NEW CLIENTID---------------------------
,('020234587', 'Adult Expansion', '2019-06-01', '2019-06-30', 36)
,('020234587', 'Adult Expansion', '2019-08-01', '2019-08-31', 36)
,('020234587', 'Adult Expansion', '2019-09-01', '2019-09-30', 36)
,('020234587', 'Adult Expansion', '2019-10-01', '2019-10-31', 36)
,('020234587', 'Adult Expansion', '2019-11-01', '2019-11-30', 36)
,('020234587', 'Non-1115-Waiver', '2019-12-01', '2019-12-31', 36)
,('020234587', 'Non-1115-Waiver', '2020-01-01', '2020-01-31', 36)
,('020234587', 'Non-1115-Waiver', '2020-02-01', '2020-02-29', 36)
期望输出:
CLIENTID | AGE | ELIGBEGIN | ELIGEND | Sequence_ID |
---|---|---|---|---|
123456789 | 52 | 2019-12-19 | 2019-12-31 | 1 |
123456789 | 52 | 2020-01-01 | 2020-01-15 | 2 |
123456789 | 52 | 2020-03-01 | 2020-12-31 | 3 |
012345678 | 52 | 2020-02-01 | 2020-28-20 | 1 |
012345678 | 52 | 2020-03-01 | 2020-03-31 | 2 |
020234587 | 36 | 2019-06-01 | 2019-06-30 | 1 |
020234587 | 36 | 2019-08-01 | 2019-11-30 | 2 |
然后这是我用来尝试解决这个问题的当前代码。
SELECT * INTO #y1 FROM #t
WHERE YEAR(ELIGBEGIN) = '2019'
SELECT s1.CLIENTID
, s1.AGE
, CAST(s1.ELIGBEGIN AS DATETIME) AS ELIGBEGIN
, MIN(CAST(t1.ELIGEND AS DATETIME)) AS ELIGEND
, ROW_NUMBER() OVER(PARTITION BY s1.CLIENTID ORDER BY CAST(s1.ELIGBEGIN AS DATETIME)) AS Sequence_ID
INTO #CY19
FROM --[dbo].[Eligs]
--#t s1
#y1 s1
INNER JOIN #y1--#t
t1 ON t1.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) <= CAST(t1.ELIGEND AS DATETIME)
AND t1.DEMONSTRATION_POPULATION = 'Adult Expansion'
AND NOT EXISTS
(
SELECT * FROM #y1 t2--#t t2
WHERE t2.CLIENTID = t1.CLIENTID
AND (CAST(t1.ELIGEND AS DATETIME) + 1) >= CAST(t2.ELIGBEGIN AS DATETIME)
AND CAST(t1.ELIGEND AS DATETIME) < CAST(t2.ELIGEND AS DATETIME)
)
WHERE --s1.DEMONSTRATION_POPULATION = 'Adult Expansion' AND
NOT EXISTS
(
SELECT * FROM #y1 s2--#t s2
WHERE s2.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) > CAST(s2.ELIGBEGIN AS DATETIME)
AND (CAST(s1.ELIGBEGIN AS DATETIME) - 1) <= CAST(s2.ELIGEND AS DATETIME)
)
--AND s1.DEMONSTRATION_POPLUATION = 'Adult Expansion'
--AND s1.UMIC = '1'
GROUP BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME), s1.AGE, s1.ELIGBEGIN, s1.DEMONSTRATION_POPULATION
ORDER BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME)
--do the same for the 2020 calendar year:
SELECT * INTO #y2
FROM #t
WHERE YEAR(ELIGBEGIN) = '2020'
SELECT s1.CLIENTID
, s1.AGE
, CAST(s1.ELIGBEGIN AS DATETIME) AS ELIGBEGIN
, MIN(CAST(t1.ELIGEND AS DATETIME)) AS ELIGEND
, ROW_NUMBER() OVER(PARTITION BY s1.CLIENTID ORDER BY CAST(s1.ELIGBEGIN AS DATETIME)) AS Sequence_ID
INTO #CY20
FROM --[dbo].[Eligs]
--#t s1
#y2 s1
INNER JOIN #y2--#t
t1 ON t1.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) <= CAST(t1.ELIGEND AS DATETIME)
AND s1.DEMONSTRATION_POPULATION = 'Adult Expansion'
AND NOT EXISTS
(
SELECT * FROM #y2 t2--#t t2
WHERE t2.CLIENTID = t1.CLIENTID
AND (CAST(t1.ELIGEND AS DATETIME) + 1) >= CAST(t2.ELIGBEGIN AS DATETIME)
AND CAST(t1.ELIGEND AS DATETIME) < CAST(t2.ELIGEND AS DATETIME)
)
WHERE --s1.DEMONSTRATION_POPLUATION = 'Adult Expansion'
--AND
NOT EXISTS
(
SELECT * FROM #y2 s2--#t s2
WHERE s2.CLIENTID = s1.CLIENTID
AND CAST(s1.ELIGBEGIN AS DATETIME) > CAST(s2.ELIGBEGIN AS DATETIME)
AND (CAST(s1.ELIGBEGIN AS DATETIME) - 1) <= CAST(s2.ELIGEND AS DATETIME)
)
--AND s1.DEMONSTRATION_POPLUATION = 'Adult Expansion'
--AND s1.UMIC = '1'
GROUP BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME), s1.AGE
--ORDER BY s1.CLIENTID, CAST(s1.ELIGBEGIN AS DATETIME)
;
SELECT CLIENTID
, AGE
, ELIGBEGIN
, ELIGEND
, Sequence_ID = ROW_NUMBER() OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)
INTO #testcase
FROM (
SELECT * FROM #CY19
UNION
SELECT * FROM #CY20) a
SELECT * FROM #testcase
ORDER BY CLIENTID
然而,最终SELECT * FROM #testcase
的实际输出如下:
实际结果:
CLIENTID | AGE | ELIGBEGIN | ELIGEND | Sequence_ID |
---|---|---|---|---|
123456789 | 52 | 2019-12-19 | 2019-12-31 | 1 |
123456789 | 52 | 2020-01-01 | 2020-01-15 | 2 |
123456789 | 52 | 2020-03-01 | 2020-12-31 | 3 |
020234587 | 36 | 2019-06-01 | 2019-06-30 | 1 |
020234587 | 36 | 2019-08-01 | 2019-11-30 | 2 |
如您所见,我面临三个主要问题,即实际输出与所需输出不匹配。
- 我必须 运行 在不同的年份两次进行相同的查询,因为我不知道如何对从 2019-12-31 到 2020-01 连续注册的会员进行细分注册- 31岁及以上。如果我 运行 代码 table
#t
而不是 table#y1
,第一个 CLIENTID 的输出将是 12/16/19 到 1/15/20,我不想要。 - 我完全失去了一个 CLIENTID,因为他们在 'Adult Expansion' 子群体中,然后在 'Demonstration Population #3' 中,然后又回到 'Adult Expansion'
- 出于某种原因,代码将正确忽略前面的行,其中客户的
DEMONSTRATION_POPULATION != 'Adult Expansion'
但如果 CLIENTID 从 'Adult Expansion' 更改为不同的子群体,则实际输出仍包括注册日期来自这个不同的子群体,这直接反对我想要的输出。当一个人在 'Adult Expansion'. 中注册时,我只想要注册部分
如有哪位大侠能协助解决以上三个问题,将不胜感激!
提前感谢任何提示!
这是一个使用间隙和孤岛方法的解决方案:
;WITH prevNextCTE
AS
(
SELECT CLIENTID,
DEMONSTRATION_POPULATION,
ELIGBEGIN,
ELIGEND,
AGE,
-- compare the year of the previous record to the current to create split at year end
CASE WHEN YEAR(LAG(ELIGEND,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)) = YEAR(ELIGBEGIN)
THEN LAG(ELIGEND,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)
END AS prevELIGEND,
-- compare the year of the next record to the current to create split at year end
CASE WHEN YEAR(LEAD(ELIGBEGIN,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)) = YEAR(ELIGEND)
THEN LEAD(ELIGBEGIN,1) OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN)
END AS nextELIGBEGIN
FROM #t
WHERE DEMONSTRATION_POPULATION = 'Adult Expansion'
)
,islandStartCTE
AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN) AS sequence_no
FROM prevNextCTE
WHERE prevELIGEND IS NULL
OR DATEADD(DAY,1,prevELIGEND) < ELIGBEGIN
)
,islandEndCTE
AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CLIENTID ORDER BY ELIGBEGIN) AS sequence_no
FROM prevNextCTE
WHERE nextELIGBEGIN IS NULL
OR DATEADD(DAY,1,ELIGEND) < nextELIGBEGIN
)
SELECT iss.CLIENTID,
iss.AGE,
iss.ELIGBEGIN,
ise.ELIGEND,
iss.sequence_no
FROM islandStartCTE AS iss
-- left join here is not necessary but makes it easier to spot errors
-- when a start position has no matching end (which should not occur)
LEFT
JOIN islandEndCTE AS ise
ON ise.CLIENTID = iss.CLIENTID
AND ise.sequence_no = iss.sequence_no