SQL islands - 查找连续的地址范围
SQL islands - find consecutive address ranges
我有代表街道地址范围的数据。当它们连续时,我需要对范围进行分组。我已经尝试了一些针对 SQL 间隙和孤岛的其他解决方案,但似乎无济于事。
FullName | City | FromRight | ToRight | FromLeft | ToLeft
-----------------------------------------------------------------
W Main St | Townsville | 100 | 198 | 101 | 199
W Main St | Townsville | 200 | 298 | 201 | 299
W Main St | Townsville | 500 | 598 | 501 | 599
W Main St | Cityburg | 600 | 698 | 601 | 699
E 1st Ave | Townsville | 100 | 398 | 301 | 399
E 1st Ave | Townsville | 400 | 416 | 401 | 417
E 1st Ave | Townsville | 418 | 458 | 419 | 459
当街道名称和城市相同时,我需要能够根据低地址(From Right)和高地址(To Left)找到连续的值。所以我的结果 table 看起来像:
FullName | City | FromRight | ToLeft
--------------------------------------------
W Main St | Townsville | 100 | 299
W Main St | Townsville | 500 | 599
W Main St | Cityburg | 600 | 699
E 1st Ave | Townsville | 100 | 459
如有任何帮助,我们将不胜感激!
已更新以处理行中的空白
我敢肯定这不是最好的方法,但它适用于您的测试数据。无论如何,玩起来很有趣。我不明白它的真正用途,仅仅因为你 can 并不意味着你一直 should,但它又是一个有趣的挑战:)
;with cte1 as(
select
FullName,
City,
FromRight,
ToRight,
FromLeft,
ToLeft,
case
when lag(ToLeft) over(PARTITION BY FullName, City ORDER BY FullName, City, FromRight) is null or lag(ToLeft) over(PARTITION BY FullName, City ORDER BY FullName, City, FromRight) + 1 <> FromRight then FromRight
end as NewFromRight,
case
when lead(FromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) - 1 = ToLeft then NULL
when lead(FromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) is null then ToLeft
when lead(FromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) - 1 <> ToLeft then ToLeft
else ToLeft
end as NewToLeft
from #cities),
------this CTE is needed because I couldn't figure out how to do it without it
------It takes the max of the previous CTE for the given partition.
------Nested windows functions aren't allowed hence the second cte
cte2 as(
select distinct
FullName,
City,
NewFromRight as FromRight,
NewToLeft as ToLeft
from
cte1
where
NewFromRight is not null and NewToLeft is not null
union all
select distinct
FullName,
City,
--max(NewFromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) as FromRight,
--max(NewToLeft) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) as ToLeft
case
when NewFromRight is null then lag(NewFromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight)
else NewFromRight
end as FromRight,
case
when NewToLeft is null then lead(NewToLeft) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight)
else NewToLeft
end as ToLeft
from cte1
where
NewFromRight is null or NewToLeft is null)
select * from cte2
where FromRight is not null and ToLeft is not null
order by FullName, FromRight
如果有人想玩这个...这里有一些测试数据。只需将第一个 CTE 中的 YourTable
替换为 #cities
select
'W Main St' as FullName,'Townsville' as City,100 as FromRight,198 as ToRight,101 as FromLeft,199 as ToLeft
into #cities
UNION ALL SELECT 'W Main St','Townsville',200,298,201,299
UNION ALL SELECT 'W Main St','Townsville',500,598,501,599
UNION ALL SELECT 'W Main St','Cityburg',600,698,601,699
UNION ALL SELECT 'E 1st Ave','Townsville',100,398,301,399
UNION ALL SELECT 'E 1st Ave','Townsville',400,416,401,417
UNION ALL SELECT 'E 1st Ave','Townsville',418,458,419,459
UNION ALL SELECT 'E 1st Ave','Townsville',470,458,419,479
UNION ALL SELECT 'E 1st Ave','Townsville',490,458,419,499
UNION ALL SELECT 'E 1st Ave','Townsville',500,458,419,501
Declare @YourTable Table (FullName varchar(100),City varchar(100),FromRight int,ToRight int,FromLeft int, ToLeft int)
Insert Into @YourTable values
('W Main St' , 'Townsville' , 100 , 198 , 101 , 199),
('W Main St' , 'Townsville' , 200 , 298 , 201 , 299),
('W Main St' , 'Townsville' , 500 , 598 , 501 , 599),
('W Main St' , 'Cityburg' , 600 , 698 , 601 , 699),
('E 1st Ave' , 'Townsville' , 100 , 398 , 301 , 399),
('E 1st Ave' , 'Townsville' , 400 , 416 , 401 , 417),
('E 1st Ave' , 'Townsville' , 418 , 458 , 419 , 459)
;with cteBase as (Select FullName,City,R1=FromRight,R2=ToLeft From @YourTable
),ctePass1 as (
Select A.FullName,A.City,R1=B.Pass1R1,R2=B.Pass1R2
From cteBase A
Cross Apply (Select Pass1R1=min(R1),Pass1R2=max(R2)
From cteBase
Where FullName=A.FullName and City=A.City and (A.R1 Between R1-1 and R2+1 or A.R2 Between R1-1 and R2+1)) B
),ctePass2 as (
Select A.FullName,A.City,R1=B.Pass1R1,R2=B.Pass1R2
From ctePass1 A
Cross Apply (Select Pass1R1=min(R1),Pass1R2=max(R2)
From ctePass1
Where FullName=A.FullName and City=A.City and (A.R1 Between R1-1 and R2+1 or A.R2 Between R1-1 and R2+1)) B
)
Select Distinct
FullName
,City
,FromRight = R1
,ToLeft = R2
From ctePass2
Order By 1 Desc,2 Desc, 3
Returns
FullName City FromRight ToLeft
W Main St Townsville 100 299
W Main St Townsville 500 599
W Main St Cityburg 600 699
E 1st Ave Townsville 100 459
此解决方案依赖于计数 table。但是一旦到位就非常简单。 (向上面的 John 致敬,他在另一个答案中以易于使用的格式提供了示例数据)。
Declare @YourTable Table (FullName varchar(100),City varchar(100),FromRight int,ToRight int,FromLeft int, ToLeft int)
Insert Into @YourTable values
('W Main St' , 'Townsville' , 100 , 198 , 101 , 199),
('W Main St' , 'Townsville' , 200 , 298 , 201 , 299),
('W Main St' , 'Townsville' , 500 , 598 , 501 , 599),
('W Main St' , 'Cityburg' , 600 , 698 , 601 , 699),
('E 1st Ave' , 'Townsville' , 100 , 398 , 301 , 399),
('E 1st Ave' , 'Townsville' , 400 , 416 , 401 , 417),
('E 1st Ave' , 'Townsville' , 418 , 458 , 419 , 459);
WITH cte AS (
SELECT [yt].[FullName], [yt].[City], [n].[n],
[n].[n] - ROW_NUMBER() OVER (PARTITION BY [yt].[FullName], [yt].[City] ORDER BY [n]) AS [rn]
FROM [Util].[dbo].[Numbers] AS [n]
JOIN @YourTable AS [yt]
ON [n].[n] BETWEEN [yt].[FromRight] AND [yt].[ToLeft]
)
SELECT [FullName], [City], MIN([n]), MAX([n])
FROM [cte]
GROUP BY [FullName] ,
[City], [rn]
ORDER BY [FullName], [City], MIN([n]);
这里的关键观察是,如果你在一个连续的范围内,row_number()
和计数 table 都以相同的速率增加(即每行一个),所以它们的差异对于组内的行将是相同的。
如果您使用的是现代版本并假设 1) 根据您的评论只有 FromRight
和 ToLeft
重要,2) 间隔永远不会重叠
Declare @YourTable Table (FullName varchar(100),City varchar(100),FromRight int,ToRight int,FromLeft int,ToLeft int)
Insert Into @YourTable values
('W Main St' , 'Townsville' , 100 , 198 , 101 , 199),
('W Main St' , 'Townsville' , 200 , 298 , 201 , 299),
('W Main St' , 'Townsville' , 500 , 598 , 501 , 599),
('W Main St' , 'Cityburg' , 600 , 698 , 601 , 699),
('E 1st Ave' , 'Townsville' , 100 , 398 , 301 , 399),
('E 1st Ave' , 'Townsville' , 400 , 416 , 401 , 417),
('E 1st Ave' , 'Townsville' , 418 , 458 , 419 , 459);
select FullName, City
, FromRight = max(case startFlag when 1 then FromRight end)
, ToLeft = max(case endFlag when 1 then ToLeft end)
from (
select *
, grp = sum(startFlag) over(partition by FullName, City order by FromRight)
from (
select *
, startFlag = Case FromRight when lag(ToLeft,1,-1) over (partition by FullName , City order by FromRight) + 1 then 0 else 1 end
, endFlag = Case ToLeft when lead(FromRight,1,-1) over (partition by FullName , City order by ToLeft) - 1 then 0 else 1 end
from @YourTable
) flags
) groups
group by FullName, City, grp
order by FullName, City, FromRight;
我有代表街道地址范围的数据。当它们连续时,我需要对范围进行分组。我已经尝试了一些针对 SQL 间隙和孤岛的其他解决方案,但似乎无济于事。
FullName | City | FromRight | ToRight | FromLeft | ToLeft
-----------------------------------------------------------------
W Main St | Townsville | 100 | 198 | 101 | 199
W Main St | Townsville | 200 | 298 | 201 | 299
W Main St | Townsville | 500 | 598 | 501 | 599
W Main St | Cityburg | 600 | 698 | 601 | 699
E 1st Ave | Townsville | 100 | 398 | 301 | 399
E 1st Ave | Townsville | 400 | 416 | 401 | 417
E 1st Ave | Townsville | 418 | 458 | 419 | 459
当街道名称和城市相同时,我需要能够根据低地址(From Right)和高地址(To Left)找到连续的值。所以我的结果 table 看起来像:
FullName | City | FromRight | ToLeft
--------------------------------------------
W Main St | Townsville | 100 | 299
W Main St | Townsville | 500 | 599
W Main St | Cityburg | 600 | 699
E 1st Ave | Townsville | 100 | 459
如有任何帮助,我们将不胜感激!
已更新以处理行中的空白
我敢肯定这不是最好的方法,但它适用于您的测试数据。无论如何,玩起来很有趣。我不明白它的真正用途,仅仅因为你 can 并不意味着你一直 should,但它又是一个有趣的挑战:)
;with cte1 as(
select
FullName,
City,
FromRight,
ToRight,
FromLeft,
ToLeft,
case
when lag(ToLeft) over(PARTITION BY FullName, City ORDER BY FullName, City, FromRight) is null or lag(ToLeft) over(PARTITION BY FullName, City ORDER BY FullName, City, FromRight) + 1 <> FromRight then FromRight
end as NewFromRight,
case
when lead(FromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) - 1 = ToLeft then NULL
when lead(FromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) is null then ToLeft
when lead(FromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) - 1 <> ToLeft then ToLeft
else ToLeft
end as NewToLeft
from #cities),
------this CTE is needed because I couldn't figure out how to do it without it
------It takes the max of the previous CTE for the given partition.
------Nested windows functions aren't allowed hence the second cte
cte2 as(
select distinct
FullName,
City,
NewFromRight as FromRight,
NewToLeft as ToLeft
from
cte1
where
NewFromRight is not null and NewToLeft is not null
union all
select distinct
FullName,
City,
--max(NewFromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) as FromRight,
--max(NewToLeft) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight) as ToLeft
case
when NewFromRight is null then lag(NewFromRight) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight)
else NewFromRight
end as FromRight,
case
when NewToLeft is null then lead(NewToLeft) over (PARTITION BY FullName, City ORDER BY FullName, City, FromRight)
else NewToLeft
end as ToLeft
from cte1
where
NewFromRight is null or NewToLeft is null)
select * from cte2
where FromRight is not null and ToLeft is not null
order by FullName, FromRight
如果有人想玩这个...这里有一些测试数据。只需将第一个 CTE 中的 YourTable
替换为 #cities
select
'W Main St' as FullName,'Townsville' as City,100 as FromRight,198 as ToRight,101 as FromLeft,199 as ToLeft
into #cities
UNION ALL SELECT 'W Main St','Townsville',200,298,201,299
UNION ALL SELECT 'W Main St','Townsville',500,598,501,599
UNION ALL SELECT 'W Main St','Cityburg',600,698,601,699
UNION ALL SELECT 'E 1st Ave','Townsville',100,398,301,399
UNION ALL SELECT 'E 1st Ave','Townsville',400,416,401,417
UNION ALL SELECT 'E 1st Ave','Townsville',418,458,419,459
UNION ALL SELECT 'E 1st Ave','Townsville',470,458,419,479
UNION ALL SELECT 'E 1st Ave','Townsville',490,458,419,499
UNION ALL SELECT 'E 1st Ave','Townsville',500,458,419,501
Declare @YourTable Table (FullName varchar(100),City varchar(100),FromRight int,ToRight int,FromLeft int, ToLeft int)
Insert Into @YourTable values
('W Main St' , 'Townsville' , 100 , 198 , 101 , 199),
('W Main St' , 'Townsville' , 200 , 298 , 201 , 299),
('W Main St' , 'Townsville' , 500 , 598 , 501 , 599),
('W Main St' , 'Cityburg' , 600 , 698 , 601 , 699),
('E 1st Ave' , 'Townsville' , 100 , 398 , 301 , 399),
('E 1st Ave' , 'Townsville' , 400 , 416 , 401 , 417),
('E 1st Ave' , 'Townsville' , 418 , 458 , 419 , 459)
;with cteBase as (Select FullName,City,R1=FromRight,R2=ToLeft From @YourTable
),ctePass1 as (
Select A.FullName,A.City,R1=B.Pass1R1,R2=B.Pass1R2
From cteBase A
Cross Apply (Select Pass1R1=min(R1),Pass1R2=max(R2)
From cteBase
Where FullName=A.FullName and City=A.City and (A.R1 Between R1-1 and R2+1 or A.R2 Between R1-1 and R2+1)) B
),ctePass2 as (
Select A.FullName,A.City,R1=B.Pass1R1,R2=B.Pass1R2
From ctePass1 A
Cross Apply (Select Pass1R1=min(R1),Pass1R2=max(R2)
From ctePass1
Where FullName=A.FullName and City=A.City and (A.R1 Between R1-1 and R2+1 or A.R2 Between R1-1 and R2+1)) B
)
Select Distinct
FullName
,City
,FromRight = R1
,ToLeft = R2
From ctePass2
Order By 1 Desc,2 Desc, 3
Returns
FullName City FromRight ToLeft
W Main St Townsville 100 299
W Main St Townsville 500 599
W Main St Cityburg 600 699
E 1st Ave Townsville 100 459
此解决方案依赖于计数 table。但是一旦到位就非常简单。 (向上面的 John 致敬,他在另一个答案中以易于使用的格式提供了示例数据)。
Declare @YourTable Table (FullName varchar(100),City varchar(100),FromRight int,ToRight int,FromLeft int, ToLeft int)
Insert Into @YourTable values
('W Main St' , 'Townsville' , 100 , 198 , 101 , 199),
('W Main St' , 'Townsville' , 200 , 298 , 201 , 299),
('W Main St' , 'Townsville' , 500 , 598 , 501 , 599),
('W Main St' , 'Cityburg' , 600 , 698 , 601 , 699),
('E 1st Ave' , 'Townsville' , 100 , 398 , 301 , 399),
('E 1st Ave' , 'Townsville' , 400 , 416 , 401 , 417),
('E 1st Ave' , 'Townsville' , 418 , 458 , 419 , 459);
WITH cte AS (
SELECT [yt].[FullName], [yt].[City], [n].[n],
[n].[n] - ROW_NUMBER() OVER (PARTITION BY [yt].[FullName], [yt].[City] ORDER BY [n]) AS [rn]
FROM [Util].[dbo].[Numbers] AS [n]
JOIN @YourTable AS [yt]
ON [n].[n] BETWEEN [yt].[FromRight] AND [yt].[ToLeft]
)
SELECT [FullName], [City], MIN([n]), MAX([n])
FROM [cte]
GROUP BY [FullName] ,
[City], [rn]
ORDER BY [FullName], [City], MIN([n]);
这里的关键观察是,如果你在一个连续的范围内,row_number()
和计数 table 都以相同的速率增加(即每行一个),所以它们的差异对于组内的行将是相同的。
如果您使用的是现代版本并假设 1) 根据您的评论只有 FromRight
和 ToLeft
重要,2) 间隔永远不会重叠
Declare @YourTable Table (FullName varchar(100),City varchar(100),FromRight int,ToRight int,FromLeft int,ToLeft int)
Insert Into @YourTable values
('W Main St' , 'Townsville' , 100 , 198 , 101 , 199),
('W Main St' , 'Townsville' , 200 , 298 , 201 , 299),
('W Main St' , 'Townsville' , 500 , 598 , 501 , 599),
('W Main St' , 'Cityburg' , 600 , 698 , 601 , 699),
('E 1st Ave' , 'Townsville' , 100 , 398 , 301 , 399),
('E 1st Ave' , 'Townsville' , 400 , 416 , 401 , 417),
('E 1st Ave' , 'Townsville' , 418 , 458 , 419 , 459);
select FullName, City
, FromRight = max(case startFlag when 1 then FromRight end)
, ToLeft = max(case endFlag when 1 then ToLeft end)
from (
select *
, grp = sum(startFlag) over(partition by FullName, City order by FromRight)
from (
select *
, startFlag = Case FromRight when lag(ToLeft,1,-1) over (partition by FullName , City order by FromRight) + 1 then 0 else 1 end
, endFlag = Case ToLeft when lead(FromRight,1,-1) over (partition by FullName , City order by ToLeft) - 1 then 0 else 1 end
from @YourTable
) flags
) groups
group by FullName, City, grp
order by FullName, City, FromRight;