SQL 几何 VS decimal(8,6) Lat, Long Performance
SQL Geometry VS decimal(8,6) Lat, Long Performance
我正在研究在给定坐标的特定邻近范围内选择最近点的性能。
选项是使用两个 decimal(8,6)
- 纬度、长列或单个 geography
列并使用它。
我只关心哪个更快?
TL;DR 地理速度快约 10 倍。
好的,我已经设置了测试:
一对 table,一个与 id,lat,long (int, decimal(8,6),decimal(8,6))
另一个与 id,coord (int, geography)
。
然后插入47k的运行dom数据。
为了首先建立索引 table 我在纬度上使用了非聚集升序索引,填充因子为 95。
第二个 GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW
填充因子为 95。
CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
) ON [PRIMARY]
TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
PK_Temp PRIMARY KEY CLUSTERED
(
Id
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
declare @i int =0
declare @lat decimal(8,6) =0.0
declare @long decimal(8,6) =0.0
while (@i < 47000)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
insert into Temp
select geography::Point(@lat, @long,4326)
set @i =@i+1
end
go
CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
[coord]
)USING GEOGRAPHY_GRID
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW),
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
CREATE TABLE [dbo].[Temp2](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Lat] [decimal](8, 6) NOT NULL,
[Long] [decimal](8, 6) NOT NULL,
CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
declare @i int = 0
declare @lat decimal(8,6) = 0
declare @long decimal(8,6) = 0
while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)
insert into Temp2
select @lat , @long
set @i = @i +1
end
go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2]
(
[Lat] ASC,
[Long] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
然后我 运行 几个测试:
第一个是纬度,经度。
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
@lat_c FLOAT = COS(@lat * PI() / 180)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
其次是地理。
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
while (@i < 100)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' ' +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st
ORDER BY @g.STDistance(st.[coord]) ASC
set @i =@i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
结果:
- 经纬度 - 00:00:10
- 地理 - 00:02:21
对于那些想知道为什么地理表现如此糟糕的人
这是执行计划 - 请注意它不使用空间索引,并且需要很长时间才能排序,因为行大小为 4047 字节(十进制为 25 字节)。尝试
P.S 我也为平面做了一个,但与球面的差异非常小~0.5s(在 9.5-10.0 秒后返回,这似乎稍微快一点)仍然把它全部放在一个地方这里的脚本:
print 'flat'
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
更新:
切换到 SQL 2014 并强制使用具有 10M 记录的索引后:
- 经纬度 00:00:22.935
- 持平 00:00:22.988
- 地理占了00:00:02.427
使用的地理脚本:
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' '
+CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord]) IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc
我正在研究在给定坐标的特定邻近范围内选择最近点的性能。
选项是使用两个 decimal(8,6)
- 纬度、长列或单个 geography
列并使用它。
我只关心哪个更快?
TL;DR 地理速度快约 10 倍。
好的,我已经设置了测试:
一对 table,一个与 id,lat,long (int, decimal(8,6),decimal(8,6))
另一个与 id,coord (int, geography)
。
然后插入47k的运行dom数据。
为了首先建立索引 table 我在纬度上使用了非聚集升序索引,填充因子为 95。
第二个 GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW
填充因子为 95。
CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
) ON [PRIMARY]
TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
PK_Temp PRIMARY KEY CLUSTERED
(
Id
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
declare @i int =0
declare @lat decimal(8,6) =0.0
declare @long decimal(8,6) =0.0
while (@i < 47000)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
insert into Temp
select geography::Point(@lat, @long,4326)
set @i =@i+1
end
go
CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
[coord]
)USING GEOGRAPHY_GRID
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW),
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
CREATE TABLE [dbo].[Temp2](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Lat] [decimal](8, 6) NOT NULL,
[Long] [decimal](8, 6) NOT NULL,
CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
declare @i int = 0
declare @lat decimal(8,6) = 0
declare @long decimal(8,6) = 0
while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)
insert into Temp2
select @lat , @long
set @i = @i +1
end
go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2]
(
[Lat] ASC,
[Long] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
然后我 运行 几个测试:
第一个是纬度,经度。
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
@lat_c FLOAT = COS(@lat * PI() / 180)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
其次是地理。
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
while (@i < 100)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' ' +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st
ORDER BY @g.STDistance(st.[coord]) ASC
set @i =@i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
结果:
- 经纬度 - 00:00:10
- 地理 - 00:02:21
对于那些想知道为什么地理表现如此糟糕的人
这是执行计划 - 请注意它不使用空间索引,并且需要很长时间才能排序,因为行大小为 4047 字节(十进制为 25 字节)。尝试
P.S 我也为平面做了一个,但与球面的差异非常小~0.5s(在 9.5-10.0 秒后返回,这似乎稍微快一点)仍然把它全部放在一个地方这里的脚本:
print 'flat'
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
更新:
切换到 SQL 2014 并强制使用具有 10M 记录的索引后:
- 经纬度 00:00:22.935
- 持平 00:00:22.988
- 地理占了00:00:02.427
使用的地理脚本:
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' '
+CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord]) IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc