SQL 几何 VS decimal(8,6) Lat, Long Performance

SQL Geometry VS decimal(8,6) Lat, Long Performance

我正在研究在给定坐标的特定邻近范围内选择最近点的性能。

选项是使用两个 decimal(8,6) - 纬度、长列或单个 geography 列并使用它。

我只关心哪个更快?

TL;DR 地理速度快约 10 倍。

好的,我已经设置了测试:

一对 table,一个与 id,lat,long (int, decimal(8,6),decimal(8,6)) 另一个与 id,coord (int, geography)

然后插入47k的运行dom数据。

为了首先建立索引 table 我在纬度上使用了非聚集升序索引,填充因子为 95。 第二个 GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW 填充因子为 95。

CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
)  ON [PRIMARY]
 TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
    PK_Temp PRIMARY KEY CLUSTERED 
    (
    Id
    ) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]

GO


declare @i int =0
    declare @lat decimal(8,6) =0.0
      declare @long decimal(8,6) =0.0
  while (@i < 47000)
  begin
  set @lat =(select (0.9 -Rand()*1.8)*100)
 set @long =(select (0.9 -Rand()*1.8)*100)
    insert into Temp
  select geography::Point(@lat, @long,4326)


set @i =@i+1

 end

go


CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
    [coord]
)USING  GEOGRAPHY_GRID 
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW), 
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]

GO

CREATE TABLE [dbo].[Temp2](
    [Id] [int] IDENTITY(1,1) NOT NULL,
    [Lat] [decimal](8, 6) NOT NULL,
    [Long] [decimal](8, 6) NOT NULL,
 CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED 
(
    [Id] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO


declare @i int = 0
declare @lat decimal(8,6)  = 0 
declare @long decimal(8,6)  = 0

while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)

insert into Temp2
select @lat , @long

set @i = @i +1
end

go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2] 
(
    [Lat] ASC,
    [Long] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON, FILLFACTOR = 95) ON [PRIMARY]
GO

然后我 运行 几个测试:

第一个是纬度,经度。

declare @lat decimal(8,6) = 0.0,
 @lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()

while(@i < 100)
begin

set @lat =   (select (0.9 - Rand()*1.8)*100)
set @lon =  (select (0.9 - (RAND()*1.8))*100.0)

DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
        @lat_c FLOAT = COS(@lat * PI() / 180)


SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
    SELECT
        lat,
        long,
        ((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
    FROM dbo.Temp2
) t
ORDER BY dist

set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go

其次是地理。

 DECLARE @g geography;


   declare @point nvarchar(50)  =''
 declare @i int =0,
     @lat decimal(8,6) =0.0,
       @long decimal(8,6) =0.0,
       @start datetime = getdate()
  while (@i < 100)
  begin
  set @lat =(select (0.9 -Rand()*1.8)*100)
 set @long =(select (0.9 -Rand()*1.8)*100)
 set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' +CONVERT(varchar(10), @long)+')')
 SET @g = geography::STGeomFromText(@point, 4326);
    SELECT TOP 1000
    @lat,
    @long,
        @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
    ,   st.[coord]
    ,   st.id
FROM    Temp st 
ORDER BY @g.STDistance(st.[coord]) ASC

set @i =@i+1

 end
print CONVERT(varchar,(getdate()-@start),108)
 go

结果:

  • 经纬度 - 00:00:10
  • 地理 - 00:02:21

对于那些想知道为什么地理表现如此糟糕的人 这是执行计划 - 请注意它不使用空间索引,并且需要很长时间才能排序,因为行大小为 4047 字节(十进制为 25 字节)。尝试

P.S 我也为平面做了一个,但与球面的差异非常小~0.5s(在 9.5-10.0 秒后返回,这似乎稍微快一点)仍然把它全部放在一个地方这里的脚本:

print 'flat'
declare @lat decimal(8,6) = 0.0,
 @lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()

while(@i < 100)
begin

set @lat =   (select (0.9 - Rand()*1.8)*100)
set @lon =  (select (0.9 - (RAND()*1.8))*100.0)

SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
    SELECT
        lat,
        long,
        sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
    FROM dbo.Temp2
) t

ORDER BY dist

set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go

更新:

切换到 SQL 2014 并强制使用具有 10M 记录的索引后:

  • 经纬度 00:00:22.935
  • 持平 00:00:22.988
  • 地理占了00:00:02.427

使用的地理脚本:

DECLARE @g geography;
declare @point nvarchar(50)  =''
declare @i int =0,
        @lat decimal(8,6) =0.0,
        @long decimal(8,6) =0.0,
        @start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' 
             +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);

SELECT TOP 1000
    @lat,
    @long,
        @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
    ,   st.[coord]
    ,   st.id
FROM    Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord])  IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc