将缓冲的 GEOGRAPHY 点插入到目标 table 中,不重复

Insert buffered GEOGRAPHY Points into target table without duplicates

我想从源 table 创建一个目标 table,它只包含 unique/distinct 个点,即 1000 米半径内不应有任何点来自彼此。

这是一个起点(使用 temp tables 的简化模型):

IF OBJECT_ID('tempdb..#Source') IS NOT NULL DROP TABLE #Source
IF OBJECT_ID('tempdb..#TargetSeeded') IS NOT NULL DROP TABLE #TargetSeeded
IF OBJECT_ID('tempdb..#TargetEmpty') IS NOT NULL DROP TABLE #TargetEmpty

CREATE TABLE #Source
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetSeeded
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetEmpty
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

DECLARE @Point1 GEOGRAPHY;
DECLARE @Point2 GEOGRAPHY;
DECLARE @Point3 GEOGRAPHY;
DECLARE @Point4 GEOGRAPHY;
DECLARE @PointBufferDistanceInMeters INT;

SET @Point1 = GEOGRAPHY::STPointFromText('POINT(1 52.50)', 4326);
SET @Point2 = GEOGRAPHY::STPointFromText('POINT(1 52.51)', 4326);
SET @Point3 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @Point4 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @PointBufferDistanceInMeters = 1000;

--SELECT @Point1.STDistance(@Point2);
--SELECT @Point1.STDistance(@Point3);
--SELECT @Point1.STDistance(@Point4);
--SELECT @Point2.STDistance(@Point3);

INSERT INTO #Source
    SELECT @Point1
        UNION ALL 
    SELECT @Point2
        UNION ALL 
    SELECT @Point3
        UNION ALL 
    SELECT @Point4

INSERT INTO #TargetSeeded
    SELECT @Point1
        UNION ALL 
    SELECT @Point2

CREATE SPATIAL INDEX SpatialIndex ON #Source([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetEmpty([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetSeeded([Point]);

-- Identify Ids to be inserted
SELECT 
    Id,
    Point
FROM #Source WHERE Id NOT IN
(
    SELECT 
        So.Id
    FROM #Source AS So
    INNER JOIN #TargetSeeded AS Ta
    ON So.Point.STDistance(Ta.Point) < @PointBufferDistanceInMeters
) 

我可以以一种(恕我直言)高效(?)的方式从#Source as set based approach 中识别要插入的候选项。我只是不明白如何删除#Source table 中的重复项(上述意义上的重复项 - 缓冲区为 1000 米)。所以最终,我希望将 id 3 或 4 插入#TargetSeeded(哪个无关紧要)。有什么想法吗?

PS:

这是一个相关的子查询尝试,可能是一个解决方案:

IF OBJECT_ID('tempdb..#Source') IS NOT NULL DROP TABLE #Source
IF OBJECT_ID('tempdb..#TargetSeeded') IS NOT NULL DROP TABLE #TargetSeeded
IF OBJECT_ID('tempdb..#TargetEmpty') IS NOT NULL DROP TABLE #TargetEmpty

CREATE TABLE #Source
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetSeeded
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

CREATE TABLE #TargetEmpty
(
    Id INT IDENTITY(1,1) PRIMARY KEY,
    Point GEOGRAPHY
)

DECLARE @Point1 GEOGRAPHY;
DECLARE @Point2 GEOGRAPHY;
DECLARE @Point3 GEOGRAPHY;
DECLARE @Point4 GEOGRAPHY;
DECLARE @PointBufferDistanceInMeters INT;

SET @Point1 = GEOGRAPHY::STPointFromText('POINT(1 52.50)', 4326);
SET @Point2 = GEOGRAPHY::STPointFromText('POINT(1 52.51)', 4326);
SET @Point3 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @Point4 = GEOGRAPHY::STPointFromText('POINT(1 52.52)', 4326);
SET @PointBufferDistanceInMeters = 1000;

--SELECT @Point1.STDistance(@Point2);
--SELECT @Point1.STDistance(@Point3);
--SELECT @Point1.STDistance(@Point4);
--SELECT @Point2.STDistance(@Point3);

INSERT INTO #Source
    SELECT @Point1
        UNION ALL 
    SELECT @Point2
        UNION ALL 
    SELECT @Point3
        UNION ALL 
    SELECT @Point4

INSERT INTO #TargetSeeded
    SELECT @Point1
        UNION ALL 
    SELECT @Point2

CREATE SPATIAL INDEX SpatialIndex ON #Source([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetEmpty([Point]);
CREATE SPATIAL INDEX SpatialIndex ON #TargetSeeded([Point]);

-- Identify Ids to be inserted
DELETE FROM #Source WHERE Id NOT IN 
(
    SELECT 
        Id
    FROM #Source WHERE Id NOT IN
    (
        SELECT 
            So.Id
        FROM #Source AS So
        INNER JOIN #TargetSeeded AS Ta
        ON So.Point.STDistance(Ta.Point) < @PointBufferDistanceInMeters
    ) 
)

SELECT 
    *
FROM #Source o
WHERE o.Id IN
(
    SELECT MAX(i.Id)
    FROM #Source i
    WHERE 
        i.Point.STDistance(o.Point) < @PointBufferDistanceInMeters
)

这有帮助吗?

-- Sample data.
declare @Source as Table ( Id Int Identity Primary Key, Point Geography );
insert into @Source ( Point ) values
  ( Geography::STPointFromText( 'Point( 1 52.50 )', 4326 ) ),
  ( Geography::STPointFromText( 'Point( 1 52.51 )', 4326 ) ),
  ( Geography::STPointFromText( 'Point( 1 52.52 )', 4326 ) ),
  ( Geography::STPointFromText( 'Point( 1 52.52 )', 4326 ) );
select *, Point.ToString() as DecodedPoint from @Source;

declare @Target as Table ( Id Int Identity Primary Key, Point Geography );
insert into @Target ( Point ) values
  ( Geography::STPointFromText( 'Point( 1 52.50 )', 4326 ) ),
  ( Geography::STPointFromText( 'Point( 1 52.51 )', 4326 ) );
select *, Point.ToString() as DecodedPoint from @Target;

declare @PointBufferDistanceInMeters as Int = 1000;

-- Merge the data.    
insert into @Target
  select Point
    from @Source as S
    where
      -- Remove rows that conflict with another   Source   row.
      not exists ( select 42 from @Source where S.Point.STDistance( Point ) < @PointBufferDistanceInMeters and S.Id < Id ) and
      -- Remove rows that conflict with an existing   Target   row.
      not exists ( select 42 from @Target where S.Point.STDistance( Point ) < @PointBufferDistanceInMeters )
select *, Point.ToString() as DecodedPoint from @Target;