使用 3 个 FOR 循环优化 SQL 查询

Optimize SQL query with 3 FOR loops

我有一个完全可用的 SQL 查询。但是,它非常非常慢。我正在寻找优化它的方法。

CREATE TABLE trajectory_geom (
  id                        SERIAL PRIMARY KEY,
  trajectory_id             BIGINT,
  user_id               BIGINT,
  geom                  GEOMETRY(Linestring, 4326)
);

INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
    SELECT
        p.trajectory_id,
        p.user_id,
        ST_Transform(ST_MakeLine(p.geom), 4326)
    FROM point p
    GROUP BY p.trajectory_id
;

DO $$
DECLARE
  urow record;
  vrow record;
  wrow record;
BEGIN
  FOR wrow IN
  SELECT DISTINCT(p.user_id) FROM point p
  LOOP
    raise notice 'User id: %', wrow.user_id;
    FOR vrow IN
    SELECT DISTINCT(p.trajectory_id) FROM point p WHERE p.user_id = wrow.user_id
    LOOP
      FOR urow IN
      SELECT
        analyzed_tr.*
      FROM trajectory_start_end_geom analyzed_tr
      WHERE
      analyzed_tr.user_id = wrow.user_id
      AND
      ST_Intersects (
        (
         analyzed_tr.start_geom
        )
        ,
        (
          SELECT g.geom
          FROM trajectory_geom g
          WHERE g.trajectory_id = vrow.trajectory_id
        )
      ) = TRUE
      LOOP
        INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
          SELECT
            vrow.trajectory_id,
            urow.trajectory_id,
            wrow.user_id
          WHERE urow.trajectory_id <> vrow.trajectory_id
        ;
      END LOOP;
    END LOOP;
  END LOOP;
END;
$$;

它有 3 个循环...我怎样才能避免它们?

基本上,我循环所有用户 ID,对于每个用户循环所有轨迹并检查轨迹是否与该用户的任何其他轨迹交互。

架构:

CREATE TABLE public.trajectory_start_end_geom
(
  id integer NOT NULL DEFAULT nextval('trajectory_start_end_geom_id_seq'::regclass),
  trajectory_id bigint,
  user_id bigint,
  start_geom geometry(Polygon,4326),
  end_geom geometry(Polygon,4326),
  CONSTRAINT trajectory_start_end_geom_pkey PRIMARY KEY (id)
)
WITH (
  OIDS=FALSE
);

CREATE TABLE public.trajectory_geom
(
  id integer NOT NULL DEFAULT nextval('trajectory_geom_id_seq'::regclass),
  trajectory_id bigint,
  user_id bigint,
  geom geometry(LineString,4326),
  CONSTRAINT trajectory_geom_pkey PRIMARY KEY (id)
)
WITH (
  OIDS=FALSE
);

CREATE TABLE public.point
(
  id integer NOT NULL DEFAULT nextval('point_id_seq'::regclass),
  user_id bigint,
  date date,
  "time" time without time zone,
  lat double precision,
  lon double precision,
  trajectory_id integer,
  geom geometry(Geometry,4326),
  CONSTRAINT point_pkey PRIMARY KEY (id)
)
WITH (
  OIDS=FALSE
);

这应该可以解决问题:

WITH vrow AS(
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
    SELECT
        p.trajectory_id,
        p.user_id,
        ST_Transform(ST_MakeLine(p.geom), 4326) AS geom
    FROM point p
    GROUP BY p.trajectory_id
RETURNING trajectory_id, user_id, geom
)
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
          SELECT
            vrow.trajectory_id,
            urow.trajectory_id,
            vrow.user_id
          FROM trajectory_start_end_geom AS urow          
        JOIN vrow 
            ON urow.user_id = vrow.user_id 
            AND urow.trajectory_id <> vrow.trajectory_id
            AND ST_Intersects(urow.start_geom, vrow.geom)

如果您不需要插入 trajectory_geom 删除它(和 CTE)会加快它的速度

试试这个 SQL 查询。希望这有帮助。

INSERT INTO trajectories_intercepting_with_starting_point 
(initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT 
        TG.trajectory_id AS first_trajectory_id,
        TG2.trajectory_id AS last_trajectory_id,
        TG.user_id
FROM Trajectory_geom AS TG
    JOIN Trajectory_geom AS TG2 ON TG.user_id = TG2.user_id
                                       AND TG.trajectory_id < TG2.trajectory_id
    JOIN Trajectory_start_end_geom AS TSE ON TSE.trajectory_id = TG.trajectory_id
WHERE ST_Intersects(TSE.start_geom, TG2.geom) = TRUE