使用 3 个 FOR 循环优化 SQL 查询
Optimize SQL query with 3 FOR loops
我有一个完全可用的 SQL 查询。但是,它非常非常慢。我正在寻找优化它的方法。
CREATE TABLE trajectory_geom (
id SERIAL PRIMARY KEY,
trajectory_id BIGINT,
user_id BIGINT,
geom GEOMETRY(Linestring, 4326)
);
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326)
FROM point p
GROUP BY p.trajectory_id
;
DO $$
DECLARE
urow record;
vrow record;
wrow record;
BEGIN
FOR wrow IN
SELECT DISTINCT(p.user_id) FROM point p
LOOP
raise notice 'User id: %', wrow.user_id;
FOR vrow IN
SELECT DISTINCT(p.trajectory_id) FROM point p WHERE p.user_id = wrow.user_id
LOOP
FOR urow IN
SELECT
analyzed_tr.*
FROM trajectory_start_end_geom analyzed_tr
WHERE
analyzed_tr.user_id = wrow.user_id
AND
ST_Intersects (
(
analyzed_tr.start_geom
)
,
(
SELECT g.geom
FROM trajectory_geom g
WHERE g.trajectory_id = vrow.trajectory_id
)
) = TRUE
LOOP
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
wrow.user_id
WHERE urow.trajectory_id <> vrow.trajectory_id
;
END LOOP;
END LOOP;
END LOOP;
END;
$$;
它有 3 个循环...我怎样才能避免它们?
基本上,我循环所有用户 ID,对于每个用户循环所有轨迹并检查轨迹是否与该用户的任何其他轨迹交互。
架构:
CREATE TABLE public.trajectory_start_end_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_start_end_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
start_geom geometry(Polygon,4326),
end_geom geometry(Polygon,4326),
CONSTRAINT trajectory_start_end_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.trajectory_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
geom geometry(LineString,4326),
CONSTRAINT trajectory_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.point
(
id integer NOT NULL DEFAULT nextval('point_id_seq'::regclass),
user_id bigint,
date date,
"time" time without time zone,
lat double precision,
lon double precision,
trajectory_id integer,
geom geometry(Geometry,4326),
CONSTRAINT point_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
这应该可以解决问题:
WITH vrow AS(
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326) AS geom
FROM point p
GROUP BY p.trajectory_id
RETURNING trajectory_id, user_id, geom
)
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
vrow.user_id
FROM trajectory_start_end_geom AS urow
JOIN vrow
ON urow.user_id = vrow.user_id
AND urow.trajectory_id <> vrow.trajectory_id
AND ST_Intersects(urow.start_geom, vrow.geom)
如果您不需要插入 trajectory_geom
删除它(和 CTE)会加快它的速度
试试这个 SQL 查询。希望这有帮助。
INSERT INTO trajectories_intercepting_with_starting_point
(initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
TG.trajectory_id AS first_trajectory_id,
TG2.trajectory_id AS last_trajectory_id,
TG.user_id
FROM Trajectory_geom AS TG
JOIN Trajectory_geom AS TG2 ON TG.user_id = TG2.user_id
AND TG.trajectory_id < TG2.trajectory_id
JOIN Trajectory_start_end_geom AS TSE ON TSE.trajectory_id = TG.trajectory_id
WHERE ST_Intersects(TSE.start_geom, TG2.geom) = TRUE
我有一个完全可用的 SQL 查询。但是,它非常非常慢。我正在寻找优化它的方法。
CREATE TABLE trajectory_geom (
id SERIAL PRIMARY KEY,
trajectory_id BIGINT,
user_id BIGINT,
geom GEOMETRY(Linestring, 4326)
);
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326)
FROM point p
GROUP BY p.trajectory_id
;
DO $$
DECLARE
urow record;
vrow record;
wrow record;
BEGIN
FOR wrow IN
SELECT DISTINCT(p.user_id) FROM point p
LOOP
raise notice 'User id: %', wrow.user_id;
FOR vrow IN
SELECT DISTINCT(p.trajectory_id) FROM point p WHERE p.user_id = wrow.user_id
LOOP
FOR urow IN
SELECT
analyzed_tr.*
FROM trajectory_start_end_geom analyzed_tr
WHERE
analyzed_tr.user_id = wrow.user_id
AND
ST_Intersects (
(
analyzed_tr.start_geom
)
,
(
SELECT g.geom
FROM trajectory_geom g
WHERE g.trajectory_id = vrow.trajectory_id
)
) = TRUE
LOOP
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
wrow.user_id
WHERE urow.trajectory_id <> vrow.trajectory_id
;
END LOOP;
END LOOP;
END LOOP;
END;
$$;
它有 3 个循环...我怎样才能避免它们?
基本上,我循环所有用户 ID,对于每个用户循环所有轨迹并检查轨迹是否与该用户的任何其他轨迹交互。
架构:
CREATE TABLE public.trajectory_start_end_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_start_end_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
start_geom geometry(Polygon,4326),
end_geom geometry(Polygon,4326),
CONSTRAINT trajectory_start_end_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.trajectory_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
geom geometry(LineString,4326),
CONSTRAINT trajectory_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.point
(
id integer NOT NULL DEFAULT nextval('point_id_seq'::regclass),
user_id bigint,
date date,
"time" time without time zone,
lat double precision,
lon double precision,
trajectory_id integer,
geom geometry(Geometry,4326),
CONSTRAINT point_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
这应该可以解决问题:
WITH vrow AS(
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326) AS geom
FROM point p
GROUP BY p.trajectory_id
RETURNING trajectory_id, user_id, geom
)
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
vrow.user_id
FROM trajectory_start_end_geom AS urow
JOIN vrow
ON urow.user_id = vrow.user_id
AND urow.trajectory_id <> vrow.trajectory_id
AND ST_Intersects(urow.start_geom, vrow.geom)
如果您不需要插入 trajectory_geom
删除它(和 CTE)会加快它的速度
试试这个 SQL 查询。希望这有帮助。
INSERT INTO trajectories_intercepting_with_starting_point
(initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
TG.trajectory_id AS first_trajectory_id,
TG2.trajectory_id AS last_trajectory_id,
TG.user_id
FROM Trajectory_geom AS TG
JOIN Trajectory_geom AS TG2 ON TG.user_id = TG2.user_id
AND TG.trajectory_id < TG2.trajectory_id
JOIN Trajectory_start_end_geom AS TSE ON TSE.trajectory_id = TG.trajectory_id
WHERE ST_Intersects(TSE.start_geom, TG2.geom) = TRUE