尝试在多个表中查找特定列的最新记录时优化查询
Optimizing query when trying to find latest record in multiple tables for specific column
问题:为多个tables中的每个(linked_id)列根据(创建)列查找最近的记录,结果应包括(user_id,MAX(创建),linked_id)。该查询还必须能够与 WHERE 子句一起使用,以根据 (linked_id).
查找单个记录
实际上有几个 table 有问题,但这里有 3 个 table,因此您可以了解结构(每个 table 中还有其他几个列已被省略,因为它们不会被退回)。
CREATE TABLE em._logs_adjustments
(
id serial NOT NULL,
user_id integer,
created timestamp with time zone NOT NULL DEFAULT now(),
linked_id integer,
CONSTRAINT _logs_adjustments_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE em._logs_assets
(
id serial NOT NULL,
user_id integer,
created timestamp with time zone NOT NULL DEFAULT now(),
linked_id integer,
CONSTRAINT _logs_assets_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE em._logs_condition_assessments
(
id serial NOT NULL,
user_id integer,
created timestamp with time zone NOT NULL DEFAULT now(),
linked_id integer,
CONSTRAINT _logs_condition_assessments_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
我目前使用的查询通过一个小技巧来绕过 GROUP BY 子句中 user_id 的需要,如果可能的话 array_agg 应该被删除。
SELECT MAX(MaxDate), linked_id, (array_agg(user_id ORDER BY MaxDate DESC))[1] AS user_id FROM (
SELECT user_id, MAX(created) as MaxDate, asset_id AS linked_id FROM _logs_assets
GROUP BY asset_id, user_id
UNION ALL
SELECT user_id, MAX(created) as MaxDate, linked_id FROM _logs_adjustments
GROUP BY linked_id, user_id
UNION ALL
SELECT user_id, MAX(created) as MaxDate, linked_id FROM _logs_condition_assessments
GROUP BY linked_id, user_id
) as subQuery
GROUP BY linked_id
ORDER BY linked_id DESC
我收到了想要的结果,但我不认为这是 正确的 方法,尤其是当 array_agg 被使用且不应该并且一些 table 可能有超过 1.5+ 百万条记录,使得查询需要超过 10-15+ 秒才能到达 运行。非常感谢在正确方向上的任何 help/steering。
SELECT DISTINCT ON ( expression [, ...] ) keeps only the first row of each set of rows where the given expressions evaluate to equal. The DISTINCT ON expressions are interpreted using the same rules as for ORDER BY (see above). Note that the "first row" of each set is unpredictable unless ORDER BY is used to ensure that the desired row appears first
select distinct on (linked_id) created, linked_id, user_id
from (
select user_id, created, asset_id as linked_id
from _logs_assets
union all
select user_id, created, linked_id
from _logs_adjustments
union all
select user_id, created, linked_id
from _logs_condition_assessments
) s
order by linked_id desc, created desc
问题:为多个tables中的每个(linked_id)列根据(创建)列查找最近的记录,结果应包括(user_id,MAX(创建),linked_id)。该查询还必须能够与 WHERE 子句一起使用,以根据 (linked_id).
查找单个记录实际上有几个 table 有问题,但这里有 3 个 table,因此您可以了解结构(每个 table 中还有其他几个列已被省略,因为它们不会被退回)。
CREATE TABLE em._logs_adjustments
(
id serial NOT NULL,
user_id integer,
created timestamp with time zone NOT NULL DEFAULT now(),
linked_id integer,
CONSTRAINT _logs_adjustments_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE em._logs_assets
(
id serial NOT NULL,
user_id integer,
created timestamp with time zone NOT NULL DEFAULT now(),
linked_id integer,
CONSTRAINT _logs_assets_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE em._logs_condition_assessments
(
id serial NOT NULL,
user_id integer,
created timestamp with time zone NOT NULL DEFAULT now(),
linked_id integer,
CONSTRAINT _logs_condition_assessments_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
我目前使用的查询通过一个小技巧来绕过 GROUP BY 子句中 user_id 的需要,如果可能的话 array_agg 应该被删除。
SELECT MAX(MaxDate), linked_id, (array_agg(user_id ORDER BY MaxDate DESC))[1] AS user_id FROM (
SELECT user_id, MAX(created) as MaxDate, asset_id AS linked_id FROM _logs_assets
GROUP BY asset_id, user_id
UNION ALL
SELECT user_id, MAX(created) as MaxDate, linked_id FROM _logs_adjustments
GROUP BY linked_id, user_id
UNION ALL
SELECT user_id, MAX(created) as MaxDate, linked_id FROM _logs_condition_assessments
GROUP BY linked_id, user_id
) as subQuery
GROUP BY linked_id
ORDER BY linked_id DESC
我收到了想要的结果,但我不认为这是 正确的 方法,尤其是当 array_agg 被使用且不应该并且一些 table 可能有超过 1.5+ 百万条记录,使得查询需要超过 10-15+ 秒才能到达 运行。非常感谢在正确方向上的任何 help/steering。
SELECT DISTINCT ON ( expression [, ...] ) keeps only the first row of each set of rows where the given expressions evaluate to equal. The DISTINCT ON expressions are interpreted using the same rules as for ORDER BY (see above). Note that the "first row" of each set is unpredictable unless ORDER BY is used to ensure that the desired row appears first
select distinct on (linked_id) created, linked_id, user_id
from (
select user_id, created, asset_id as linked_id
from _logs_assets
union all
select user_id, created, linked_id
from _logs_adjustments
union all
select user_id, created, linked_id
from _logs_condition_assessments
) s
order by linked_id desc, created desc