MySQL 内连接和子查询需要很长时间
MySQL Inner join and subqueries takes a long time
我正在尝试从以下查询中获取结果,但计算需要很长时间
我该如何改进它?
请帮忙
SELECT
ic.id, ic.id_number, ia.phone_number
FROM
in_client AS ic
INNER JOIN (
SELECT id_number, MAX(modified) AS modified
FROM in_client
WHERE
id_number IS NOT NULL AND
agent_id = 1234
GROUP BY id_number
HAVING MAX(modified) >= '2021-mm-dd hh:mm:ss'
) AS max USING (id_number, modified)
INNER JOIN
in_agent AS ia ON (ia.id = ic.agent_id)
LEFT OUTER JOIN
in_policy AS ip ON (ip.client_id = ic.id)
WHERE
ip.client_id IS NULL
架构:
in_client
id - PK (INT)
agent_id - FK (INT)
id_number - VARCHAR
modified - DATETIME
in_agent
id - PK (INT)
name - VARCHAR
in_policy
id - PK (INT)
client_id - FK (INT)
注意:每个 table 还有更多其他字段,但它们与此查询无关
作为一般的 IDEA
此查询从同一代理下的客户端 table 获取所有客户端,这些客户端在策略 table.
中没有文件
每个客户端都可以创建多次,所以我对每组客户端使用 GROUP BY,然后使用最新的客户端(已修改)。
每个客户端都与代理 (FK) 相关联 - 因此在子查询中有一个 INNER JOIN 以实现完美匹配。为了让所有在策略 table 中没有文件的客户,我使用 LEFT OUTER JOIN
查询说明:
在 in_client
中,尝试添加以下索引:
INDEX idx1_client (id_number, modified)
, INDEX idx2_client (agent_id, id_number, modified)
选择这些索引是为了满足 max
派生的 table 中查询表达式的要求,以便更好地访问 (agent_id, id_number and modified)
并连接到 in_client USING (id_number, modified)
。这只是第一次猜测,没有这些 table 的完整架构。
我使用 MariaDB 和 MySQL 进行测试,使用较新的版本以允许动态递归地创建测试数据。
为评估问题而创建的简单测试用例:
The fiddle (runs with MariaDB 10.3+ and MySQL 8+)
CREATE TABLE in_agent (
id INT primary key auto_increment
, name VARCHAR(30)
, phone_number varchar(20)
);
CREATE TABLE in_client (
id INT primary key auto_increment
, agent_id INT references in_agent (id)
, id_number VARCHAR(30)
, modified DATETIME DEFAULT current_timestamp
, INDEX idx1 (id_number, modified)
, INDEX idx2 (agent_id, id_number, modified)
);
CREATE TABLE in_policy (
id INT primary key auto_increment
, client_id INT references in_client (id)
);
INSERT INTO in_agent (name)
WITH RECURSIVE cte1 (lev) AS (
SELECT 1 UNION ALL
SELECT lev + 1 FROM cte1 WHERE lev < 100
)
SELECT CONCAT('name', lev) FROM cte1
;
INSERT INTO in_client (agent_id, id_number)
WITH RECURSIVE cte1 (lev) AS (
SELECT 1 UNION ALL
SELECT lev + 1 FROM cte1 WHERE lev < 100
)
SELECT TRUNCATE((lev-1)/50,0)+1, CONCAT('idnum', lev) FROM cte1
;
SELECT COUNT(*) FROM in_agent;
SELECT COUNT(*) FROM in_client;
SELECT ic.id, ic.id_number, ia.phone_number
FROM in_client AS ic
JOIN (
SELECT id_number, MAX(modified) AS modified
FROM in_client
WHERE id_number IS NOT NULL
AND agent_id = 1
GROUP BY id_number
HAVING MAX(modified) >= '2021-01-01 01:00:00'
) AS max USING (id_number, modified)
JOIN in_agent AS ia ON (ia.id = ic.agent_id)
LEFT JOIN in_policy AS ip ON (ip.client_id = ic.id)
WHERE ip.client_id IS NULL
;
EXPLAIN
SELECT ic.id, ic.id_number, ia.phone_number
FROM in_client AS ic
JOIN (
SELECT id_number, MAX(modified) AS modified
FROM in_client
WHERE id_number IS NOT NULL
AND agent_id = 1
GROUP BY id_number
HAVING MAX(modified) >= '2021-01-01 01:00:00'
) AS max USING (id_number, modified)
JOIN in_agent AS ia ON (ia.id = ic.agent_id)
LEFT JOIN in_policy AS ip ON (ip.client_id = ic.id)
WHERE ip.client_id IS NULL
;
选择的指标似乎比原始问题详细信息中的指标更好。
这应该有所帮助。
我正在尝试从以下查询中获取结果,但计算需要很长时间
我该如何改进它?
请帮忙
SELECT
ic.id, ic.id_number, ia.phone_number
FROM
in_client AS ic
INNER JOIN (
SELECT id_number, MAX(modified) AS modified
FROM in_client
WHERE
id_number IS NOT NULL AND
agent_id = 1234
GROUP BY id_number
HAVING MAX(modified) >= '2021-mm-dd hh:mm:ss'
) AS max USING (id_number, modified)
INNER JOIN
in_agent AS ia ON (ia.id = ic.agent_id)
LEFT OUTER JOIN
in_policy AS ip ON (ip.client_id = ic.id)
WHERE
ip.client_id IS NULL
架构:
in_client
id - PK (INT)
agent_id - FK (INT)
id_number - VARCHAR
modified - DATETIME
in_agent
id - PK (INT)
name - VARCHAR
in_policy
id - PK (INT)
client_id - FK (INT)
注意:每个 table 还有更多其他字段,但它们与此查询无关
作为一般的 IDEA
此查询从同一代理下的客户端 table 获取所有客户端,这些客户端在策略 table.
中没有文件每个客户端都可以创建多次,所以我对每组客户端使用 GROUP BY,然后使用最新的客户端(已修改)。
每个客户端都与代理 (FK) 相关联 - 因此在子查询中有一个 INNER JOIN 以实现完美匹配。为了让所有在策略 table 中没有文件的客户,我使用 LEFT OUTER JOIN
查询说明:
在 in_client
中,尝试添加以下索引:
INDEX idx1_client (id_number, modified)
, INDEX idx2_client (agent_id, id_number, modified)
选择这些索引是为了满足 max
派生的 table 中查询表达式的要求,以便更好地访问 (agent_id, id_number and modified)
并连接到 in_client USING (id_number, modified)
。这只是第一次猜测,没有这些 table 的完整架构。
我使用 MariaDB 和 MySQL 进行测试,使用较新的版本以允许动态递归地创建测试数据。
为评估问题而创建的简单测试用例:
The fiddle (runs with MariaDB 10.3+ and MySQL 8+)
CREATE TABLE in_agent (
id INT primary key auto_increment
, name VARCHAR(30)
, phone_number varchar(20)
);
CREATE TABLE in_client (
id INT primary key auto_increment
, agent_id INT references in_agent (id)
, id_number VARCHAR(30)
, modified DATETIME DEFAULT current_timestamp
, INDEX idx1 (id_number, modified)
, INDEX idx2 (agent_id, id_number, modified)
);
CREATE TABLE in_policy (
id INT primary key auto_increment
, client_id INT references in_client (id)
);
INSERT INTO in_agent (name)
WITH RECURSIVE cte1 (lev) AS (
SELECT 1 UNION ALL
SELECT lev + 1 FROM cte1 WHERE lev < 100
)
SELECT CONCAT('name', lev) FROM cte1
;
INSERT INTO in_client (agent_id, id_number)
WITH RECURSIVE cte1 (lev) AS (
SELECT 1 UNION ALL
SELECT lev + 1 FROM cte1 WHERE lev < 100
)
SELECT TRUNCATE((lev-1)/50,0)+1, CONCAT('idnum', lev) FROM cte1
;
SELECT COUNT(*) FROM in_agent;
SELECT COUNT(*) FROM in_client;
SELECT ic.id, ic.id_number, ia.phone_number
FROM in_client AS ic
JOIN (
SELECT id_number, MAX(modified) AS modified
FROM in_client
WHERE id_number IS NOT NULL
AND agent_id = 1
GROUP BY id_number
HAVING MAX(modified) >= '2021-01-01 01:00:00'
) AS max USING (id_number, modified)
JOIN in_agent AS ia ON (ia.id = ic.agent_id)
LEFT JOIN in_policy AS ip ON (ip.client_id = ic.id)
WHERE ip.client_id IS NULL
;
EXPLAIN
SELECT ic.id, ic.id_number, ia.phone_number
FROM in_client AS ic
JOIN (
SELECT id_number, MAX(modified) AS modified
FROM in_client
WHERE id_number IS NOT NULL
AND agent_id = 1
GROUP BY id_number
HAVING MAX(modified) >= '2021-01-01 01:00:00'
) AS max USING (id_number, modified)
JOIN in_agent AS ia ON (ia.id = ic.agent_id)
LEFT JOIN in_policy AS ip ON (ip.client_id = ic.id)
WHERE ip.client_id IS NULL
;
选择的指标似乎比原始问题详细信息中的指标更好。
这应该有所帮助。