MySQL: WHERE 在任何子查询中
MySQL: WHERE IN any of subqueries
如何重组此查询:
SELECT * FROM tbl t
WHERE (
t.id IN <subquery1>
OR t.id IN <subquery2>
OR t.id IN <subquery3>
)
...变成更像下面这样的东西:
SELECT * FROM tbl t
WHERE t.id IN (<subquery1> OR <subquery2> OR <subquery3>)
注意:所有 3 个子查询 select 来自同一个 tbl t
,但它们 select 每个 不同的列 .
通过一些具体示例进一步阐明子查询:
- 子查询 1:
SELECT col1 FROM tbl WHERE value=100
- 子查询 2:
SELECT col2 FROM tbl WHERE value=200
- 子查询 3:
SELECT col3 FROM tbl WHERE value=300
Table结构:
CREATE TABLE tbl (
id INTEGER PRIMARY KEY,
col1 INTEGER not null,
col2 INTEGER not null,
col3 INTEGER not null,
value INTEGER not null
);
仅使用字段 i
(和 2621441 行)对 table integers
的快速测试:
SELECT i
FROM integers
WHERE (
i in (SELECT i FROM integers WHERE i = 100)
OR
i in (SELECT i FROM integers WHERE i = 200)
OR
i in (SELECT i FROM integers WHERE i = 1000)
)
ORDER BY i;
+----+-------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| 1 | PRIMARY | integers | NULL | index | NULL | PRIMARY | 4 | NULL | 2615753 | 100.00 | Using where; Using index |
| 4 | SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 3 | SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 2 | SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
+----+-------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
4 rows in set, 1 warning (0.01 sec)
以上 returns 导致大约 2 秒。
SELECT i
FROM integers
WHERE i in (
SELECT i FROM integers WHERE i = 100
UNION ALL
SELECT i FROM integers WHERE i = 200
UNION ALL
SELECT i FROM integers WHERE i = 1000
)
ORDER BY i;
+----+--------------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| 1 | PRIMARY | integers | NULL | index | NULL | PRIMARY | 4 | NULL | 2615753 | 100.00 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 3 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 4 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
+----+--------------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
4 rows in set, 1 warning (0.00 sec)
以上 returns 结果大约为 1.35 秒
SELECT i
FROM integers
WHERE i in (
SELECT i FROM integers WHERE i = 100
UNION
SELECT i FROM integers WHERE i = 200
UNION
SELECT i FROM integers WHERE i = 1000
)
ORDER BY i;
+----+--------------------+--------------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+--------------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| 1 | PRIMARY | integers | NULL | index | NULL | PRIMARY | 4 | NULL | 2615753 | 100.00 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 3 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 4 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| NULL | UNION RESULT | <union2,3,4> | NULL | ALL | NULL | NULL | NULL | NULL | NULL | NULL | Using temporary |
+----+--------------------+--------------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
5 rows in set, 1 warning (0.00 sec)
以上 returns 结果为 1.6 秒。
'winner' 是 UNION ALL
我测试了很多变体(合成 table,10kk 行,colX = 1..10kk 中的随机数,值 = 1..1kk 中的随机数)。最快的是:
CREATE INDEX idx ON test (value);
SELECT id
FROM test
WHERE id in (SELECT col1 FROM test WHERE value = 100)
UNION
SELECT id
FROM test
WHERE id in (SELECT col2 FROM test WHERE value = 200)
UNION
SELECT id
FROM test
WHERE id in (SELECT col3 FROM test WHERE value = 1000)
ORDER BY id;
mysql> SELECT id
-> FROM test
-> WHERE id in (SELECT col1 FROM test WHERE value = 100)
-> UNION
-> SELECT id
-> FROM test
-> WHERE id in (SELECT col2 FROM test WHERE value = 200)
-> UNION
-> SELECT id
-> FROM test
-> WHERE id in (SELECT col3 FROM test WHERE value = 1000)
-> ORDER BY id;
-- <output skipped>
36 rows in set (1.60 sec)
mysql> SELECT id
-> FROM test
-> WHERE (
-> id in (SELECT col1 FROM test WHERE value = 100)
-> OR
-> id in (SELECT col2 FROM test WHERE value = 200)
-> OR
-> id in (SELECT col3 FROM test WHERE value = 1000)
-> )
-> ORDER BY id;
-- <output skipped>
36 rows in set (29.18 sec)
实际上可以用一个子查询来完成:
SELECT *
FROM tbl t1
WHERE t1.id IN (
SELECT
CASE t2.value
WHEN 100 THEN t2.col1
WHEN 200 THEN t2.col2
WHEN 1000 THEN t2.col3
END AS id
FROM tbl t2
WHERE t2.value IN (100, 200, 1000)
)
在 value
列的索引 v4
的 2474003 行上进行了测试:
+----+--------------+-------------+------------+--------+---------------+---------+---------+--------------------+------+----------+-----------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------+-------------+------------+--------+---------------+---------+---------+--------------------+------+----------+-----------------------+
| 1 | SIMPLE | <subquery2> | NULL | ALL | NULL | NULL | NULL | NULL | NULL | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | eq_ref | PRIMARY | PRIMARY | 4 | <subquery2>.id | 1 | 100.00 | Using where |
| 2 | MATERIALIZED | t2 | NULL | range | v4 | v4 | 4 | NULL | 7 | 100.00 | Using index condition |
+----+--------------+-------------+------------+--------+---------------+---------+---------+--------------------+------+----------+-----------------------+
或者没有子查询。如果 col1
、col2
和 col3
是 id
的子集,则子查询本身就是答案。
如何重组此查询:
SELECT * FROM tbl t
WHERE (
t.id IN <subquery1>
OR t.id IN <subquery2>
OR t.id IN <subquery3>
)
...变成更像下面这样的东西:
SELECT * FROM tbl t
WHERE t.id IN (<subquery1> OR <subquery2> OR <subquery3>)
注意:所有 3 个子查询 select 来自同一个 tbl t
,但它们 select 每个 不同的列 .
通过一些具体示例进一步阐明子查询:
- 子查询 1:
SELECT col1 FROM tbl WHERE value=100
- 子查询 2:
SELECT col2 FROM tbl WHERE value=200
- 子查询 3:
SELECT col3 FROM tbl WHERE value=300
Table结构:
CREATE TABLE tbl (
id INTEGER PRIMARY KEY,
col1 INTEGER not null,
col2 INTEGER not null,
col3 INTEGER not null,
value INTEGER not null
);
仅使用字段 i
(和 2621441 行)对 table integers
的快速测试:
SELECT i
FROM integers
WHERE (
i in (SELECT i FROM integers WHERE i = 100)
OR
i in (SELECT i FROM integers WHERE i = 200)
OR
i in (SELECT i FROM integers WHERE i = 1000)
)
ORDER BY i;
+----+-------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| 1 | PRIMARY | integers | NULL | index | NULL | PRIMARY | 4 | NULL | 2615753 | 100.00 | Using where; Using index |
| 4 | SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 3 | SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 2 | SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
+----+-------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
4 rows in set, 1 warning (0.01 sec)
以上 returns 导致大约 2 秒。
SELECT i
FROM integers
WHERE i in (
SELECT i FROM integers WHERE i = 100
UNION ALL
SELECT i FROM integers WHERE i = 200
UNION ALL
SELECT i FROM integers WHERE i = 1000
)
ORDER BY i;
+----+--------------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| 1 | PRIMARY | integers | NULL | index | NULL | PRIMARY | 4 | NULL | 2615753 | 100.00 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 3 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 4 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
+----+--------------------+----------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
4 rows in set, 1 warning (0.00 sec)
以上 returns 结果大约为 1.35 秒
SELECT i
FROM integers
WHERE i in (
SELECT i FROM integers WHERE i = 100
UNION
SELECT i FROM integers WHERE i = 200
UNION
SELECT i FROM integers WHERE i = 1000
)
ORDER BY i;
+----+--------------------+--------------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+--------------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
| 1 | PRIMARY | integers | NULL | index | NULL | PRIMARY | 4 | NULL | 2615753 | 100.00 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 3 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| 4 | DEPENDENT UNION | integers | NULL | const | PRIMARY | PRIMARY | 4 | const | 1 | 100.00 | Using index |
| NULL | UNION RESULT | <union2,3,4> | NULL | ALL | NULL | NULL | NULL | NULL | NULL | NULL | Using temporary |
+----+--------------------+--------------+------------+-------+---------------+---------+---------+-------+---------+----------+--------------------------+
5 rows in set, 1 warning (0.00 sec)
以上 returns 结果为 1.6 秒。
'winner' 是 UNION ALL
我测试了很多变体(合成 table,10kk 行,colX = 1..10kk 中的随机数,值 = 1..1kk 中的随机数)。最快的是:
CREATE INDEX idx ON test (value);
SELECT id
FROM test
WHERE id in (SELECT col1 FROM test WHERE value = 100)
UNION
SELECT id
FROM test
WHERE id in (SELECT col2 FROM test WHERE value = 200)
UNION
SELECT id
FROM test
WHERE id in (SELECT col3 FROM test WHERE value = 1000)
ORDER BY id;
mysql> SELECT id
-> FROM test
-> WHERE id in (SELECT col1 FROM test WHERE value = 100)
-> UNION
-> SELECT id
-> FROM test
-> WHERE id in (SELECT col2 FROM test WHERE value = 200)
-> UNION
-> SELECT id
-> FROM test
-> WHERE id in (SELECT col3 FROM test WHERE value = 1000)
-> ORDER BY id;
-- <output skipped>
36 rows in set (1.60 sec)
mysql> SELECT id
-> FROM test
-> WHERE (
-> id in (SELECT col1 FROM test WHERE value = 100)
-> OR
-> id in (SELECT col2 FROM test WHERE value = 200)
-> OR
-> id in (SELECT col3 FROM test WHERE value = 1000)
-> )
-> ORDER BY id;
-- <output skipped>
36 rows in set (29.18 sec)
实际上可以用一个子查询来完成:
SELECT *
FROM tbl t1
WHERE t1.id IN (
SELECT
CASE t2.value
WHEN 100 THEN t2.col1
WHEN 200 THEN t2.col2
WHEN 1000 THEN t2.col3
END AS id
FROM tbl t2
WHERE t2.value IN (100, 200, 1000)
)
在 value
列的索引 v4
的 2474003 行上进行了测试:
+----+--------------+-------------+------------+--------+---------------+---------+---------+--------------------+------+----------+-----------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------+-------------+------------+--------+---------------+---------+---------+--------------------+------+----------+-----------------------+
| 1 | SIMPLE | <subquery2> | NULL | ALL | NULL | NULL | NULL | NULL | NULL | 100.00 | NULL |
| 1 | SIMPLE | t1 | NULL | eq_ref | PRIMARY | PRIMARY | 4 | <subquery2>.id | 1 | 100.00 | Using where |
| 2 | MATERIALIZED | t2 | NULL | range | v4 | v4 | 4 | NULL | 7 | 100.00 | Using index condition |
+----+--------------+-------------+------------+--------+---------------+---------+---------+--------------------+------+----------+-----------------------+
或者没有子查询。如果 col1
、col2
和 col3
是 id
的子集,则子查询本身就是答案。