SQL 的快速最近位置查找器(MySQL、PostgreSQL、SQL 服务器)
Fast nearest-location finder for SQL (MySQL, PostgreSQL, SQL Server)
有人可以帮助进行以下查询连接而不是子选择吗?它来自本教程:http://www.plumislandmedia.net/mysql/haversine-mysql-nearest-loc/.
事实证明,对于大量行(400 万),实现这个非常慢。我认为子选择是根本原因,但我不知道如何将其变成连接。
SELECT
zip,
primary_city,
latitude,
longitude,
distance
FROM
(
SELECT
z.zip,
z.primary_city,
z.latitude,
z.longitude,
p.radius,
p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) AS distance
FROM zip AS z
JOIN
(
/* these are the query parameters */
SELECT
42.81 AS latpoint,
-70.81 AS longpoint,
50.0 AS radius,
111.045 AS distance_unit
) AS p ON 1 = 1
WHERE
z.latitude BETWEEN p.latpoint - (p.radius / p.distance_unit)
AND p.latpoint + (p.radius / p.distance_unit)
AND z.longitude BETWEEN p.longpoint - (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
AND p.longpoint + (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
) AS d
WHERE distance <= radius
ORDER BY distance
LIMIT 15
我不相信你能在这里做任何事情。我想您的 运行 时间来自通过所有这些计算处理 400 万条记录所需的大量 CPU。
您的最内层子查询只有 4 个常量,它们交叉连接到您的主子查询,因此您无法在此处执行任何操作来帮助加快它的速度。这将是一个洗。
您的主子查询(大 SELECT 语句)在这里完成所有工作并包装在主查询中以节省处理,因为距离需要计算三次,除非 mysql 的优化器创造了某种奇迹,并识别出该计算被使用了 3 次。
无论如何,这可能是一个性能较差的版本,它删除了最外层的查询:
SELECT
z.zip,
z.primary_city,
z.latitude,
z.longitude,
p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) AS distance
FROM zip AS z
JOIN
(
/* these are the query parameters */
SELECT
42.81 AS latpoint,
-70.81 AS longpoint,
50.0 AS radius,
111.045 AS distance_unit
) AS p ON 1 = 1
WHERE
z.latitude BETWEEN p.latpoint - (p.radius / p.distance_unit)
AND p.latpoint + (p.radius / p.distance_unit)
AND z.longitude BETWEEN p.longpoint - (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
AND p.longpoint + (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
WHERE p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) <= p.radius
ORDER BY p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) LIMIT 15
您可以看到 z.distance
的所有实例都被替换为用于在查询的 WHERE
和 ORDER BY
部分计算距离的公式。
如果您想取消保存常量的交叉连接子查询...您也可以这样做,但现在您会因最后一次更改而失去性能,并因失去交叉而失去可读性加入:
SELECT
z.zip,
z.primary_city,
z.latitude,
z.longitude,
111.045 * DEGREES(ACOS(COS(RADIANS(42.81)) * COS(RADIANS(z.latitude)) * COS(RADIANS(-70.81 - z.longitude)) + SIN(RADIANS(42.81)) * SIN(RADIANS(z.latitude)))) AS distance
FROM zip AS z
WHERE
z.latitude BETWEEN 42.81 - (50.0 / 111.045)
AND 42.81 + (50.0 / 111.045)
AND z.longitude BETWEEN -70.81 - (50.0 / (111.045 * COS(RADIANS(42.81))))
AND -70.81 + (50.0 / (111.045 * COS(RADIANS(42.81))))
WHERE 111.045 * DEGREES(ACOS(COS(RADIANS(42.81)) * COS(RADIANS(z.latitude)) * COS(RADIANS(-70.81 - z.longitude)) + SIN(RADIANS(42.81)) * SIN(RADIANS(z.latitude)))) <= 50.0
ORDER BY 111.045 * DEGREES(ACOS(COS(RADIANS(42.81)) * COS(RADIANS(z.latitude)) * COS(RADIANS(-70.81 - z.longitude)) + SIN(RADIANS(42.81)) * SIN(RADIANS(z.latitude)))) LIMIT 15
所以...最后,这是一个有趣的练习,但我认为这些更改没有任何优点,而且肯定有一些缺点。
有人可以帮助进行以下查询连接而不是子选择吗?它来自本教程:http://www.plumislandmedia.net/mysql/haversine-mysql-nearest-loc/.
事实证明,对于大量行(400 万),实现这个非常慢。我认为子选择是根本原因,但我不知道如何将其变成连接。
SELECT
zip,
primary_city,
latitude,
longitude,
distance
FROM
(
SELECT
z.zip,
z.primary_city,
z.latitude,
z.longitude,
p.radius,
p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) AS distance
FROM zip AS z
JOIN
(
/* these are the query parameters */
SELECT
42.81 AS latpoint,
-70.81 AS longpoint,
50.0 AS radius,
111.045 AS distance_unit
) AS p ON 1 = 1
WHERE
z.latitude BETWEEN p.latpoint - (p.radius / p.distance_unit)
AND p.latpoint + (p.radius / p.distance_unit)
AND z.longitude BETWEEN p.longpoint - (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
AND p.longpoint + (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
) AS d
WHERE distance <= radius
ORDER BY distance
LIMIT 15
我不相信你能在这里做任何事情。我想您的 运行 时间来自通过所有这些计算处理 400 万条记录所需的大量 CPU。
您的最内层子查询只有 4 个常量,它们交叉连接到您的主子查询,因此您无法在此处执行任何操作来帮助加快它的速度。这将是一个洗。
您的主子查询(大 SELECT 语句)在这里完成所有工作并包装在主查询中以节省处理,因为距离需要计算三次,除非 mysql 的优化器创造了某种奇迹,并识别出该计算被使用了 3 次。
无论如何,这可能是一个性能较差的版本,它删除了最外层的查询:
SELECT
z.zip,
z.primary_city,
z.latitude,
z.longitude,
p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) AS distance
FROM zip AS z
JOIN
(
/* these are the query parameters */
SELECT
42.81 AS latpoint,
-70.81 AS longpoint,
50.0 AS radius,
111.045 AS distance_unit
) AS p ON 1 = 1
WHERE
z.latitude BETWEEN p.latpoint - (p.radius / p.distance_unit)
AND p.latpoint + (p.radius / p.distance_unit)
AND z.longitude BETWEEN p.longpoint - (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
AND p.longpoint + (p.radius / (p.distance_unit * COS(RADIANS(p.latpoint))))
WHERE p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) <= p.radius
ORDER BY p.distance_unit * DEGREES(ACOS(COS(RADIANS(p.latpoint)) * COS(RADIANS(z.latitude)) * COS(RADIANS(p.longpoint - z.longitude)) + SIN(RADIANS(p.latpoint)) * SIN(RADIANS(z.latitude)))) LIMIT 15
您可以看到 z.distance
的所有实例都被替换为用于在查询的 WHERE
和 ORDER BY
部分计算距离的公式。
如果您想取消保存常量的交叉连接子查询...您也可以这样做,但现在您会因最后一次更改而失去性能,并因失去交叉而失去可读性加入:
SELECT
z.zip,
z.primary_city,
z.latitude,
z.longitude,
111.045 * DEGREES(ACOS(COS(RADIANS(42.81)) * COS(RADIANS(z.latitude)) * COS(RADIANS(-70.81 - z.longitude)) + SIN(RADIANS(42.81)) * SIN(RADIANS(z.latitude)))) AS distance
FROM zip AS z
WHERE
z.latitude BETWEEN 42.81 - (50.0 / 111.045)
AND 42.81 + (50.0 / 111.045)
AND z.longitude BETWEEN -70.81 - (50.0 / (111.045 * COS(RADIANS(42.81))))
AND -70.81 + (50.0 / (111.045 * COS(RADIANS(42.81))))
WHERE 111.045 * DEGREES(ACOS(COS(RADIANS(42.81)) * COS(RADIANS(z.latitude)) * COS(RADIANS(-70.81 - z.longitude)) + SIN(RADIANS(42.81)) * SIN(RADIANS(z.latitude)))) <= 50.0
ORDER BY 111.045 * DEGREES(ACOS(COS(RADIANS(42.81)) * COS(RADIANS(z.latitude)) * COS(RADIANS(-70.81 - z.longitude)) + SIN(RADIANS(42.81)) * SIN(RADIANS(z.latitude)))) LIMIT 15
所以...最后,这是一个有趣的练习,但我认为这些更改没有任何优点,而且肯定有一些缺点。