Python - 计算矩阵的每个单元格之间的曼哈顿距离的有效方法?
Python - Efficient way to calculate the Manhattan distance between each cell of a matrix?
我试图在以下矩阵 X
中找到从一个单元格 w
的值到另一个单元格 v
的值的最小跳数 X
=27=].
有没有有效的方法来做到这一点?
import numpy as np
from numpy.typing import NDArray
def manhattan_distance(X: NDArray[int], w: int, v: int) -> int:
# something
return distance
X = np.array([
[1, 2, 2, 3, 3],
[1, 2, 2, 4, 4],
[5, 5, 6, 6, 6],
[7, 8, 9, 9, 9],
[10, 10, 10, 10, 10],
]).astype(np.int_)
# Expected result
manhattan_distance(X, 1, 2) # ①
-> 1
manhattan_distance(X, 2, 3) # ②
-> 1
manhattan_distance(X, 3, 6) # ③
-> 2
manhattan_distance(X, 3, 5) # ④
-> 4
我试过如下实现,但是好像挺慢的。
import numpy as np
from numpy.typing import NDArray
def manhattan_distance(X: NDArray[int], w: int, v: int) -> int:
xx, yy = np.where(X == w)
xx_, yy_ = np.where(X == v)
distance = int(min_dist(xx, xx_) + min_dist(yy, yy_))
return distance
def min_dist(xx, xx_):
min_dist = np.inf
for i in xx:
for j in xx_:
dist = np.sqrt((i - j)**2)
min_dist = dist if dist < min_dist else min_dist
return min_dist
有什么方法可以加快计算速度吗?
使用cdist
to compute all pairwise distances, to find the values' indices use np.argwhere
from scipy.spatial.distance import cdist
import numpy as np
from numpy.typing import NDArray
X = np.array([
[1, 2, 2, 3, 3],
[1, 2, 2, 4, 4],
[5, 5, 6, 6, 6],
[7, 8, 9, 9, 9],
[10, 10, 10, 10, 10],
]).astype(np.int32)
def manhattan_distance(X: NDArray[int], w: int, v: int) -> int:
return cdist(np.argwhere(X == w), np.argwhere(X == v), "cityblock").min()
print(manhattan_distance(X, 1, 2))
print(manhattan_distance(X, 2, 3))
print(manhattan_distance(X, 3, 6))
print(manhattan_distance(X, 3, 5))
输出
1.0
1.0
2.0
4.0
只需为不同的矩阵大小添加计时,即可向 OP 表明 @Dani Mesejo 的回答确实快得多。对于小矩阵,差异当然会很小。
def manhattan_distance_dani_masejo(X, w: int, v: int) -> int:
return cdist(np.argwhere(X == w), np.argwhere(X == v), "cityblock").min()
def manhattan_distance_ugen(X, w: int, v: int) -> int:
xx, yy = np.where(X == w)
xx_, yy_ = np.where(X == v)
distance = int(min_dist(xx, xx_) + min_dist(yy, yy_))
return distance
def min_dist(xx, xx_):
min_dist = np.inf
for i in xx:
for j in xx_:
dist = np.sqrt((i - j)**2)
min_dist = dist if dist < min_dist else min_dist
return min_dist
def gen_data(matrix_size):
return np.random.randint(0, matrix_size, (matrix_size, matrix_size), dtype=np.int32)
for matrix_size in [5, 50, 500]:
print('Matrix size: {}'.format(matrix_size))
X = gen_data(matrix_size)
print('ugen: ', timeit.timeit("manhattan_distance_ugen(X, np.random.randint(0, matrix_size), np.random.randint(0, matrix_size))",
"from __main__ import manhattan_distance_ugen, X, matrix_size; import numpy as np",
number=10))
print('dani masejo: ', timeit.timeit("manhattan_distance_dani_masejo(X, np.random.randint(0, matrix_size), np.random.randint(0, matrix_size))",
"from __main__ import manhattan_distance_dani_masejo, X, matrix_size; import numpy as np",
number=10))
结果:
Matrix size: 5
ugen: 0.0019634239999999914
dani masejo: 0.0006071939999999776
Matrix size: 50
ugen: 0.093326557
dani masejo: 0.0008874660000000034
Matrix size: 500
ugen: 9.112327058
dani masejo: 0.027754558000001595
我试图在以下矩阵 X
中找到从一个单元格 w
的值到另一个单元格 v
的值的最小跳数 X
=27=].
有没有有效的方法来做到这一点?
import numpy as np
from numpy.typing import NDArray
def manhattan_distance(X: NDArray[int], w: int, v: int) -> int:
# something
return distance
X = np.array([
[1, 2, 2, 3, 3],
[1, 2, 2, 4, 4],
[5, 5, 6, 6, 6],
[7, 8, 9, 9, 9],
[10, 10, 10, 10, 10],
]).astype(np.int_)
# Expected result
manhattan_distance(X, 1, 2) # ①
-> 1
manhattan_distance(X, 2, 3) # ②
-> 1
manhattan_distance(X, 3, 6) # ③
-> 2
manhattan_distance(X, 3, 5) # ④
-> 4
我试过如下实现,但是好像挺慢的。
import numpy as np
from numpy.typing import NDArray
def manhattan_distance(X: NDArray[int], w: int, v: int) -> int:
xx, yy = np.where(X == w)
xx_, yy_ = np.where(X == v)
distance = int(min_dist(xx, xx_) + min_dist(yy, yy_))
return distance
def min_dist(xx, xx_):
min_dist = np.inf
for i in xx:
for j in xx_:
dist = np.sqrt((i - j)**2)
min_dist = dist if dist < min_dist else min_dist
return min_dist
有什么方法可以加快计算速度吗?
使用cdist
to compute all pairwise distances, to find the values' indices use np.argwhere
from scipy.spatial.distance import cdist
import numpy as np
from numpy.typing import NDArray
X = np.array([
[1, 2, 2, 3, 3],
[1, 2, 2, 4, 4],
[5, 5, 6, 6, 6],
[7, 8, 9, 9, 9],
[10, 10, 10, 10, 10],
]).astype(np.int32)
def manhattan_distance(X: NDArray[int], w: int, v: int) -> int:
return cdist(np.argwhere(X == w), np.argwhere(X == v), "cityblock").min()
print(manhattan_distance(X, 1, 2))
print(manhattan_distance(X, 2, 3))
print(manhattan_distance(X, 3, 6))
print(manhattan_distance(X, 3, 5))
输出
1.0
1.0
2.0
4.0
只需为不同的矩阵大小添加计时,即可向 OP 表明 @Dani Mesejo 的回答确实快得多。对于小矩阵,差异当然会很小。
def manhattan_distance_dani_masejo(X, w: int, v: int) -> int:
return cdist(np.argwhere(X == w), np.argwhere(X == v), "cityblock").min()
def manhattan_distance_ugen(X, w: int, v: int) -> int:
xx, yy = np.where(X == w)
xx_, yy_ = np.where(X == v)
distance = int(min_dist(xx, xx_) + min_dist(yy, yy_))
return distance
def min_dist(xx, xx_):
min_dist = np.inf
for i in xx:
for j in xx_:
dist = np.sqrt((i - j)**2)
min_dist = dist if dist < min_dist else min_dist
return min_dist
def gen_data(matrix_size):
return np.random.randint(0, matrix_size, (matrix_size, matrix_size), dtype=np.int32)
for matrix_size in [5, 50, 500]:
print('Matrix size: {}'.format(matrix_size))
X = gen_data(matrix_size)
print('ugen: ', timeit.timeit("manhattan_distance_ugen(X, np.random.randint(0, matrix_size), np.random.randint(0, matrix_size))",
"from __main__ import manhattan_distance_ugen, X, matrix_size; import numpy as np",
number=10))
print('dani masejo: ', timeit.timeit("manhattan_distance_dani_masejo(X, np.random.randint(0, matrix_size), np.random.randint(0, matrix_size))",
"from __main__ import manhattan_distance_dani_masejo, X, matrix_size; import numpy as np",
number=10))
结果:
Matrix size: 5
ugen: 0.0019634239999999914
dani masejo: 0.0006071939999999776
Matrix size: 50
ugen: 0.093326557
dani masejo: 0.0008874660000000034
Matrix size: 500
ugen: 9.112327058
dani masejo: 0.027754558000001595