将二维数组中的值与附近的值进行比较
Compare value in a 2d array to nearby values
我正在寻找一种方法来将二维数组中的每个值与其周围的值进行比较,并返回哪些值接近感兴趣的值(在阈值内)。
我探索的方法涉及遍历二维数组的每个元素,但我觉得这不是最快或最佳的方法。
输入将是一个 2D 数组(大小:i x j),输出将是两个 3D 数组(k x i x j),其中“额外”维度用于存储附近元素的 i 和 j 索引在阈值内。
一些代码来说明我现在在做什么:
import numpy as np
from tqdm import tqdm
np.random.seed(seed=10)
arr = np.random.random((100, 100)) # Some 2D input array
threshold = 0.5
# Arrays for the row and col indices
i_all, j_all = np.mgrid[0:arr.shape[0],
0:arr.shape[1]]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint = (3, 3)
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
for p, element in enumerate(tqdm(arr.flatten())): # Iterate through each element
i, j = np.unravel_index(p, arr.shape)
# Create mask of elements to compare to
mask = ((i_all >= (i - (footprint[0] - 1) / 2)) &
(i_all <= (i + (footprint[0] - 1) / 2)) &
(j_all >= (j - (footprint[1] - 1) / 2)) &
(j_all <= (j + (footprint[1] - 1) / 2)))
# Create mask of those within the threshold
close_mask = abs(arr[mask] - element) <= threshold
if np.nansum(close_mask) < np.product(footprint): # If at edges need to pad to be able to index into output arrays
output_i[:, i, j] = np.pad(i_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
output_j[:, i, j] = np.pad(j_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
else: # Don't need to pad here
output_i[:, i, j] = i_all[mask][close_mask]
output_j[:, i, j] = j_all[mask][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
这适用于小型数组,但当数组具有 ~10^6 个元素时速度非常慢。我的另一个想法是将数组滑过自身以比较值。这可能会更快,但我很好奇是否有任何其他想法或内置函数可以做类似的事情。
我不知道在哪里,但我很确定你的方法有一些错误。当您查看结果时,最后 (100x100) 个子数组包含所有索引。
我写的结果看起来更好,速度快了约 1000 倍,但仍需要您进行一些测试。我可能犯了一些错误。
def faster_method(arr, threshold, footprint):
temp_arr = np.full((arr.shape[0] + footprint[0] - 1, arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2: footprint[0] // 2 + arr.shape[0],
footprint[1] // 2: footprint[1] // 2 + arr.shape[1]] = arr
temp_i_all, temp_j_all = np.mgrid[-(footprint[0] // 2): arr.shape[0] + footprint[0] // 2,
-(footprint[1] // 2): arr.shape[1] + footprint[1] // 2]
footprint_size = np.product(footprint)
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_idx = 0
for neighbour_vertical_position in range(footprint[0]):
for neighbour_horizontal_position in range(footprint[0]):
if neighbour_vertical_position == footprint[0] // 2 and neighbour_horizontal_position == footprint[1] // 2:
# center point, not a neighbour, so we can keep np.nan for it everywhere
continue
current_neighbour = temp_arr[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_i_all = temp_i_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_j_all = temp_j_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
is_close_array = np.abs(arr - current_neighbour) > threshold
output_i[output_idx] = current_i_all + 0 / is_close_array
output_j[output_idx] = current_j_all + 0 / is_close_array
return output_i, output_j
使用 dankal444 的回答,我设法让它工作:
def slidingCompare(arr, footprint=(3, 3), threshold=0.5):
"""
arr: 2D array | input
footprint: tuple | search window dimensions (must be odd)
threshold: float | Threshold for neighbours to be close
"""
import numpy as np
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
temp_arr = np.full((arr.shape[0] + footprint[0] - 1,
arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2:footprint[0] // 2 + arr.shape[0],
footprint[1] // 2:footprint[1] // 2 + arr.shape[1]] = arr
# Arrays for the row and col indices
i_all, j_all = np.mgrid[-(footprint[0] // 2):arr.shape[0]+(footprint[0] // 2),
-(footprint[1] // 2):arr.shape[1]+(footprint[1] // 2)]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_ix = np.arange(footprint_size).reshape(footprint)
for vert_pos in np.arange(footprint[0]):
for horiz_pos in np.arange(footprint[1]):
neighbour = temp_arr[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]]
close_mask = abs(arr - neighbour) <= threshold
output_i[output_ix[vert_pos, horiz_pos], close_mask] = i_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
output_j[output_ix[vert_pos, horiz_pos], close_mask] = j_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
return output_i, output_j
我正在寻找一种方法来将二维数组中的每个值与其周围的值进行比较,并返回哪些值接近感兴趣的值(在阈值内)。
我探索的方法涉及遍历二维数组的每个元素,但我觉得这不是最快或最佳的方法。
输入将是一个 2D 数组(大小:i x j),输出将是两个 3D 数组(k x i x j),其中“额外”维度用于存储附近元素的 i 和 j 索引在阈值内。
一些代码来说明我现在在做什么:
import numpy as np
from tqdm import tqdm
np.random.seed(seed=10)
arr = np.random.random((100, 100)) # Some 2D input array
threshold = 0.5
# Arrays for the row and col indices
i_all, j_all = np.mgrid[0:arr.shape[0],
0:arr.shape[1]]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint = (3, 3)
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
for p, element in enumerate(tqdm(arr.flatten())): # Iterate through each element
i, j = np.unravel_index(p, arr.shape)
# Create mask of elements to compare to
mask = ((i_all >= (i - (footprint[0] - 1) / 2)) &
(i_all <= (i + (footprint[0] - 1) / 2)) &
(j_all >= (j - (footprint[1] - 1) / 2)) &
(j_all <= (j + (footprint[1] - 1) / 2)))
# Create mask of those within the threshold
close_mask = abs(arr[mask] - element) <= threshold
if np.nansum(close_mask) < np.product(footprint): # If at edges need to pad to be able to index into output arrays
output_i[:, i, j] = np.pad(i_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
output_j[:, i, j] = np.pad(j_all[mask][close_mask].flatten().astype(float),
(int(footprint_size - np.nansum(close_mask)), 0),
mode='constant', constant_values=np.nan)
else: # Don't need to pad here
output_i[:, i, j] = i_all[mask][close_mask]
output_j[:, i, j] = j_all[mask][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
这适用于小型数组,但当数组具有 ~10^6 个元素时速度非常慢。我的另一个想法是将数组滑过自身以比较值。这可能会更快,但我很好奇是否有任何其他想法或内置函数可以做类似的事情。
我不知道在哪里,但我很确定你的方法有一些错误。当您查看结果时,最后 (100x100) 个子数组包含所有索引。
我写的结果看起来更好,速度快了约 1000 倍,但仍需要您进行一些测试。我可能犯了一些错误。
def faster_method(arr, threshold, footprint):
temp_arr = np.full((arr.shape[0] + footprint[0] - 1, arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2: footprint[0] // 2 + arr.shape[0],
footprint[1] // 2: footprint[1] // 2 + arr.shape[1]] = arr
temp_i_all, temp_j_all = np.mgrid[-(footprint[0] // 2): arr.shape[0] + footprint[0] // 2,
-(footprint[1] // 2): arr.shape[1] + footprint[1] // 2]
footprint_size = np.product(footprint)
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_idx = 0
for neighbour_vertical_position in range(footprint[0]):
for neighbour_horizontal_position in range(footprint[0]):
if neighbour_vertical_position == footprint[0] // 2 and neighbour_horizontal_position == footprint[1] // 2:
# center point, not a neighbour, so we can keep np.nan for it everywhere
continue
current_neighbour = temp_arr[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_i_all = temp_i_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
current_j_all = temp_j_all[neighbour_horizontal_position: neighbour_horizontal_position + arr.shape[0],
neighbour_vertical_position: neighbour_vertical_position + arr.shape[1]]
is_close_array = np.abs(arr - current_neighbour) > threshold
output_i[output_idx] = current_i_all + 0 / is_close_array
output_j[output_idx] = current_j_all + 0 / is_close_array
return output_i, output_j
使用 dankal444 的回答,我设法让它工作:
def slidingCompare(arr, footprint=(3, 3), threshold=0.5):
"""
arr: 2D array | input
footprint: tuple | search window dimensions (must be odd)
threshold: float | Threshold for neighbours to be close
"""
import numpy as np
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
assert footprint[0] % 2 == 1, "Footprint dimensions should be odd. "
temp_arr = np.full((arr.shape[0] + footprint[0] - 1,
arr.shape[1] + footprint[1] - 1), np.nan)
temp_arr[footprint[0] // 2:footprint[0] // 2 + arr.shape[0],
footprint[1] // 2:footprint[1] // 2 + arr.shape[1]] = arr
# Arrays for the row and col indices
i_all, j_all = np.mgrid[-(footprint[0] // 2):arr.shape[0]+(footprint[0] // 2),
-(footprint[1] // 2):arr.shape[1]+(footprint[1] // 2)]
# Footprint around the current element (ie looking at the 8 elements around the central value). Must be odd.
footprint_size = np.product(footprint)
# Prepare output for i and j indices
output_i = np.full((footprint_size, *arr.shape), np.nan)
output_j = np.full((footprint_size, *arr.shape), np.nan)
output_ix = np.arange(footprint_size).reshape(footprint)
for vert_pos in np.arange(footprint[0]):
for horiz_pos in np.arange(footprint[1]):
neighbour = temp_arr[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]]
close_mask = abs(arr - neighbour) <= threshold
output_i[output_ix[vert_pos, horiz_pos], close_mask] = i_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
output_j[output_ix[vert_pos, horiz_pos], close_mask] = j_all[vert_pos: vert_pos + arr.shape[0],
horiz_pos: horiz_pos + arr.shape[1]][close_mask]
# Output: two 3D arrays of indices corresponding to elements within the threshold of the element of interest for rows and cols
return output_i, output_j