Python:完全不对称网格
Python: Complete asymmetric grid
我有一个 n 维点网格,但其中有空洞,我想获取缺少的网格点列表。但是,我不想扩展现有网格的边框。
例如在 2D 中,如果上下或左右有任何值,我只需要网格点坐标。这是一幅卡通片,其中 o
是一个现有点,x
是我想要的坐标。
o o o o o
o o x o o x o
o x x o o
o x o o
o o o o
虽然数据不在网格中。它只是一个坐标列表,即
coords = [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100,5.5), (1200,4.0), (1200,4.5), (1200,5.0), (1200,5.5),
(1300,3.5), (1300,4.0), (1300,4.5)]
所以我想要的值是[(1100,3.5), (1100,4.0), (1100,5.0), (1200,3.5)]
。
我尝试获取每个参数的最小值和最大值并制作一个新轴 numpy.arange(min(param1),max(param1),100)
,然后通过 numpy.setdiff1d()
将其与旧值进行比较,但这会使网格变为矩形必然。
关于如何高效执行此操作的任何想法?
这是您的示例的解决方案。但是,我不认为这可以很容易地推广到 n 维。
工作原理:
从成排的洞开始。将顶点列表转换为数组并使用字典顺序对行进行排序。
import numpy as np
import matplotlib.pyplot as plt
coords = np.asarray(
[(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100, 6.5), (1200,4.0), (1200,5.5), (1200,7.0), (1200,5.5),
(1300,3.5), (1300,4.0), (1300,4.5), (1300, 5.5), (1700,5.0) ])
coords = coords[ np.lexsort(( coords[:,1], coords[:,0] )),:]
将网格大小确定为不为零的顶点之间的最小差异。
diffs = np.diff(coords, axis = 0)
dx = np.min(diffs[diffs[:,0] > 0.0, 0])
dy = np.min(diffs[diffs[:,1] > 0.0, 1])
网格包含空洞,其中 x 坐标没有变化,y 坐标的变化大于 dy
。
indices = (diffs[:,0] == 0.0) * (diffs[:,1] > dy)
将空洞扩展到缺失网格点列表,使用它们的索引提取起点和空洞长度。最后,串联
如果没有孔,则进入 numpy.array
或 return 空数组。
hole_list = [ np.asarray( [ [x, y] for y in np.arange( y + dy, y + Dy, dy )] )
for ((x, y), Dy) in zip ( coords[indices,:],
diffs[indices,1] ) ]
if len( hole_list ) > 0:
holes_x = np.concatenate( hole_list )
else:
holes_x = np.asarray( [] )
现在将找到的孔添加到网格中并在列中查找孔。只需要切换字典顺序,并在行中添加空洞,以避免找到它们两次。
# Holes in columns.
coords_x = np.append( coords, holes_x, axis = 0 )
coords_x = coords[ np.lexsort( ( coords[:,0], coords[:,1] ) ), : ]
diffs = np.diff( coords_x, axis = 0 )
indices = ( diffs[:,1] == 0.0 ) * ( diffs[:,0] > dx )
hole_list = [ np.asarray( [ [x, y] for x in np.arange( x + dx, x + Dx, dx )] )
for ((x, y), Dx) in zip ( coords_x[indices,:],
diffs[indices,0] ) ]
if len( hole_list ) > 0:
holes_y = np.concatenate( hole_list )
else:
holes_y = np.asarray( [] )
示例:
import numpy as np
import matplotlib.pyplot as plt
coords = np.asarray(
[(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100, 6.5), (1200,4.0), (1200,5.5), (1200,7.0), (1200,5.5),
(1300,3.5), (1300,4.0), (1300,4.5), (1300, 5.5), (1700,5.0) ])
coords = coords[ np.lexsort(( coords[:,1], coords[:,0] )),:]
# Find x and y grid sizes.
diffs = np.diff(coords, axis = 0)
dx = np.min(diffs[diffs[:,0] > 0.0, 0])
dy = np.min(diffs[diffs[:,1] > 0.0, 1])
# Holes in rows.
indices = (diffs[:,0] == 0.0) * (diffs[:,1] > dy)
hole_list = [ np.asarray( [ [x, y] for y in np.arange( y + dy, y + Dy, dy )] )
for ((x, y), Dy) in zip ( coords[indices,:],
diffs[indices,1] ) ]
if len( hole_list ) > 0:
holes_x = np.concatenate( hole_list )
else:
holes_x = np.asarray( [] )
# Holes in columns.
coords_x = np.append( coords, holes_x, axis = 0 )
coords_x = coords[ np.lexsort( ( coords[:,0], coords[:,1] ) ), : ]
diffs = np.diff( coords_x, axis = 0 )
indices = ( diffs[:,1] == 0.0 ) * ( diffs[:,0] > dx )
hole_list = [ np.asarray( [ [x, y] for x in np.arange( x + dx, x + Dx, dx )] )
for ((x, y), Dx) in zip ( coords_x[indices,:],
diffs[indices,0] ) ]
if len( hole_list ) > 0:
holes_y = np.concatenate( hole_list )
else:
holes_y = np.asarray( [] )
# Plot holes.
f = plt.figure()
ax = f.add_subplot(111)
ax.scatter( coords[:,0], coords[:,1], c = 'g', s=200 )
ax.scatter( holes_x[:,0], holes_x[:,1], c = 'r', s=50 )
ax.scatter( holes_y[:,0], holes_y[:,1], c = 'b', s=50 )
不知道速度,但这里有一个适用于 D 维的,它是
受到现有评论和答案的启发。对于一组方形的
点宽度 w
,它循环了大约 D*w**(D-1)
次。它遍历每个
维度,查看沿该维度的投影并循环遍历该投影中维度的所有线,沿每条线执行 setdiff
。
import numpy as np
def grid_holes(coords):
coords = np.atleast_2d(coords)
N, D = coords.shape
coords = coords[np.lexsort(coords.T)]
diff = np.diff(coords, axis=0)
spacing = np.where(diff, np.abs(diff), np.inf).min(0)
missing = []
for d in xrange(D):
projection = np.delete(coords, d, 1)
order = np.lexsort(projection.T)
gridlines = np.split(coords[order],
np.diff(projection[order], axis=0).any(1).nonzero()[0] + 1)
for gridline in gridlines:
x = gridline[:, d]
s = spacing[d]
i = np.round(x/s).astype(int)
gaps = np.diff(i) - 1
gap_locs = gaps.nonzero()[0]
if not len(gap_locs):
continue
mx = [ x[loc] + s*(g+1) for loc in gap_locs
for g in xrange(gaps[loc])]
mcoords = np.repeat(gridline[:1], len(mx), 0)
mcoords[:, d] = mx
missing.append(mcoords)
return np.concatenate(missing)
一个函数来测试它:
def test_grid_holes(coords, known_holes=None, func=grid_holes):
ret = ()
if isinstance(coords, tuple) and len(coords)==2:
# Generate random coords
N, D = coords
coords = np.random.randint(0, int(N**(1./D)), coords)
ret += (coords, )
else:
coords = np.atleast_2d(coords)
N, D = coords.shape
found_holes = func(coords)
found_holes = np.unique(found_holes.view('f8,'*D)).view('f8').reshape(-1, D)
ret += (found_holes,)
if D <= 3:
import matplotlib.pyplot as plt
fig = plt.figure()
if D == 2:
ax = fig.add_subplot(111)
elif D == 3:
from mpl_toolkits.mplot3d import Axes3D
ax = fig.add_subplot(111, projection='3d')
if known_holes is not None:
known_holes = np.atleast_2d(known_holes)
ax.scatter(*known_holes.T, c='r', marker='o')
ax.scatter(*coords.T, c='k', marker='o')
ax.scatter(*found_holes.T, c='k', marker='x')
if known_holes is not None:
known_holes = np.unique(known_holes.view('f8,'*D)).view('f8').reshape(-1, D)
return np.allclose(found_holes, known_holes)
else:
return ret
这里我们可以针对您的数据和生成的数据进行测试:
coords = [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100,5.5), (1200,4.0), (1200,4.5), (1200,5.0),
(1200,5.5), (1300,3.5), (1300,4.0), (1300,4.5)]
holes = [(1100,3.5), (1100,4.0), (1100,5.0), (1200,3.5)]
test_grid_holes(coords, holes)
test_grid_holes((100, 3))
我认为最简单的方法是将网格映射到矩形阵列。因为这样就可以相对简单快速地确定哪些点属于标准。缺点是 RAM 使用最终可能会成为一个问题,尤其是对于稀疏网格。
还有一点仍有争议,那就是网格应该如何定义。其他答案目前使用元素之间沿维度的最小差异作为该方向上网格的步长。但是,这在极少数情况下会带来问题。例如。如果已知坐标是:
2, 4, 6, 9, 11
那么步长将等于2
,但显然这在9
处出错了。也许最好取连续差异的最大公约数?例如。在 this answer 的帮助下。在我的代码中,我采用了不同的方法:仅使用已知坐标中存在的 "ticks" 来构建网格。
对于 2D 情况,类似以下内容就足够了:
def find_holes_2d(coords):
coords = np.asanyarray(coords)
# determine grid and transform coordinates
uniq_x, labels_x = np.unique(coords[:,0], return_inverse=True)
uniq_y, labels_y = np.unique(coords[:,1], return_inverse=True)
# layout the known grid in an array
grid = np.zeros([len(uniq_x), len(uniq_y)], bool)
grid[labels_x, labels_y] = True
# see which grid points are inside known coordinates
x_fwd = np.logical_or.accumulate(grid, axis=0)
x_bkwd = np.logical_or.accumulate(grid[::-1], axis=0)[::-1]
y_fwd = np.logical_or.accumulate(grid, axis=1)
y_bkwd = np.logical_or.accumulate(grid[:,::-1], axis=1)[:,::-1]
# select the holes according to the criteria
holes = ~grid & (x_fwd & x_bkwd | y_fwd & y_bkwd)
# Transform positions back to original coordinates
I,J = np.where(holes)
return np.column_stack([uniq_x[I], uniq_y[J]])
相同的方法可以应用于 ND 情况,例如:
def find_holes(coords):
coords = np.asanyarray(coords)
uniq, labels = zip(*[np.unique(c, return_inverse=True) for c in coords.T])
grid = np.zeros(map(len, uniq), bool)
grid[labels] = True
candidates = np.zeros_like(grid)
for dim in range(grid.ndim):
grid0 = np.rollaxis(grid, dim)
inside = np.logical_or.accumulate(grid0, axis=0) &
np.logical_or.accumulate(grid0[::-1], axis=0)[::-1]
candidates |= np.rollaxis(inside, 0, dim+1)
holes = candidates & ~grid
hole_labels = np.where(holes)
return np.column_stack([u[h] for u, h in zip(uniq, hole_labels)])
最后,还有一个问题,如这个玩具示例所示:
o x o o
x x o
o o o o
这里还有一个洞"undetected"。通过将找到的孔(x
's)的坐标添加到原始坐标和 运行 第二次迭代,这很容易解决。
我有一个 n 维点网格,但其中有空洞,我想获取缺少的网格点列表。但是,我不想扩展现有网格的边框。
例如在 2D 中,如果上下或左右有任何值,我只需要网格点坐标。这是一幅卡通片,其中 o
是一个现有点,x
是我想要的坐标。
o o o o o
o o x o o x o
o x x o o
o x o o
o o o o
虽然数据不在网格中。它只是一个坐标列表,即
coords = [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100,5.5), (1200,4.0), (1200,4.5), (1200,5.0), (1200,5.5),
(1300,3.5), (1300,4.0), (1300,4.5)]
所以我想要的值是[(1100,3.5), (1100,4.0), (1100,5.0), (1200,3.5)]
。
我尝试获取每个参数的最小值和最大值并制作一个新轴 numpy.arange(min(param1),max(param1),100)
,然后通过 numpy.setdiff1d()
将其与旧值进行比较,但这会使网格变为矩形必然。
关于如何高效执行此操作的任何想法?
这是您的示例的解决方案。但是,我不认为这可以很容易地推广到 n 维。
工作原理:
从成排的洞开始。将顶点列表转换为数组并使用字典顺序对行进行排序。
import numpy as np
import matplotlib.pyplot as plt
coords = np.asarray(
[(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100, 6.5), (1200,4.0), (1200,5.5), (1200,7.0), (1200,5.5),
(1300,3.5), (1300,4.0), (1300,4.5), (1300, 5.5), (1700,5.0) ])
coords = coords[ np.lexsort(( coords[:,1], coords[:,0] )),:]
将网格大小确定为不为零的顶点之间的最小差异。
diffs = np.diff(coords, axis = 0)
dx = np.min(diffs[diffs[:,0] > 0.0, 0])
dy = np.min(diffs[diffs[:,1] > 0.0, 1])
网格包含空洞,其中 x 坐标没有变化,y 坐标的变化大于 dy
。
indices = (diffs[:,0] == 0.0) * (diffs[:,1] > dy)
将空洞扩展到缺失网格点列表,使用它们的索引提取起点和空洞长度。最后,串联
如果没有孔,则进入 numpy.array
或 return 空数组。
hole_list = [ np.asarray( [ [x, y] for y in np.arange( y + dy, y + Dy, dy )] )
for ((x, y), Dy) in zip ( coords[indices,:],
diffs[indices,1] ) ]
if len( hole_list ) > 0:
holes_x = np.concatenate( hole_list )
else:
holes_x = np.asarray( [] )
现在将找到的孔添加到网格中并在列中查找孔。只需要切换字典顺序,并在行中添加空洞,以避免找到它们两次。
# Holes in columns.
coords_x = np.append( coords, holes_x, axis = 0 )
coords_x = coords[ np.lexsort( ( coords[:,0], coords[:,1] ) ), : ]
diffs = np.diff( coords_x, axis = 0 )
indices = ( diffs[:,1] == 0.0 ) * ( diffs[:,0] > dx )
hole_list = [ np.asarray( [ [x, y] for x in np.arange( x + dx, x + Dx, dx )] )
for ((x, y), Dx) in zip ( coords_x[indices,:],
diffs[indices,0] ) ]
if len( hole_list ) > 0:
holes_y = np.concatenate( hole_list )
else:
holes_y = np.asarray( [] )
示例:
import numpy as np
import matplotlib.pyplot as plt
coords = np.asarray(
[(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100, 6.5), (1200,4.0), (1200,5.5), (1200,7.0), (1200,5.5),
(1300,3.5), (1300,4.0), (1300,4.5), (1300, 5.5), (1700,5.0) ])
coords = coords[ np.lexsort(( coords[:,1], coords[:,0] )),:]
# Find x and y grid sizes.
diffs = np.diff(coords, axis = 0)
dx = np.min(diffs[diffs[:,0] > 0.0, 0])
dy = np.min(diffs[diffs[:,1] > 0.0, 1])
# Holes in rows.
indices = (diffs[:,0] == 0.0) * (diffs[:,1] > dy)
hole_list = [ np.asarray( [ [x, y] for y in np.arange( y + dy, y + Dy, dy )] )
for ((x, y), Dy) in zip ( coords[indices,:],
diffs[indices,1] ) ]
if len( hole_list ) > 0:
holes_x = np.concatenate( hole_list )
else:
holes_x = np.asarray( [] )
# Holes in columns.
coords_x = np.append( coords, holes_x, axis = 0 )
coords_x = coords[ np.lexsort( ( coords[:,0], coords[:,1] ) ), : ]
diffs = np.diff( coords_x, axis = 0 )
indices = ( diffs[:,1] == 0.0 ) * ( diffs[:,0] > dx )
hole_list = [ np.asarray( [ [x, y] for x in np.arange( x + dx, x + Dx, dx )] )
for ((x, y), Dx) in zip ( coords_x[indices,:],
diffs[indices,0] ) ]
if len( hole_list ) > 0:
holes_y = np.concatenate( hole_list )
else:
holes_y = np.asarray( [] )
# Plot holes.
f = plt.figure()
ax = f.add_subplot(111)
ax.scatter( coords[:,0], coords[:,1], c = 'g', s=200 )
ax.scatter( holes_x[:,0], holes_x[:,1], c = 'r', s=50 )
ax.scatter( holes_y[:,0], holes_y[:,1], c = 'b', s=50 )
不知道速度,但这里有一个适用于 D 维的,它是
受到现有评论和答案的启发。对于一组方形的
点宽度 w
,它循环了大约 D*w**(D-1)
次。它遍历每个
维度,查看沿该维度的投影并循环遍历该投影中维度的所有线,沿每条线执行 setdiff
。
import numpy as np
def grid_holes(coords):
coords = np.atleast_2d(coords)
N, D = coords.shape
coords = coords[np.lexsort(coords.T)]
diff = np.diff(coords, axis=0)
spacing = np.where(diff, np.abs(diff), np.inf).min(0)
missing = []
for d in xrange(D):
projection = np.delete(coords, d, 1)
order = np.lexsort(projection.T)
gridlines = np.split(coords[order],
np.diff(projection[order], axis=0).any(1).nonzero()[0] + 1)
for gridline in gridlines:
x = gridline[:, d]
s = spacing[d]
i = np.round(x/s).astype(int)
gaps = np.diff(i) - 1
gap_locs = gaps.nonzero()[0]
if not len(gap_locs):
continue
mx = [ x[loc] + s*(g+1) for loc in gap_locs
for g in xrange(gaps[loc])]
mcoords = np.repeat(gridline[:1], len(mx), 0)
mcoords[:, d] = mx
missing.append(mcoords)
return np.concatenate(missing)
一个函数来测试它:
def test_grid_holes(coords, known_holes=None, func=grid_holes):
ret = ()
if isinstance(coords, tuple) and len(coords)==2:
# Generate random coords
N, D = coords
coords = np.random.randint(0, int(N**(1./D)), coords)
ret += (coords, )
else:
coords = np.atleast_2d(coords)
N, D = coords.shape
found_holes = func(coords)
found_holes = np.unique(found_holes.view('f8,'*D)).view('f8').reshape(-1, D)
ret += (found_holes,)
if D <= 3:
import matplotlib.pyplot as plt
fig = plt.figure()
if D == 2:
ax = fig.add_subplot(111)
elif D == 3:
from mpl_toolkits.mplot3d import Axes3D
ax = fig.add_subplot(111, projection='3d')
if known_holes is not None:
known_holes = np.atleast_2d(known_holes)
ax.scatter(*known_holes.T, c='r', marker='o')
ax.scatter(*coords.T, c='k', marker='o')
ax.scatter(*found_holes.T, c='k', marker='x')
if known_holes is not None:
known_holes = np.unique(known_holes.view('f8,'*D)).view('f8').reshape(-1, D)
return np.allclose(found_holes, known_holes)
else:
return ret
这里我们可以针对您的数据和生成的数据进行测试:
coords = [(1000,3.5), (1000,4.0), (1000,4.5), (1000,5.0), (1000,5.5),
(1100,4.5), (1100,5.5), (1200,4.0), (1200,4.5), (1200,5.0),
(1200,5.5), (1300,3.5), (1300,4.0), (1300,4.5)]
holes = [(1100,3.5), (1100,4.0), (1100,5.0), (1200,3.5)]
test_grid_holes(coords, holes)
test_grid_holes((100, 3))
我认为最简单的方法是将网格映射到矩形阵列。因为这样就可以相对简单快速地确定哪些点属于标准。缺点是 RAM 使用最终可能会成为一个问题,尤其是对于稀疏网格。
还有一点仍有争议,那就是网格应该如何定义。其他答案目前使用元素之间沿维度的最小差异作为该方向上网格的步长。但是,这在极少数情况下会带来问题。例如。如果已知坐标是:
2, 4, 6, 9, 11
那么步长将等于2
,但显然这在9
处出错了。也许最好取连续差异的最大公约数?例如。在 this answer 的帮助下。在我的代码中,我采用了不同的方法:仅使用已知坐标中存在的 "ticks" 来构建网格。
对于 2D 情况,类似以下内容就足够了:
def find_holes_2d(coords):
coords = np.asanyarray(coords)
# determine grid and transform coordinates
uniq_x, labels_x = np.unique(coords[:,0], return_inverse=True)
uniq_y, labels_y = np.unique(coords[:,1], return_inverse=True)
# layout the known grid in an array
grid = np.zeros([len(uniq_x), len(uniq_y)], bool)
grid[labels_x, labels_y] = True
# see which grid points are inside known coordinates
x_fwd = np.logical_or.accumulate(grid, axis=0)
x_bkwd = np.logical_or.accumulate(grid[::-1], axis=0)[::-1]
y_fwd = np.logical_or.accumulate(grid, axis=1)
y_bkwd = np.logical_or.accumulate(grid[:,::-1], axis=1)[:,::-1]
# select the holes according to the criteria
holes = ~grid & (x_fwd & x_bkwd | y_fwd & y_bkwd)
# Transform positions back to original coordinates
I,J = np.where(holes)
return np.column_stack([uniq_x[I], uniq_y[J]])
相同的方法可以应用于 ND 情况,例如:
def find_holes(coords):
coords = np.asanyarray(coords)
uniq, labels = zip(*[np.unique(c, return_inverse=True) for c in coords.T])
grid = np.zeros(map(len, uniq), bool)
grid[labels] = True
candidates = np.zeros_like(grid)
for dim in range(grid.ndim):
grid0 = np.rollaxis(grid, dim)
inside = np.logical_or.accumulate(grid0, axis=0) &
np.logical_or.accumulate(grid0[::-1], axis=0)[::-1]
candidates |= np.rollaxis(inside, 0, dim+1)
holes = candidates & ~grid
hole_labels = np.where(holes)
return np.column_stack([u[h] for u, h in zip(uniq, hole_labels)])
最后,还有一个问题,如这个玩具示例所示:
o x o o
x x o
o o o o
这里还有一个洞"undetected"。通过将找到的孔(x
's)的坐标添加到原始坐标和 运行 第二次迭代,这很容易解决。