通过列表理解收集 np 数组的相同行
collectIng identical rows of an np array by list comprehension
我有一个 2d np 数组,想通过列表理解收集相同的行。
我的实现 returns 可以找到想要的结果和更好的解决方案 here:
import numpy as np
A = np.array([
[1,1,1,0,0,0], #sample input data
[0,0,1,0,1,1],
[0,0,1,0,1,1],
[1,1,1,0,0,0],
[0,0,1,0,1,1],
[1,0,0,0,0,0],
[1,0,1,0,0,0],
[1,0,0,0,1,0],
[1,0,0,0,0,0]
])
def gr_id_rows(Matrix): #returns list of lists of identical row indices
m = Matrix.shape[0]
M = Matrix
indices = list(range(m))
lst_of_lsts_ident = []
while len(M) > 0:
lst_ident = []
row_0 = M[0,:]
M = np.delete(M, 0, 0)
lst_ident.append(indices.pop(0))
k = 0
for row in M:
if np.array_equal(row, row_0):
M = np.delete(M, k, 0)
lst_ident.append(indices.pop(k))
else:
k += 1
lst_of_lsts_ident.append(lst_ident)
return lst_of_lsts_ident
#execution
print( gr_id_rows(A) ) #[[0, 3], [1, 2, 4], [5, 8], [6], [7]]
关于真实数据集的注释:
- 它只是二进制文件。
- 尺寸最大可达 1000 x 700,但大部分尺寸约为 60 x 40。
我们可以使用列表理解来优雅地做到这一点吗?
我的尝试(显然)产生了错误的结果。
nbr_rows = A.shape[0]
col_ind = range(A.shape[0])
ind_eq = [[k for k in col_ind if np.array_equal(A[k,:], A[h,:]) and k != h] for h in col_ind]
print(ind_eq) #[[3], [2, 4], [1, 4], [0], [1, 2], [8], [], [], [5]]
这是一个解决方案,在 A 上使用 numpy.equal
与自身(广播),并使用 itertools.groupby
重塑输出:
from itertools import groupby
a,b = np.equal(A, A[:,None]).all(2).nonzero()
{tuple(b[i] for i in g) for i,g in groupby(range(len(a)), lambda i:a[i])}
输出:
{(0, 3), (1, 2, 4), (5, 8), (6,), (7,)}
我有一个 2d np 数组,想通过列表理解收集相同的行。 我的实现 returns 可以找到想要的结果和更好的解决方案 here:
import numpy as np
A = np.array([
[1,1,1,0,0,0], #sample input data
[0,0,1,0,1,1],
[0,0,1,0,1,1],
[1,1,1,0,0,0],
[0,0,1,0,1,1],
[1,0,0,0,0,0],
[1,0,1,0,0,0],
[1,0,0,0,1,0],
[1,0,0,0,0,0]
])
def gr_id_rows(Matrix): #returns list of lists of identical row indices
m = Matrix.shape[0]
M = Matrix
indices = list(range(m))
lst_of_lsts_ident = []
while len(M) > 0:
lst_ident = []
row_0 = M[0,:]
M = np.delete(M, 0, 0)
lst_ident.append(indices.pop(0))
k = 0
for row in M:
if np.array_equal(row, row_0):
M = np.delete(M, k, 0)
lst_ident.append(indices.pop(k))
else:
k += 1
lst_of_lsts_ident.append(lst_ident)
return lst_of_lsts_ident
#execution
print( gr_id_rows(A) ) #[[0, 3], [1, 2, 4], [5, 8], [6], [7]]
关于真实数据集的注释:
- 它只是二进制文件。
- 尺寸最大可达 1000 x 700,但大部分尺寸约为 60 x 40。
我们可以使用列表理解来优雅地做到这一点吗?
我的尝试(显然)产生了错误的结果。
nbr_rows = A.shape[0]
col_ind = range(A.shape[0])
ind_eq = [[k for k in col_ind if np.array_equal(A[k,:], A[h,:]) and k != h] for h in col_ind]
print(ind_eq) #[[3], [2, 4], [1, 4], [0], [1, 2], [8], [], [], [5]]
这是一个解决方案,在 A 上使用 numpy.equal
与自身(广播),并使用 itertools.groupby
重塑输出:
from itertools import groupby
a,b = np.equal(A, A[:,None]).all(2).nonzero()
{tuple(b[i] for i in g) for i,g in groupby(range(len(a)), lambda i:a[i])}
输出:
{(0, 3), (1, 2, 4), (5, 8), (6,), (7,)}