Python 中具有周期性边界的数组中的连续值
Consecutive values in array with periodic boundaries in Python
我有一些二维数组,其中包含 0
和 1
:
import numpy as np
a = np.random.randint(2, size=(20, 20))
b = np.random.randint(2, size=(20, 20))
c = np.random.randint(2, size=(20, 20))
d = np.random.randint(2, size=(20, 20))
并且我想计算具有周期性边界的连续出现次数。
这意味着(为了清晰起见,在 1D 中):
[1 1 0 0 1 1 0 1 1 1]
应该给我 5
(最后三个元素 + 前两个)。
二维数组应该是 compared/counted 在第三个(第二个,如果你从 0 开始)轴,就像首先在 axis=2
中堆叠数组然后应用与 1D 相同的算法。但是我不确定这是不是最简单的方法。
您可以使用 groupby
来自 itertools
:
from itertools import groupby
a = [1, 1, 0, 0, 1, 1, 0, 1, 1, 1]
def get_longest_seq(a):
if all(a):
return len(a)
a_lens = [len(list(it)) for k, it in groupby(a) if k != 0]
if a[0] == 1 and a[-1] == 1:
m = max(max(a_lens), a_lens[0] + a_lens[-1])
else:
m = max(a_lens)
return m
print(get_longest_seq(a))
这是 2D
的 ndarrays a
和更高的 dim 数组的一种方法,旨在提高性能效率 -
def count_periodic_boundary(a):
a = a.reshape(-1,a.shape[-1])
m = a==1
c0 = np.flip(m,axis=-1).argmin(axis=-1)+m.argmin(axis=-1)
z = np.zeros(a.shape[:-1]+(1,),dtype=bool)
p = np.hstack((z,m,z))
c = (p[:,:-1]<p[:,1:]).sum(1)
s = np.r_[0,c[:-1].cumsum()]
l = np.diff(np.flatnonzero(np.diff(p.ravel())))[::2]
d = np.maximum(c0,np.maximum.reduceat(l,s))
return np.where(m.all(-1),a.shape[-1],d)
样品运行 -
In [75]: np.random.seed(0)
...: a = np.random.randint(2, size=(5, 20))
In [76]: a
Out[76]:
array([[0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1],
[0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1],
[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0]])
In [77]: count_periodic_boundary(a)
Out[77]: array([7, 4, 5, 2, 6])
In [72]: np.random.seed(0)
...: a = np.random.randint(2, size=(2, 5, 20))
In [73]: a
Out[73]:
array([[[0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1],
[0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1],
[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0]],
[[1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0],
[1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1],
[0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0],
[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]]])
In [74]: count_periodic_boundary(a)
Out[74]: array([7, 4, 5, 2, 6, 2, 5, 4, 2, 1])
这是一个双线的,诚然包含一条相当长的线:
*m,n = a.shape
return np.minimum(n,(np.arange(1,2*n+1)-np.maximum.accumulate(np.where(a[...,None,:],0,np.arange(1,2*n+1).reshape(2,n)).reshape(*m,2*n),-1)).max(-1))
工作原理:
让我们先忽略环绕,考虑一个简单的例子:a = [1 0 0 1 1 0 1 1 1 0]
我们想将其转换为 b = [1 0 0 1 2 0 1 2 3 0],因此我们可以简单地取最大值。生成 b 的一种方法是采用排列 r = [1 2 3 4 5 6 7 8 9 10] 并减去 aux = [0 2 3 3 3 6 6 6 6 10]。 aux 我们通过将 r 乘以 (1-a) 得到 [0 2 3 0 0 6 0 0 0 10] 并取累积最大值来创建。
为了处理环绕,我们只需将 a 的两个副本并排放置,然后使用上面的副本。
这里是代码再次分解成更小的位并注释:
*m,n = a.shape
# r has length 2*n because of how we deal with the wrap around
r = np.arange(1,2*n+1)
# create r x (1-a) using essentially np.where(a,0,r)
# it's a bit more involved because we are cloning a in the same step
# a will be doubled along a new axis we insert before the last one
# this will happen by means of broadcasting against r which we distribute
# over two rows along the new axis
# in the very end we merge the new and the last axis
r1_a = np.where(a[...,None,:],0,r.reshape(2,n)).reshape(*m,2*n)
# take cumulative max
aux = np.maximum.accumulate(r1_a,-1)
# finally, take the row wise maximum and deal with all-one rows
return np.minimum(n,(r-aux).max(-1))
我有一些二维数组,其中包含 0
和 1
:
import numpy as np
a = np.random.randint(2, size=(20, 20))
b = np.random.randint(2, size=(20, 20))
c = np.random.randint(2, size=(20, 20))
d = np.random.randint(2, size=(20, 20))
并且我想计算具有周期性边界的连续出现次数。 这意味着(为了清晰起见,在 1D 中):
[1 1 0 0 1 1 0 1 1 1]
应该给我 5
(最后三个元素 + 前两个)。
二维数组应该是 compared/counted 在第三个(第二个,如果你从 0 开始)轴,就像首先在 axis=2
中堆叠数组然后应用与 1D 相同的算法。但是我不确定这是不是最简单的方法。
您可以使用 groupby
来自 itertools
:
from itertools import groupby
a = [1, 1, 0, 0, 1, 1, 0, 1, 1, 1]
def get_longest_seq(a):
if all(a):
return len(a)
a_lens = [len(list(it)) for k, it in groupby(a) if k != 0]
if a[0] == 1 and a[-1] == 1:
m = max(max(a_lens), a_lens[0] + a_lens[-1])
else:
m = max(a_lens)
return m
print(get_longest_seq(a))
这是 2D
的 ndarrays a
和更高的 dim 数组的一种方法,旨在提高性能效率 -
def count_periodic_boundary(a):
a = a.reshape(-1,a.shape[-1])
m = a==1
c0 = np.flip(m,axis=-1).argmin(axis=-1)+m.argmin(axis=-1)
z = np.zeros(a.shape[:-1]+(1,),dtype=bool)
p = np.hstack((z,m,z))
c = (p[:,:-1]<p[:,1:]).sum(1)
s = np.r_[0,c[:-1].cumsum()]
l = np.diff(np.flatnonzero(np.diff(p.ravel())))[::2]
d = np.maximum(c0,np.maximum.reduceat(l,s))
return np.where(m.all(-1),a.shape[-1],d)
样品运行 -
In [75]: np.random.seed(0)
...: a = np.random.randint(2, size=(5, 20))
In [76]: a
Out[76]:
array([[0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1],
[0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1],
[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0]])
In [77]: count_periodic_boundary(a)
Out[77]: array([7, 4, 5, 2, 6])
In [72]: np.random.seed(0)
...: a = np.random.randint(2, size=(2, 5, 20))
In [73]: a
Out[73]:
array([[[0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1],
[0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1],
[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0],
[0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0]],
[[1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0],
[1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0],
[1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1],
[0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0],
[1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0]]])
In [74]: count_periodic_boundary(a)
Out[74]: array([7, 4, 5, 2, 6, 2, 5, 4, 2, 1])
这是一个双线的,诚然包含一条相当长的线:
*m,n = a.shape
return np.minimum(n,(np.arange(1,2*n+1)-np.maximum.accumulate(np.where(a[...,None,:],0,np.arange(1,2*n+1).reshape(2,n)).reshape(*m,2*n),-1)).max(-1))
工作原理:
让我们先忽略环绕,考虑一个简单的例子:a = [1 0 0 1 1 0 1 1 1 0] 我们想将其转换为 b = [1 0 0 1 2 0 1 2 3 0],因此我们可以简单地取最大值。生成 b 的一种方法是采用排列 r = [1 2 3 4 5 6 7 8 9 10] 并减去 aux = [0 2 3 3 3 6 6 6 6 10]。 aux 我们通过将 r 乘以 (1-a) 得到 [0 2 3 0 0 6 0 0 0 10] 并取累积最大值来创建。
为了处理环绕,我们只需将 a 的两个副本并排放置,然后使用上面的副本。
这里是代码再次分解成更小的位并注释:
*m,n = a.shape
# r has length 2*n because of how we deal with the wrap around
r = np.arange(1,2*n+1)
# create r x (1-a) using essentially np.where(a,0,r)
# it's a bit more involved because we are cloning a in the same step
# a will be doubled along a new axis we insert before the last one
# this will happen by means of broadcasting against r which we distribute
# over two rows along the new axis
# in the very end we merge the new and the last axis
r1_a = np.where(a[...,None,:],0,r.reshape(2,n)).reshape(*m,2*n)
# take cumulative max
aux = np.maximum.accumulate(r1_a,-1)
# finally, take the row wise maximum and deal with all-one rows
return np.minimum(n,(r-aux).max(-1))