Python 中的快速组合生成器
Fast combinatoric generator in Python
作为 python 中一个大型项目的一部分,我需要一个快速生成器函数来生成小于 n
的所有可能的非负整数集,这样每个集最多有s
个元素且集合中最大数与最小数之差小于w
.
到目前为止我实现的最快的实现是使用 itertools
:
import itertools
def subsample(n, s, w):
nn = range(w)
for p in range(s):
o = list(itertools.combinations(nn, p+1))
for t in o:
yield t
for _ in range(0, n-w):
pt = o
o = [tuple([op + 1 for op in list(u)]) for u in pt]
for t in list((set(o) ^ set(pt)) & set(o)):
yield t
例如:
In [1]: list(subsample(6,3,3))
Out [1]: [(0,), (1,), (2,), (3,), (4,), (5,), (0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4), (2, 4), (4, 5), (3, 5), (0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5)]
但我相信一定有更有效的方法来做到这一点。有什么可以让它更快的吗?
以下是一些基于 Knuth 算法的生成器。
subsets4
生成 1..n 的所有子集 k 或更少的元素
subsets5
将 subsets4
生成的子集限制为最大差异为 w 的子集
subsets
生成长度为 k 的 1..n 的所有子集
subsets2
将 subsets
生成的子集限制为最大差异为 w 的子集
运行 megatest()
函数测试 subsets5
生成器的多个值 n, k 和 w.
# all subsets of k or less elements of 1..n
def subsets4(n,k):
a = [ 0 ] * k
i = 0
while i >= 0:
a[i] += 1
yield a[0:i+1]
r = a[i]+1
i += 1
while i < k and r <= n:
a[i] = r
yield a[0:i+1]
i += 1
r += 1
i -= 1
if a[i] >= n:
i -= 1
# all subsets of k or less elements of 1..n with max difference <= w
def subsets5(n,k,w):
a = [ 0 ] * k
i = 0
while i >= 0:
a[i] += 1
yield a[0:i+1]
r = a[i]+1
i += 1
while i < k and r <= n and r-a[0] <= w:
a[i] = r
yield a[0:i+1]
i += 1
r += 1
i -= 1
if a[i] >= n or a[i]+1-a[0] > w:
i -= 1
# all subsets of 1..n having exactly k elements
def subsets(n,k):
a = range(1,k+1)
while a[0] <= n+1-k:
yield a
# find i
i = k-1
while i >= 0 and a[i]+k-i >= n+1: i -= 1
r = a[i]
a[i] += 1
j = 2
i += 1
while i < k:
a[i] = r + j
i += 1
j += 1
# all subsets of 1..n having exactly k elements and whose max
# difference is w
def subsets2(n,k,w):
if k > w: return
a = range(1,k+1)
while a[0] <= n+1-k:
yield a
i = k-1
while i >= 0 and (a[i]+k-i >= n+1 or a[i]+k-i-a[0] > w) : i -= 1
r = a[i]
a[i] += 1
j = 2
i += 1
while i < k:
a[i] = r + j
i += 1
j += 1
def test(n,k,w):
s1 = [ s for s in subsets4(n,k) if s[-1] - s[0] <= w ]
s2 = [ s for s in subsets5(n,k,w) ]
if s1 == s2:
print "OK", n,k,w
return 0
else:
print "NOT OK", n, k, w
return 1
# for s in subsets2(10,3,4): print s
# for s in subsets(10,3): print s
def megatest():
failed = 0
for n in xrange(10,20):
for k in xrange(1,n+1):
for w in xrange(k,n+1):
failed += test(n,k,w)
print "failed:", failed
作为 python 中一个大型项目的一部分,我需要一个快速生成器函数来生成小于 n
的所有可能的非负整数集,这样每个集最多有s
个元素且集合中最大数与最小数之差小于w
.
到目前为止我实现的最快的实现是使用 itertools
:
import itertools
def subsample(n, s, w):
nn = range(w)
for p in range(s):
o = list(itertools.combinations(nn, p+1))
for t in o:
yield t
for _ in range(0, n-w):
pt = o
o = [tuple([op + 1 for op in list(u)]) for u in pt]
for t in list((set(o) ^ set(pt)) & set(o)):
yield t
例如:
In [1]: list(subsample(6,3,3))
Out [1]: [(0,), (1,), (2,), (3,), (4,), (5,), (0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4), (2, 4), (4, 5), (3, 5), (0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5)]
但我相信一定有更有效的方法来做到这一点。有什么可以让它更快的吗?
以下是一些基于 Knuth 算法的生成器。
subsets4
生成 1..n 的所有子集 k 或更少的元素subsets5
将subsets4
生成的子集限制为最大差异为 w 的子集
subsets
生成长度为 k 的 1..n 的所有子集
subsets2
将subsets
生成的子集限制为最大差异为 w 的子集
运行 megatest()
函数测试 subsets5
生成器的多个值 n, k 和 w.
# all subsets of k or less elements of 1..n
def subsets4(n,k):
a = [ 0 ] * k
i = 0
while i >= 0:
a[i] += 1
yield a[0:i+1]
r = a[i]+1
i += 1
while i < k and r <= n:
a[i] = r
yield a[0:i+1]
i += 1
r += 1
i -= 1
if a[i] >= n:
i -= 1
# all subsets of k or less elements of 1..n with max difference <= w
def subsets5(n,k,w):
a = [ 0 ] * k
i = 0
while i >= 0:
a[i] += 1
yield a[0:i+1]
r = a[i]+1
i += 1
while i < k and r <= n and r-a[0] <= w:
a[i] = r
yield a[0:i+1]
i += 1
r += 1
i -= 1
if a[i] >= n or a[i]+1-a[0] > w:
i -= 1
# all subsets of 1..n having exactly k elements
def subsets(n,k):
a = range(1,k+1)
while a[0] <= n+1-k:
yield a
# find i
i = k-1
while i >= 0 and a[i]+k-i >= n+1: i -= 1
r = a[i]
a[i] += 1
j = 2
i += 1
while i < k:
a[i] = r + j
i += 1
j += 1
# all subsets of 1..n having exactly k elements and whose max
# difference is w
def subsets2(n,k,w):
if k > w: return
a = range(1,k+1)
while a[0] <= n+1-k:
yield a
i = k-1
while i >= 0 and (a[i]+k-i >= n+1 or a[i]+k-i-a[0] > w) : i -= 1
r = a[i]
a[i] += 1
j = 2
i += 1
while i < k:
a[i] = r + j
i += 1
j += 1
def test(n,k,w):
s1 = [ s for s in subsets4(n,k) if s[-1] - s[0] <= w ]
s2 = [ s for s in subsets5(n,k,w) ]
if s1 == s2:
print "OK", n,k,w
return 0
else:
print "NOT OK", n, k, w
return 1
# for s in subsets2(10,3,4): print s
# for s in subsets(10,3): print s
def megatest():
failed = 0
for n in xrange(10,20):
for k in xrange(1,n+1):
for w in xrange(k,n+1):
failed += test(n,k,w)
print "failed:", failed