查找具有最少元素数的列表的所有无序分区
Find all unordered partitions of a list with a minimum number of elements
给出了一个元素列表。我想有所有的可能性把这个列表分成任意数量的分区,这样每个分区至少有 x 个元素。列表中分区的顺序和分区中元素的顺序无关紧要。
例如。:
List = [1,2,3,4] get_partitions(list,2) 应该 return:
[[1,2,3,4],
[[1,2],[3,4]],
[[1,3],[2,4]],
[[1,4],[2,3]]]
List = [1,2,3,4] get_partitions(list,1) 应该 return:
[[1,2,3,4],
[[1,2],[3,4]],
[[1,3],[2,4]],
[[1,4],[2,3]],
[[1],[2],[3,4],
...]
我已经开始在 Python 中递归地实现它,但是我创建了太多冗余案例。出于运行时的原因,我想提前减少这些情况,而不是事后用 frozensets 删除它们,例如。
from itertools import combinations
import numpy as np
def get_partitions(liste,min,max=None):
if max is None:
# Default setting
max = len(liste)
if len(liste) == min :
# Termination Criterium
yield [liste]
else:
for r in range(np.min([len(liste),max]),min-1,-1):
# max for avoiding cases like: [[1,2,3,4],[2,6]] and [[2,6],[1,2,3,4]]
for perm in combinations(liste,r):
rest = [i for i in liste if i not in perm]
if len(rest) >= min:
for recurse in get_partitions(rest,min,r):
yield [list(perm)] + list(recurse)
if len(rest) == 0:
# r == len(liste)
yield [list(perm)]
这导致:
[[[1, 2, 3, 4]],
[[1, 2], [3, 4]],
[[1, 3], [2, 4]],
[[1, 4], [2, 3]],
[[2, 3], [1, 4]],
[[2, 4], [1, 3]],
[[3, 4], [1, 2]]]
在此先感谢您的帮助。
尝试使用@mozway 的答案并将其扩展为递归版本导致我:
def get_partitions(iterable, minl=2):
s = set(iterable)
for r in range(minl, len(s)//2+1):
if len(s)//2 != r:
for c in combinations(s, r):
for recurse in get_partitions(list(s.difference(c)), minl):
yield [list(c),*recurse]
else:
for c in islice(combinations(s, r), comb(len(s),r)//2):
for recurse in get_partitions(list(s.difference(c)), minl):
yield [list(c),*recurse]
yield [list(s)]
对于示例 list = [1,2,3,4], x=1,它将可能性的数量从 47(我最初的尝试)减少到 19。仍然有很多多余的情况。
[[[1], [2], [3], [4]], <----
[[1], [2], [3, 4]],
[[1], [2, 3, 4]],
[[2], [1], [3], [4]], <----
[[2], [1], [3, 4]],
[[2], [1, 3, 4]],
[[3], [1], [2], [4]], <----
[[3], [1], [2, 4]],
[[3], [1, 2, 4]],
[[4], [1], [2], [3]], <----
[[4], [1], [2, 3]],
[[4], [1, 2, 3]],
[[1, 2], [3], [4]],
[[1, 2], [3, 4]],
[[1, 3], [2], [4]],
[[1, 3], [2, 4]],
[[1, 4], [2], [3]],
[[1, 4], [2, 3]],
[[1, 2, 3, 4]]]
这看起来像是带有补码的部分幂集。
您不需要定义最大值,设置最小值后它就固定了。
此外,包含全集是一个特例(从技术上讲,全集是集合+空集,因此违反了最小条件)
要限制组合的数量,只要你有总长度的前半部分就可以了。如果您有一个偶数输入并且正在选择一半分区,则只计算一半的组合(使用itertools.islice
):
您可以使用:
from itertools import combinations
from math import comb
def get_partitions(iterable, minl=2):
s = set(iterable)
return [list(s)]+\
[[list(c), list(s.difference(c))]
for r in range(minl, len(s)//2+1)
for c in ( combinations(s, r) if len(s)//2 != r else
islice(combinations(s, r), comb(len(s),r)//2))
]
out = get_partitions([1,2,3,4])
输出:
[[1, 2, 3, 4],
[[1, 2], [3, 4]],
[[1, 3], [2, 4]],
[[1, 4], [2, 3]]]
其他示例:
>>> get_partitions([1,2,3,4,5,6], 1)
[[1, 2, 3, 4, 5, 6],
[[1], [2, 3, 4, 5, 6]],
[[2], [1, 3, 4, 5, 6]],
[[3], [1, 2, 4, 5, 6]],
[[4], [1, 2, 3, 5, 6]],
[[5], [1, 2, 3, 4, 6]],
[[6], [1, 2, 3, 4, 5]],
[[1, 2], [3, 4, 5, 6]],
[[1, 3], [2, 4, 5, 6]],
[[1, 4], [2, 3, 5, 6]],
[[1, 5], [2, 3, 4, 6]],
[[1, 6], [2, 3, 4, 5]],
[[2, 3], [1, 4, 5, 6]],
[[2, 4], [1, 3, 5, 6]],
[[2, 5], [1, 3, 4, 6]],
[[2, 6], [1, 3, 4, 5]],
[[3, 4], [1, 2, 5, 6]],
[[3, 5], [1, 2, 4, 6]],
[[3, 6], [1, 2, 4, 5]],
[[4, 5], [1, 2, 3, 6]],
[[4, 6], [1, 2, 3, 5]],
[[5, 6], [1, 2, 3, 4]],
[[1, 2, 3], [4, 5, 6]],
[[1, 2, 4], [3, 5, 6]],
[[1, 2, 5], [3, 4, 6]],
[[1, 2, 6], [3, 4, 5]],
[[1, 3, 4], [2, 5, 6]],
[[1, 3, 5], [2, 4, 6]],
[[1, 3, 6], [2, 4, 5]],
[[1, 4, 5], [2, 3, 6]],
[[1, 4, 6], [2, 3, 5]],
[[1, 5, 6], [2, 3, 4]]]
这是一个 long-ish 解决方案。在生成分区时没有使用拒绝,所以从这个意义上说,这可能有点有效。不过,还有很多东西需要优化。
示例:
list(get_partitions(range(3), 1))
# [[[0, 1, 2]], [[0], [1, 2]], [[1], [0, 2]], [[2], [0, 1]], [[0], [1], [2]]]
以下是其工作原理的概述:
- 首先,定义一个辅助函数
split
是很有用的,它接受一个列表 lst
和一个整数 n
和 return 所有将列表拆分成的方法两组,一组大小为 n
,另一组大小为 len(lst) - n
.
- 其次,我们需要解决一个更简单的问题:如何将列表
lst
分成 n
个组,每个组的大小为 k
。当然,这只有在len(lst) = n * k
时才有可能。这是在 get_partitions_same_size
函数中实现的。这个想法是始终在第一组中包含 lst
的第一个元素并递归。
- 第三,我们需要找到
len(lst)
的所有整数个分区。我从 this 线程复制了代码。
- 第四,对于
len(lst)
的每个整数划分p
,我们需要找到所有划分lst
的方法p
。
- 例如,说
len(lst) == 7
和 p = 3 + 2 + 2
。在这种情况下,我们可以选择任意三个元素作为第一组,剩下的任意两个元素作为第二组,最后的第三组不做选择。
- 这会引入冗余,因为我们可以获得两个分区,它们仅在最后两组的顺序上有所不同。
- 为了解决这个问题,我们可以通过计算每个大小的组数来表示一个分区。在此示例中,
p
对应于 p_scheme = [(3, 1), (2, 2)]
。函数 get_partitions_helper
接收列表 lst
和“分区方案”p_scheme
,并且 return 没有 double-counting 所有相应的分区。这是使用第二步中的 get_partitions_same_size
的地方。
- 最后,一切都在
get_partitions
中:我们遍历 len(lst)
的整数分区和 return 对应于每个可能的整数分区的所有可能的列表分区。
这是一个有趣的问题,非常欢迎对错误和优化提出意见。
from itertools import combinations
from collections import Counter
# from this thread:
#
def partitions(n, I=1):
yield (n,)
for i in range(I, n//2 + 1):
for p in partitions(n-i, i):
yield (i,) + p
def split(lst, n):
'''
return all ways to split lst into two groups,
with n and len(lst) - n elements respectively
'''
assert len(lst) >= n
# handle special case of len(lst) == 2 * n
if len(lst) == 2 * n:
for first, second in split(lst[1:], n-1):
yield [lst[0], *first], second
else:
for comb in combinations(range(len(lst)), n):
comb = set(comb)
first = [x for i, x in enumerate(lst) if i in comb]
second = [x for i, x in enumerate(lst) if i not in comb]
yield first, second
def get_partitions_same_size(lst, n, k):
# print(lst, n, k)
'return all ways to partition lst into n parts each of size k up to order'
if len(lst) != n * k:
print(lst, n, k)
assert len(lst) == n * k
if n == 0 and len(lst) == 0:
yield []
# case when group size is one
elif k == 1:
yield [[x] for x in lst]
# otherwise, without loss, the first element of lst goes into the first group
else:
for first, rest in split(lst[1:], k-1):
for rec_call in get_partitions_same_size(rest, n-1, k):
yield [[lst[0], *first], *rec_call]
def get_partitions_helper(lst, p_scheme):
"""
return all ways to partition lst into groups according to a partition scheme p_scheme
p_scheme describes an integer partition of len(lst)
for example, if len(lst) == 5, then possible integer partitions are:
[(5,), (1, 4), (1, 1, 3), (1, 1, 1, 2), (1, 1, 1, 1, 1), (1, 2, 2), (2, 3)]
for each, we count the number of groups of a given size
the corresponding partition schemes are:
[[(5, 1)],
[(1, 1), (4, 1)],
[(1, 2), (3, 1)],
[(1, 3), (2, 1)],
[(1, 5)],
[(1, 1), (2, 2)],
[(2, 1), (3, 1)]]
"""
if not lst and not p_scheme:
yield []
return
assert len(lst) == sum(a * b for a, b in p_scheme)
group_size, group_count = p_scheme[0]
num_elts = group_size * group_count
for first, second in split(lst, num_elts):
for firsts in get_partitions_same_size(first, group_count, group_size):
for seconds in get_partitions_helper(second, p_scheme[1:]):
yield [*firsts, *seconds]
def get_partitions(lst, min_):
"""
get all partitions of lst into groups s.t. each group has at least min_ elements
up to order (of groups and elements within a group)
"""
for partition in partitions(len(lst), min_):
p_scheme = list(Counter(partition).items())
yield from get_partitions_helper(lst, p_scheme)
给出了一个元素列表。我想有所有的可能性把这个列表分成任意数量的分区,这样每个分区至少有 x 个元素。列表中分区的顺序和分区中元素的顺序无关紧要。 例如。: List = [1,2,3,4] get_partitions(list,2) 应该 return:
[[1,2,3,4],
[[1,2],[3,4]],
[[1,3],[2,4]],
[[1,4],[2,3]]]
List = [1,2,3,4] get_partitions(list,1) 应该 return:
[[1,2,3,4],
[[1,2],[3,4]],
[[1,3],[2,4]],
[[1,4],[2,3]],
[[1],[2],[3,4],
...]
我已经开始在 Python 中递归地实现它,但是我创建了太多冗余案例。出于运行时的原因,我想提前减少这些情况,而不是事后用 frozensets 删除它们,例如。
from itertools import combinations
import numpy as np
def get_partitions(liste,min,max=None):
if max is None:
# Default setting
max = len(liste)
if len(liste) == min :
# Termination Criterium
yield [liste]
else:
for r in range(np.min([len(liste),max]),min-1,-1):
# max for avoiding cases like: [[1,2,3,4],[2,6]] and [[2,6],[1,2,3,4]]
for perm in combinations(liste,r):
rest = [i for i in liste if i not in perm]
if len(rest) >= min:
for recurse in get_partitions(rest,min,r):
yield [list(perm)] + list(recurse)
if len(rest) == 0:
# r == len(liste)
yield [list(perm)]
这导致:
[[[1, 2, 3, 4]],
[[1, 2], [3, 4]],
[[1, 3], [2, 4]],
[[1, 4], [2, 3]],
[[2, 3], [1, 4]],
[[2, 4], [1, 3]],
[[3, 4], [1, 2]]]
在此先感谢您的帮助。
尝试使用@mozway 的答案并将其扩展为递归版本导致我:
def get_partitions(iterable, minl=2):
s = set(iterable)
for r in range(minl, len(s)//2+1):
if len(s)//2 != r:
for c in combinations(s, r):
for recurse in get_partitions(list(s.difference(c)), minl):
yield [list(c),*recurse]
else:
for c in islice(combinations(s, r), comb(len(s),r)//2):
for recurse in get_partitions(list(s.difference(c)), minl):
yield [list(c),*recurse]
yield [list(s)]
对于示例 list = [1,2,3,4], x=1,它将可能性的数量从 47(我最初的尝试)减少到 19。仍然有很多多余的情况。
[[[1], [2], [3], [4]], <----
[[1], [2], [3, 4]],
[[1], [2, 3, 4]],
[[2], [1], [3], [4]], <----
[[2], [1], [3, 4]],
[[2], [1, 3, 4]],
[[3], [1], [2], [4]], <----
[[3], [1], [2, 4]],
[[3], [1, 2, 4]],
[[4], [1], [2], [3]], <----
[[4], [1], [2, 3]],
[[4], [1, 2, 3]],
[[1, 2], [3], [4]],
[[1, 2], [3, 4]],
[[1, 3], [2], [4]],
[[1, 3], [2, 4]],
[[1, 4], [2], [3]],
[[1, 4], [2, 3]],
[[1, 2, 3, 4]]]
这看起来像是带有补码的部分幂集。
您不需要定义最大值,设置最小值后它就固定了。
此外,包含全集是一个特例(从技术上讲,全集是集合+空集,因此违反了最小条件)
要限制组合的数量,只要你有总长度的前半部分就可以了。如果您有一个偶数输入并且正在选择一半分区,则只计算一半的组合(使用itertools.islice
):
您可以使用:
from itertools import combinations
from math import comb
def get_partitions(iterable, minl=2):
s = set(iterable)
return [list(s)]+\
[[list(c), list(s.difference(c))]
for r in range(minl, len(s)//2+1)
for c in ( combinations(s, r) if len(s)//2 != r else
islice(combinations(s, r), comb(len(s),r)//2))
]
out = get_partitions([1,2,3,4])
输出:
[[1, 2, 3, 4],
[[1, 2], [3, 4]],
[[1, 3], [2, 4]],
[[1, 4], [2, 3]]]
其他示例:
>>> get_partitions([1,2,3,4,5,6], 1)
[[1, 2, 3, 4, 5, 6],
[[1], [2, 3, 4, 5, 6]],
[[2], [1, 3, 4, 5, 6]],
[[3], [1, 2, 4, 5, 6]],
[[4], [1, 2, 3, 5, 6]],
[[5], [1, 2, 3, 4, 6]],
[[6], [1, 2, 3, 4, 5]],
[[1, 2], [3, 4, 5, 6]],
[[1, 3], [2, 4, 5, 6]],
[[1, 4], [2, 3, 5, 6]],
[[1, 5], [2, 3, 4, 6]],
[[1, 6], [2, 3, 4, 5]],
[[2, 3], [1, 4, 5, 6]],
[[2, 4], [1, 3, 5, 6]],
[[2, 5], [1, 3, 4, 6]],
[[2, 6], [1, 3, 4, 5]],
[[3, 4], [1, 2, 5, 6]],
[[3, 5], [1, 2, 4, 6]],
[[3, 6], [1, 2, 4, 5]],
[[4, 5], [1, 2, 3, 6]],
[[4, 6], [1, 2, 3, 5]],
[[5, 6], [1, 2, 3, 4]],
[[1, 2, 3], [4, 5, 6]],
[[1, 2, 4], [3, 5, 6]],
[[1, 2, 5], [3, 4, 6]],
[[1, 2, 6], [3, 4, 5]],
[[1, 3, 4], [2, 5, 6]],
[[1, 3, 5], [2, 4, 6]],
[[1, 3, 6], [2, 4, 5]],
[[1, 4, 5], [2, 3, 6]],
[[1, 4, 6], [2, 3, 5]],
[[1, 5, 6], [2, 3, 4]]]
这是一个 long-ish 解决方案。在生成分区时没有使用拒绝,所以从这个意义上说,这可能有点有效。不过,还有很多东西需要优化。
示例:
list(get_partitions(range(3), 1))
# [[[0, 1, 2]], [[0], [1, 2]], [[1], [0, 2]], [[2], [0, 1]], [[0], [1], [2]]]
以下是其工作原理的概述:
- 首先,定义一个辅助函数
split
是很有用的,它接受一个列表lst
和一个整数n
和 return 所有将列表拆分成的方法两组,一组大小为n
,另一组大小为len(lst) - n
. - 其次,我们需要解决一个更简单的问题:如何将列表
lst
分成n
个组,每个组的大小为k
。当然,这只有在len(lst) = n * k
时才有可能。这是在get_partitions_same_size
函数中实现的。这个想法是始终在第一组中包含lst
的第一个元素并递归。 - 第三,我们需要找到
len(lst)
的所有整数个分区。我从 this 线程复制了代码。 - 第四,对于
len(lst)
的每个整数划分p
,我们需要找到所有划分lst
的方法p
。- 例如,说
len(lst) == 7
和p = 3 + 2 + 2
。在这种情况下,我们可以选择任意三个元素作为第一组,剩下的任意两个元素作为第二组,最后的第三组不做选择。 - 这会引入冗余,因为我们可以获得两个分区,它们仅在最后两组的顺序上有所不同。
- 为了解决这个问题,我们可以通过计算每个大小的组数来表示一个分区。在此示例中,
p
对应于p_scheme = [(3, 1), (2, 2)]
。函数get_partitions_helper
接收列表lst
和“分区方案”p_scheme
,并且 return 没有 double-counting 所有相应的分区。这是使用第二步中的get_partitions_same_size
的地方。
- 例如,说
- 最后,一切都在
get_partitions
中:我们遍历len(lst)
的整数分区和 return 对应于每个可能的整数分区的所有可能的列表分区。
这是一个有趣的问题,非常欢迎对错误和优化提出意见。
from itertools import combinations
from collections import Counter
# from this thread:
#
def partitions(n, I=1):
yield (n,)
for i in range(I, n//2 + 1):
for p in partitions(n-i, i):
yield (i,) + p
def split(lst, n):
'''
return all ways to split lst into two groups,
with n and len(lst) - n elements respectively
'''
assert len(lst) >= n
# handle special case of len(lst) == 2 * n
if len(lst) == 2 * n:
for first, second in split(lst[1:], n-1):
yield [lst[0], *first], second
else:
for comb in combinations(range(len(lst)), n):
comb = set(comb)
first = [x for i, x in enumerate(lst) if i in comb]
second = [x for i, x in enumerate(lst) if i not in comb]
yield first, second
def get_partitions_same_size(lst, n, k):
# print(lst, n, k)
'return all ways to partition lst into n parts each of size k up to order'
if len(lst) != n * k:
print(lst, n, k)
assert len(lst) == n * k
if n == 0 and len(lst) == 0:
yield []
# case when group size is one
elif k == 1:
yield [[x] for x in lst]
# otherwise, without loss, the first element of lst goes into the first group
else:
for first, rest in split(lst[1:], k-1):
for rec_call in get_partitions_same_size(rest, n-1, k):
yield [[lst[0], *first], *rec_call]
def get_partitions_helper(lst, p_scheme):
"""
return all ways to partition lst into groups according to a partition scheme p_scheme
p_scheme describes an integer partition of len(lst)
for example, if len(lst) == 5, then possible integer partitions are:
[(5,), (1, 4), (1, 1, 3), (1, 1, 1, 2), (1, 1, 1, 1, 1), (1, 2, 2), (2, 3)]
for each, we count the number of groups of a given size
the corresponding partition schemes are:
[[(5, 1)],
[(1, 1), (4, 1)],
[(1, 2), (3, 1)],
[(1, 3), (2, 1)],
[(1, 5)],
[(1, 1), (2, 2)],
[(2, 1), (3, 1)]]
"""
if not lst and not p_scheme:
yield []
return
assert len(lst) == sum(a * b for a, b in p_scheme)
group_size, group_count = p_scheme[0]
num_elts = group_size * group_count
for first, second in split(lst, num_elts):
for firsts in get_partitions_same_size(first, group_count, group_size):
for seconds in get_partitions_helper(second, p_scheme[1:]):
yield [*firsts, *seconds]
def get_partitions(lst, min_):
"""
get all partitions of lst into groups s.t. each group has at least min_ elements
up to order (of groups and elements within a group)
"""
for partition in partitions(len(lst), min_):
p_scheme = list(Counter(partition).items())
yield from get_partitions_helper(lst, p_scheme)