在 python 中重建集合和交集
rebuilding sets and intersection in python
我有以下问题,最初我有 3 套:
A [1,2,3],B [2,3,4,5] and C [2,5,6,7]
接下来我考虑两个集合的交集和所有集合的交集
AB [2,3],
AC [2],
BC [2,5] and
ABC [2] (Full intersection)
现在我想要的是在以下条件下对我的集合进行新的重新排序:
1. 保留每组的基数。
2. 保留所有可能交集的基数。
例如我应该得到
A [3,4,7],
B [1,3,7,5] and
C [2,6,5,7]
注意 A 和 B 的新交集(现在是 [3,7])与之前的交集一样有 2 个元素,类似于交集 AC 、BC 和完全交集 ABC,当然还有 A 的基数, B和C分别继续为3、4和4。
最后,我需要尽可能多地执行重组过程,我理解这取决于集合的基数和集合总数。
这可以帮助您入门吗?
import random
import itertools
def same_intersect_len(x, y, z):
return (len(x & y) == len(a & b) and
len(x & z) == len(a & c) and
len(z & y) == len(b & c) and
len(x & y & z) == len(a & b & c))
a = set(random.sample(range(0, 10), 1))
b = set(random.sample(range(0, 10), 2))
c = set(random.sample(range(0, 10), 3))
elements = list(a) + list(b) + list(c)
ps = set(itertools.permutations(elements))
new_a = list()
new_b = list()
new_c = list()
for p in ps:
candidate_a = {p[len(a) - 1]} - a
candidate_b = set(p[len(a):len(a) + len(b) - 1]) - b
candidate_c = set(p[len(a) + len(b):len(a) + len(b) + len(c) - 1]) - c
if same_intersect_len(
candidate_a,
candidate_b,
candidate_c):
new_a.append(candidate_a)
new_b.append(candidate_b)
new_c.append(candidate_c)
您可以生成原始元素集的所有可能排列,并将它们用作从原始元素到新配置的 "mappings"。因此,例如,2345671
将每个数字映射到下一个数字并环绕;这将创建集合:
A = {2, 3, 4} # from {1, 2, 3}
B = {3, 4, 5, 6} # from {2, 3, 4, 5}
C = {3, 6, 7, 1} # from {2, 5, 6, 7}
使用 itertools
:
非常简单
from itertools import permutations
def all_configurations(*sets):
elements = list(set.union(*sets))
for perm in permutations(elements):
map = {old: new for old, new in zip(elements, perm)}
new_sets = [{map[k] for k in old_set} for old_set in sets]
yield new_sets
A = {1, 2, 3}
B = {2, 3, 4, 5}
C = {2, 5, 6, 7}
confs = all_configurations(A, B, C)
for conf in confs:
print(conf)
# Or: Ax, Bx, Cx = conf
请注意我是如何使用 yield
语句的,这将一次生成一个新的排列,而不是一次创建它们,因此您可以将其用于大量元素而不会耗尽您的内存.此外,所写的函数适用于任意数量的输入集。
当然,这肯定会生成一些重复项(例如,在您的示例中,将 6 映射到 7 以及将 7 映射到 6 不会改变任何内容),但它肯定也会生成每个有效选项。一些示例输出:
[{2, 4, 6}, {3, 4, 5, 6}, {1, 3, 6, 7}]
[{4, 5, 7}, {1, 3, 4, 7}, {2, 3, 6, 7}]
[{3, 5, 6}, {1, 3, 5, 7}, {2, 3, 4, 7}]
[{1, 6, 7}, {1, 2, 4, 7}, {2, 3, 5, 7}]
EDIT:为了得到固定数量的不重复排列,可以把原来的代码改成return一个tuple
of frozensets
而不是集合列表,这样整个事情都可以散列,所以你只得到唯一的。然后您可以将内容添加到输出集中,直到它达到您想要的基数:
from itertools import permutations
def all_configurations(*sets):
elements = list(set.union(*sets))
for perm in permutations(elements):
map = {old: new for old, new in zip(elements, perm)}
new_sets = tuple(frozenset(map[k] for k in old_set) for old_set in sets)
yield new_sets
def n_configurations(n, *sets):
output = set()
confs = all_configurations(*sets)
for conf in confs:
output.add(conf)
if len(output) >= n:
break
return output
A = {1, 2, 3}
B = {2, 3, 4, 5}
C = {2, 5, 6, 7}
confs = n_configurations(10, A, B, C)
for a, b, c in confs:
print(a, b, c)
这会产生以下 10 个配置:
(frozenset([1, 2, 3]), frozenset([2, 3, 5, 6]), frozenset([2, 4, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 6]), frozenset([2, 5, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 6, 7]), frozenset([2, 4, 5, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 6]), frozenset([2, 4, 5, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 5]), frozenset([2, 4, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 5, 6]), frozenset([2, 4, 5, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 7]), frozenset([2, 5, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 5]), frozenset([2, 5, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 7]), frozenset([2, 4, 5, 6]))
(frozenset([1, 2, 3]), frozenset([2, 3, 5, 7]), frozenset([2, 4, 6, 7]))
我有以下问题,最初我有 3 套:
A [1,2,3],B [2,3,4,5] and C [2,5,6,7]
接下来我考虑两个集合的交集和所有集合的交集
AB [2,3],
AC [2],
BC [2,5] and
ABC [2] (Full intersection)
现在我想要的是在以下条件下对我的集合进行新的重新排序: 1. 保留每组的基数。 2. 保留所有可能交集的基数。
例如我应该得到
A [3,4,7],
B [1,3,7,5] and
C [2,6,5,7]
注意 A 和 B 的新交集(现在是 [3,7])与之前的交集一样有 2 个元素,类似于交集 AC 、BC 和完全交集 ABC,当然还有 A 的基数, B和C分别继续为3、4和4。 最后,我需要尽可能多地执行重组过程,我理解这取决于集合的基数和集合总数。
这可以帮助您入门吗?
import random
import itertools
def same_intersect_len(x, y, z):
return (len(x & y) == len(a & b) and
len(x & z) == len(a & c) and
len(z & y) == len(b & c) and
len(x & y & z) == len(a & b & c))
a = set(random.sample(range(0, 10), 1))
b = set(random.sample(range(0, 10), 2))
c = set(random.sample(range(0, 10), 3))
elements = list(a) + list(b) + list(c)
ps = set(itertools.permutations(elements))
new_a = list()
new_b = list()
new_c = list()
for p in ps:
candidate_a = {p[len(a) - 1]} - a
candidate_b = set(p[len(a):len(a) + len(b) - 1]) - b
candidate_c = set(p[len(a) + len(b):len(a) + len(b) + len(c) - 1]) - c
if same_intersect_len(
candidate_a,
candidate_b,
candidate_c):
new_a.append(candidate_a)
new_b.append(candidate_b)
new_c.append(candidate_c)
您可以生成原始元素集的所有可能排列,并将它们用作从原始元素到新配置的 "mappings"。因此,例如,2345671
将每个数字映射到下一个数字并环绕;这将创建集合:
A = {2, 3, 4} # from {1, 2, 3}
B = {3, 4, 5, 6} # from {2, 3, 4, 5}
C = {3, 6, 7, 1} # from {2, 5, 6, 7}
使用 itertools
:
from itertools import permutations
def all_configurations(*sets):
elements = list(set.union(*sets))
for perm in permutations(elements):
map = {old: new for old, new in zip(elements, perm)}
new_sets = [{map[k] for k in old_set} for old_set in sets]
yield new_sets
A = {1, 2, 3}
B = {2, 3, 4, 5}
C = {2, 5, 6, 7}
confs = all_configurations(A, B, C)
for conf in confs:
print(conf)
# Or: Ax, Bx, Cx = conf
请注意我是如何使用 yield
语句的,这将一次生成一个新的排列,而不是一次创建它们,因此您可以将其用于大量元素而不会耗尽您的内存.此外,所写的函数适用于任意数量的输入集。
当然,这肯定会生成一些重复项(例如,在您的示例中,将 6 映射到 7 以及将 7 映射到 6 不会改变任何内容),但它肯定也会生成每个有效选项。一些示例输出:
[{2, 4, 6}, {3, 4, 5, 6}, {1, 3, 6, 7}]
[{4, 5, 7}, {1, 3, 4, 7}, {2, 3, 6, 7}]
[{3, 5, 6}, {1, 3, 5, 7}, {2, 3, 4, 7}]
[{1, 6, 7}, {1, 2, 4, 7}, {2, 3, 5, 7}]
EDIT:为了得到固定数量的不重复排列,可以把原来的代码改成return一个tuple
of frozensets
而不是集合列表,这样整个事情都可以散列,所以你只得到唯一的。然后您可以将内容添加到输出集中,直到它达到您想要的基数:
from itertools import permutations
def all_configurations(*sets):
elements = list(set.union(*sets))
for perm in permutations(elements):
map = {old: new for old, new in zip(elements, perm)}
new_sets = tuple(frozenset(map[k] for k in old_set) for old_set in sets)
yield new_sets
def n_configurations(n, *sets):
output = set()
confs = all_configurations(*sets)
for conf in confs:
output.add(conf)
if len(output) >= n:
break
return output
A = {1, 2, 3}
B = {2, 3, 4, 5}
C = {2, 5, 6, 7}
confs = n_configurations(10, A, B, C)
for a, b, c in confs:
print(a, b, c)
这会产生以下 10 个配置:
(frozenset([1, 2, 3]), frozenset([2, 3, 5, 6]), frozenset([2, 4, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 6]), frozenset([2, 5, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 6, 7]), frozenset([2, 4, 5, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 6]), frozenset([2, 4, 5, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 5]), frozenset([2, 4, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 5, 6]), frozenset([2, 4, 5, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 7]), frozenset([2, 5, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 5]), frozenset([2, 5, 6, 7]))
(frozenset([1, 2, 3]), frozenset([2, 3, 4, 7]), frozenset([2, 4, 5, 6]))
(frozenset([1, 2, 3]), frozenset([2, 3, 5, 7]), frozenset([2, 4, 6, 7]))