提高多个列表的笛卡尔积的性能
Improving the performance of cartesian product of multiple lists
我正在使用递归实现 python 中多个集合的笛卡尔积。
这是我的实现:
def car_two_sets(a, b):
result = []
for x in a:
for y in b:
result.append(str(x) + str(y))
return result
def car_multiple_sets(lists):
if len(lists) == 2:
return car_two_sets(lists[0], lists[1])
else:
return car_multiple_sets([car_two_sets(lists[0], lists[1])] + lists[2:])
a = [1, 2]
b = [3, 4]
c = [6, 7, 8]
lists = [a, b, c]
print(car_multiple_sets(lists))
代码工作正常,但对于较大数量的集合,速度很慢。
关于如何改进此实施的任何想法?
我想到了memoization,但是找不到任何重复的计算来缓存。
我不想使用 itertools 函数。
具有三倍以上列表的基准:
221 us 223 us 223 us h
225 us 227 us 227 us k3
228 us 229 us 229 us k2
267 us 267 us 267 us k
340 us 341 us 342 us g
1177 us 1185 us 1194 us car_multiple_sets
3057 us 3082 us 3084 us f
代码(Try it online!):
from timeit import repeat
from random import shuffle
from bisect import insort
from itertools import product, starmap
from operator import concat
def car_two_sets(a, b):
result = []
for x in a:
for y in b:
result.append(str(x) + str(y))
return result
def car_multiple_sets(lists):
if len(lists) == 2:
return car_two_sets(lists[0], lists[1])
else:
return car_multiple_sets([car_two_sets(lists[0], lists[1])] + lists[2:])
def f(lists):
return [''.join(map(str,a)) for a in product(*lists)]
def g(lists):
return [''.join(a) for a in product(*[map(str,a)for a in lists])]
def h(lists):
return list(map(''.join, product(*[map(str,a)for a in lists])))
def k(lists):
result = ['']
for lst in lists:
lst = [*map(str, lst)]
result = [S + s for S in result for s in lst]
return result
def k2(lists):
result = ['']
for lst in lists:
result = list(starmap(concat, product(result, map(str, lst))))
return result
def k3(lists):
result = ['']
for lst in lists:
result = starmap(concat, product(result, map(str, lst)))
return list(result)
funcs = [car_multiple_sets, f, g, h, k, k2, k3]
a = [1, 2]
b = [3, 4]
c = [6, 7, 8]
lists = [a, b, c]
for func in funcs:
print(func(lists), func.__name__)
times = {func: [] for func in funcs}
lists *= 3
for _ in range(50):
shuffle(funcs)
for func in funcs:
t = min(repeat(lambda: func(lists), number=1))
insort(times[func], t)
for func in sorted(funcs, key=times.get):
print(*('%4d us ' % (t * 1e6) for t in times[func][:3]), func.__name__)
(f
和 g
来自当前已删除的答案,k
功能来自我)
几点评论:
如果你仔细想想,car_multiple_sets
所做的就是迭代它的参数 lists
。您正在使用递归来执行此操作,但也可以使用 for
循环对列表进行迭代。碰巧递归有点慢,memory-inefficient in python,所以 for
-loops 更可取。
您无需转换为 str
即可将整数组合在一起。您可以使用 tuples
。这正是他们的目的。将 str(x)+str(y)
替换为 (x,y)
以获得一对两个整数而不是字符串。
def car_two_sets(a, b, unpack=False):
if unpack:
return [(*x, y) for x in a for y in b]
else:
return [(x, y) for x in a for y in b]
def car_multiple_sets(lists):
if len(lists) == 0:
return [()] # empty Cartesian product has one element, the empty tuple
elif len(lists) == 1:
return list(zip(lists[0])) # list of "1-uples" for homogeneity
else:
result = car_two_sets(lists[0], lists[1])
for l in lists[2:]:
result = car_two_sets(result, l, unpack=True)
return result
print( car_multiple_sets((range(3), 'abc', range(2))) )
# [(0, 'a', 0), (0, 'a', 1), (0, 'b', 0), (0, 'b', 1), (0, 'c', 0), (0, 'c', 1),
# (1, 'a', 0), (1, 'a', 1), (1, 'b', 0), (1, 'b', 1), (1, 'c', 0), (1, 'c', 1),
# (2, 'a', 0), (2, 'a', 1), (2, 'b', 0), (2, 'b', 1), (2, 'c', 0), (2, 'c', 1)]
我正在使用递归实现 python 中多个集合的笛卡尔积。
这是我的实现:
def car_two_sets(a, b):
result = []
for x in a:
for y in b:
result.append(str(x) + str(y))
return result
def car_multiple_sets(lists):
if len(lists) == 2:
return car_two_sets(lists[0], lists[1])
else:
return car_multiple_sets([car_two_sets(lists[0], lists[1])] + lists[2:])
a = [1, 2]
b = [3, 4]
c = [6, 7, 8]
lists = [a, b, c]
print(car_multiple_sets(lists))
代码工作正常,但对于较大数量的集合,速度很慢。 关于如何改进此实施的任何想法? 我想到了memoization,但是找不到任何重复的计算来缓存。
我不想使用 itertools 函数。
具有三倍以上列表的基准:
221 us 223 us 223 us h
225 us 227 us 227 us k3
228 us 229 us 229 us k2
267 us 267 us 267 us k
340 us 341 us 342 us g
1177 us 1185 us 1194 us car_multiple_sets
3057 us 3082 us 3084 us f
代码(Try it online!):
from timeit import repeat
from random import shuffle
from bisect import insort
from itertools import product, starmap
from operator import concat
def car_two_sets(a, b):
result = []
for x in a:
for y in b:
result.append(str(x) + str(y))
return result
def car_multiple_sets(lists):
if len(lists) == 2:
return car_two_sets(lists[0], lists[1])
else:
return car_multiple_sets([car_two_sets(lists[0], lists[1])] + lists[2:])
def f(lists):
return [''.join(map(str,a)) for a in product(*lists)]
def g(lists):
return [''.join(a) for a in product(*[map(str,a)for a in lists])]
def h(lists):
return list(map(''.join, product(*[map(str,a)for a in lists])))
def k(lists):
result = ['']
for lst in lists:
lst = [*map(str, lst)]
result = [S + s for S in result for s in lst]
return result
def k2(lists):
result = ['']
for lst in lists:
result = list(starmap(concat, product(result, map(str, lst))))
return result
def k3(lists):
result = ['']
for lst in lists:
result = starmap(concat, product(result, map(str, lst)))
return list(result)
funcs = [car_multiple_sets, f, g, h, k, k2, k3]
a = [1, 2]
b = [3, 4]
c = [6, 7, 8]
lists = [a, b, c]
for func in funcs:
print(func(lists), func.__name__)
times = {func: [] for func in funcs}
lists *= 3
for _ in range(50):
shuffle(funcs)
for func in funcs:
t = min(repeat(lambda: func(lists), number=1))
insort(times[func], t)
for func in sorted(funcs, key=times.get):
print(*('%4d us ' % (t * 1e6) for t in times[func][:3]), func.__name__)
(f
和 g
来自当前已删除的答案,k
功能来自我)
几点评论:
如果你仔细想想,
car_multiple_sets
所做的就是迭代它的参数lists
。您正在使用递归来执行此操作,但也可以使用for
循环对列表进行迭代。碰巧递归有点慢,memory-inefficient in python,所以for
-loops 更可取。您无需转换为
str
即可将整数组合在一起。您可以使用tuples
。这正是他们的目的。将str(x)+str(y)
替换为(x,y)
以获得一对两个整数而不是字符串。
def car_two_sets(a, b, unpack=False):
if unpack:
return [(*x, y) for x in a for y in b]
else:
return [(x, y) for x in a for y in b]
def car_multiple_sets(lists):
if len(lists) == 0:
return [()] # empty Cartesian product has one element, the empty tuple
elif len(lists) == 1:
return list(zip(lists[0])) # list of "1-uples" for homogeneity
else:
result = car_two_sets(lists[0], lists[1])
for l in lists[2:]:
result = car_two_sets(result, l, unpack=True)
return result
print( car_multiple_sets((range(3), 'abc', range(2))) )
# [(0, 'a', 0), (0, 'a', 1), (0, 'b', 0), (0, 'b', 1), (0, 'c', 0), (0, 'c', 1),
# (1, 'a', 0), (1, 'a', 1), (1, 'b', 0), (1, 'b', 1), (1, 'c', 0), (1, 'c', 1),
# (2, 'a', 0), (2, 'a', 1), (2, 'b', 0), (2, 'b', 1), (2, 'c', 0), (2, 'c', 1)]