将 itertools powerset 转换为列化的 numpy 数组
Convert itertools powerset into columnized numpy array
给定一个元组 items
,我得到幂集 itertools
:
items = tuple(('a', 'b', 'c'))
combos = powerset(items)
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))
我的目标是转换 powerset 输出 combos
:
#combos =
(('a',),
('b',),
('c',),
('a', 'b'),
('a', 'c'),
('b', 'c'),
('a', 'b', 'c'))
到一个 np.array
中,对于每一行,每个项目都放在与该项目在原始元组中的位置相对应的列中 cols
。
想要的结果:
#res =
[['a', '', ''],
['', 'b', ''],
['', '', 'c'],
['a', 'b', ''],
['a', '', 'c'],
['', 'b', 'c'],
['a', 'b', 'c']]
#shape = (7,3)
我能够使用下面的代码实现这一点。但是,是否有比我循环遍历数组并将每个单独的项目放在结果数组的正确列中的尝试更 pythonic,faster/memory 有效的方法来转换输出?
完整代码:
from itertools import combinations, chain
import numpy as np
def columnizePowerset(powerset, items):
combo_arr = np.array([list(x) for x in powerset], dtype=object) # Convert to np.array
res = np.empty([len(powerset), len(items)], dtype=str) # Initialize result array
for i in range(len(combo_arr)): # Loop through rows
for j in range(len(combo_arr[i])): # Loop through items in row
col = np.where(np.array(items) == combo_arr[i][j]) # Identify column for item
res[i][col] = combo_arr[i][j] # Place item in correct column
return res
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))
if __name__ == "__main__":
items = tuple(('a', 'b', 'c')) # Given
combos = powerset(items) # Perform powerset
res = columnizePowerset(combos, items) # Convert powerset to columnized array
print(res)
顺序与 powerset
产量不完全相同,但模式让我想起了二进制 - 您可以通过查看递增整数的位来获得相同的项目...
我不是 NumPy 高手,但这似乎相当 numpy-er。 (请参阅 post 编辑历史记录以获得更差的解决方案。)
import numpy as np
def powerset_numpy(items):
n = len(items)
bit_indexes = np.arange(1, 2 ** n)
n_bits = bit_indexes.shape[0]
item_bits = np.repeat(np.arange(0, n), n_bits).reshape((n, -1))
item_values = np.repeat(items, n_bits).reshape((n, -1))
n_bits = (bit_indexes & (1 << item_bits)).astype(bool)
return np.where(n_bits, item_values, '').T
print(powerset_numpy(['a', 'b', 'c']))
打印出来
[['a' '' '']
['' 'b' '']
['a' 'b' '']
['' '' 'c']
['a' '' 'c']
['' 'b' 'c']
['a' 'b' 'c']]
如果设置了值,您可以检查所有位掩码,这意味着我们可以将其包含在当前列表中,否则忽略它。
def powerset(s: list):
res = []
for mask in range(1, 2 ** len(s)):
temp = ['' for _ in range(len(s))]
for j in range(len(s)):
if ((mask >> j) & 1) == 1: # jth bit set in mask
temp[j] = s[j]
res.append(temp)
return res
print(powerset(['a', 'b', 'c']))
[['a', '', ''],
['', 'b', ''],
['a', 'b', ''],
['', '', 'c'],
['a', '', 'c'],
['', 'b', 'c'],
['a', 'b', 'c']]
给定一个元组 items
,我得到幂集 itertools
:
items = tuple(('a', 'b', 'c'))
combos = powerset(items)
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))
我的目标是转换 powerset 输出 combos
:
#combos =
(('a',),
('b',),
('c',),
('a', 'b'),
('a', 'c'),
('b', 'c'),
('a', 'b', 'c'))
到一个 np.array
中,对于每一行,每个项目都放在与该项目在原始元组中的位置相对应的列中 cols
。
想要的结果:
#res =
[['a', '', ''],
['', 'b', ''],
['', '', 'c'],
['a', 'b', ''],
['a', '', 'c'],
['', 'b', 'c'],
['a', 'b', 'c']]
#shape = (7,3)
我能够使用下面的代码实现这一点。但是,是否有比我循环遍历数组并将每个单独的项目放在结果数组的正确列中的尝试更 pythonic,faster/memory 有效的方法来转换输出?
完整代码:
from itertools import combinations, chain
import numpy as np
def columnizePowerset(powerset, items):
combo_arr = np.array([list(x) for x in powerset], dtype=object) # Convert to np.array
res = np.empty([len(powerset), len(items)], dtype=str) # Initialize result array
for i in range(len(combo_arr)): # Loop through rows
for j in range(len(combo_arr[i])): # Loop through items in row
col = np.where(np.array(items) == combo_arr[i][j]) # Identify column for item
res[i][col] = combo_arr[i][j] # Place item in correct column
return res
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return tuple(chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)))
if __name__ == "__main__":
items = tuple(('a', 'b', 'c')) # Given
combos = powerset(items) # Perform powerset
res = columnizePowerset(combos, items) # Convert powerset to columnized array
print(res)
顺序与 powerset
产量不完全相同,但模式让我想起了二进制 - 您可以通过查看递增整数的位来获得相同的项目...
我不是 NumPy 高手,但这似乎相当 numpy-er。 (请参阅 post 编辑历史记录以获得更差的解决方案。)
import numpy as np
def powerset_numpy(items):
n = len(items)
bit_indexes = np.arange(1, 2 ** n)
n_bits = bit_indexes.shape[0]
item_bits = np.repeat(np.arange(0, n), n_bits).reshape((n, -1))
item_values = np.repeat(items, n_bits).reshape((n, -1))
n_bits = (bit_indexes & (1 << item_bits)).astype(bool)
return np.where(n_bits, item_values, '').T
print(powerset_numpy(['a', 'b', 'c']))
打印出来
[['a' '' '']
['' 'b' '']
['a' 'b' '']
['' '' 'c']
['a' '' 'c']
['' 'b' 'c']
['a' 'b' 'c']]
如果设置了值,您可以检查所有位掩码,这意味着我们可以将其包含在当前列表中,否则忽略它。
def powerset(s: list):
res = []
for mask in range(1, 2 ** len(s)):
temp = ['' for _ in range(len(s))]
for j in range(len(s)):
if ((mask >> j) & 1) == 1: # jth bit set in mask
temp[j] = s[j]
res.append(temp)
return res
print(powerset(['a', 'b', 'c']))
[['a', '', ''],
['', 'b', ''],
['a', 'b', ''],
['', '', 'c'],
['a', '', 'c'],
['', 'b', 'c'],
['a', 'b', 'c']]