生成不同长度的单词给出 n 个字符
Generate words of varying length give n number of characters
给定 n 个字符,我需要生成所有可能的 Ki 长度的单词,例如:
我想不出 len 5 这个词,但这就是我的想法
n = 11
k1 = 2
k2 = 4
k3 = 5
所以基本上所有长度为 2 4 和 5 的单词都没有重复使用字符。最好的方法是什么?
3: [{u'eit': u' "eit":0'}],
5: [{u'doosw': u' "woods": 4601, '}, {u'acenr': u' "caner": 0, '}, {u'acens': u' "canes": 0, '}, {u'acden': u' "caned": 0, '}, {u'aceln': u' "canel": 0,'}],
6: [{u'abeill': u' "alible": 0, '}, {u'cdeeit': u' "deciet":0,'}, {u'demoor': u' "mooder": 0, '}],
7: [{u'deiprss': u' "spiders": 0, '}, {u'deiprsy': u' "spidery": 0, '}, {u'cersttu': u' "scutter": 0, '}],
8: [{u'chiiilst': u' "chilitis": 0, '}, {u'agilnrtw': u' "trawling": 0, '}, {u'abdeemns': u' "beadsmen": 0, '}],
9: [{u'abeiilnns': u' "biennials": 0, '}, {u'bclooortu': u' "oblocutor": 0, '}, {u'aabfiinst': u' "fabianist": 0, '}, {u'acdeiituz': u' "diazeutic": 0, '}, {u'aabfiimns': u' "fabianism": 0, '}, {u'ehnoooppt': u' "optophone": 0, '}],
10: [{u'aiilnoprtt': u' "tripolitan": 0, '}, {u'eeilprrsty': u' "sperrylite": 0, '}, {u'gghhiilttt': u' "lighttight": 0, '}, {u'aeegilrruz': u' "regularize": 0, '}, {u'ellnprtuuy': u' "purulently": 0, '}],
11: [{u'cdgilnoostu': u' "outscolding": 0, '}],
12: [{u'ceeeilnostuy': u' "leucosyenite": 0, '}, {u'aacciloprsst': u' "sarcoplastic": 0, '}],
13: [{u'acdeimmoprrsu': u' "cardiospermum": 0, '}, {u'celnnooostuvy': u' "noncovetously": 0, '}],
14: [{u'adeejmnnoprrtu': u' "preadjournment": 0, '}]
wlen = self.table[pos]
if pos == 0:
# See if the letters remaining in the bag are a valid word
key = ''.join(sorted(bag.elements()))
for d in wlen:
if key in d.keys():
yield solution + [key]
pos -= 1
for dic in wlen:
for key in dic.keys():
target = "LNDJOBEAWRL".lower()
symbols = sorted([c + str(target[i+1:].count(c)) for i, c in enumerate(target)])
现在我们有了每个单词的标准表示,我们需要一种快速的方法来检查是否有任何排列匹配它们。为此,我们使用 trie datastructure。这是其中一个的一些入门代码:
class Trie:
def __init__(self, symbol):
self.symbol = symbol
self.words = []
self.children = dict()
def add_word(self, word):
def add_child(self, symbol, trie):
self.children[symbol] = trie
现在你需要做一个空的trie作为根,以任何东西作为符号,专门用来存放所有顶级的trie。然后遍历我们之前转换的每个单词,对于我们生成的第一个符号,检查根树是否有带有该符号的子树。如果没有,则为其创建一个 trie 并添加它。如果是,则继续下一个符号,并检查具有该符号的 trie 是否在前一个 trie 中。以这种方式进行,直到用尽所有符号,在这种情况下,当前的 trie 节点代表我们转换的那个词的标准化形式。把原来的单词存入这个trie,然后继续下一个单词。
完成后,您的整个单词列表将包含在此 trie 数据结构中。然后,您可以执行以下操作:
def print_words(symbols, node):
for word in node.words:
for sym in node.children:
if sym in symbols:
print_words(symbols, node.children[sym])
print_words(symbols, root_trie)
下面的代码使用递归生成器来构建解决方案。为了存储目标字母,我们使用 collections.Counter
为了简化搜索,我们为每个需要的单词长度创建一个字典,将每个字典存储在一个名为 all_words
的字典中,单词长度作为键。每个子词典存储包含相同字母的单词列表,以排序的字母作为键,例如 'aet': ['ate', 'eat', 'tea']
我使用标准的 Unix '/usr/share/dict/words' word 文件。如果您使用不同格式的文件,您可能需要修改将单词放入 all_words
#!/usr/bin/env python3
''' Create anagrams from a string of target letters and a list of word lengths '''
from collections import Counter
from itertools import product
# The Unix word list
fname = '/usr/share/dict/words'
# The target letters to use
target = 'lndjobeawrl'
# Word lengths, in descending order
wordlengths = [5, 4, 2]
# A dict to hold dicts for each word length.
# The inner dicts store lists of words containing the same letters,
# with the sorted letters as the key, eg 'aet': ['ate', 'eat', 'tea']
all_words = {i: {} for i in wordlengths}
# A method that tests if a word only contains letters in target
valid = set(target).issuperset
print('Scanning', fname, 'for valid words...')
count = 0
with open(fname) as f:
for word in f:
word = word.rstrip()
wlen = len(word)
# Only add words of the correct length, with no punctuation.
# Using word.islower() eliminates most abbreviations.
if (wlen in wordlengths and word.islower()
and word.isalpha() and valid(word)):
sorted_word = ''.join(sorted(word))
# Add this word to the list in all_words[wlen],
# creating the list if it doesn't exist
all_words[wlen].setdefault(sorted_word, []).append(word)
count += 1
print(count, 'words found')
for k, v in all_words.items():
print(k, len(v))
def solve(pos, bag, solution):
wlen = wordlengths[pos]
if pos == 0:
# See if the letters remaining in the bag are a valid word
key = ''.join(sorted(bag.elements()))
if key in all_words[wlen]:
yield solution + [key]
pos -= 1
for key in all_words[wlen].keys():
# Test that all letters in key are in the bag
newbag = bag.copy()
if all(v >= 0 for v in newbag.values()):
# Add this key to the current solution and
# recurse to find the next key
yield from solve(pos, newbag, solution + [key])
# Find all lists of keys that produce valid combinations
for solution in solve(len(wordlengths) - 1, Counter(target), []):
# Convert solutions to tuples of words
t = [all_words[len(key)][key] for key in solution]
for s in product(*t):
Scanning /usr/share/dict/words for valid words...
300 words found
5 110
4 112
2 11
('ad', 'jell', 'brown')
('do', 'jell', 'brawn')
('ow', 'jell', 'brand')
('re', 'jowl', 'bland')
target = 'nobigword'
wordlengths = [4, 3, 2]
Scanning /usr/share/dict/words for valid words...
83 words found
4 31
3 33
2 7
('do', 'big', 'worn')
('do', 'bin', 'grow')
('do', 'nib', 'grow')
('do', 'bow', 'grin')
('do', 'bow', 'ring')
('do', 'gin', 'brow')
('do', 'now', 'brig')
('do', 'own', 'brig')
('do', 'won', 'brig')
('do', 'orb', 'wing')
('do', 'rob', 'wing')
('do', 'rib', 'gown')
('do', 'wig', 'born')
('go', 'bid', 'worn')
('go', 'bin', 'word')
('go', 'nib', 'word')
('go', 'bow', 'rind')
('go', 'din', 'brow')
('go', 'now', 'bird')
('go', 'own', 'bird')
('go', 'won', 'bird')
('go', 'orb', 'wind')
('go', 'rob', 'wind')
('go', 'rib', 'down')
('go', 'row', 'bind')
('id', 'bog', 'worn')
('id', 'gob', 'worn')
('id', 'orb', 'gown')
('id', 'rob', 'gown')
('id', 'row', 'bong')
('in', 'bog', 'word')
('in', 'gob', 'word')
('in', 'dog', 'brow')
('in', 'god', 'brow')
('no', 'bid', 'grow')
('on', 'bid', 'grow')
('no', 'big', 'word')
('on', 'big', 'word')
('no', 'bow', 'gird')
('no', 'bow', 'grid')
('on', 'bow', 'gird')
('on', 'bow', 'grid')
('no', 'dig', 'brow')
('on', 'dig', 'brow')
('or', 'bid', 'gown')
('or', 'big', 'down')
('or', 'bog', 'wind')
('or', 'gob', 'wind')
('or', 'bow', 'ding')
('or', 'wig', 'bond')
('ow', 'bog', 'rind')
('ow', 'gob', 'rind')
('ow', 'dig', 'born')
('ow', 'don', 'brig')
('ow', 'nod', 'brig')
('ow', 'orb', 'ding')
('ow', 'rob', 'ding')
('ow', 'rid', 'bong')
('ow', 'rig', 'bond')
此代码是为 Python 3 编写的。您可以在 Python 2.7 上使用它,但您需要更改
yield from solve(pos, newbag, solution + [key])
for result in solve(pos, newbag, solution + [key]):
yield result
给定 n 个字符,我需要生成所有可能的 Ki 长度的单词,例如:
做 熊
我想不出 len 5 这个词,但这就是我的想法
n = 11
k1 = 2
k2 = 4
k3 = 5
所以基本上所有长度为 2 4 和 5 的单词都没有重复使用字符。最好的方法是什么?
3: [{u'eit': u' "eit":0'}],
5: [{u'doosw': u' "woods": 4601, '}, {u'acenr': u' "caner": 0, '}, {u'acens': u' "canes": 0, '}, {u'acden': u' "caned": 0, '}, {u'aceln': u' "canel": 0,'}],
6: [{u'abeill': u' "alible": 0, '}, {u'cdeeit': u' "deciet":0,'}, {u'demoor': u' "mooder": 0, '}],
7: [{u'deiprss': u' "spiders": 0, '}, {u'deiprsy': u' "spidery": 0, '}, {u'cersttu': u' "scutter": 0, '}],
8: [{u'chiiilst': u' "chilitis": 0, '}, {u'agilnrtw': u' "trawling": 0, '}, {u'abdeemns': u' "beadsmen": 0, '}],
9: [{u'abeiilnns': u' "biennials": 0, '}, {u'bclooortu': u' "oblocutor": 0, '}, {u'aabfiinst': u' "fabianist": 0, '}, {u'acdeiituz': u' "diazeutic": 0, '}, {u'aabfiimns': u' "fabianism": 0, '}, {u'ehnoooppt': u' "optophone": 0, '}],
10: [{u'aiilnoprtt': u' "tripolitan": 0, '}, {u'eeilprrsty': u' "sperrylite": 0, '}, {u'gghhiilttt': u' "lighttight": 0, '}, {u'aeegilrruz': u' "regularize": 0, '}, {u'ellnprtuuy': u' "purulently": 0, '}],
11: [{u'cdgilnoostu': u' "outscolding": 0, '}],
12: [{u'ceeeilnostuy': u' "leucosyenite": 0, '}, {u'aacciloprsst': u' "sarcoplastic": 0, '}],
13: [{u'acdeimmoprrsu': u' "cardiospermum": 0, '}, {u'celnnooostuvy': u' "noncovetously": 0, '}],
14: [{u'adeejmnnoprrtu': u' "preadjournment": 0, '}]
wlen = self.table[pos]
if pos == 0:
# See if the letters remaining in the bag are a valid word
key = ''.join(sorted(bag.elements()))
for d in wlen:
if key in d.keys():
yield solution + [key]
pos -= 1
for dic in wlen:
for key in dic.keys():
target = "LNDJOBEAWRL".lower()
symbols = sorted([c + str(target[i+1:].count(c)) for i, c in enumerate(target)])
现在我们有了每个单词的标准表示,我们需要一种快速的方法来检查是否有任何排列匹配它们。为此,我们使用 trie datastructure。这是其中一个的一些入门代码:
class Trie:
def __init__(self, symbol):
self.symbol = symbol
self.words = []
self.children = dict()
def add_word(self, word):
def add_child(self, symbol, trie):
self.children[symbol] = trie
现在你需要做一个空的trie作为根,以任何东西作为符号,专门用来存放所有顶级的trie。然后遍历我们之前转换的每个单词,对于我们生成的第一个符号,检查根树是否有带有该符号的子树。如果没有,则为其创建一个 trie 并添加它。如果是,则继续下一个符号,并检查具有该符号的 trie 是否在前一个 trie 中。以这种方式进行,直到用尽所有符号,在这种情况下,当前的 trie 节点代表我们转换的那个词的标准化形式。把原来的单词存入这个trie,然后继续下一个单词。
完成后,您的整个单词列表将包含在此 trie 数据结构中。然后,您可以执行以下操作:
def print_words(symbols, node):
for word in node.words:
for sym in node.children:
if sym in symbols:
print_words(symbols, node.children[sym])
print_words(symbols, root_trie)
下面的代码使用递归生成器来构建解决方案。为了存储目标字母,我们使用 collections.Counter
为了简化搜索,我们为每个需要的单词长度创建一个字典,将每个字典存储在一个名为 all_words
的字典中,单词长度作为键。每个子词典存储包含相同字母的单词列表,以排序的字母作为键,例如 'aet': ['ate', 'eat', 'tea']
我使用标准的 Unix '/usr/share/dict/words' word 文件。如果您使用不同格式的文件,您可能需要修改将单词放入 all_words
#!/usr/bin/env python3
''' Create anagrams from a string of target letters and a list of word lengths '''
from collections import Counter
from itertools import product
# The Unix word list
fname = '/usr/share/dict/words'
# The target letters to use
target = 'lndjobeawrl'
# Word lengths, in descending order
wordlengths = [5, 4, 2]
# A dict to hold dicts for each word length.
# The inner dicts store lists of words containing the same letters,
# with the sorted letters as the key, eg 'aet': ['ate', 'eat', 'tea']
all_words = {i: {} for i in wordlengths}
# A method that tests if a word only contains letters in target
valid = set(target).issuperset
print('Scanning', fname, 'for valid words...')
count = 0
with open(fname) as f:
for word in f:
word = word.rstrip()
wlen = len(word)
# Only add words of the correct length, with no punctuation.
# Using word.islower() eliminates most abbreviations.
if (wlen in wordlengths and word.islower()
and word.isalpha() and valid(word)):
sorted_word = ''.join(sorted(word))
# Add this word to the list in all_words[wlen],
# creating the list if it doesn't exist
all_words[wlen].setdefault(sorted_word, []).append(word)
count += 1
print(count, 'words found')
for k, v in all_words.items():
print(k, len(v))
def solve(pos, bag, solution):
wlen = wordlengths[pos]
if pos == 0:
# See if the letters remaining in the bag are a valid word
key = ''.join(sorted(bag.elements()))
if key in all_words[wlen]:
yield solution + [key]
pos -= 1
for key in all_words[wlen].keys():
# Test that all letters in key are in the bag
newbag = bag.copy()
if all(v >= 0 for v in newbag.values()):
# Add this key to the current solution and
# recurse to find the next key
yield from solve(pos, newbag, solution + [key])
# Find all lists of keys that produce valid combinations
for solution in solve(len(wordlengths) - 1, Counter(target), []):
# Convert solutions to tuples of words
t = [all_words[len(key)][key] for key in solution]
for s in product(*t):
Scanning /usr/share/dict/words for valid words...
300 words found
5 110
4 112
2 11
('ad', 'jell', 'brown')
('do', 'jell', 'brawn')
('ow', 'jell', 'brand')
('re', 'jowl', 'bland')
的结果target = 'nobigword'
wordlengths = [4, 3, 2]
Scanning /usr/share/dict/words for valid words...
83 words found
4 31
3 33
2 7
('do', 'big', 'worn')
('do', 'bin', 'grow')
('do', 'nib', 'grow')
('do', 'bow', 'grin')
('do', 'bow', 'ring')
('do', 'gin', 'brow')
('do', 'now', 'brig')
('do', 'own', 'brig')
('do', 'won', 'brig')
('do', 'orb', 'wing')
('do', 'rob', 'wing')
('do', 'rib', 'gown')
('do', 'wig', 'born')
('go', 'bid', 'worn')
('go', 'bin', 'word')
('go', 'nib', 'word')
('go', 'bow', 'rind')
('go', 'din', 'brow')
('go', 'now', 'bird')
('go', 'own', 'bird')
('go', 'won', 'bird')
('go', 'orb', 'wind')
('go', 'rob', 'wind')
('go', 'rib', 'down')
('go', 'row', 'bind')
('id', 'bog', 'worn')
('id', 'gob', 'worn')
('id', 'orb', 'gown')
('id', 'rob', 'gown')
('id', 'row', 'bong')
('in', 'bog', 'word')
('in', 'gob', 'word')
('in', 'dog', 'brow')
('in', 'god', 'brow')
('no', 'bid', 'grow')
('on', 'bid', 'grow')
('no', 'big', 'word')
('on', 'big', 'word')
('no', 'bow', 'gird')
('no', 'bow', 'grid')
('on', 'bow', 'gird')
('on', 'bow', 'grid')
('no', 'dig', 'brow')
('on', 'dig', 'brow')
('or', 'bid', 'gown')
('or', 'big', 'down')
('or', 'bog', 'wind')
('or', 'gob', 'wind')
('or', 'bow', 'ding')
('or', 'wig', 'bond')
('ow', 'bog', 'rind')
('ow', 'gob', 'rind')
('ow', 'dig', 'born')
('ow', 'don', 'brig')
('ow', 'nod', 'brig')
('ow', 'orb', 'ding')
('ow', 'rob', 'ding')
('ow', 'rid', 'bong')
('ow', 'rig', 'bond')
此代码是为 Python 3 编写的。您可以在 Python 2.7 上使用它,但您需要更改
yield from solve(pos, newbag, solution + [key])
for result in solve(pos, newbag, solution + [key]):
yield result