在 python 树中存储字数
Storing word count in the python trie
我拿了一个单词列表并将其放入一个 trie 中。我还想在里面存储字数以供进一步分析。最好的方法是什么?这是 class 我认为频率将被收集和存储的地方,但我不确定如何去做。你可以看到我的尝试,插入的最后一行是我尝试存储计数的地方。
class TrieNode:
def __init__(self,k):
self.v = 0
self.k = k
self.children = {}
def all_words(self, prefix):
if self.end:
yield prefix
for letter, child in self.children.items():
yield from child.all_words(prefix + letter)
class Trie:
def __init__(self):
self.root = TrieNode()
def __init__(self):
self.root = TrieNode()
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr.v += 1
def insert_many(self, words):
for word in words:
self.insert(word)
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix)
我想存储计数以便在我使用时使用
print(list(trie.all_words_beginning_with_prefix('prefix')))
我会得到这样的结果:
[(word, count), (word, count)]
我将向 trie 节点添加一个名为 is_word 的字段,其中 is_word 仅对单词中的最后一个字母为真。就像你有单词 AND 一样,is_word 对于包含字母 D 的 trie 节点是正确的。我会更新只有 is_word 为真的节点的频率,而不是单词中的每个字母。
所以当你从一个字母开始迭代时,检查它是否是一个单词,如果是,停止迭代,return 计数和单词。我假设在您的迭代中您会跟踪这些字母,并不断将它们添加到前缀中。
你的 trie 是 multi-way trie。
虽然正在插入,但在看到任何节点时,这意味着将在该路径中添加一个新单词。因此,增加该节点的 word_count。
class TrieNode:
def __init__(self, char):
self.char = char
self.word_count = 0
self.children = {}
def all_words(self, prefix, path):
if len(self.children) == 0:
yield prefix + path
for letter, child in self.children.items():
yield from child.all_words(prefix, path + letter)
class Trie:
def __init__(self):
self.root = TrieNode('')
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if node is None:
node = TrieNode(letter)
curr.children[letter] = node
curr.word_count += 1 # increment it everytime the node is seen at particular level.
curr = node
def insert_many(self, words):
for word in words:
self.insert(word)
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix, path="")
def word_count(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return 0
return cur.word_count
trie = Trie()
trie.insert_many(["hello", "hi", "random", "heap"])
prefix = "he"
words = [w for w in trie.all_words_beginning_with_prefix(prefix)]
print("Lazy method:\n Prefix: %s, Words: %s, Count: %d" % (prefix, words, len(words)))
print("Proactive method:\n Word count for '%s': %d" % (prefix, trie.word_count(prefix)))
输出:
Lazy method:
Prefix: he, Words: ['hello', 'heap'], Count: 2
Proactive method:
Word count for 'he': 2
我拿了一个单词列表并将其放入一个 trie 中。我还想在里面存储字数以供进一步分析。最好的方法是什么?这是 class 我认为频率将被收集和存储的地方,但我不确定如何去做。你可以看到我的尝试,插入的最后一行是我尝试存储计数的地方。
class TrieNode:
def __init__(self,k):
self.v = 0
self.k = k
self.children = {}
def all_words(self, prefix):
if self.end:
yield prefix
for letter, child in self.children.items():
yield from child.all_words(prefix + letter)
class Trie:
def __init__(self):
self.root = TrieNode()
def __init__(self):
self.root = TrieNode()
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if not node:
node = TrieNode()
curr.children[letter] = node
curr.v += 1
def insert_many(self, words):
for word in words:
self.insert(word)
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix)
我想存储计数以便在我使用时使用
print(list(trie.all_words_beginning_with_prefix('prefix')))
我会得到这样的结果:
[(word, count), (word, count)]
我将向 trie 节点添加一个名为 is_word 的字段,其中 is_word 仅对单词中的最后一个字母为真。就像你有单词 AND 一样,is_word 对于包含字母 D 的 trie 节点是正确的。我会更新只有 is_word 为真的节点的频率,而不是单词中的每个字母。
所以当你从一个字母开始迭代时,检查它是否是一个单词,如果是,停止迭代,return 计数和单词。我假设在您的迭代中您会跟踪这些字母,并不断将它们添加到前缀中。
你的 trie 是 multi-way trie。
虽然正在插入,但在看到任何节点时,这意味着将在该路径中添加一个新单词。因此,增加该节点的 word_count。
class TrieNode:
def __init__(self, char):
self.char = char
self.word_count = 0
self.children = {}
def all_words(self, prefix, path):
if len(self.children) == 0:
yield prefix + path
for letter, child in self.children.items():
yield from child.all_words(prefix, path + letter)
class Trie:
def __init__(self):
self.root = TrieNode('')
def insert(self, word):
curr = self.root
for letter in word:
node = curr.children.get(letter)
if node is None:
node = TrieNode(letter)
curr.children[letter] = node
curr.word_count += 1 # increment it everytime the node is seen at particular level.
curr = node
def insert_many(self, words):
for word in words:
self.insert(word)
def all_words_beginning_with_prefix(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return # No words with given prefix
yield from cur.all_words(prefix, path="")
def word_count(self, prefix):
cur = self.root
for c in prefix:
cur = cur.children.get(c)
if cur is None:
return 0
return cur.word_count
trie = Trie()
trie.insert_many(["hello", "hi", "random", "heap"])
prefix = "he"
words = [w for w in trie.all_words_beginning_with_prefix(prefix)]
print("Lazy method:\n Prefix: %s, Words: %s, Count: %d" % (prefix, words, len(words)))
print("Proactive method:\n Word count for '%s': %d" % (prefix, trie.word_count(prefix)))
输出:
Lazy method:
Prefix: he, Words: ['hello', 'heap'], Count: 2
Proactive method:
Word count for 'he': 2