霍夫曼递归解码
Huffman Decoding with recursion
我正在创建一个可以对文本进行编码和解码的 huffman class,但是我的解码方法有问题。我的编码方法工作正常,我的解码方法适用于少量文本。但是,当我尝试解码大量文本时,出现最大递归深度错误,并且不确定如何修复它。 class 接受一个包含字符及其频率的字典,然后将它们变成节点并构建树。构建树后,它将字符及其位码放入另一个字典中以用于编码和解码。
class Node:
def __init__(self,value,data,left=None,right=None):
#Holds frequency
self.value = value
#Holds character
self.data = data
self.left = left
self.right = right
self.code = ""
def __lt__(self, other):
return self.value < other.value
def __le__(self, other):
return self.value <= other.value
def __gt__(self, other):
return self.value > other.value
def __ge__(self, other):
return self.value >= other.value
class MyHuffman():
def __init__(self):
self.chars = {}
self.tree = None
self.decodePosition = 0
def build(self, weights):
huffNodes = []
heapq.heapify(huffNodes)
for x, y in weights.items():
heapq.heappush(huffNodes,Node(y,x))
while len(huffNodes) > 1:
left = heapq.heappop(huffNodes)
right = heapq.heappop(huffNodes)
left.code = 1
right.code = 0
heapq.heappush(huffNodes, Node(left.value+right.value, left.data + right.data, left, right))
self.tree = huffNodes[0]
self.makeLookupTable(self.tree)
def makeLookupTable(self, node, bitCode=""):
codes = bitCode + str(node.code)
if node.left == None and node.right == None:
self.chars[node.data] = codes
else:
self.makeLookupTable(node.left, codes)
self.makeLookupTable(node.right, codes)
return self.chars
def encode(self, word):
bitString = ""
for x in range (0, len(word)):
if word[x] in self.chars:
for y, z in self.chars.items():
if word[x] == y:
bitString = bitString + z
return bitString
def decode(self, bitstring):
for x in bitstring:
if x != "0" and x != "1":
return None
dec = self.recursiveTraverseTree(self.tree,bitstring)
self.decodePosition=0
return dec
def recursiveTraverseTree(self, node, bitString, words=""):
if node.left == None and node.right == None:
words= words + str(node.data)
node = self.tree
if self.decodePosition < len(bitString):
if bitString[self.decodePosition] == "0":
self.decodePosition+=1
return self.recursiveTraverseTree(node.right, bitString, words)
elif bitString[self.decodePosition] == "1":
self.decodePosition+=1
return self.recursiveTraverseTree(node.left, bitString, words)
return words
下面的一些测试运行
第一个测试工作正常,但第二个测试给出了 maxRecursion 深度错误
huff = MyHuffman()
freqs = {'z': 50, 'j': 1, "'": 11, 'a': 42, 'd': 3, 'l': 1, 'f': 14, ' ': 31, 'i': 1, 'w': 8, 's': 41, 'r': 2, 'k': 49, 'b': 28, 'q': 28, 'p': 32, 'g': 33, 'v': 51, 'c': 6, 'h': 6, 'm': 5, 'y': 8, 'o': 36, 't': 28, 'u': 23, 'n': 15}
b = huff.build(freqs)
word = "damn son"
bitstring = huff.encode(word)
decrypted = huff.decode(bitstring)
print(f"[{word}] -> [{bitstring}] -> [{decrypted}]")
#Outputs[damn son] -> [1000011001110000101000110010100010110001] -> [damn son]
word1 ="bgzqbbvkqpaawaoasczz nfq szbumq'vzmbvbknsvvqouapcpbzkvbgtbsggcohto p tzhytz kkutanbv ubsbaogasznr tzz pzzsgqfqpsnfugsktpuzztq s vkfzavokoogmvzgpk tagkz zaoz'vaqpqkvbuvqtsbzgf zk kuzasovhauoqwtvvvovko fvbwhzpvpkskouaupspstbvgsbszipqvvuvmuaspzna stvvk gu saaokggnmsvtspkvqvsahvozsawszfpws skgg bqkqu salg fovuknaknpupovafbovqssagpgbfkcuz gf ofgyrokasgc guabqzbtkosqzbzvspzwgnsyfhvoz akgo'htsovzpakabayffpkpkvqrpzzqogsfvvatsvqbaapozavuyovzpbzsz ppuzrqbq'jaq zuqhhpptvkguktcbkazsszsgvocppzptzzhtzgt b mkznz qqk avkmztzsbzkgrovkqsssb pvtvssoutssazasunaavgybffppqgqagbsa gqscwpno'pb qsknzagtu pztqfbmbztmtuzktvza gaovapkvav govpv oazg'qgrpyszvqqzvgvztbavsy pswurtauztkztavv zcqvzs gbt zgzosfvtpk'gyggbokt ktgukzgskfzf ntavpq bzazvtphvcfba knp'zg'vsyqtvuopz tvzks fn'boaakyvskgvgqggqz tknqbbbvskavkgqpskkkvapca rvzpkksvw'p spvhbzfgggzz'fopfsngoujykutkapbvtqzkkaanpogpnozohvqgfwkdpkvgdpouku v zpkkonuzks oznspqz'aszvnt v ytkcptstkftcknfksbkqszqht z wmpafzwf vkofvadvaqagqzpnzavvuzqkau nqobvggzp qv gup fokkbsoaqkoz svu uvzqzzpyfwuq ozszkzspkavsvta tgovt zpyqvpkzpbnvbsakkgvaktkqwukozp zskzr a aobzopg qtmkakk g nz vgagaktwv tptw bfqmsogzhhkpzwaza tokcbta kzzutvzk zvkunqoowako zabvvkqu'zb kp kvakvthgytvsvstpbvngvskgaqnfkokwozbztgszh'pgbopsgdnvzvozzgvsvgpvuzbuvkzat"
bitstring1 = huff.encode(word1)
decrypted1 = huff.decode(bitstring1)
print(f"[{word1}] -> [{bitstring1}] -> [{decrypted1}]")
#Gives maximum recursion depth error
word1
的长度为 1260。霍夫曼编码每个字母至少使用 1 位。结果 bitstring1
至少有 1260 位长。 decode
每解码一位就递归一次,或者至少递归 1260 次。这超过了 Python 的默认限制 1000,因此出现错误。
这是一个草图(未经测试),它在循环中重写 decode
并在递归中保持树搜索。
def decode(self, bitstring):
for x in bitstring:
if x != "0" and x != "1":
return None
pos = 0
size = len(bitstring)
dec = ""
while pos < size:
pos, word = self.decode_one(self.tree, bitstring, pos)
dec += word
self.decodePosition = 0
return dec
def decode_one(self, node, bitstring, pos):
"""Return a tuple of the smallest unused position and the word"""
if node.left == None and node.right == None:
return pos, node.data
elif bitstring[pos] == "0":
return self.decode_one(node.right, bitstring, pos + 1)
elif bitstring[pos] == "1":
return self.decode_one(node.left, bitstring, pos + 1)
我正在创建一个可以对文本进行编码和解码的 huffman class,但是我的解码方法有问题。我的编码方法工作正常,我的解码方法适用于少量文本。但是,当我尝试解码大量文本时,出现最大递归深度错误,并且不确定如何修复它。 class 接受一个包含字符及其频率的字典,然后将它们变成节点并构建树。构建树后,它将字符及其位码放入另一个字典中以用于编码和解码。
class Node:
def __init__(self,value,data,left=None,right=None):
#Holds frequency
self.value = value
#Holds character
self.data = data
self.left = left
self.right = right
self.code = ""
def __lt__(self, other):
return self.value < other.value
def __le__(self, other):
return self.value <= other.value
def __gt__(self, other):
return self.value > other.value
def __ge__(self, other):
return self.value >= other.value
class MyHuffman():
def __init__(self):
self.chars = {}
self.tree = None
self.decodePosition = 0
def build(self, weights):
huffNodes = []
heapq.heapify(huffNodes)
for x, y in weights.items():
heapq.heappush(huffNodes,Node(y,x))
while len(huffNodes) > 1:
left = heapq.heappop(huffNodes)
right = heapq.heappop(huffNodes)
left.code = 1
right.code = 0
heapq.heappush(huffNodes, Node(left.value+right.value, left.data + right.data, left, right))
self.tree = huffNodes[0]
self.makeLookupTable(self.tree)
def makeLookupTable(self, node, bitCode=""):
codes = bitCode + str(node.code)
if node.left == None and node.right == None:
self.chars[node.data] = codes
else:
self.makeLookupTable(node.left, codes)
self.makeLookupTable(node.right, codes)
return self.chars
def encode(self, word):
bitString = ""
for x in range (0, len(word)):
if word[x] in self.chars:
for y, z in self.chars.items():
if word[x] == y:
bitString = bitString + z
return bitString
def decode(self, bitstring):
for x in bitstring:
if x != "0" and x != "1":
return None
dec = self.recursiveTraverseTree(self.tree,bitstring)
self.decodePosition=0
return dec
def recursiveTraverseTree(self, node, bitString, words=""):
if node.left == None and node.right == None:
words= words + str(node.data)
node = self.tree
if self.decodePosition < len(bitString):
if bitString[self.decodePosition] == "0":
self.decodePosition+=1
return self.recursiveTraverseTree(node.right, bitString, words)
elif bitString[self.decodePosition] == "1":
self.decodePosition+=1
return self.recursiveTraverseTree(node.left, bitString, words)
return words
下面的一些测试运行 第一个测试工作正常,但第二个测试给出了 maxRecursion 深度错误
huff = MyHuffman()
freqs = {'z': 50, 'j': 1, "'": 11, 'a': 42, 'd': 3, 'l': 1, 'f': 14, ' ': 31, 'i': 1, 'w': 8, 's': 41, 'r': 2, 'k': 49, 'b': 28, 'q': 28, 'p': 32, 'g': 33, 'v': 51, 'c': 6, 'h': 6, 'm': 5, 'y': 8, 'o': 36, 't': 28, 'u': 23, 'n': 15}
b = huff.build(freqs)
word = "damn son"
bitstring = huff.encode(word)
decrypted = huff.decode(bitstring)
print(f"[{word}] -> [{bitstring}] -> [{decrypted}]")
#Outputs[damn son] -> [1000011001110000101000110010100010110001] -> [damn son]
word1 ="bgzqbbvkqpaawaoasczz nfq szbumq'vzmbvbknsvvqouapcpbzkvbgtbsggcohto p tzhytz kkutanbv ubsbaogasznr tzz pzzsgqfqpsnfugsktpuzztq s vkfzavokoogmvzgpk tagkz zaoz'vaqpqkvbuvqtsbzgf zk kuzasovhauoqwtvvvovko fvbwhzpvpkskouaupspstbvgsbszipqvvuvmuaspzna stvvk gu saaokggnmsvtspkvqvsahvozsawszfpws skgg bqkqu salg fovuknaknpupovafbovqssagpgbfkcuz gf ofgyrokasgc guabqzbtkosqzbzvspzwgnsyfhvoz akgo'htsovzpakabayffpkpkvqrpzzqogsfvvatsvqbaapozavuyovzpbzsz ppuzrqbq'jaq zuqhhpptvkguktcbkazsszsgvocppzptzzhtzgt b mkznz qqk avkmztzsbzkgrovkqsssb pvtvssoutssazasunaavgybffppqgqagbsa gqscwpno'pb qsknzagtu pztqfbmbztmtuzktvza gaovapkvav govpv oazg'qgrpyszvqqzvgvztbavsy pswurtauztkztavv zcqvzs gbt zgzosfvtpk'gyggbokt ktgukzgskfzf ntavpq bzazvtphvcfba knp'zg'vsyqtvuopz tvzks fn'boaakyvskgvgqggqz tknqbbbvskavkgqpskkkvapca rvzpkksvw'p spvhbzfgggzz'fopfsngoujykutkapbvtqzkkaanpogpnozohvqgfwkdpkvgdpouku v zpkkonuzks oznspqz'aszvnt v ytkcptstkftcknfksbkqszqht z wmpafzwf vkofvadvaqagqzpnzavvuzqkau nqobvggzp qv gup fokkbsoaqkoz svu uvzqzzpyfwuq ozszkzspkavsvta tgovt zpyqvpkzpbnvbsakkgvaktkqwukozp zskzr a aobzopg qtmkakk g nz vgagaktwv tptw bfqmsogzhhkpzwaza tokcbta kzzutvzk zvkunqoowako zabvvkqu'zb kp kvakvthgytvsvstpbvngvskgaqnfkokwozbztgszh'pgbopsgdnvzvozzgvsvgpvuzbuvkzat"
bitstring1 = huff.encode(word1)
decrypted1 = huff.decode(bitstring1)
print(f"[{word1}] -> [{bitstring1}] -> [{decrypted1}]")
#Gives maximum recursion depth error
word1
的长度为 1260。霍夫曼编码每个字母至少使用 1 位。结果 bitstring1
至少有 1260 位长。 decode
每解码一位就递归一次,或者至少递归 1260 次。这超过了 Python 的默认限制 1000,因此出现错误。
这是一个草图(未经测试),它在循环中重写 decode
并在递归中保持树搜索。
def decode(self, bitstring):
for x in bitstring:
if x != "0" and x != "1":
return None
pos = 0
size = len(bitstring)
dec = ""
while pos < size:
pos, word = self.decode_one(self.tree, bitstring, pos)
dec += word
self.decodePosition = 0
return dec
def decode_one(self, node, bitstring, pos):
"""Return a tuple of the smallest unused position and the word"""
if node.left == None and node.right == None:
return pos, node.data
elif bitstring[pos] == "0":
return self.decode_one(node.right, bitstring, pos + 1)
elif bitstring[pos] == "1":
return self.decode_one(node.left, bitstring, pos + 1)