大多数时候 C++ Huffman 代码实现编码正确吗?
C++ Huffman code implementation encoding right most of the time?
晚上都在研究霍夫曼编码的 C++ 实现,这就是我所拥有的。奇怪的是大部分时间都在工作。但是对于某些输入,我得到了不正确的 coding/output。如果你看到我在这里遗漏了什么,请告诉我......我会继续寻找......
谢谢你的帮助 !更新了代码以解决下面提到的问题...仍然遇到同样的问题。
#include <iostream>
#include <queue>
#include <list>
#include <iterator>
#include <vector>
#include <algorithm>
class HuffmanCodes
{
struct Node
{
int data;
size_t freq;
Node* left;
Node* right;
Node()
{
data = '[=10=]';
freq = 0;
}
Node(int data, size_t freq) : data(data),
freq(freq),
left(nullptr),
right(nullptr)
{}
~Node()
{
delete left;
delete right;
}
};
struct compare
{
bool operator()(Node* l, Node* r)
{
return (l->freq > r->freq);
}
};
Node* top;
void printCode(Node* root, std::string str, std::vector<int>& data, int i)
{
if(root == nullptr)
return;
if(root->data != '$' && data[i] == root->data )
{
std::cout << root->data +1 << " : " << str << "\n";
}
printCode(root->left, str + "0" ,data, i);
printCode(root->right, str + "1", data, i);
}
public:
HuffmanCodes() {}
~HuffmanCodes()
{
delete top;
}
void GenerateCode( std::vector<int>& data, std::vector<size_t>& freq)
{
Node* left;
Node* right;
std::priority_queue<Node*, std::vector<Node*>, compare > minHeap;
for(size_t i = 0; i < data.size(); ++i)
{
minHeap.push(new Node(data[i], freq[i]));
}
while(minHeap.size() != 1)
{
std::sort (data.begin(), data.end());
left = minHeap.top();
minHeap.pop();
right = minHeap.top();
minHeap.pop();
top = new Node('$', left->freq + right->freq);
top->left = left;
top->right = right;
minHeap.push(top);
}
for( int j = 0; j < data.size(); j++ )
printCode(minHeap.top(), "", data, j);
}
};
int main()
{
int n;
std::cin >> n;
std::vector<int> data;
std::vector<size_t> freq;
for(int i = 0; i < n; i++){
int input;
std::cin >> input;
freq.push_back(input);
}
for(int i = 0; i < n; i++){
data.push_back(i);
}
HuffmanCodes set1;
size_t size = n;
set1.GenerateCode(data, freq);
return 0;
}
输入:
20
84
87
78
16
94
36
87
93
50
22
63
28
91
60
64
27
41
27
73
37
输出:
1010
1100
1001
100010
000
01101
1011
1111
11101
100011
0100
01100
1101
0011
0101
00100
11100
00101
0111
10000
正确输出:
1010
1011
1001
100010
000
01011
1100
1111
11101
100011
0100
01010
1101
0011
0110
00100
11100
00101
0111
10000
Valgrind 立即指向错误代码:
g++ -std=c++2a -fPIC -g -Wall -Wextra -Wwrite-strings -Wno-parentheses -Wpedantic -Warray-bounds -Weffc++ 53367469.cpp -o 53367469
53367469.cpp: In constructor ‘MinHeapNode::MinHeapNode(int, unsigned int)’:
53367469.cpp:20:1: warning: ‘MinHeapNode::data’ should be initialized in the member initialization list [-Weffc++]
MinHeapNode(int data, unsigned freq) {
^~~~~~~~~~~
53367469.cpp:20:1: warning: ‘MinHeapNode::freq’ should be initialized in the member initialization list [-Weffc++]
53367469.cpp:20:1: warning: ‘MinHeapNode::left’ should be initialized in the member initialization list [-Weffc++]
53367469.cpp:20:1: warning: ‘MinHeapNode::right’ should be initialized in the member initialization list [-Weffc++]
53367469.cpp: In function ‘int main()’:
53367469.cpp:104:8: warning: ISO C++ forbids variable length array ‘p’ [-Wvla]
int p[n];
^
53367469.cpp:105:8: warning: ISO C++ forbids variable length array ‘a’ [-Wvla]
int a[n];
^
53367469.cpp: In function ‘void merge(int*, int, int, int)’:
53367469.cpp:145:14: warning: ISO C++ forbids variable length array ‘left’ [-Wvla]
int left[n1];
^
53367469.cpp:146:15: warning: ISO C++ forbids variable length array ‘right’ [-Wvla]
int right[n2];
^
valgrind -q --leak-check=full ./53367469 <<<"$INPUT"
==8553== Invalid read of size 8
==8553== at 0x10A41D: HuffmanCodes(int*, int*, int) (53367469.cpp:72)
==8553== by 0x10A704: main (53367469.cpp:115)
==8553== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==8553==
==8553==
==8553== Process terminating with default action of signal 11 (SIGSEGV)
==8553== Access not within mapped region at address 0x0
==8553== at 0x10A41D: HuffmanCodes(int*, int*, int) (53367469.cpp:72)
==8553== by 0x10A704: main (53367469.cpp:115)
==8553== If you believe this happened as a result of a stack
==8553== overflow in your program's main thread (unlikely but
==8553== possible), you can try to increase the size of the
==8553== main thread stack using the --main-stacksize= flag.
==8553== The main thread stack size used in this run was 8720384.
一个空队列没有有效的top()
。
顺便说一句,使用 <bits/stdc++.h>
是低效且不可移植的; using namespace std
也是不明智的。
原来问题在于代码如何处理重复项,如果有人好奇的话。代码工作只是必须对结构中的比较进行一些更改以获得正确的输出。感谢大家花时间查看和/或回复我的 post 我一如既往地感谢这些建议,但它们与问题无关。我不得不通过手工做 huff 树来追踪这个。但现在解决了,我 post 代码有点以防万一有人感兴趣。
晚上都在研究霍夫曼编码的 C++ 实现,这就是我所拥有的。奇怪的是大部分时间都在工作。但是对于某些输入,我得到了不正确的 coding/output。如果你看到我在这里遗漏了什么,请告诉我......我会继续寻找...... 谢谢你的帮助 !更新了代码以解决下面提到的问题...仍然遇到同样的问题。
#include <iostream>
#include <queue>
#include <list>
#include <iterator>
#include <vector>
#include <algorithm>
class HuffmanCodes
{
struct Node
{
int data;
size_t freq;
Node* left;
Node* right;
Node()
{
data = '[=10=]';
freq = 0;
}
Node(int data, size_t freq) : data(data),
freq(freq),
left(nullptr),
right(nullptr)
{}
~Node()
{
delete left;
delete right;
}
};
struct compare
{
bool operator()(Node* l, Node* r)
{
return (l->freq > r->freq);
}
};
Node* top;
void printCode(Node* root, std::string str, std::vector<int>& data, int i)
{
if(root == nullptr)
return;
if(root->data != '$' && data[i] == root->data )
{
std::cout << root->data +1 << " : " << str << "\n";
}
printCode(root->left, str + "0" ,data, i);
printCode(root->right, str + "1", data, i);
}
public:
HuffmanCodes() {}
~HuffmanCodes()
{
delete top;
}
void GenerateCode( std::vector<int>& data, std::vector<size_t>& freq)
{
Node* left;
Node* right;
std::priority_queue<Node*, std::vector<Node*>, compare > minHeap;
for(size_t i = 0; i < data.size(); ++i)
{
minHeap.push(new Node(data[i], freq[i]));
}
while(minHeap.size() != 1)
{
std::sort (data.begin(), data.end());
left = minHeap.top();
minHeap.pop();
right = minHeap.top();
minHeap.pop();
top = new Node('$', left->freq + right->freq);
top->left = left;
top->right = right;
minHeap.push(top);
}
for( int j = 0; j < data.size(); j++ )
printCode(minHeap.top(), "", data, j);
}
};
int main()
{
int n;
std::cin >> n;
std::vector<int> data;
std::vector<size_t> freq;
for(int i = 0; i < n; i++){
int input;
std::cin >> input;
freq.push_back(input);
}
for(int i = 0; i < n; i++){
data.push_back(i);
}
HuffmanCodes set1;
size_t size = n;
set1.GenerateCode(data, freq);
return 0;
}
输入: 20 84 87 78 16 94 36 87 93 50 22 63 28 91 60 64 27 41 27 73 37
输出:
1010
1100
1001
100010
000
01101
1011
1111
11101
100011
0100
01100
1101
0011
0101
00100
11100
00101
0111
10000
正确输出: 1010
1011
1001
100010
000
01011
1100
1111
11101
100011
0100
01010
1101
0011
0110
00100
11100
00101
0111
10000
Valgrind 立即指向错误代码:
g++ -std=c++2a -fPIC -g -Wall -Wextra -Wwrite-strings -Wno-parentheses -Wpedantic -Warray-bounds -Weffc++ 53367469.cpp -o 53367469
53367469.cpp: In constructor ‘MinHeapNode::MinHeapNode(int, unsigned int)’:
53367469.cpp:20:1: warning: ‘MinHeapNode::data’ should be initialized in the member initialization list [-Weffc++]
MinHeapNode(int data, unsigned freq) {
^~~~~~~~~~~
53367469.cpp:20:1: warning: ‘MinHeapNode::freq’ should be initialized in the member initialization list [-Weffc++]
53367469.cpp:20:1: warning: ‘MinHeapNode::left’ should be initialized in the member initialization list [-Weffc++]
53367469.cpp:20:1: warning: ‘MinHeapNode::right’ should be initialized in the member initialization list [-Weffc++]
53367469.cpp: In function ‘int main()’:
53367469.cpp:104:8: warning: ISO C++ forbids variable length array ‘p’ [-Wvla]
int p[n];
^
53367469.cpp:105:8: warning: ISO C++ forbids variable length array ‘a’ [-Wvla]
int a[n];
^
53367469.cpp: In function ‘void merge(int*, int, int, int)’:
53367469.cpp:145:14: warning: ISO C++ forbids variable length array ‘left’ [-Wvla]
int left[n1];
^
53367469.cpp:146:15: warning: ISO C++ forbids variable length array ‘right’ [-Wvla]
int right[n2];
^
valgrind -q --leak-check=full ./53367469 <<<"$INPUT"
==8553== Invalid read of size 8
==8553== at 0x10A41D: HuffmanCodes(int*, int*, int) (53367469.cpp:72)
==8553== by 0x10A704: main (53367469.cpp:115)
==8553== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==8553==
==8553==
==8553== Process terminating with default action of signal 11 (SIGSEGV)
==8553== Access not within mapped region at address 0x0
==8553== at 0x10A41D: HuffmanCodes(int*, int*, int) (53367469.cpp:72)
==8553== by 0x10A704: main (53367469.cpp:115)
==8553== If you believe this happened as a result of a stack
==8553== overflow in your program's main thread (unlikely but
==8553== possible), you can try to increase the size of the
==8553== main thread stack using the --main-stacksize= flag.
==8553== The main thread stack size used in this run was 8720384.
一个空队列没有有效的top()
。
顺便说一句,使用 <bits/stdc++.h>
是低效且不可移植的; using namespace std
也是不明智的。
原来问题在于代码如何处理重复项,如果有人好奇的话。代码工作只是必须对结构中的比较进行一些更改以获得正确的输出。感谢大家花时间查看和/或回复我的 post 我一如既往地感谢这些建议,但它们与问题无关。我不得不通过手工做 huff 树来追踪这个。但现在解决了,我 post 代码有点以防万一有人感兴趣。