使用二叉堆构建哈夫曼树
Building a Huffman Tree using a Binary Heap
我目前正在尝试编写一个读取文本文件并通过创建 HuffmanTree 对其进行编码的程序。我在优先级队列的二进制堆中使用并行数组来跟踪我的哈夫曼树。
我知道从堆中删除两分钟,合并它们,然后将它们重新插入堆中直到剩下一分钟的原则,但我无法将 logic/algorithm 转换为代码。
这是我的 HuffmanEncode class:
public class HuffmanEncode {
public HuffmanEncode(String in, String out) {
// Implements the Huffman encoding algorithm
// Add private methods and instance variables as needed
int[] freqs = new int[128]; // character counts
char[] chars = new char[128]; //characters
freqs = countFrequencies(in);
HuffmanTree[] trees = new HuffmanTree[128]; //single node trees
for(int i= 0; i < freqs.length; i++) {
chars[i] = (char)i;
trees[i] = new HuffmanTree(chars[i]);
}
BinaryHeap heap = new BinaryHeap(128); // create a binary heap
for(int i = 0; i < 128; i++) {
heap.insert(freqs[i], trees[i]);
}
// STUCK HERE
buildTree(heap);
HuffmanTree root = new HuffmanTree();
// STUCK HERE
}
private void buildTree(BinaryHeap h) {
// grab two smallest
while (h.getSize() > 1) { //repeat until there is only one left
int temp1, temp2;
HuffmanTree t1, t2;
temp1 = h.getMinPriority();
temp2 = h.getMinPriority();
// add their frequency to create new single node tree with char 128
int sum = temp1 + temp2;
HuffmanTree node = new HuffmanTree((char)128);
// insert it back into the heap
h.insert(sum, node);
}
}
// count the frequencies of all characters in ascii 0-127 and store them in an array
private int[] countFrequencies(String input) {
File f1 = new File(input);
int[] count = new int[128];
try {
BufferedReader in = new BufferedReader (new FileReader (f1));
int nextChar;
char ch;
while((nextChar = in.read()) != -1) { // stop when end of file is reached
ch = ((char) nextChar);
if(ch <= 127)
count[ch]++;
}
in.close();
} catch (FileNotFoundException e) {
System.out.println("file not found");
} catch (IOException e) {
System.out.println("Buffered Reader error");
}
return count;
}
这是我的二进制堆 class:
public class BinaryHeap {
// implements a binary heap where the heap rule is the value in the parent
// node is less than or equal to the values in the child nodes
// implementation uses parallel arrays to store the priorities and the trees
// must use this implementation
int priority[];
HuffmanTree trees[];
int size;
public BinaryHeap(int s) {
priority = new int[s+1];
trees = new HuffmanTree[s+1];
size = 0;
}
public void removeMin() {
// PRE: size != 0;
// removes the priority and the tree at the root of the heap
int parent;
int child;
int x = priority[size];
HuffmanTree z = trees[size];
size--;
child = 2;
while(child <= size) {
parent = child / 2;
if(child < size && priority[child+1] < priority[child])
child++;
if(x < priority[child]) break;
priority[parent] = priority[child];
trees[parent] = trees[child];
child = 2 * child;
}
priority[child/2] = x;
trees[child/2] = z;
}
public int getMinPriority() {
// PRE: size != 0
// return the priority in the root of the heap
int min = priority[1];
removeMin();
return min;
}
public boolean full() {
// return true if the heap is full otherwise return false
return size == priority.length-1;
}
public void insert(int p, HuffmanTree t) {
// insert the priority p and the associated tree t into the heap
// PRE: !full()
int parent;
int child;
size++;
child = size;
parent = child/2;
priority[0] = p;
trees[0] = t;
while (priority[parent] > p) {
priority[child] = priority[parent];
trees[child] = trees[parent];
child = parent;
parent = child/2;
}
priority[child] = p;
trees[child] = t;
}
public int getSize() {
// return the number of values (priority, tree) pairs in the heap
return size;
}
}
这是 HuffmanTree 对象的 class:
import java.util.*;
public class HuffmanTree {
private class Node{
private Node left;
private char data;
private Node right;
private Node parent;
private Node(Node L, char d, Node R, Node P) {
left = L;
data = d;
right = R;
parent = P;
}
}
private Node root;
private Node current; // value is changed by move methods
public HuffmanTree() {
root = null;
current = null;
}
public HuffmanTree(char d) {
// single node tree
root = new Node(null, d, null, null);
current = null;
}
public HuffmanTree(String t, char nonLeaf) {
// Assumes t represents a post order representation of the tree
// nonLeaf is the char value of the data in the non-leaf nodes
// use (char) 128 for the non-leaf value
}
public HuffmanTree(HuffmanTree b1, HuffmanTree b2, char d) {
// makes a new tree where b1 is the left subtree and b2 is the right subtree and d is the data in root
root = new Node(b1.root, d, b2.root, null);
current = null;
}
// use the move methods to traverse the tree
// the move methods change the value of current
// use these in the decoding process
public void moveToRoot() {
// change current to reference the root of the tree
current = root;
}
public void moveToLeft() {
// PRE: the current node is not a leaf
current = current.left;
}
public void moveToRight() {
// PRE: the current node is not a leaf
current = current.right;
}
public void moveToParent() {
// PRE: the current node is not the root
current = current.parent;
}
public boolean atRoot() {
// returns true if the current node is the root otherwise returns false
if(current.equals(root)) {
return true;
}
return false;
}
public boolean atLeaf() {
// returns true if the current references a leaf otherwise return false
if(current.left == null && current.right == null && current.parent != null) {
return true;
}
return false;
}
public char current() {
// returns the data value in the node referenced by current
return current.data;
}
public Iterator<String> iterator(){
//return a new path iterator object
return new PathIterator();
}
public String toString() {
// returns a string representation of the tree using postorder format
return toString(root);
}
private String toString(Node r) {
if(r == null)
return "";
toString(r.left);
toString(r.right);
return r.data + "";
}
public class PathIterator implements Iterator<String>{
// the iterator returns the path (a series of 0s and 1s) to each leaf
// DO NOT compute all paths in the constructor
// only compute them as needed (similar to what you did in homework 2)
// add private methods and variables as needed
public PathIterator() {
}
public boolean hasNext() {
return true;
}
public String next() {
// the format of the string should be leaf value, a space, a sequence of 0s and 1s
// the 0s and 1s indicate the path from the root the node containing the leaf value
String result = "";
return result;
}
public void remove() {
// optional method not implemented
}
}
}
我了解并非所有代码都是完整的,它还在进行中。目前我正在尝试使用 HuffmanEncode class 构建树。
我的问题是如何使用二叉堆的并行数组构造二叉树?我尝试从数组中拉出两个元素,添加它们的频率以创建一个新节点,然后将它们重新插入到树中(如代码所示),但我不知道如何与使用 HuffmanTree 保持平行将两棵树合并在一起的构造函数。我怎样才能保证这一切顺利进行?
当你让新节点插回堆时,需要先将它的左右分支连接到你从堆中拉出的两个节点上。
我已经使用最小堆实现了霍夫曼编码。你可能会从中得到一些想法。
internal class HuffmanTree
{
internal HuffmanTreeNode rootNode;
private Dictionary<char, int> dictFrequencies;
HuffmanPriorityQueue huffmanPriorityQueue;
internal void Build(string input)
{
dictFrequencies = input.GroupBy(x => x).ToDictionary(k => k.Key, v => v.Count());
huffmanPriorityQueue = new HuffmanPriorityQueue(dictFrequencies.Count);
foreach (var item in dictFrequencies)
{
huffmanPriorityQueue.Push(new HuffmanTreeNode { Symbol = item.Key, Frequency = item.Value, Left = null, Right = null });
}
while(huffmanPriorityQueue.Count() >= 2)
{
HuffmanTreeNode leftNode = huffmanPriorityQueue.Pop();
HuffmanTreeNode rightNode = huffmanPriorityQueue.Pop();
HuffmanTreeNode parentNode = new HuffmanTreeNode
{
Frequency = leftNode.Frequency + rightNode.Frequency,
Symbol = '*',
Left = leftNode,
Right = rightNode
};
this.rootNode = parentNode;
huffmanPriorityQueue.Push(parentNode);
}
}
internal class HuffmanPriorityQueue
{
HuffmanTreeNode[] items;
int top = 0;
public HuffmanPriorityQueue(int size)
{
items = new HuffmanTreeNode[size + 1];
}
internal void Push(HuffmanTreeNode node)
{
int i = ++top;
while (i > 1 && items[i / 2].Frequency > node.Frequency)
{
items[i] = items[i / 2];
i = i / 2;
}
items[i] = node;
}
internal HuffmanTreeNode Pop()
{
HuffmanTreeNode data = items[1];
items[1] = items[top];
items[top--] = null;
int i = 1;
int j = i * 2;
while (j <= top)
{
if ((j + 1) <= top && items[j + 1].Frequency < items[j].Frequency)
{
j += 1;
}
if (items[j].Frequency < items[i].Frequency)
{
HuffmanTreeNode temp = items[j];
items[j] = items[i];
items[i] = temp;
i = j;
j = i * 2;
}
else
break;
}
return data;
}
internal int Count()
{
return top;
}
}
internal class HuffmanTreeNode
{
internal char Symbol { get; set; }
internal int Frequency { get; set; }
internal HuffmanTreeNode Right { get; set; }
internal HuffmanTreeNode Left { get; set; }
}
我目前正在尝试编写一个读取文本文件并通过创建 HuffmanTree 对其进行编码的程序。我在优先级队列的二进制堆中使用并行数组来跟踪我的哈夫曼树。
我知道从堆中删除两分钟,合并它们,然后将它们重新插入堆中直到剩下一分钟的原则,但我无法将 logic/algorithm 转换为代码。
这是我的 HuffmanEncode class:
public class HuffmanEncode {
public HuffmanEncode(String in, String out) {
// Implements the Huffman encoding algorithm
// Add private methods and instance variables as needed
int[] freqs = new int[128]; // character counts
char[] chars = new char[128]; //characters
freqs = countFrequencies(in);
HuffmanTree[] trees = new HuffmanTree[128]; //single node trees
for(int i= 0; i < freqs.length; i++) {
chars[i] = (char)i;
trees[i] = new HuffmanTree(chars[i]);
}
BinaryHeap heap = new BinaryHeap(128); // create a binary heap
for(int i = 0; i < 128; i++) {
heap.insert(freqs[i], trees[i]);
}
// STUCK HERE
buildTree(heap);
HuffmanTree root = new HuffmanTree();
// STUCK HERE
}
private void buildTree(BinaryHeap h) {
// grab two smallest
while (h.getSize() > 1) { //repeat until there is only one left
int temp1, temp2;
HuffmanTree t1, t2;
temp1 = h.getMinPriority();
temp2 = h.getMinPriority();
// add their frequency to create new single node tree with char 128
int sum = temp1 + temp2;
HuffmanTree node = new HuffmanTree((char)128);
// insert it back into the heap
h.insert(sum, node);
}
}
// count the frequencies of all characters in ascii 0-127 and store them in an array
private int[] countFrequencies(String input) {
File f1 = new File(input);
int[] count = new int[128];
try {
BufferedReader in = new BufferedReader (new FileReader (f1));
int nextChar;
char ch;
while((nextChar = in.read()) != -1) { // stop when end of file is reached
ch = ((char) nextChar);
if(ch <= 127)
count[ch]++;
}
in.close();
} catch (FileNotFoundException e) {
System.out.println("file not found");
} catch (IOException e) {
System.out.println("Buffered Reader error");
}
return count;
}
这是我的二进制堆 class:
public class BinaryHeap {
// implements a binary heap where the heap rule is the value in the parent
// node is less than or equal to the values in the child nodes
// implementation uses parallel arrays to store the priorities and the trees
// must use this implementation
int priority[];
HuffmanTree trees[];
int size;
public BinaryHeap(int s) {
priority = new int[s+1];
trees = new HuffmanTree[s+1];
size = 0;
}
public void removeMin() {
// PRE: size != 0;
// removes the priority and the tree at the root of the heap
int parent;
int child;
int x = priority[size];
HuffmanTree z = trees[size];
size--;
child = 2;
while(child <= size) {
parent = child / 2;
if(child < size && priority[child+1] < priority[child])
child++;
if(x < priority[child]) break;
priority[parent] = priority[child];
trees[parent] = trees[child];
child = 2 * child;
}
priority[child/2] = x;
trees[child/2] = z;
}
public int getMinPriority() {
// PRE: size != 0
// return the priority in the root of the heap
int min = priority[1];
removeMin();
return min;
}
public boolean full() {
// return true if the heap is full otherwise return false
return size == priority.length-1;
}
public void insert(int p, HuffmanTree t) {
// insert the priority p and the associated tree t into the heap
// PRE: !full()
int parent;
int child;
size++;
child = size;
parent = child/2;
priority[0] = p;
trees[0] = t;
while (priority[parent] > p) {
priority[child] = priority[parent];
trees[child] = trees[parent];
child = parent;
parent = child/2;
}
priority[child] = p;
trees[child] = t;
}
public int getSize() {
// return the number of values (priority, tree) pairs in the heap
return size;
}
}
这是 HuffmanTree 对象的 class:
import java.util.*;
public class HuffmanTree {
private class Node{
private Node left;
private char data;
private Node right;
private Node parent;
private Node(Node L, char d, Node R, Node P) {
left = L;
data = d;
right = R;
parent = P;
}
}
private Node root;
private Node current; // value is changed by move methods
public HuffmanTree() {
root = null;
current = null;
}
public HuffmanTree(char d) {
// single node tree
root = new Node(null, d, null, null);
current = null;
}
public HuffmanTree(String t, char nonLeaf) {
// Assumes t represents a post order representation of the tree
// nonLeaf is the char value of the data in the non-leaf nodes
// use (char) 128 for the non-leaf value
}
public HuffmanTree(HuffmanTree b1, HuffmanTree b2, char d) {
// makes a new tree where b1 is the left subtree and b2 is the right subtree and d is the data in root
root = new Node(b1.root, d, b2.root, null);
current = null;
}
// use the move methods to traverse the tree
// the move methods change the value of current
// use these in the decoding process
public void moveToRoot() {
// change current to reference the root of the tree
current = root;
}
public void moveToLeft() {
// PRE: the current node is not a leaf
current = current.left;
}
public void moveToRight() {
// PRE: the current node is not a leaf
current = current.right;
}
public void moveToParent() {
// PRE: the current node is not the root
current = current.parent;
}
public boolean atRoot() {
// returns true if the current node is the root otherwise returns false
if(current.equals(root)) {
return true;
}
return false;
}
public boolean atLeaf() {
// returns true if the current references a leaf otherwise return false
if(current.left == null && current.right == null && current.parent != null) {
return true;
}
return false;
}
public char current() {
// returns the data value in the node referenced by current
return current.data;
}
public Iterator<String> iterator(){
//return a new path iterator object
return new PathIterator();
}
public String toString() {
// returns a string representation of the tree using postorder format
return toString(root);
}
private String toString(Node r) {
if(r == null)
return "";
toString(r.left);
toString(r.right);
return r.data + "";
}
public class PathIterator implements Iterator<String>{
// the iterator returns the path (a series of 0s and 1s) to each leaf
// DO NOT compute all paths in the constructor
// only compute them as needed (similar to what you did in homework 2)
// add private methods and variables as needed
public PathIterator() {
}
public boolean hasNext() {
return true;
}
public String next() {
// the format of the string should be leaf value, a space, a sequence of 0s and 1s
// the 0s and 1s indicate the path from the root the node containing the leaf value
String result = "";
return result;
}
public void remove() {
// optional method not implemented
}
}
}
我了解并非所有代码都是完整的,它还在进行中。目前我正在尝试使用 HuffmanEncode class 构建树。
我的问题是如何使用二叉堆的并行数组构造二叉树?我尝试从数组中拉出两个元素,添加它们的频率以创建一个新节点,然后将它们重新插入到树中(如代码所示),但我不知道如何与使用 HuffmanTree 保持平行将两棵树合并在一起的构造函数。我怎样才能保证这一切顺利进行?
当你让新节点插回堆时,需要先将它的左右分支连接到你从堆中拉出的两个节点上。
我已经使用最小堆实现了霍夫曼编码。你可能会从中得到一些想法。
internal class HuffmanTree
{
internal HuffmanTreeNode rootNode;
private Dictionary<char, int> dictFrequencies;
HuffmanPriorityQueue huffmanPriorityQueue;
internal void Build(string input)
{
dictFrequencies = input.GroupBy(x => x).ToDictionary(k => k.Key, v => v.Count());
huffmanPriorityQueue = new HuffmanPriorityQueue(dictFrequencies.Count);
foreach (var item in dictFrequencies)
{
huffmanPriorityQueue.Push(new HuffmanTreeNode { Symbol = item.Key, Frequency = item.Value, Left = null, Right = null });
}
while(huffmanPriorityQueue.Count() >= 2)
{
HuffmanTreeNode leftNode = huffmanPriorityQueue.Pop();
HuffmanTreeNode rightNode = huffmanPriorityQueue.Pop();
HuffmanTreeNode parentNode = new HuffmanTreeNode
{
Frequency = leftNode.Frequency + rightNode.Frequency,
Symbol = '*',
Left = leftNode,
Right = rightNode
};
this.rootNode = parentNode;
huffmanPriorityQueue.Push(parentNode);
}
}
internal class HuffmanPriorityQueue
{
HuffmanTreeNode[] items;
int top = 0;
public HuffmanPriorityQueue(int size)
{
items = new HuffmanTreeNode[size + 1];
}
internal void Push(HuffmanTreeNode node)
{
int i = ++top;
while (i > 1 && items[i / 2].Frequency > node.Frequency)
{
items[i] = items[i / 2];
i = i / 2;
}
items[i] = node;
}
internal HuffmanTreeNode Pop()
{
HuffmanTreeNode data = items[1];
items[1] = items[top];
items[top--] = null;
int i = 1;
int j = i * 2;
while (j <= top)
{
if ((j + 1) <= top && items[j + 1].Frequency < items[j].Frequency)
{
j += 1;
}
if (items[j].Frequency < items[i].Frequency)
{
HuffmanTreeNode temp = items[j];
items[j] = items[i];
items[i] = temp;
i = j;
j = i * 2;
}
else
break;
}
return data;
}
internal int Count()
{
return top;
}
}
internal class HuffmanTreeNode
{
internal char Symbol { get; set; }
internal int Frequency { get; set; }
internal HuffmanTreeNode Right { get; set; }
internal HuffmanTreeNode Left { get; set; }
}