无法编写用于构建哈夫曼树的函数

Can't write a function for building Huffman tree

我有一个struct element,其中包含有关树元素的信息。

struct element
{
    char ch;   //Letter
    int left;  //Number in array
    int right; //Number in arry
    int count; //Count letter in word
};

此外,我有一个函数 MakeAlphabet,它为 std::string:

创建了一个字母表
std::vector<element> MakeAlphabet(std::string str)  //РАБОТАЕТ!
{
    std::vector<element> alphabet;
    ranges::sort(str);

    for(auto ch : str | ranges::views::unique)
    {
        alphabet.push_back(element{ch, -1, -1, static_cast<int>(ranges::count(str, ch))});
    }
    return alphabet;
};

最后,我得到了一个函数 MakeBinaryTree:

std::vector<element> MakeBinaryTree(std::string str)
{
    std::vector<element> result;
    std::vector<element> alphabet = MakeAlphabet(str);
    std::sort(alphabet.begin(), alphabet.end());
    result = alphabet;
   
    int size = alphabet.size();
    result = alphabet;

    for(int i = 0; i < size; i+=2)  //I think problem is here!
    {   
        alphabet.push_back(element{-1, i, i+1, (alphabet[i].count + alphabet[i+1].count)});
        alphabet.erase(alphabet.begin(), alphabet.begin() + 1);
        result.push_back(*(alphabet.end()-1));
    }
    return result;
};

检查结果树的根(它必须匹配单词中的字母数)时,结果几乎总是不正确的。

更新: 我有 std::sort(alphabet.begin(), alphabet.end());.

的重载运算符
bool operator<(const element &first, const element &second)
{
    return (first.count < second.count);
}

bool operator==(const element &first, const element &second)
{
    return (first.count == second.count);
}

数组必须排序,取出现次数最少的两个节点组成一个新节点放回数组中。数组再次排序等等...... 因此它只有一个数组,即 alphabet

使用指向元素 (element*) 的指针而不是仅指向元素,因为很容易将它们从数组切换到树。

创建节点class

class node{
};

class inner_node: public node
{
    node *left, *right;
    int count; //Count letter in word
};

class element: public node
{
public:
    char ch;   //Letter
    int count; //Count letter in word
};

std::vector<node*> MakeAlphabet(std::string str)  //РАБОТАЕТ!
{
    std::vector<node*> alphabet;
    ranges::sort(str);

    for(auto ch : str | ranges::views::unique)
    {
        element *e = new element();
        e->ch = ch;
        e->static_cast<int>(ranges::count(str, ch));
        alphabet.push_back(e);
    }
    return alphabet;
}

std::vector<node*> MakeBinaryTree(std::string str)
{
    std::vector<node*> alphabet = MakeAlphabet(str);

    // keep going until there is only the huffman tree root left in the vector
    while(alphabet.size() > 1)
    {   
        std::sort(alphabet.begin(), alphabet.end());

        // Takes last two elements and remove them
        inner_node *first  = (inner_node*) alphabet.back();
        alphabet.pop_back();
        inner_node *second = (inner_node*) alphabet.back();
        alphabet.pop_back();

        // Creates tree node and put in the vector
        inner_node *n = new node();
        n->left = first;
        n->right = second;
        n->count = first->count + second->count;
        alphabet.push_back(n);
    }
    return alphabet;
}

使用 priority_queue 而不是向量。

如果需要,请完成一次霍夫曼编码。

我已经使用 priority_queue.

重新实现了 MakeBinaryTree 函数

其他一切都保持不变。

示例案例:-

character   Frequency
    a            5
    b           9
    c           12
    d           13
    e           16
    f           45

预期结果:-

std::vector<element> 中的最终输出:

Index       Character   Frequency
   0          .      100
   1          f       45
   2          .       55
   3          .       25
   4          .       30
   5          c       12
   6          d       13
   7          .       14
   8          e       16
   9          a        5
  10          b        9

.表示内部节点。

工作代码:-

#include <iostream>
#include <vector>
#include <utility>
#include <string>
#include <queue>
#include <map>
#include <stack>
#include <string_view>
#include <deque>
#include <array>
#include <algorithm>
#include <range/v3/algorithm/count.hpp>
#include <range/v3/action/sort.hpp>
#include <range/v3/view/unique.hpp>
#include <exception>
#include <gtest/gtest.h>

struct element //Элемент бинарного дерева.
{
    char ch;
    struct element* left;
    struct element* right;
    int count;
};

bool operator<(const element &first, const element &second)
{
    return (first.count > second.count); // Reversed to form min heap instead of default max heap
}

bool operator==(const element &first, const element &second)
{
    return (first.count == second.count);
}

std::vector<element> MakeAlphabet(std::string str)  //РАБОТАЕТ!
{
    std::vector<element> alphabet;
    ranges::sort(str);

    for(auto ch : str | ranges::views::unique)
    {
        alphabet.push_back(element{ch, NULL, NULL, static_cast<int>(ranges::count(str, ch))});
    }
    return alphabet;
};


std::vector<element> MakeBinaryTree(std::string str)    //НЕ работает.
{
    std::vector<element> result;
    std::vector<element> alphabet = MakeAlphabet(str);
    std::priority_queue<element> min_heap;
    //Initialize Min Heap
    for(auto x:alphabet)
        min_heap.push(x);

    // Form Huffman Encoding Tree
    while(min_heap.size()>1){
        element *lc = (element*)malloc(sizeof(element));*lc=min_heap.top();min_heap.pop();
        element *rc = (element*)malloc(sizeof(element));*rc=min_heap.top();min_heap.pop();
        min_heap.push(element{'.',lc,rc,lc->count + rc->count});
    }
    std::queue<element> prefix_traversal;
    prefix_traversal.push(min_heap.top());

    // Convert Tree to Vector using BFS
    while(prefix_traversal.size()>0){
        element top = prefix_traversal.front();
        prefix_traversal.pop();
        result.push_back(top);
        if(top.left != NULL)
            prefix_traversal.push(*top.left);
        if(top.right != NULL)
            prefix_traversal.push(*top.right);

    }
    return result;
};



int main(){
    std::string s;
    for(int i=0;i<13;i+=1)s+="d";
    for(int i=0;i<9;i+=1)s+="b";
    for(int i=0;i<5;i+=1)s+="a";
    for(int i=0;i<12;i+=1)s+="c";   
    for(int i=0;i<16;i+=1)s+="e";
    for(int i=0;i<45;i+=1)s+="f";
    std::vector<element> result = MakeBinaryTree(s);
    printf("Index   \tCharacter\tFrequency\n");
    for(int i=0;i<result.size(); i+=1){
        // std::cout<<"Index "<<i<<" "<<result[i].ch<<" "<<result[i].count<<std::endl;
        printf("%4d\t\t%3c\t\t%4d\n",i,result[i].ch,result[i].count);
    }
    return 1;
}