首页 > 代码库 > 使用优先队列构建赫夫曼树

使用优先队列构建赫夫曼树

关于赫夫曼编码和赫夫曼树的相关知识可参考之前两篇文章(由二叉树构造赫夫曼树、赫夫曼编码)。本文介绍另一种构建赫夫曼树的方式,采用优先队列.

步骤:
1.首先我们需要统计不同字符出现的次数。一个字符出现的次数越多,说明其优先级越高,其赫夫曼编码应该越短;
2.将待编码的字符(即带权节点)存入优先级队列,优先级即字符出现的次数;
3.不断迭代队列,直到队列中剩下一个元素(即根节点)。每次从队列中取出两个优先级最小的元素(优先级队列中的元素是按照节点优先级顺序排列的),然后生成一个新的赫夫曼树节点,节点左右孩子分别指向这两个元素,节点权值为这两个元素权值之和,将新节点插入队列;
4.根据赫夫曼树生成赫夫曼编码表。递归遍历赫夫曼树,对每个叶子节点进行编码(左0右1);
5.编码过程即对每个字符查编码表的过程;
6.解码过程即根据编码串访问赫夫曼树的过程.

实现:


/*****************************************************
赫夫曼编码优先队列版
by Rowandjj
2014/6/17
*****************************************************/
#include<iostream>
using namespace std;


typedef struct _HTNODE_//赫夫曼树节点的存储结构
{
    char symbol;//符号(ASC码值)
    struct _HTNODE_ *left;//左孩子
    struct _HTNODE_ *right;//右孩子
}HtNode;

typedef struct _HTREE_//赫夫曼树
{
    HtNode *root;
}HTree;

typedef struct _HLNODE_//赫夫曼码表节点
{
    char symbol;//ASC码值
    char *code;//对应的编码
    struct _HLNODE_ *next;
}HlNode;

typedef struct _HLTABLE_//赫夫曼码表(以单链表形式存储不同ASC码及与之对应的编码表)
{
    HlNode *first;
    HlNode *last;
}HlTable;
//--------------------------------------------------------
#define MAX_SZ 256//队列最大长度
#define TYPE HtNode*//优先队列的节点定义
typedef struct _PQUEUENODE_//队列节点定义
{
    TYPE val;//节点值
    unsigned int priority;//优先级
    struct _PQUEUENODE_ *next;
}pQueueNode;

typedef struct _PQUEUE_//优先队列定义
{
    unsigned int size;//队列长度
    pQueueNode *first;//首节点
}pQueue;
//-----------优先队列操作定义------------------------------
void InitQueue(pQueue **queue);//队列初始化
void AddQueue(pQueue **queue,TYPE val,unsigned int priority);//将一个带优先级的节点插入队列
TYPE GetQueue(pQueue **queue);//队列节点按照优先级顺序出队
void TraverseQueue(pQueue *queue);//遍历队列

//-----------赫夫曼编码操作定义----------------------------
HTree* BuildTree(char *inputString);//创建赫夫曼树
HlTable* BuildTable(HTree *huffmanTree);//构建赫夫曼码表(出现频率最高的ASC码将使用最短的编码)
void encode(HlTable *table,char *stringToEncode);//编码
void decode(HTree *tree,char *stringToDecode);//解码
void TraverseTable(HlTable *table);//遍历赫夫曼编码表
//---------------------------------------------------------

void InitQueue(pQueue **queue)
{
    *queue = (pQueue *)malloc(sizeof(pQueue));
    (*queue)->first = NULL;
    (*queue)->size = 0;
}

void AddQueue(pQueue **queue,TYPE val,unsigned int priority)
{
    if((*queue)->size == MAX_SZ)
    {
        cout<<"queue is full!";
        return;
    }
    pQueueNode *aux = (pQueueNode*)malloc(sizeof(pQueueNode));
    aux->priority = priority;
    aux->val = val;
    aux->next = NULL;

    if((*queue)->first == NULL)//生成队列首节点
    {
        (*queue)->first = aux;
        (*queue)->size++;
    }else
    {
        if(priority <= (*queue)->first->priority)//插到首节点位置
        {
            aux->next = (*queue)->first;
            (*queue)->first = aux;
            (*queue)->size++;
        }else//遍历队列插到合适的位置
        {
            pQueueNode *iterator = (*queue)->first;
            

            while(iterator->next != NULL && priority > iterator->next->priority)
            {
                iterator = iterator->next;
            }

            aux->next = iterator->next;
            iterator->next = aux;
            (*queue)->size++;
        }
    }
}

TYPE GetQueue(pQueue **queue)
{
    TYPE returnVal = NULL;
    if ((*queue)->size > 0)
    {
        pQueueNode *nodeToDel = (*queue)->first;
        (*queue)->first = (*queue)->first->next;
        returnVal = nodeToDel->val;
        free(nodeToDel);
        (*queue)->size--;
    }else
    {
        cout<<"queue is empty"<<endl;
    }
    return returnVal;
}
//-------------------------------------------------------------
HTree* BuildTree(char *inputString)
{
    //统计字符出现次数
    int *probability = (int*)malloc(sizeof(int)*256);
    int i;
    for(i = 0; i < 256;i++)
    {
        probability[i] = 0;
    }
    for(i = 0; inputString[i] != '\0';i++)
    {
        probability[(unsigned char)inputString[i]]++;
    }
    
/*    for(i = 0; i < 256; i++)//打印字符出现的次数(也即频率)
    {
        if(probability[i] != 0)
        cout<<(char)i<<":"<<probability[i]<<endl;
    }
*/
    //根据每个字符出现的频率生成赫夫曼树
    pQueue* huffmanQueue;
    InitQueue(&huffmanQueue);
    for(i = 0; i < 256; i++)
    {
        //1 现将带权节点加到队列中
        if(probability[i] != 0)//只考虑带权节点,也就是说只对带权节点编码
        {
            HtNode *aux = (HtNode*)malloc(sizeof(HtNode));
            aux->symbol = (char)i;
            aux->left = NULL;
            aux->right = NULL;
            
            AddQueue(&huffmanQueue,aux,probability[i]);
        }
    }
    
    cout<<"字符及出现频次:"<<endl;
    TraverseQueue(huffmanQueue);
    free(probability);
        //2 再构建赫夫曼树
    while(huffmanQueue->size != 1)
    {
        int priority = huffmanQueue->first->priority;
        priority += huffmanQueue->first->next->priority;
        //从队列中获取两个优先级最低的节点
        HtNode *left = GetQueue(&huffmanQueue);
        HtNode *right = GetQueue(&huffmanQueue);
        //生成其父节点,父节点的权值为两个子节点的权值之和
        HtNode *newNode = (HtNode*)malloc(sizeof(HtNode));
        newNode->left = left;
        newNode->right = right;
        //将生成的父节点插入到优先队列的合适位置
        AddQueue(&huffmanQueue,newNode,priority);
    }
    HTree *tree = (HTree*)malloc(sizeof(HTree));
    tree->root = GetQueue(&huffmanQueue);
    return tree;
}
void travelseTree(HtNode *treeNode,HlTable **table,int k,char code[256])
{
    if(treeNode->left == NULL && treeNode->right == NULL)//走到叶子节点
    {
        code[k] = '\0';
        HlNode *aux = (HlNode*)malloc(sizeof(HlNode));
        aux->symbol = treeNode->symbol;
        aux->code = (char *)malloc(sizeof(char)*(strlen(code)+1));
        strcpy(aux->code,code);
        aux->next = NULL;
        if((*table)->first == NULL)//连接
        {
            (*table)->first = (*table)->last = aux;
        }else
        {
            (*table)->last->next = aux;
            (*table)->last = aux;
        }
    }
    if(treeNode->left != NULL)
    {
        code[k] = '0';
        travelseTree(treeNode->left,table,k+1,code);
    }
    if(treeNode->right != NULL)
    {
        code[k] = '1';
        travelseTree(treeNode->right,table,k+1,code);
    }
}
HlTable* BuildTable(HTree *huffmanTree)//由赫夫曼树构建赫夫曼编码表
{
    HlTable *table = (HlTable*)malloc(sizeof(HlTable));
    table->first = NULL;
    table->last = NULL;
    char code[256];
    int k = 0;
    
    travelseTree(huffmanTree->root,&table,k,code);
    return table;
}

void encode(HlTable *table,char *stringToEncode)
{
    HlNode *travel;

    for(int i = 0; stringToEncode[i] != '\0'; i++)
    {
        travel = table->first;
        while(travel->symbol != stringToEncode[i])
        {
            travel = travel->next;
        }
        cout<<travel->code<<"  ";
    }
}
void decode(HTree *tree,char *stringToDecode)
{
    HtNode *travel = tree->root;
    for(int i = 0;stringToDecode[i] != '\0'; i++)
    {
        if(travel->left == NULL && travel->right == NULL)
        {
            cout<<travel->symbol;
            travel = tree->root;
        }
        if(stringToDecode[i] == '0')
        {
            travel = travel->left;
        }
        if(stringToDecode[i] == '1')
        {
            travel = travel->right;
        }
        if(stringToDecode[i] != '0' && stringToDecode[i] != '1')
        {
            cout<<"\ndecode error..."<<endl;
        }
    }
    if(travel->left == NULL && travel->right == NULL)
    {
        cout<<travel->symbol;
        travel = tree->root;
    }

}

void TraverseQueue(pQueue *queue)
{
    pQueueNode *iterator = queue->first;
    while(iterator != NULL)
    {
        cout<<iterator->val->symbol<<":"<<iterator->priority<<endl;
        iterator = iterator->next;
    }
}
void TraverseTable(HlTable *table)
{
    HlNode *iterator = table->first;
    cout<<"字符"<<":"<<"编码"<<endl;
    while(iterator != NULL)
    {
        cout<<iterator->symbol<<":"<<iterator->code<<endl;
        iterator = iterator->next;
    }
    cout<<endl;
}
int main()
{
    HTree *codeTree = BuildTree("abbccccddddddd");
    HlTable *codeTable = BuildTable(codeTree);
    
    TraverseTable(codeTable);
    cout<<"编码开始:"<<endl;
    encode(codeTable,"abcd");
    cout<<"\n解码开始:"<<endl;
    decode(codeTree,"10100100000101");
    cout<<endl;
    return 0;
}

测试结果:


优先队列变化过程:

生成的赫夫曼树:


注:这里的构建赫夫曼树的约定和之前的两篇不太一样,这里约定一个节点的左节点的权值小于右节点的权值.