首页 > 代码库 > 优先级队列优化的霍夫曼编码(带中文压缩)

优先级队列优化的霍夫曼编码(带中文压缩)

利用STL中的优先级队列进行优化

我将压缩和解压分为两部分,其实一些还是是一样的

压缩的时候通过bitset将每8个01串压缩成一个字节,如果最后一个不满足8个,用0补齐,但是要记录最后一个字节实际有多少个有效位,将其存入文件最后一个字节,解压的时候先将文件定位到最后一个字节,取出有效位的个数,压缩文件真正有效的是倒数第二个字节,倒数第一个字节只是记录倒数第二个字节中有几位是有效的,解压的时候根据密码本(记录每个字节的权值)建立哈夫曼树,然后更具哈夫曼树解压文件

压缩代码部分:

#include <iostream>
#include <string>
#include<functional>
#include <map>
#include <vector>
#include <queue>
#include <bitset>
#include <fstream>
#include <cassert>>
#include <algorithm>
using namespace std;
//-------------------- hufuman Node ------------------------------//
typedef struct Node
{

    char ch;
    int freq;
    Node* leftchild;
    Node* rightchild;
    Node* parent;
}Node,*pNode;

struct cmp1
{
    bool operator()(const pNode a, const pNode b)
    {
        return a->freq > b->freq;//这里好坑啊!!!!如果优先级低的要建立大顶堆
    }
};

//--------------------get <string,int>  dic-----------------------
void getdic2(map<char,int>& dic,const char* addr)
{
    char ch;
    ifstream fin;
    fin.open(addr);
    assert(fin.is_open());
    while(fin.get(ch))
    {
        dic[ch]++;
    }
    fin.close();
}
bool cmp(const pair<char, int> &a, const pair<char, int> &b)
{
	return a.second < b.second;
}

//--------------------get lower vector----------------------------
void getvec2(map<char,int>& dic,vector<pair<char,int> >& vec)
{
    using namespace std;
    map<char,int>::iterator p = dic.begin();
    while(p != dic.end())
    {
        vec.push_back(pair<char,int>(p->first,p->second));
        p++;
    }
    sort(vec.begin(),vec.end(),cmp);
}

//--------------------get Node queue------------------------------
void getnopriority_queue( vector<pair<char,int> >& vec, priority_queue<pNode ,vector<pNode>,cmp1 > & deq)
{
    vector<pair<char,int> >::iterator q = vec.begin();
    while(q != vec.end())
    {
        pNode p_Node = new Node;
        p_Node->ch = q->first;
        p_Node->freq = q->second;
       // cout<< p_Node->ch <<" "<< p_Node->freq<<endl;
        p_Node->leftchild = NULL;
        p_Node->rightchild = NULL;
        p_Node->parent = NULL;
        deq.push(p_Node);
        q++;
    }
    //cout<<endl;
}

//----------------------------------------------------------------
pNode findParentNode(pNode &a, pNode &b)
{
    pNode parent = new Node;
    parent->freq = a->freq + b->freq;
    parent->leftchild = a;
    parent->rightchild = b;
    a->parent = NULL;
    b->parent = NULL;
    return parent;
}

//--------------------make hafuman tree---------------------------
pNode makhfmtr( priority_queue<pNode ,vector<pNode>,cmp1 >  dep)
{
    while(dep.size() >= 2)
    {
        pNode x,y;
        x = dep.top();dep.pop();
        y = dep.top();dep.pop();
        //cout<<x->freq<<" "<<y->freq<<endl;
        dep.push(findParentNode(x,y));
    }
    dep.top()->parent = NULL;
    return dep.top();
}

//--------------------利用DFS求哈夫曼编码---------------------------
map <char,string>  hfmList2;
vector<char> V;
void gethfmList2(pNode root)
{
   if(root->leftchild == NULL && root->rightchild == NULL)
   {
       string a;
       for(int i = 0; i < V.size(); i++)
              a += V[i];
       hfmList2[root->ch] = a;
       V.pop_back();
       return;

   }

   if(root->leftchild)
   {
       V.push_back('0');
       gethfmList2(root->leftchild);

   }
   if(root->rightchild)
   {
        V.push_back('1');
        gethfmList2(root->rightchild);
   }

   if(!V.empty())
   {
       V.pop_back();
   }
}


void smallerToFile(const char* addrYuan,const char* addrMudi)
{
    using namespace std;
    ifstream fin;
    fin.open(addrYuan);
    assert(fin.is_open());
    ofstream fout;
    fout.open(addrMudi,ios_base::binary);
    char chFromFile;
    char greatch = 0;
    string str;
    while(1)
    {
        fin.get(chFromFile);
        if(!fin) break;
        str += hfmList2[chFromFile];
        while(str.size() >= 8)
        {
            string str2(str,0,8);
            //cout<<" cxvdf";
            bitset<8> aaa(str2);
            greatch = (char)aaa.to_ulong();
            fout << greatch;
            str.erase(0,8);
        }
    }
    //////////////////////////////////////////////////////////////////////////
    char youxiao = str.size();
    for(char i = youxiao; i <= 7; i++)
    {
        str.append("0");
    }

    bitset<8> aaa(str);
    chFromFile = (char)aaa.to_ulong();
    fout << chFromFile;
    fout << youxiao;//把最后一位剩余的有效位置存起来
    fin.close();
    fout.close();
}

void dicToFile(const char* addr,const  map <char,int> dic)
{
    ofstream fout("密码本.txt");
    map<char,int>::const_iterator pos = dic.begin();
    while(pos != dic.end())
    {
        fout << pos->first << " "<< pos->second << endl;
        pos++;
    }
    fout.close();
}


int main()
{

    map <char,int> dic;
    vector<pair<char,int> > vec;
    priority_queue<pNode ,vector<pNode>,cmp1 > deq;
    getdic2(dic,"原文.txt");
    getvec2(dic,vec);
    getnopriority_queue(vec,deq);
    pNode root = makhfmtr(deq);
    gethfmList2(root);
    dicToFile("密码本",dic);
    smallerToFile("原文.txt","压缩后.txt");
    return 0;
}



解压代码部分:

#include <iostream>
#include <string>
#include <map>
#include <vector>
#include <queue>
#include <utility> //pair
#include <bitset>	 //位运算
#include <fstream>
#include <cassert>>//assert
#include <algorithm>//sort
using namespace std;
//-------------------- hufuman node ------------------------------//
typedef struct node
{

    char ch;
    int freq;
    node* leftchild;
    node* rightchild;
    node* parent;
}node,*pnode;

struct cmp2
{
    bool operator()(const pnode a, const pnode b)
    {
        return a->freq > b->freq;
    }
};

//--------------------get <string,int>  dic-----------------------
void getdic(map<char,int>& dic,const char* addr)
{
    char ch;
    ifstream fin;
    fin.open(addr);
    assert(fin.is_open());
    while(fin.get(ch))
    {
        dic[ch]++;
    }
    fin.close();
}

//----------------------------------------------------------------
bool cmp(const pair<char,int> &a,const pair<char,int> &b)
{
    return a.second < b.second;
}
//--------------------get lower vector----------------------------
void getvec(map<char,int>& dic,vector<pair<char,int> >& vec)
{
    using namespace std;
    map<char,int>::iterator p = dic.begin();
    while(p != dic.end())
    {
        vec.push_back(pair<char,int>(p->first,p->second));
        p++;
    }
    sort(vec.begin(),vec.end(),cmp);
}

//--------------------get node queue------------------------------
void getnopriority_queue(const vector<pair<char,int> >& vec, priority_queue<pnode ,vector<pnode>,cmp2 > & deq)
{
    vector<pair<char,int> >::const_iterator q = vec.begin();//pair用const_iterator
    while(q != vec.end())
    {
        pnode p_node = new node;
        p_node->ch = q->first;
        p_node->freq = q->second;
        p_node->leftchild = NULL;
        p_node->rightchild = NULL;
        p_node->parent = NULL;
        deq.push(p_node);
        q++;
    }
}

//----------------------------------------------------------------
pnode findParentNode(pnode &a, pnode &b)
{
    pnode parent = new node;
    parent->freq = a->freq + b->freq;
    parent->leftchild = a;
    parent->rightchild = b;
    a->parent = parent;
    b->parent = parent;
    return parent;
}

//--------------------make hafuman tree---------------------------
pnode makhfmtr( priority_queue<pnode ,vector<pnode>,cmp2 >  dep)
{
    while(dep.size() >= 2)
    {
        pnode x,y;
        x = dep.top();dep.pop();
        y = dep.top();dep.pop();
        dep.push(findParentNode(x,y));
    }
    dep.top()->parent = NULL;
    return dep.top();
}

//-------------------------解压缩---------------------------------------
void jiemiToFile(const char* addrYuan,const char* addrMudi,node* hfmhead)
{
	using namespace std;
	ifstream fin;

	ofstream fout;
	fout.open(addrMudi);

	char ch;
	char quan;
	char youxiao;
	node* ptr = hfmhead;

	fin.open(addrYuan,ios_base::binary);
	assert(fin.is_open());
	fin.seekg(-1,ios_base::end);
	ifstream::pos_type pos;
	fin.get(youxiao);
    fin.seekg(-2,ios_base::end);

	pos = fin.tellg();
	fin.seekg(0,ios_base::beg);

	int i = 0;
	while(1){
		if(fin.tellg() == pos) break;
		fin.get(ch);
		bitset<8> aaa(ch);
		for(i = 7;i >= 0;i--){
			if(0 == aaa[i]){
				ptr = ptr->leftchild;
				if(NULL == ptr->leftchild){
					fout << ptr->ch;
					ptr = hfmhead;
				}
			}else if(1 == aaa[i]){
				ptr = ptr->rightchild;
				if(NULL == ptr->leftchild){
					fout << ptr->ch;
					ptr = hfmhead;
				}
			}
		}
	}
	fin.get(ch);
	bitset<8> last(ch);
	for(i = 7;i >= (8-youxiao);i--){
		if(0 == last[i]){
			ptr = ptr->leftchild;
			if(NULL == ptr->leftchild){
				fout << ptr->ch;
				ptr = hfmhead;
			}
		}else if(1 == last[i]){
			ptr = ptr->rightchild;
			if(NULL == ptr->leftchild){
				fout << ptr->ch;
				ptr = hfmhead;
			}
		}
	}
	fin.close();
	fout.close();
}


//--------------------------------------------------------------------------
void dicFromFile(const char* addr,map<char,int> &dic)
{
	ifstream fin("密码本.txt");
	char ch;
	int num;
	while(1){
		fin.get(ch);
		if(!fin) break;
		fin >> num;
		dic[ch] = num;
		fin.get(ch);
	}
	fin.close();
}

int main()
{
	map <char,int> dic;
	vector<pair<char,int> > vec;
    priority_queue<pnode ,vector<pnode>,cmp2 > deq;
    dicFromFile("密码本.txt",dic);
    getvec(dic,vec);
    getnopriority_queue(vec,deq);
    node* head = makhfmtr(deq);
	jiemiToFile("压缩后.txt","解压后.txt",head);
	return 0;
}



优先级队列优化的霍夫曼编码(带中文压缩)