首页 > 代码库 > 优先级队列优化的霍夫曼编码(带中文压缩)
优先级队列优化的霍夫曼编码(带中文压缩)
利用STL中的优先级队列进行优化
我将压缩和解压分为两部分,其实一些还是是一样的
压缩的时候通过bitset将每8个01串压缩成一个字节,如果最后一个不满足8个,用0补齐,但是要记录最后一个字节实际有多少个有效位,将其存入文件最后一个字节,解压的时候先将文件定位到最后一个字节,取出有效位的个数,压缩文件真正有效的是倒数第二个字节,倒数第一个字节只是记录倒数第二个字节中有几位是有效的,解压的时候根据密码本(记录每个字节的权值)建立哈夫曼树,然后更具哈夫曼树解压文件
压缩代码部分:
#include <iostream> #include <string> #include<functional> #include <map> #include <vector> #include <queue> #include <bitset> #include <fstream> #include <cassert>> #include <algorithm> using namespace std; //-------------------- hufuman Node ------------------------------// typedef struct Node { char ch; int freq; Node* leftchild; Node* rightchild; Node* parent; }Node,*pNode; struct cmp1 { bool operator()(const pNode a, const pNode b) { return a->freq > b->freq;//这里好坑啊!!!!如果优先级低的要建立大顶堆 } }; //--------------------get <string,int> dic----------------------- void getdic2(map<char,int>& dic,const char* addr) { char ch; ifstream fin; fin.open(addr); assert(fin.is_open()); while(fin.get(ch)) { dic[ch]++; } fin.close(); } bool cmp(const pair<char, int> &a, const pair<char, int> &b) { return a.second < b.second; } //--------------------get lower vector---------------------------- void getvec2(map<char,int>& dic,vector<pair<char,int> >& vec) { using namespace std; map<char,int>::iterator p = dic.begin(); while(p != dic.end()) { vec.push_back(pair<char,int>(p->first,p->second)); p++; } sort(vec.begin(),vec.end(),cmp); } //--------------------get Node queue------------------------------ void getnopriority_queue( vector<pair<char,int> >& vec, priority_queue<pNode ,vector<pNode>,cmp1 > & deq) { vector<pair<char,int> >::iterator q = vec.begin(); while(q != vec.end()) { pNode p_Node = new Node; p_Node->ch = q->first; p_Node->freq = q->second; // cout<< p_Node->ch <<" "<< p_Node->freq<<endl; p_Node->leftchild = NULL; p_Node->rightchild = NULL; p_Node->parent = NULL; deq.push(p_Node); q++; } //cout<<endl; } //---------------------------------------------------------------- pNode findParentNode(pNode &a, pNode &b) { pNode parent = new Node; parent->freq = a->freq + b->freq; parent->leftchild = a; parent->rightchild = b; a->parent = NULL; b->parent = NULL; return parent; } //--------------------make hafuman tree--------------------------- pNode makhfmtr( priority_queue<pNode ,vector<pNode>,cmp1 > dep) { while(dep.size() >= 2) { pNode x,y; x = dep.top();dep.pop(); y = dep.top();dep.pop(); //cout<<x->freq<<" "<<y->freq<<endl; dep.push(findParentNode(x,y)); } dep.top()->parent = NULL; return dep.top(); } //--------------------利用DFS求哈夫曼编码--------------------------- map <char,string> hfmList2; vector<char> V; void gethfmList2(pNode root) { if(root->leftchild == NULL && root->rightchild == NULL) { string a; for(int i = 0; i < V.size(); i++) a += V[i]; hfmList2[root->ch] = a; V.pop_back(); return; } if(root->leftchild) { V.push_back('0'); gethfmList2(root->leftchild); } if(root->rightchild) { V.push_back('1'); gethfmList2(root->rightchild); } if(!V.empty()) { V.pop_back(); } } void smallerToFile(const char* addrYuan,const char* addrMudi) { using namespace std; ifstream fin; fin.open(addrYuan); assert(fin.is_open()); ofstream fout; fout.open(addrMudi,ios_base::binary); char chFromFile; char greatch = 0; string str; while(1) { fin.get(chFromFile); if(!fin) break; str += hfmList2[chFromFile]; while(str.size() >= 8) { string str2(str,0,8); //cout<<" cxvdf"; bitset<8> aaa(str2); greatch = (char)aaa.to_ulong(); fout << greatch; str.erase(0,8); } } ////////////////////////////////////////////////////////////////////////// char youxiao = str.size(); for(char i = youxiao; i <= 7; i++) { str.append("0"); } bitset<8> aaa(str); chFromFile = (char)aaa.to_ulong(); fout << chFromFile; fout << youxiao;//把最后一位剩余的有效位置存起来 fin.close(); fout.close(); } void dicToFile(const char* addr,const map <char,int> dic) { ofstream fout("密码本.txt"); map<char,int>::const_iterator pos = dic.begin(); while(pos != dic.end()) { fout << pos->first << " "<< pos->second << endl; pos++; } fout.close(); } int main() { map <char,int> dic; vector<pair<char,int> > vec; priority_queue<pNode ,vector<pNode>,cmp1 > deq; getdic2(dic,"原文.txt"); getvec2(dic,vec); getnopriority_queue(vec,deq); pNode root = makhfmtr(deq); gethfmList2(root); dicToFile("密码本",dic); smallerToFile("原文.txt","压缩后.txt"); return 0; }
#include <iostream> #include <string> #include <map> #include <vector> #include <queue> #include <utility> //pair #include <bitset> //位运算 #include <fstream> #include <cassert>>//assert #include <algorithm>//sort using namespace std; //-------------------- hufuman node ------------------------------// typedef struct node { char ch; int freq; node* leftchild; node* rightchild; node* parent; }node,*pnode; struct cmp2 { bool operator()(const pnode a, const pnode b) { return a->freq > b->freq; } }; //--------------------get <string,int> dic----------------------- void getdic(map<char,int>& dic,const char* addr) { char ch; ifstream fin; fin.open(addr); assert(fin.is_open()); while(fin.get(ch)) { dic[ch]++; } fin.close(); } //---------------------------------------------------------------- bool cmp(const pair<char,int> &a,const pair<char,int> &b) { return a.second < b.second; } //--------------------get lower vector---------------------------- void getvec(map<char,int>& dic,vector<pair<char,int> >& vec) { using namespace std; map<char,int>::iterator p = dic.begin(); while(p != dic.end()) { vec.push_back(pair<char,int>(p->first,p->second)); p++; } sort(vec.begin(),vec.end(),cmp); } //--------------------get node queue------------------------------ void getnopriority_queue(const vector<pair<char,int> >& vec, priority_queue<pnode ,vector<pnode>,cmp2 > & deq) { vector<pair<char,int> >::const_iterator q = vec.begin();//pair用const_iterator while(q != vec.end()) { pnode p_node = new node; p_node->ch = q->first; p_node->freq = q->second; p_node->leftchild = NULL; p_node->rightchild = NULL; p_node->parent = NULL; deq.push(p_node); q++; } } //---------------------------------------------------------------- pnode findParentNode(pnode &a, pnode &b) { pnode parent = new node; parent->freq = a->freq + b->freq; parent->leftchild = a; parent->rightchild = b; a->parent = parent; b->parent = parent; return parent; } //--------------------make hafuman tree--------------------------- pnode makhfmtr( priority_queue<pnode ,vector<pnode>,cmp2 > dep) { while(dep.size() >= 2) { pnode x,y; x = dep.top();dep.pop(); y = dep.top();dep.pop(); dep.push(findParentNode(x,y)); } dep.top()->parent = NULL; return dep.top(); } //-------------------------解压缩--------------------------------------- void jiemiToFile(const char* addrYuan,const char* addrMudi,node* hfmhead) { using namespace std; ifstream fin; ofstream fout; fout.open(addrMudi); char ch; char quan; char youxiao; node* ptr = hfmhead; fin.open(addrYuan,ios_base::binary); assert(fin.is_open()); fin.seekg(-1,ios_base::end); ifstream::pos_type pos; fin.get(youxiao); fin.seekg(-2,ios_base::end); pos = fin.tellg(); fin.seekg(0,ios_base::beg); int i = 0; while(1){ if(fin.tellg() == pos) break; fin.get(ch); bitset<8> aaa(ch); for(i = 7;i >= 0;i--){ if(0 == aaa[i]){ ptr = ptr->leftchild; if(NULL == ptr->leftchild){ fout << ptr->ch; ptr = hfmhead; } }else if(1 == aaa[i]){ ptr = ptr->rightchild; if(NULL == ptr->leftchild){ fout << ptr->ch; ptr = hfmhead; } } } } fin.get(ch); bitset<8> last(ch); for(i = 7;i >= (8-youxiao);i--){ if(0 == last[i]){ ptr = ptr->leftchild; if(NULL == ptr->leftchild){ fout << ptr->ch; ptr = hfmhead; } }else if(1 == last[i]){ ptr = ptr->rightchild; if(NULL == ptr->leftchild){ fout << ptr->ch; ptr = hfmhead; } } } fin.close(); fout.close(); } //-------------------------------------------------------------------------- void dicFromFile(const char* addr,map<char,int> &dic) { ifstream fin("密码本.txt"); char ch; int num; while(1){ fin.get(ch); if(!fin) break; fin >> num; dic[ch] = num; fin.get(ch); } fin.close(); } int main() { map <char,int> dic; vector<pair<char,int> > vec; priority_queue<pnode ,vector<pnode>,cmp2 > deq; dicFromFile("密码本.txt",dic); getvec(dic,vec); getnopriority_queue(vec,deq); node* head = makhfmtr(deq); jiemiToFile("压缩后.txt","解压后.txt",head); return 0; }
优先级队列优化的霍夫曼编码(带中文压缩)
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。