首页 > 代码库 > 文件压缩

文件压缩

 Heap.h:
#pragmaonce
#include<vector>
#include<assert.h>

//仿函数
template<class T>
//小堆
struct Less
{
           bool operator()(const T& l, const T& r)
          {
                    return l < r;
          }
};

template<class T>
//大堆
struct Greater
{
           bool operator()(const T& l, const T& r)
          {
                    return l > r;
          }
};


template<class T, class Compare = Less <T>>
class Heap
{
public:
          Heap()
          {}

          Heap( const T * a, size_t size)
          {
                   _a.resize( size);
                    for (int i = 0; i < size; ++i)
                   {
                             _a.push_back( a[i]);
                   }
                    //建堆
                    for (int i = (_a.size() - 2) / 2; i >= 0; i--)
                   {
                             _AdjustDown(i, size);
                   }
          }

           void Push(const T& x)
          {
                   _a.push_back( x);
                   _AdjustUp(_a.size()-1);
          }

           void Pop()
          {
                    assert(_a.size()> 0);
                   swap(_a[0], _a[_a.size() - 1]);
                   _a.pop_back();
                   _AdjustDown(0, _a.size());
          }

           T& Top()
          {
                    assert(_a.size()>0);
                    return _a[0];
          }

           int Size()
          {
                    return _a.size();
          }

           bool Empty()
          {
                    return _a.empty();
          }

           //向下调整
           void _AdjustDown(size_t parent, size_t size )
          {
                    size_t child = 2 * parent + 1;//子节点
                    while (child < size )
                   {
                              Compare _com;
                              if (child + 1 < size
                                      && _com(_a[child + 1], _a[child]))//右子树存在且大于左子树
                             {
                                      ++child;
                             }
                              //大堆:父节点小于子节点,则交换
                              //小堆:父节点大于子节点,则交换
                              if (_com(_a[child], _a[parent ]))
                             {
                                      swap(_a[child], _a[ parent]);
                                       parent = child;//向下调整
                                      child = 2 * parent + 1;
                             }
                              else//已有序
                             {
                                       break;
                             }
                   }
          }

           //向上调整
           void _AdjustUp(int child)
          {
                    size_t parent = (child - 1) / 2;//父节点
                    while (child > 0)
                   {
                              Compare _com;
                              //大堆:父节点小于子节点,则交换
                              //小堆:父节点大于子节点,则交换
                              if (_com(_a[child ], _a[parent]))
                             {
                                      swap(_a[ child], _a[parent]);
                                       child = parent;//向上调整
                                      parent = ( child - 1) / 2;
                             }
                              else
                             {
                                       break;
                             }
                   }
          }

           vector<T > _a;
};
#####################################################################################
Huffman.h:
#pragma once
#include<assert.h>
#include"Heap.h"

template<class T>
struct HuffmanNode
{
           HuffmanNode<T >* _left;
           HuffmanNode<T >* _right;
           T _weight;

          HuffmanNode( const T & w)
                   :_left( NULL)
                   ,_right( NULL)
                   , _weight( w)
          {}
};

template<class T>
class HuffmanTree
{
           typedef HuffmanNode <T> Node;
public:
          HuffmanTree()
                   :_root( NULL)
          {}

          HuffmanTree( const T * a, size_t size,const T& invalid)
          {
                   _root = CreateTree( a, size , invalid);
          }


           Node* CreateTree(const T* a, size_t size,const T& invalid)
          {
                    //仿函数
                    struct Compare
                   {
                              bool operator()(const Node* l,const Node* r)
                             {
                                       return l ->_weight < r->_weight;
                             }
                   };

                    assert(a);
                    //小堆
                    Heap<Node *, Compare>  minHeap;
                    for (size_t i = 0; i < size; ++i)
                   {
                              if (a [i] != invalid)
                             {
                                      minHeap.Push( new Node (a[i]));
                             }
                   }
                    //生成HuffmanTree
                    while (minHeap.Size()>1)
                   {
                              //选出当前最小的两个节点
                              Node* left = minHeap.Top();
                             minHeap.Pop();
                              Node* right = minHeap.Top();
                             minHeap.Pop();

                              //权值相加,生成父节点
                              Node* parent = new Node(left->_weight + right->_weight);
                             parent->_left = left;
                             parent->_right = right;
                             minHeap.Push(parent);
                   }
                    return minHeap.Top();
          }

           //获得根节点
           Node* GetRootNode()
          {
                    return _root;
          }
protected:
           Node* _root;
};
#########################################################################################
FileCompress.h:
#include<string>
#include"Huffman.h"
#include<assert.h>

typedef unsigned long long LongType ;

struct CharInfo
{
           unsigned char _ch;//字符
           LongType _count;  //出现次数
           string _code;     //Huffman code

          CharInfo( const LongType count=0)
                   :_ch(0)
                   , _count( count)
          {}

           CharInfo operator+(const CharInfo& fi)const
          {
                    return CharInfo (_count + fi._count);
          }

           bool operator!=(const CharInfo fi)const
          {
                    return _count != fi ._count;
          }

           bool operator<(const CharInfo& fi)const
          {
                    return _count < fi ._count;
          }
};


template<class T>
class FileCompress
{
public:
          FileCompress()
          {
                    for (int i = 0; i < 256; i++)
                   {
                             _infos[i]._ch = i;
                             _infos[i]._count = 0;
                   }
          }
public:
           //压缩
           void Compress(const char* filename)
          {
                    //统计字符个数
                    assert(filename);
                    FILE* fOut = fopen(filename , "rb");
                    assert(fOut);//判断是否读取成功

                    char ch = fgetc(fOut);//unsigned char ch=fgetc(fOut);不可行
                    while (!feof(fOut))//EOF值为-1,与无符号char比较会造成死循环
                   {
                             _infos[( unsigned char )ch]._count++;
                             ch = fgetc(fOut);
                   }

                    //构建HuffmanTree
                    CharInfo invalid(0);
                    HuffmanTree<CharInfo > tree(_infos, 256, invalid);

                    //生成Huffman code
                    string code;//编码
                   GenerateHuffmanCode(tree.GetRootNode(), code);

                    //读取源文件,压缩
                    string compress = filename ;
                   compress += ".compress";//加上压缩文件后缀
                    FILE* fIn = fopen(compress.c_str(), "wb" );
                    assert(fIn);

                   fseek(fOut, 0, SEEK_SET);//文件指针
                    char value = 0;
                    int pos = 0;
                   ch = fgetc(fOut); //读取字符
                    while (!feof(fOut))
                   {
                              //取出Huffman code
                              string& code = _infos[(unsigned char)ch]._code;
                              for (size_t i = 0; i < code.size(); ++i)
                             {
                                      value <<= 1;
                                       //将字符串转化成对应的码,存入fIn压缩文件中
                                       if (code[i] == ‘1‘ )
                                      {
                                                value |= 1;
                                      }
                                       //8个字节
                                       if (++pos == 8)
                                      {
                                                fputc(value, fIn);
                                                value = 0;
                                                pos = 0;
                                      }
                             }
                             ch = fgetc(fOut);
                   }
                    if (pos)//不足8为,后补0
                   {
                             value <<= (8 - pos);
                             fputc(value, fIn);
                   }

                    //配置文件
                    string config = filename ;
                   config += ".config";//配置文件后缀
                    FILE* fConfig = fopen(config.c_str(), "wb");
                    assert(fConfig);

                    char countStr[128];//存储转化后的编码
                    string str;
                    for (size_t i = 0; i < 256; ++i)
                   {
                              //记录字符出现次数
                              if (_infos[i]._count>0)
                             {
                                      str += _infos[i]._ch;
                                      str += ‘,‘;
                                       //将字符记录转换为10进制,存在countStr中
                                      _itoa(_infos[i]._count, countStr, 10);
                                      str += countStr;
                                      str += ‘\n‘;
                             }
                              //将字符串写入配置文件
                             fputs(str.c_str(), fConfig);
                             str.clear();
                   }
                    //关闭文件
                   fclose(fOut);
                   fclose(fIn);
                   fclose(fConfig);
          }

           //解压缩
           void UnCompress(const char* filename)
          {
                    //配置文件
                    string config = filename ;
                   config += ".config";
                    FILE* fConfig = fopen(config.c_str(), "rb");
                    assert(fConfig);

                    string tmp;
                    while (ReadLine(fConfig, tmp))
                   {
                              if (!tmp.empty())
                             {
                                       //收集字符
                                      _infos[( unsigned char )tmp[0]]._count = atoi(tmp.substr(2).c_str());
                                      tmp.clear();
                             }
                              else
                             {
                                      tmp += ‘\n‘;
                             }
                   }

                    //重建Huffman树
                    CharInfo invalid(0);
                    HuffmanTree<CharInfo >ht(_infos, 256, invalid);

                    //读压缩文件
                    string compress = filename ;
                   compress += ".compress";
                    FILE* fOut = fopen(compress.c_str(), "rb");
                    assert(fOut);

                    //生成解压文件
                    string UnCompress = filename ;
                   UnCompress += ".uncompress";
                    FILE* fIn = fopen(UnCompress.c_str(), "wb");
                    assert(fIn);

                    unsigned char ch = fgetc(fOut);
                    HuffmanNode<CharInfo >* root = ht.GetRootNode();
                    HuffmanNode<CharInfo >* cur = root;
                    int pos = 8;

                    LongType charCount = root->_weight._count;
                    while (!feof(fOut))
                   {
                              if (ch & 1 << (pos-1))
                             {
                                      cur = cur->_right;
                             }
                              else
                             {
                                      cur = cur->_left;
                             }
                              if (cur->_left == NULL && cur->_right == NULL)
                             {
                                      fputc(cur->_weight._ch, fIn);
                                      cur = root;
                                       if (--charCount == 0)
                                      {
                                                 break;
                                      }
                             }
                             --pos;
                              if ( pos == 0)
                             {
                                      pos = 8;
                                      ch = fgetc(fOut);
                             }
                   }
                   fclose(fConfig);
                   fclose(fIn);
                   fclose(fOut);
          }

           //生成Huffman编码
           void  GenerateHuffmanCode(HuffmanNode <CharInfo>* root,string code)
          {
                    if (root == NULL)
                   {
                              return;
                   }
                    //递归左右节点,生成Huffman Code
                    if (root ->_left)
                   {
                             GenerateHuffmanCode( root->_left, code + ‘0‘);
                   }
                    if (root ->_right)
                   {
                             GenerateHuffmanCode( root->_right, code + ‘1‘);
                   }
                    if (root ->_left == NULL&& root->_right == NULL )
                   {
                              //将得到的叶结点编码存入数组中节点位置的Huffman code中
                             _infos[ root->_weight._ch]._code = code ;
                   }
                    //cout << _infos[root->_weight._ch]._ch << code << endl;
          }

           //按行读
           bool ReadLine(FILE * fConfig, string& tmp )
          {
                    assert(fConfig);
                    char ch = fgetc(fConfig );
                    if (feof(fConfig ))
                   {
                              return false ;
                   }
                    while (ch != ‘\n‘ )
                   {
                              tmp += ch;
                             ch = fgetc( fConfig);
                   }
                    return true ;
          }
          
protected:
           CharInfo _infos[256];
};
  
######################################################################################
test.cpp:
#include<iostream>
using namespace std;

#include"FileCompress.h"
#include"Heap.h"

void test()
{
           /*FileCompress<CharInfo> fc;
          fc.Compress("input.txt");
          cout << "压缩成功" << endl;

          fc.UnCompress("input.txt");
          cout << "解压成功" << endl;
*/
           FileCompress<CharInfo > fcb;
          fcb.Compress( "Input.BIG");
          cout << "压缩成功" << endl;

          fcb.UnCompress( "Input.BIG");
          cout << "解压成功" << endl;
}

int main()
{
          test();
           //Heap<int> hp;
           //hp.Push(20);
           //hp.Push(30);
           //hp.Push(10);
           //hp.Push(50);
          system( "pause");
           return 0;
}

 

文件压缩