//数据类型:
MAX_FILE_SIZE 0x3f3f3f // 最大文件长度
MAX_NODE_SIZE 300 //最大节点数
priority_queue<int , vector<int>, Hufode> N //堆排序
char node[MAX_NODE_SIZE + 1]; //节点元素
char nodeNum[MAX_NODE_SIZE + 1]; //节点元素值
Int tbuf[MAX_NODE_SIZE + 1]; //树缓冲区
char code[MAX_NODE_SIZE][]; //节点路径
char filename[30]; //文件名
struct HufNode{ //节点信息
int weight, parents, lchild, rchild;
HufNode(){}; //自定义节点操作,用于优先队列
}HUFTREE[MAX_NODE_SIZE];
//功能描述:
COMPRESS:
Creat_Node(fbuf, flen); //得到节点权值
Creat_Tree(count); //构建树
Code_Node(count); //构建码表
Compress(count); //创建压缩文件
EXTRACT:
Restore_Tree(char *t, char v, int &pos) //还原树
Extract(int zero, int len, char *str) //解压文件
这次课程设计我选择的题目是哈弗曼编/译器,本来我觉得哈弗曼进行压缩解码并不是很难,只要你理解了哈夫曼算法,但在实际编写中,我发现并不是这样的。
首先,对任意文件进行压缩,要理解到任何类型的文件实质上都是可以进行压缩的。一开始为考虑这个钻了牛角尖,想着去统一用wchar_t保存或是转为Unicode等等什么的。但其实不必那么复杂,因为汉字(不仅仅汉字, 任何字符都是这样的)都是以字节为单位的,由多个字节组成的,将其分开对待,因为最终解压时恢复原串还是按照原有顺序组装,所以和纯英文文件的实现没有什 么区别);那么,实质上,所构建的哈夫曼树最只有256个叶子节点.。这样程序应该可以针对图片,各种内容的文本文件等进行压缩。还原后应该与源文件是相同的。
其次,在进行树的构造的时候,因为要附加开辟一个大于源文件3,4倍的空间。我是在堆中分配空间,对程序中资源分配不是很好,这样就对压缩的目标文件有一定的要求,文件不能太大,否则会造成内存溢出。对于小文件来说,压缩有些得不偿失,因为在压缩时必须将构建的树的码表同时存储进去,文件太小但树可能是文件的好几倍大。不同内容的文件压缩率也不同,相对来说字符类型压缩比率高一些,汉子类型压缩比率比较低。
压缩中,对于一个文件,必须要遍历一次利用统排得到每个节点的权值。相对于比较耗费资源的是构建树的时候,要每次选择其中最小的两个操作,后放回新数据再进行排序。这是十分耗费的,感觉相对来说利用利用优先队列构建大根堆是比较好的选择,堆排的时间复杂度是而二维阶,对数据要求没有那么高。因为是所得到的压缩码都是01字符串,尽可能的减少存储空间,将所得到的01码进行或运算,使得其八位构成一个字节,不够八位的01码在其后补0,并记录个数,将这些写入压缩文件中,码表应放在压缩文件首。
解压相对来说比较简单,用文件中有的码表还原树还原文件。感觉代码还可以优化很多,而且运行时不太稳定,通过这次课程设计收获了很多。
参考资料
http://blog.csdn.net/rushkid02/article/details/7687125
http://blog.csdn.net/tsdl2009/article/details/6514057
http://blog.csdn.net/ns_code/article/details/20227303
http://blog.csdn.net/to_be_better/article/details/50431352
但是因为我是用数组开的,所以不压缩大文件,图片也不可以。��
压缩
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <queue>
#include <fstream>
#include <cstring>
using namespace std;
#define MAX_FILE_SIZE 1000000
#define MAX_NODE_SIZE 300
unsigned char *node = new unsigned char[MAX_NODE_SIZE + 1];
int *nodeNum = new int[MAX_FILE_SIZE + 1];
int *tbuf = new int[MAX_NODE_SIZE + 1];
char *code[MAX_NODE_SIZE];
char filename[30];
struct HufNode
{
int weight, parents, lchild, rchild;
HufNode(){};
}HUFTREE[MAX_NODE_SIZE];
struct NodeGreat{
bool operator()(int a, int b){
return HUFTREE[a].weight > HUFTREE[b].weight;
}
};
//compress, return boolean, leaves count
bool Compress(int count, long flen, unsigned char *fbuf){
ofstream fout(filename, ios::out);
if(!fout.good())
throw "open temp file filed";
//write node count
fout << count << endl;
//write code list in file.temp, node and code
for (int i = 1; i < count; ++i)
{
fout << node[i] << endl;
fout << code[i] << endl;
}
//set temp file pos
long tempPos = 0;
//set a temp file buffer
char *tempBuf = new char[MAX_FILE_SIZE];
// %
int bit = 7;
for(long i = 0; i < flen; i++)
{
int k = 0;
//fbuf[i]:file content; tbuf[fbuf[i]]:node; code[tbuf[fbuf[i]]]:node code;
while(code[nodeNum[fbuf[i]]][k] != '\0')
{
//turn char to a bit
tempBuf[tempPos] |= (code[nodeNum[fbuf[i]]][k]-'0') << bit--;
//every eight bit for a Byte,
if(bit < 0)
{
bit = 7;
tempPos++;
}
++k;
}
}
//note the number of zero
int zero = 0;
if(bit != 7)
zero = bit % 7 + 1, tempPos++;
//write 0 and space in temp file;
fout << zero << " " << tempPos << endl;
//write all the tempbuf in the temp file
//cout << tempBuf << endl;
fout.write(tempBuf, sizeof(char)*tempPos);
// cout << zero <<tempBuf << endl;
delete[] tempBuf;
fout.close();
}
//get the Node code with 0 and 1, leaves count
void Code_Node(int count){
//temp path
char *path = new char[count];
path[count-1] = '\0';
//get leaves path in Code
for (int i = 1; i < count; ++i)
{
int child = i;
int parent = HUFTREE[child].parents;
int num = count-1;
//from leaf to root
while(parent){
//path wont long as leaves count;
num--;
if(HUFTREE[parent].rchild == child){
path[num] = '0';
}
else if(HUFTREE[parent].lchild == child){
path[num] = '1';
}
else{
throw "code tree error";
}
child = parent;
parent = HUFTREE[parent].parents;
}
//copy to Code
code[i] = new char[count - num];
strcpy(code[i], &path[num]);
// << code[i] << endl;
}
}
//build tree
void Creat_Tree(int count){
printf("%ld\n", count);
//priority queues save the index
priority_queue<int, vector<int>, NodeGreat> N;
for (int i = 1; i < count; ++i)
{
N.push(i);
}
printf("he;;op");
int len = count;
for (int i = 0; i < count - 2; i++)
{
//get the minimum two;
int lc, rc;
lc = N.top();
N.pop();
rc = N.top();
N.pop();
//make node
HUFTREE[len].weight = HUFTREE[lc].weight + HUFTREE[rc].weight;
HUFTREE[len].lchild = lc;
HUFTREE[len].rchild = rc;
HUFTREE[lc].parents = HUFTREE[rc].parents = len;
N.push(len++);
}
//set the root parents;
printf("he;;op");
int root;
root = N.top();
HUFTREE[root].parents = 0;
}
//get tree leaves, return leaves count;
int Creat_Node(unsigned char *fbuf, long flen){
//get tree node value;
for (int i = 0; i < flen; ++i)
{
tbuf[fbuf[i]]++;
}
//tree leaves
//
int count = 1;
for (int i = 0; i < flen; ++i)
{
if(tbuf[fbuf[i]]){
HUFTREE[count].weight = tbuf[fbuf[i]];
HUFTREE[count].lchild = 0;
HUFTREE[count].rchild = 0;
HUFTREE[count].parents = 0;
tbuf[fbuf[i]] = 0;
//note the node number
if(!nodeNum[fbuf[i]]){
nodeNum[fbuf[i]] = count;
}
// note the node
node[count] = fbuf[i];
++count;
//cout << fbuf[i] << endl;
}
}
//cout << count << endl;
//make tbuf values;
for (int i = 0; i < flen; ++i)
{
tbuf[fbuf[i]]++;
}
return count;
}
int main(int argc, char* argv[]){
//copy filename
sprintf(filename, "%s.temp", argv[1]);
ifstream fin;
// file check, put pointer on the file end
fin.open(argv[1], ios::ate | ios::in);
if(!fin.good())
throw "open file filed";
//get file length, put pointer on the file begin
long flen = fin.tellg();
if(flen > MAX_FILE_SIZE)
throw "file too big";
fin.seekg(0, ios::beg);
//copy file to file buffer
unsigned char *fbuf = new unsigned char[flen + 1];
fin.read((char*)fbuf, flen);
fin.close();
//file operater done;
cout << "FILE: " << argv[1] << " LENHTH: " << flen << "Btye" << endl;
//creaat huffuman Node, get node count
int count = 0;
count = Creat_Node(fbuf, flen);
// build tree
Creat_Tree(count);
// get the node code
printf("not here\n");
Code_Node(count);
printf("not here\n");
//compress the file
Compress(count, flen, fbuf);
printf("not here\n");
delete[] fbuf;
return 0;
}
解压
#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <queue>
#include <fstream>
#include <cstring>
using namespace std;
#define MAX_FILE_SIZE 0x3f3f3f
#define MAX_NODE_SIZE 300
char *fbuf = new char[MAX_FILE_SIZE + 1];
char *ext = new char[MAX_FILE_SIZE + 1];
char filename[30];
struct HufNode
{
int weight, parents, lchild, rchild;
char elem;
HufNode(){};
}HUFTREE[MAX_NODE_SIZE];
struct NodeGreat{
bool operator()(int a, int b){
return HUFTREE[a].weight > HUFTREE[b].weight;
}
};
//creat the tree with code list,
void Restore_Tree(char node, char* code, int &nodePos){
//the tree root;
int NODE = 0;
for (int i = 0; code[i] != '\0' ; ++i)
{
if(code[i] == '1'){
if(HUFTREE[NODE].rchild == 0){
HUFTREE[NODE].rchild = nodePos++;
}
NODE = HUFTREE[NODE].rchild;
}
else{
if(HUFTREE[NODE].lchild == 0){
HUFTREE[NODE].lchild = nodePos++;
}
NODE = HUFTREE[NODE].lchild;
}
// num++;
}
//cout << "ss" <<NODE << node << endl;
HUFTREE[NODE].elem = node;
}
void Extract(int zero, long flen, char *fbuf){
//set file pos, NODE, end 0 count
long long filePos = 0;
int NODE = 0;
int end = 0;
for(int i = 0; i < flen; i++)
{
//temp for temp file element
char temp = fbuf[i];
//in case 0
if(i == flen - 1)
end = zero;
//analyse file element
for(int j = 7; j >= end; j--)
{
//& operator, return 1 or 0
if((1 << j) & temp)
//if 1
NODE = HUFTREE[NODE].rchild;
else
// if 0
NODE = HUFTREE[NODE].lchild;
// when it is the leaf
if(HUFTREE[NODE].rchild == 0 && HUFTREE[NODE].lchild == 0)
{
//cout << HUFTREE[NODE].elem << endl;
ext[filePos++] = HUFTREE[NODE].elem;
NODE = 0;
}
}
}
// cout<< "len::" << filePos <<endl;
ofstream out(filename, ios::out);
// cout << ext << endl;
out.write(ext, sizeof(char)*(filePos));
out.close();
}
int main(int argc, char *argv[]){
//copy filename
int tl = strlen(argv[1]);
memcpy(filename, argv[1], tl-1-4);
sprintf(filename, "%s.out", filename);
//open compress file
ifstream fin(argv[1], ios::in);
if(!fin.good())
throw "open file error";
//get node count, node and code;
int count;
char *code = new char[count + 1];
char *node = new char[5];
//get count
fin >> count;
fin.getline(node, count);
//node pos
int nodePos = 1;
//each loop get a node and a code
for (int i = 1; i < count; ++i)
{
fin.getline(node, count);
//if node is blank, get code again
if(node[0] == '\0'){
fin.getline(node, count);
node[0] = '\n';
}
fin.getline(code, count);
//creat a tree with list code
Restore_Tree(node[0], code, nodePos);
//reset code ????
node[0] = 0;
}
//the zero count, temp file length;
int zero = 0;
long long flen;
char* tbuf = new char[flen + 1];
//get zero count, get temp file length
fin>>zero;
fin>>flen;
//cout << flen << endl;
//get temp file content
fin.getline(tbuf, count);
//copy temp file content to file buffer
fin.read(fbuf,sizeof(char)*flen);
//translate the temp file
Extract(zero, flen, fbuf);
return 0;
}