哈夫曼树:给定n个权值作为n的叶子结点,构造一棵二叉树,若带权路径长度达到最小,称这样的二叉树为最优二叉树,也称为哈夫曼树(Huffman Tree)。哈夫曼树是带权路径长度最短的树,权值较大的结点离根较近。
哈夫曼编码:在数据通信中,需要将传送的文字转换成二进制的字符串,用0,1码的不同排列来表示字符。例如,需传送的报文为“AFTER DATA EAR ARE ART AREA”,这里用到的字符集为“A,E,R,T,F,D”,各字母出现的次数为{8,4,5,3,1,1}。现要求为这些字母设计编码。要区别6个字母,最简单的二进制编码方式是等长编码,固定采用3位二进制,可分别用000、001、010、011、100、101对“A,E,R,T,F,D”进行编码发送,当对方接收报文时再按照三位一分进行译码。显然编码的长度取决报文中不同字符的个数。若报文中可能出现26个不同字符,则固定编码长度为5。然而,传送报文时总是希望总长度尽可能短。在实际应用中,各个字符的出现频度或使用次数是不相同的,如A、B、C的使用频率远远高于X、Y、Z,自然会想到设计编码时,让使用频率高的用短码,使用频率低的用长码,以优化整个报文编码。
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define N 30
#define M 2*N
#define MAX 65535
typedef struct
{
int weight;
int parent;
int lchild;
int rchild;
}Node,huffmanTree[M];
void crthuffmanTree(huffmanTree,int [],char [],int);
void select1(huffmanTree,int,int *,int *);
char *getcode(huffmanTree,char,int,int [],char []);
char *getcode(huffmanTree ht,char x,int n,int w[],char s[])
{
char str[20],str2[20];
int i,j,k,t = 0;
int m = 2*n-1;
for(i = 1;i <= n;i++) {
if(x == s[i]) {
for(j = 1;j <= m;j++) {
if(w[i] == ht[j].weight) {
while(ht[j].parent != 0) {
k = j;
j = ht[j].parent; //找到上一个
if(ht[ht[j].lchild].weight == ht[k].weight) {
str[t++] = '0'; //如果找到为左
}
if(ht[ht[j].rchild].weight == ht[k].weight) {
str[t++] = '1'; //如果找到为右
}
}
}
}
}
}
for(i = t-1,j = 0;i != -1;i--,j++) {
str2[j] = str[i];
}
return str2;
}
void select1(huffmanTree ht,int n,int *s1,int *s2)
{
int i;
int s3,s4; //s3表示第一小,s4表示第二小
s3 = s4 = MAX;
for(i = 1;i <= n;i++) {
if(ht[i].parent == 0) {
if(ht[i].weight < s3) {
s4 = s3;
*s1 = *s2;
s3 = ht[i].weight;
*s2 = i;
}
if((ht[i].weight > s3) && (ht[i].weight < s4)) {
s4 = ht[i].weight;
*s1 = i;
}
}
}
}
void crthuffmanTree(huffmanTree ht,int w[],char s[],int n)
{
char ch;
int s1,s2;
int m,i,j;
m = 2*n - 1;
char *code[n];
for(i = 1;i <= n;i++) {
ht[i].weight = w[i];
ht[i].parent = 0;
ht[i].lchild = 0;
ht[i].rchild = 0;
}
for(i = n+1;i <= m;i++) {
ht[i].weight = 0;
ht[i].parent = 0;
ht[i].lchild = 0;
ht[i].rchild = 0;
}
for(i = n+1;i <= m;i++) {
select1(ht,i-1,&s1,&s2);
ht[i].weight = ht[s1].weight+ht[s2].weight;
ht[i].lchild = s2;
ht[i].rchild = s1;
ht[s1].parent = i;
ht[s2].parent = i;
}
printf("根据建立的哈夫曼树得到字符对应编码如下:\n");
for(i = 1;i <= n; i++) {
code[i] = getcode(ht,s[i],n,w,s);
printf("%c:%s\n",s[i],code[i]);
}
setbuf(stdin,0);
printf("请输入要编码的字符串:");
while((ch = getchar()) != '\n') {
for(i = 1;i <= n;i++) {
if(ch == s[i]) {
printf("%s",code[i]);
}
}
}
printf("\n");
}
int main(int argc,char *argv[])
{
int i,n;
huffmanTree ht;
int w[20];
char s[20];
printf("请输入编码个数:");
scanf("%d",&n);
printf("请依次输入字符和相应的权值:");
for(i = 1;i <= n;i++) {
scanf(" %c",&s[i]);
scanf("%d",&w[i]);
}
crthuffmanTree(ht,w,s,n);
/*for(i = 1;i <= 7;i++) {
printf("%-2d %-2d %-2d %-2d\n",ht[i].weight,ht[i].parent,ht[i].lchild,ht[i].rchild);
}*/
}