Huffman编码压缩解压缩文件

时间:2022-06-11 10:06:40

采用了哈弗曼编码和优先队列(最小堆)实现

头文件

#ifndef HUFFMAN_H_INCLUDED
#define HUFFMAN_H_INCLUDED
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
#define INIT_SIZE 1000
#define FALSE 0
#define TRUE 1
#define WEIGHT_SIZE 256
#define CODE_SIZE 40

typedef int BOOL;

typedef struct huff_node
{
unsigned char value;
char *code;
struct huff_node *left_child;
struct huff_node *right_child;
struct huff_node *parent;
int weight;

} huff_node,*phuff_node;
typedef struct
{
phuff_node root;
phuff_node *code_table;
// int leaves_size;
}huff_tree,*phuff_tree;

typedef huff_node heap_node,*pheap_node;
typedef struct heap
{
pheap_node *value;
int capacity;
int length;
} heap,*pheap;

void error(char *msg);
BOOL is_emp(pheap heap);
BOOL is_full(pheap heap);
pheap init_heap();
void insert_heap(pheap heap,pheap_node heap_node);
pheap_node delete_heap(pheap heap);
#endif // HUFFMAN_H_INCLUDED
堆定义

#include "huffman.h"

void error(char *msg)
{
perror(msg);
exit("-1");
}
BOOL is_emp(pheap heap)
{
return heap->length == 0 ? TRUE : FALSE;
}
BOOL is_full(pheap heap)
{
return heap->length == heap->capacity ? TRUE : FALSE;
}

pheap init_heap()
{
pheap heap = (pheap)malloc(sizeof(heap));
if(heap == NULL)
error("heap malloc error\n");
heap->value = (pheap_node*)malloc((INIT_SIZE + 1) * sizeof(pheap_node));
if(heap->value == NULL)
error("heap->value malloc error\n");
heap->capacity = INIT_SIZE;
heap->length = 0;

return heap;
}

void insert_heap(pheap heap,pheap_node node)
{
if(is_full(heap))
error("heap is full\n");
if(node->value != NULL)
{
node->left_child = NULL;
node->right_child = NULL;
}
if(is_emp(heap))
{
heap->value[++heap->length] = node;
return ;
}
int i = ++heap->length;
// printf("insert heap length is %d\n",heap->length);
for(; (i > 1) && (heap->value[i / 2]->weight > node->weight); i /= 2)
{
heap->value[i] = heap->value[i / 2];
//printf("log:%d %d\n",i,heap->length);
}
// printf("out:%d %d\n",i,heap->length);
heap->value[i] = node;
}

void print_node(pheap_node node)
{
printf("value:%c weight:%d\n",node->value,node->weight);
}
void print_heap(pheap heap)
{
int i = 1;
for(; i <= heap->length; i++)
print_node(heap->value[i]);
printf("\n");
}
pheap_node delete_heap(pheap heap)
{
if(is_emp(heap))
error("heap is empty\n");
int i,child;
pheap_node result = heap->value[1];
pheap_node current = heap->value[heap->length];
//printf("current is %c\n",current->value);
heap->length--;
for(i = 1; 2 * i <= heap->length; i = child)
{
child = 2 * i;
if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight))
child ++;
if(current->weight < heap->value[child]->weight)
break;
heap->value[i] = heap->value[child];
}
heap->value[i] = current;
return result;
}

void reset_heap(pheap heap)
{
int i = heap->length / 2,j,child;
pheap_node temp_node;
for(; i > 0; i--)
{
child = 2 * i;
if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight))
child ++;
if(heap->value[i]->weight > heap->value[child]->weight)
{
temp_node = heap->value[i];
heap->value[i] = heap->value[child];
heap->value[child] = temp_node;
//子树修改了,要判断是否需要进一步的调整
for(j = child; 2 * j <= heap->length; j = child)
{
child = 2 * j;
if((child != heap->length) && (heap->value[child]->weight > heap->value[child + 1]->weight))
child ++;
if(temp_node->weight < heap->value[child]->weight)
break;
heap->value[j] = heap->value[child];
}
heap->value[j] = temp_node;
}
}

}

Huffman树定义及操作

#include "huffman.h"

void free_huff_node(phuff_node node)
{
if(node != NULL)
{
free_huff_node(node->left_child);
free_huff_node(node->right_child);
if(node->code !=NULL)
free(node->code);
free(node);
}
}
void free_huff_tree(phuff_tree huff_tree)
{
if(huff_tree != NULL)
free_huff_node(huff_tree->root);
int i =0;
for(i; i < WEIGHT_SIZE; i++)
{
if(huff_tree->code_table[i] != NULL)
{
free(huff_tree->code_table[i]);
}

}
free(huff_tree);
}
phuff_tree build_huff_tree(int *weight)
{
pheap_node node;
pheap heap = init_heap();
pheap_node *code_table = (pheap_node*)malloc(WEIGHT_SIZE * sizeof(pheap_node));
int i,j;
for(i = 0; i < WEIGHT_SIZE; i++)
{
if(weight[i] != 0)
{
node =(pheap_node)malloc(sizeof(heap_node));
node->weight = weight[i];
node->value = (unsigned char)i;
code_table[i] = node;//存储所有的叶子节点
insert_heap(heap,node);
}
else code_table[i] = NULL;
}

phuff_tree huff_tree = (phuff_tree)malloc(sizeof(huff_tree));

if(huff_tree == NULL) error("huff_tree malloc error\n");

huff_tree->code_table = code_table;

pheap_node heap_node_1,heap_node_2,merge_node;
while(heap->length > 1)
{
heap_node_1 = delete_heap(heap);
heap_node_2 = delete_heap(heap);

merge_node = (pheap_node)malloc(sizeof(heap_node));
if(merge_node == NULL)
error("merge_node malloc error\n");
merge_node->value = NULL;
merge_node->weight = heap_node_1->weight + heap_node_2->weight;
merge_node->left_child = heap_node_1;
merge_node->right_child = heap_node_2;
heap_node_1->parent = merge_node;
heap_node_2->parent = merge_node;
insert_heap(heap,merge_node);
}
merge_node->parent = NULL;
huff_tree->root = merge_node;
build_code_table(huff_tree);
return huff_tree;
}


void build_code_table(phuff_tree huff_tree)
{
char *code_buff = (char*)malloc(CODE_SIZE * sizeof(char));
memset(code_buff,'\0',CODE_SIZE);
build_code(huff_tree->root,code_buff,0);
}

void build_code(phuff_node cur_pos,char* code_buff,int len)
{
// printf("current code_buff is %s\n",code_buff);
if((cur_pos->left_child == NULL) && (cur_pos->right_child == NULL))
{
cur_pos->code = (char*)malloc((len + 1)* sizeof(char));
memset(cur_pos->code,'\0',len + 1);
memcpy(cur_pos->code,code_buff,len * sizeof(char));
}
if(cur_pos->left_child != NULL)
{
code_buff[len] = '0';
build_code(cur_pos->left_child,code_buff,len + 1);
}
if(cur_pos->right_child != NULL)
{
code_buff[len] = '1';
build_code(cur_pos->right_child,code_buff,len + 1);
}
}
压缩

#include "huffman.h"

/*统计各个字符的权重*/
int* get_weight(char *file_name)
{
FILE *file = fopen(file_name,"r");
if(file == NULL)
error("file open error\n");

int *weight = (int*)malloc(WEIGHT_SIZE * sizeof(int));
int i;
for(i = 0; i < WEIGHT_SIZE; i++)
weight[i] = 0;

unsigned char temp_char;
while(!feof(file))
{
fread(&temp_char,1,1,file);
weight[temp_char]++;
}

fclose(file);

return weight;
}

/*压缩*/
void compress(char* ori_file_name,char* com_file_name)
{
int *weight = get_weight(ori_file_name);
phuff_tree huff_tree = build_huff_tree(weight);

long cur_pos = 0,file_length = 0;
unsigned char temp_read = 0,temp_write = 0;
int code_len = 0,i = 0,j = 0,left_size = 8;
char *code;

FILE *ori_file = fopen(ori_file_name,"r");
FILE *com_file = fopen(com_file_name,"wb");

//写权重信息
for(i; i < WEIGHT_SIZE; i++)
{
if(weight[i] != 0)
{
fprintf(com_file,"%d",i);
fputc(' ',com_file);
fprintf(com_file,"%d",weight[i]);
fputc(' ',com_file);
file_length += weight[i];
}
}
fputc('\n',com_file);
fprintf(com_file,"%ld",file_length - 1);//多读一个EOF
fputc('\n',com_file);
while(!feof(ori_file))
{
if(j >= code_len)
{
fread(&temp_read,sizeof(char),1,ori_file);
//printf(" %d ",temp_read);
code = huff_tree->code_table[temp_read]->code;
code_len = strlen(code);
j = 0;
}
for(i = 0; (i < left_size) && (j < code_len); i++,j++)
{
if(code[j]=='0')
temp_write = temp_write << 1;
else temp_write = (temp_write << 1)| 1;
}
if(i < left_size)//本字节未填满
{
left_size -= i;
}
else //填满
{
left_size = 8;//表示当前字节剩余空间
fwrite(&temp_write, sizeof(char),1,com_file);
//printf(" %d ",temp_write);
temp_write = 0;
}
}
/*将后面的补为0*/
temp_write = temp_write << left_size;
fwrite(&temp_write,sizeof(char),1,com_file);

fclose(ori_file);
fclose(com_file);
free(huff_tree);

}
解压缩

#include "huffman.h"

int* read_weight(FILE *com_file)
{
int* weight = (int*)malloc(WEIGHT_SIZE * sizeof(int));
int i = 0;
for(; i < WEIGHT_SIZE; i++)
weight[i] = 0;
char *temp_read = (char*)malloc(WEIGHT_SIZE * 10 * sizeof(char));
if(fgets(temp_read,WEIGHT_SIZE * 10,com_file) == NULL)
error("read com_file for weight erroe\n");
char *pre_pos = temp_read,*cur_pos = strstr(temp_read," ");
int num;
char str[10];
while(cur_pos !=NULL)
{
memset(str,'\0',10);
strncpy(str,pre_pos,cur_pos - pre_pos + 1);
num = atoi(str);

pre_pos = cur_pos;
cur_pos = strstr(cur_pos + 1," ");
memset(str,'\0',10);
strncpy(str,pre_pos,cur_pos - pre_pos + 1);
weight[num] = atoi(str);
pre_pos = cur_pos;
cur_pos = strstr(cur_pos + 1," ");
}

return weight;
}

void decompress(char* com_file_name,char* ori_file_name)
{
FILE *com_file = fopen(com_file_name,"rb+");
FILE *ori_file = fopen(ori_file_name,"w+");
int *weight = read_weight(com_file);
long file_length = 0;
//fread(&file_length,sizeof(long),1,com_file);
char *buf = (char*)malloc(sizeof(long) * 8 * sizeof(char));
fgets(buf,sizeof(long) * 8,com_file);
file_length = atol(buf);
free(buf);

if(file_length == 0) error("file length is 0!\n");
phuff_tree huff_tree = build_huff_tree(weight);
unsigned char temp_read = 0;
pheap_node huff_pos = huff_tree->root;

int i = 0,j = 0;
while(!feof(com_file))
{

fread(&temp_read,sizeof(char),1,com_file);

for(i = 0; i < 8; i++)
{
if((temp_read & 0x80) >> 1 == 64)

huff_pos = huff_pos->right_child;
else
huff_pos = huff_pos->left_child;

if(huff_pos->left_child == NULL && huff_pos->right_child == NULL)
{
fwrite(&(huff_pos->value),sizeof(unsigned char),1,ori_file);
huff_pos = huff_tree->root;
j++;
if(j >= file_length) break;
}
temp_read=temp_read << 1;
}
}
fclose(ori_file);
fclose(com_file);
free(huff_tree);
}


测试

int main()
{
char ori_file_name[20],com_file_name[20];

printf("请输入要压缩前的文件名(长度小于20):");
scanf("%s",ori_file_name);
printf("请输入要压缩后的文件名(长度小于20):");
scanf("%s",com_file_name);
printf("正在压缩...\n");
compress(ori_file_name,com_file_name);
printf("正在解压,解压后的文件名为decompress...\n");
decompress(com_file_name,"decompress");

return 0;
}