听说赫夫曼胜过了他的导师,被认为”青出于蓝而胜于蓝“,这句话也是我比较欣赏的,嘻嘻。
一 概念
了解”赫夫曼树“之前,几个必须要知道的专业名词可要熟练记住啊。
1: 结点的权
“权”就相当于“重要度”,我们形象的用一个具体的数字来表示,然后通过数字的大小来决定谁重要,谁不重要。
2: 路径
树中从“一个结点"到“另一个结点“之间的分支。
3: 路径长度
一个路径上的分支数量。
4: 树的路径长度
从树的根节点到每个节点的路径长度之和。
5: 节点的带权路径路劲长度
其实也就是该节点到根结点的路径长度*该节点的权。
6: 树的带权路径长度
树中各个叶节点的路径长度*该叶节点的权的和,常用wpl(weight path length)表示。
二: 构建赫夫曼树
上面说了那么多,肯定是为下面做铺垫,这里说赫夫曼树,肯定是要说赫夫曼树咋好咋好,赫夫曼树是一种最优二叉树,
因为他的wpl是最短的,何以见得?我们可以上图说话。
现在我们做一个wpl的对比:
图a: wpl= 5*2 + 7*2 +2*2+13*2=54
图b:wpl=5*3+2*3+7*2+13*1=48
我们对比一下,图b的wpl最短的,地球人已不能阻止wpl还能比“图b”的小,所以,“图b"就是一颗赫夫曼树,那么大家肯定
要问,如何构建一颗赫夫曼树,还是上图说话。
第一步: 我们将所有的节点都作为独根结点。
第二步: 我们将最小的c和a组建为一个新的二叉树,权值为左右结点之和。
第三步: 将上一步组建的新节点加入到剩下的节点中,排除上一步组建过的左右子树,我们选中b组建新的二叉树,然后取权值。
第四步: 同上。
三: 赫夫曼编码
大家都知道,字符,汉字,数字在计算机中都是以0,1来表示的,相应的存储都是有一套编码方案来支撑的,比如asc码。
这样才能在"编码“和”解码“的过程中不会成为乱码,但是asc码不理想的地方就是等长的,其实我们都想用较少的空间来存储
更多的东西,那么我们就要采用”不等长”的编码方案来存储,那么“何为不等长呢“?其实也就是出现次数比较多的字符我们采用短编码,
出现次数较少的字符我们采用长编码,恰好,“赫夫曼编码“就是不等长的编码。
这里大家只要掌握赫夫曼树的编码规则:左子树为0,右子树为1,对应的编码后的规则是:从根节点到子节点
a: 111
b: 10
c: 110
d: 0
四: 实现
不知道大家懂了没有,不懂的话多看几篇,下面说下赫夫曼的具体实现。
第一步:构建赫夫曼树。
第二步:对赫夫曼树进行编码。
第三步:压缩操作。
第四步:解压操作。
1:首先看下赫夫曼树的结构,这里字段的含义就不解释了。
#region 赫夫曼树结构
/// <summary>
/// 赫夫曼树结构
/// </summary>
public class huffmantree
{
public int weight { get; set; }
public int parent { get; set; }
public int left { get; set; }
public int right { get; set; }
}
#endregion
2: 创建赫夫曼树,原理在上面已经解释过了,就是一步一步的向上搭建,这里要注意的二个性质定理:
当叶子节点为n个,则需要n-1步就能搭建赫夫曼树。
当叶子节点为n个,则赫夫曼树的节点总数为:(2*n)-1个。
#region 赫夫曼树的创建
/// <summary>
/// 赫夫曼树的创建
/// </summary>
/// <param name="huffman">赫夫曼树</param>
/// <param name="leafnum">叶子节点</param>
/// <param name="weight">节点权重</param>
public huffmantree[] createtree(huffmantree[] huffman, int leafnum, int[] weight)
{
//赫夫曼树的节点总数
int huffmannode = 2 * leafnum - 1;
//初始化节点,赋予叶子节点值
for (int i = 0; i < huffmannode; i++)
{
if (i < leafnum)
{
huffman[i].weight = weight[i];
}
}
//这里面也要注意,4个节点,其实只要3步就可以构造赫夫曼树
for (int i = leafnum; i < huffmannode; i++)
{
int minindex1;
int minindex2;
selectnode(huffman, i, out minindex1, out minindex2);
//最后得出minindex1和minindex2中实体的weight最小
huffman[minindex1].parent = i;
huffman[minindex2].parent = i;
huffman[i].left = minindex1;
huffman[i].right = minindex2;
huffman[i].weight = huffman[minindex1].weight + huffman[minindex2].weight;
}
return huffman;
}
#endregion
#region 选出叶子节点中最小的二个节点
/// <summary>
/// 选出叶子节点中最小的二个节点
/// </summary>
/// <param name="huffman"></param>
/// <param name="searchnodes">要查找的结点数</param>
/// <param name="minindex1"></param>
/// <param name="minindex2"></param>
public void selectnode(huffmantree[] huffman, int searchnodes, out int minindex1, out int minindex2)
{
huffmantree minnode1 = null;
huffmantree minnode2 = null;
//最小节点在赫夫曼树中的下标
minindex1 = minindex2 = 0;
//查找范围
for (int i = 0; i < searchnodes; i++)
{
///只有独根树才能进入查找范围
if (huffman[i].parent == 0)
{
//如果为null,则认为当前实体为最小
if (minnode1 == null)
{
minindex1 = i;
minnode1 = huffman[i];
continue;
}
//如果为null,则认为当前实体为最小
if (minnode2 == null)
{
minindex2 = i;
minnode2 = huffman[i];
//交换一个位置,保证minindex1为最小,为后面判断做准备
if (minnode1.weight > minnode2.weight)
{
//节点交换
var temp = minnode1;
minnode1 = minnode2;
minnode2 = temp;
//下标交换
var tempindex = minindex1;
minindex1 = minindex2;
minindex2 = tempindex;
continue;
}
}
if (minnode1 != null && minnode2 != null)
{
if (huffman[i].weight <= minnode1.weight)
{
//将min1临时转存给min2
minnode2 = minnode1;
minnode1 = huffman[i];
//记录在数组中的下标
minindex2 = minindex1;
minindex1 = i;
}
else
{
if (huffman[i].weight < minnode2.weight)
{
minnode2 = huffman[i];
minindex2 = i;
}
}
}
}
}
}
#endregion
3:对哈夫曼树进行编码操作,形成一套“模板”,效果跟asc模板一样,不过一个是不等长,一个是等长。
#region 赫夫曼编码
/// <summary>
/// 赫夫曼编码
/// </summary>
/// <param name="huffman"></param>
/// <param name="leafnum"></param>
/// <param name="huffmancode"></param>
public string[] huffmancoding(huffmantree[] huffman, int leafnum)
{
int current = 0;
int parent = 0;
string[] huffmancode = new string[leafnum];
//四个叶子节点的循环
for (int i = 0; i < leafnum; i++)
{
//单个字符的编码串
string codetemp = string.empty;
current = i;
//第一次获取最左节点
parent = huffman[current].parent;
while (parent != 0)
{
//如果父节点的左子树等于当前节点就标记为0
if (current == huffman[parent].left)
codetemp += "0";
else
codetemp += "1";
current = parent;
parent = huffman[parent].parent;
}
huffmancode[i] = new string(codetemp.reverse().toarray());
}
return huffmancode;
}
#endregion
4:模板生成好了,我们就要对指定的测试数据进行压缩处理
#region 对指定字符进行压缩
/// <summary>
/// 对指定字符进行压缩
/// </summary>
/// <param name="huffmancode"></param>
/// <param name="alphabet"></param>
/// <param name="test"></param>
public string encode(string[] huffmancode, string[] alphabet, string test)
{
//返回的0,1代码
string encodestr = string.empty;
//对每个字符进行编码
for (int i = 0; i < test.length; i++)
{
//在模版里面查找
for (int j = 0; j < alphabet.length; j++)
{
if (test[i].tostring() == alphabet[j])
{
encodestr += huffmancode[j];
}
}
}
return encodestr;
}
#endregion
5: 最后也就是对压缩的数据进行还原操作。
#region 对指定的二进制进行解压
/// <summary>
/// 对指定的二进制进行解压
/// </summary>
/// <param name="huffman"></param>
/// <param name="leafnum"></param>
/// <param name="alphabet"></param>
/// <param name="test"></param>
/// <returns></returns>
public string decode(huffmantree[] huffman, int huffmannodes, string[] alphabet, string test)
{
string decodestr = string.empty;
//所有要解码的字符
for (int i = 0; i < test.length; )
{
int j = 0;
//赫夫曼树结构模板(用于循环的解码单个字符)
for (j = huffmannodes - 1; (huffman[j].left != 0 || huffman[j].right != 0); )
{
if (test[i].tostring() == "0")
{
j = huffman[j].left;
}
if (test[i].tostring() == "1")
{
j = huffman[j].right;
}
i++;
}
decodestr += alphabet[j];
}
return decodestr;
}
#endregion
最后上一下总的运行代码
using system;
using system.collections.generic;
using system.linq;
using system.text;
namespace huffmantree
{
class program
{
static void main(string[] args)
{
//有四个叶节点
int leafnum = 4;
//赫夫曼树中的节点总数
int huffmannodes = 2 * leafnum - 1;
//各节点的权值
int[] weight = { 5, 7, 2, 13 };
string[] alphabet = { "a", "b", "c", "d" };
string testcode = "dbdbdabdcdadbdadbdadacdbdbd";
//赫夫曼树用数组来保存,每个赫夫曼都作为一个实体存在
huffmantree[] huffman = new huffmantree[huffmannodes].select(i => new huffmantree() { }).toarray();
huffmantreemanager manager = new huffmantreemanager();
manager.createtree(huffman, leafnum, weight);
string[] huffmancode = manager.huffmancoding(huffman, leafnum);
for (int i = 0; i < leafnum; i++)
{
console.writeline("字符:{0},权重:{1},编码为:{2}", alphabet[i], huffman[i].weight, huffmancode[i]);
}
console.writeline("原始的字符串为:" + testcode);
string encode = manager.encode(huffmancode, alphabet, testcode);
console.writeline("被编码的字符串为:" + encode);
string decode = manager.decode(huffman, huffmannodes, alphabet, encode);
console.writeline("解码后的字符串为:" + decode);
}
}
#region 赫夫曼树结构
/// <summary>
/// 赫夫曼树结构
/// </summary>
public class huffmantree
{
public int weight { get; set; }
public int parent { get; set; }
public int left { get; set; }
public int right { get; set; }
}
#endregion
/// <summary>
/// 赫夫曼树的操作类
/// </summary>
public class huffmantreemanager
{
#region 赫夫曼树的创建
/// <summary>
/// 赫夫曼树的创建
/// </summary>
/// <param name="huffman">赫夫曼树</param>
/// <param name="leafnum">叶子节点</param>
/// <param name="weight">节点权重</param>
public huffmantree[] createtree(huffmantree[] huffman, int leafnum, int[] weight)
{
//赫夫曼树的节点总数
int huffmannode = 2 * leafnum - 1;
//初始化节点,赋予叶子节点值
for (int i = 0; i < huffmannode; i++)
{
if (i < leafnum)
{
huffman[i].weight = weight[i];
}
}
//这里面也要注意,4个节点,其实只要3步就可以构造赫夫曼树
for (int i = leafnum; i < huffmannode; i++)
{
int minindex1;
int minindex2;
selectnode(huffman, i, out minindex1, out minindex2);
//最后得出minindex1和minindex2中实体的weight最小
huffman[minindex1].parent = i;
huffman[minindex2].parent = i;
huffman[i].left = minindex1;
huffman[i].right = minindex2;
huffman[i].weight = huffman[minindex1].weight + huffman[minindex2].weight;
}
return huffman;
}
#endregion
#region 选出叶子节点中最小的二个节点
/// <summary>
/// 选出叶子节点中最小的二个节点
/// </summary>
/// <param name="huffman"></param>
/// <param name="searchnodes">要查找的结点数</param>
/// <param name="minindex1"></param>
/// <param name="minindex2"></param>
public void selectnode(huffmantree[] huffman, int searchnodes, out int minindex1, out int minindex2)
{
huffmantree minnode1 = null;
huffmantree minnode2 = null;
//最小节点在赫夫曼树中的下标
minindex1 = minindex2 = 0;
//查找范围
for (int i = 0; i < searchnodes; i++)
{
///只有独根树才能进入查找范围
if (huffman[i].parent == 0)
{
//如果为null,则认为当前实体为最小
if (minnode1 == null)
{
minindex1 = i;
minnode1 = huffman[i];
continue;
}
//如果为null,则认为当前实体为最小
if (minnode2 == null)
{
minindex2 = i;
minnode2 = huffman[i];
//交换一个位置,保证minindex1为最小,为后面判断做准备
if (minnode1.weight > minnode2.weight)
{
//节点交换
var temp = minnode1;
minnode1 = minnode2;
minnode2 = temp;
//下标交换
var tempindex = minindex1;
minindex1 = minindex2;
minindex2 = tempindex;
continue;
}
}
if (minnode1 != null && minnode2 != null)
{
if (huffman[i].weight <= minnode1.weight)
{
//将min1临时转存给min2
minnode2 = minnode1;
minnode1 = huffman[i];
//记录在数组中的下标
minindex2 = minindex1;
minindex1 = i;
}
else
{
if (huffman[i].weight < minnode2.weight)
{
minnode2 = huffman[i];
minindex2 = i;
}
}
}
}
}
}
#endregion
#region 赫夫曼编码
/// <summary>
/// 赫夫曼编码
/// </summary>
/// <param name="huffman"></param>
/// <param name="leafnum"></param>
/// <param name="huffmancode"></param>
public string[] huffmancoding(huffmantree[] huffman, int leafnum)
{
int current = 0;
int parent = 0;
string[] huffmancode = new string[leafnum];
//四个叶子节点的循环
for (int i = 0; i < leafnum; i++)
{
//单个字符的编码串
string codetemp = string.empty;
current = i;
//第一次获取最左节点
parent = huffman[current].parent;
while (parent != 0)
{
//如果父节点的左子树等于当前节点就标记为0
if (current == huffman[parent].left)
codetemp += "0";
else
codetemp += "1";
current = parent;
parent = huffman[parent].parent;
}
huffmancode[i] = new string(codetemp.reverse().toarray());
}
return huffmancode;
}
#endregion
#region 对指定字符进行压缩
/// <summary>
/// 对指定字符进行压缩
/// </summary>
/// <param name="huffmancode"></param>
/// <param name="alphabet"></param>
/// <param name="test"></param>
public string encode(string[] huffmancode, string[] alphabet, string test)
{
//返回的0,1代码
string encodestr = string.empty;
//对每个字符进行编码
for (int i = 0; i < test.length; i++)
{
//在模版里面查找
for (int j = 0; j < alphabet.length; j++)
{
if (test[i].tostring() == alphabet[j])
{
encodestr += huffmancode[j];
}
}
}
return encodestr;
}
#endregion
#region 对指定的二进制进行解压
/// <summary>
/// 对指定的二进制进行解压
/// </summary>
/// <param name="huffman"></param>
/// <param name="leafnum"></param>
/// <param name="alphabet"></param>
/// <param name="test"></param>
/// <returns></returns>
public string decode(huffmantree[] huffman, int huffmannodes, string[] alphabet, string test)
{
string decodestr = string.empty;
//所有要解码的字符
for (int i = 0; i < test.length; )
{
int j = 0;
//赫夫曼树结构模板(用于循环的解码单个字符)
for (j = huffmannodes - 1; (huffman[j].left != 0 || huffman[j].right != 0); )
{
if (test[i].tostring() == "0")
{
j = huffman[j].left;
}
if (test[i].tostring() == "1")
{
j = huffman[j].right;
}
i++;
}
decodestr += alphabet[j];
}
return decodestr;
}
#endregion
}
}