目录
总述
01-10天,基本语法
11-20天,线性数据结构
21-30天,树与二叉树
31-40天,图
41-50天,查找与排序
51-60天,kNN 与 NB
61-70天,决策树与集成学习
71-80天,BP 神经网络
81-90天,CNN 卷积神经网络
本贴针对树型结构。这些任务的来源可参阅《数据结构》一书中《树与二叉树》一章. 强烈建议拿出纸和笔, 画图来辅助程序的分析与设计.
第 21-26 天为二叉树的一个完整程序 (), 附带一个改写后的栈 () 和队列 ().
第 27 天为
第 28-30 天为
第 21 天: 二叉树的深度遍历的递归实现
21.1 二叉树的遍历比存储、建立要简单. 所以先“手动”建立一个二叉树来玩.
21.2 递归算法写起来就是舒服. 前、中、后序的代码差别只有输出语句的位置.
21.3 不需要额外的节点类, 每棵二叉树 (树) 都可以从自己的根结点找到其它所有节点. 这个需要自悟.
21.4 获取二叉树的层次、总节点数, 也需要递归. 以后可能要用, 这里就一并写了.
package ;
import ;
/**
* Binary tree with char type elements.
*
* @author Fan Min minfanphd@.
*/
public class BinaryCharTree {
/**
* The value in char.
*/
char value;
/**
* The left child.
*/
BinaryCharTree leftChild;
/**
* The right child.
*/
BinaryCharTree rightChild;
/**
*********************
* The first constructor.
*
* @param paraName
* The value.
*********************
*/
public BinaryCharTree(char paraName) {
value = paraName;
leftChild = null;
rightChild = null;
}// Of the constructor
/**
*********************
* Manually construct a tree. Only for testing.
*********************
*/
public static BinaryCharTree manualConstructTree() {
// Step 1. Construct a tree with only one node.
BinaryCharTree resultTree = new BinaryCharTree('a');
// Step 2. Construct all nodes. The first node is the root.
// BinaryCharTreeNode tempTreeA = ;
BinaryCharTree tempTreeB = new BinaryCharTree('b');
BinaryCharTree tempTreeC = new BinaryCharTree('c');
BinaryCharTree tempTreeD = new BinaryCharTree('d');
BinaryCharTree tempTreeE = new BinaryCharTree('e');
BinaryCharTree tempTreeF = new BinaryCharTree('f');
BinaryCharTree tempTreeG = new BinaryCharTree('g');
// Step 3. Link all nodes.
= tempTreeB;
= tempTreeC;
= tempTreeD;
= tempTreeE;
= tempTreeF;
= tempTreeG;
return resultTree;
}// Of manualConstructTree
/**
*********************
* Pre-order visit.
*********************
*/
public void preOrderVisit() {
("" + value + " ");
if (leftChild != null) {
();
} // Of if
if (rightChild != null) {
();
} // Of if
}// Of preOrderVisit
/**
*********************
* In-order visit.
*********************
*/
public void inOrderVisit() {
if (leftChild != null) {
();
} // Of if
("" + value + " ");
if (rightChild != null) {
();
} // Of if
}// Of inOrderVisit
/**
*********************
* Post-order visit.
*********************
*/
public void postOrderVisit() {
if (leftChild != null) {
();
} // Of if
if (rightChild != null) {
();
} // Of if
("" + value + " ");
}// Of postOrderVisit
/**
*********************
* Get the depth of the binary tree.
*
* @return The depth. It is 1 if there is only one node, ., the root.
*********************
*/
public int getDepth() {
// It is a leaf.
if ((leftChild == null) && (rightChild == null)) {
return 1;
} // Of if
// The depth of the left child.
int tempLeftDepth = 0;
if (leftChild != null) {
tempLeftDepth = ();
} // Of if
// The depth of the right child.
int tempRightDepth = 0;
if (rightChild != null) {
tempRightDepth = ();
} // Of if
// The depth should increment by 1.
if (tempLeftDepth >= tempRightDepth) {
return tempLeftDepth + 1;
} else {
return tempRightDepth + 1;
} // Of if
}// Of getDepth
/**
*********************
* Get the number of nodes.
*
* @return The number of nodes.
*********************
*/
public int getNumNodes() {
// It is a leaf.
if ((leftChild == null) && (rightChild == null)) {
return 1;
} // Of if
// The number of nodes of the left child.
int tempLeftNodes = 0;
if (leftChild != null) {
tempLeftNodes = ();
} // Of if
// The number of nodes of the right child.
int tempRightNodes = 0;
if (rightChild != null) {
tempRightNodes = ();
} // Of if
// The total number of nodes.
return tempLeftNodes + tempRightNodes + 1;
}// Of getNumNodes
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
}// Of main
}// Of BinaryCharTree
第 22 天: 二叉树的存储
二叉树的存储并非一个简单的问题. 引用 (指针) 是无法存储到文件里面的.
我们可以完全满二叉树的角度广度优先遍历的角度来考虑这个问题: 每个节点都有一个 name 及其在二叉树中的位置. 令根节点的位置为 0; 则第 2 层节点的位置依次为 1 至 2; 第 3 层节点的位置依次为 3 至 6. 以此类推.
把昨天那个例子所对应的二叉树画出来, 我们有两种方法:
- 空使用 0 来表示, 可以用一个向量来存储:
[a, b, c, 0, d, e, 0, 0, 0, f, g]
优点: 仅需要一个向量, 简单直接.
缺点: 对于实际的二叉树, 很多子树为空, 导致大量的 0 值. - 刘知鑫指出: 应使用压缩存储方式, 即将节点的位置和值均存储. 可表示为两个向量:
[0, 1, 2, 4, 5, 9, 10]
[a, b, c, d, e, f, g]
由于递归程序涉及变量的作用域问题 (额, 这个真的有点头疼), 我们需要使用层次遍历的方式, 获得以上的几个向量. 为此, 需要用到队列. 我们先灌点水, 把队列的代码再写一遍, 但这次装的是对象 (的引用).
package ;
/**
* Circle Object queue.
*
* @author Fan Min minfanphd@.
*/
public class CircleObjectQueue {
/**
* The total space. One space can never be used.
*/
public static final int TOTAL_SPACE = 10;
/**
* The data.
*/
Object[] data;
/**
* The index of the head.
*/
int head;
/**
* The index of the tail.
*/
int tail;
/**
*******************
* The constructor
*******************
*/
public CircleObjectQueue() {
data = new Object[TOTAL_SPACE];
head = 0;
tail = 0;
}// Of the first constructor
/**
*********************
* Enqueue.
*
* @param paraValue
* The value of the new node.
*********************
*/
public void enqueue(Object paraValue) {
if ((tail + 1) % TOTAL_SPACE == head) {
("Queue full.");
return;
} // Of if
data[tail % TOTAL_SPACE] = paraValue;
tail++;
}// Of enqueue
/**
*********************
* Dequeue.
*
* @return The value at the head.
*********************
*/
public Object dequeue() {
if (head == tail) {
//("No element in the queue");
return null;
} // Of if
Object resultValue = data[head % TOTAL_SPACE];
head++;
return resultValue;
}// Of dequeue
/**
*********************
* Overrides the method claimed in Object, the superclass of any class.
*********************
*/
public String toString() {
String resultString = "";
if (head == tail) {
return "empty";
} // Of if
for (int i = head; i < tail; i++) {
resultString += data[i % TOTAL_SPACE] + ", ";
} // Of for i
return resultString;
}// Of toString
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
CircleObjectQueue tempQueue = new CircleObjectQueue();
}// Of main
}// Of CircleObjectQueue
- 数据转换的代码很短, 但要理解还是比较花时间. 反正我写的时候比较辛苦.
- 用了两个队列.
- 实际上涵盖了广度优先 (或称层次) 遍历.
- 节点左儿子的编号是其编号 2 倍再加 1; 右儿子则应加 2.
- main 前面的保留, 只是为了读者方便与前面的程序对标.
- 需要 import .*;
/**
* The values of nodes according to breadth first traversal.
*/
char[] valuesArray;
/**
* The indices in the complete binary tree.
*/
int[] indicesArray;
/**
********************
* Convert the tree to data arrays, including a char array and an int array.
* The results are stored in two member variables.
*
* @see #valuesArray
* @see #indicesArray
*********************
*/
public void toDataArrays() {
//Initialize arrays.
int tempLength = getNumNodes();
valuesArray = new char[tempLength];
indicesArray = new int[tempLength];
int i = 0;
//Traverse and convert at the same time.
CircleObjectQueue tempQueue = new CircleObjectQueue();
(this);
CircleIntQueue tempIntQueue = new CircleIntQueue();
(0);
BinaryCharTree tempTree = (BinaryCharTree) ();
int tempIndex = ();
while (tempTree != null) {
valuesArray[i] = ;
indicesArray[i] = tempIndex;
i++;
if ( != null) {
();
(tempIndex * 2 + 1);
} // Of if
if ( != null) {
();
(tempIndex * 2 + 2);
} // Of if
tempTree = (BinaryCharTree) ();
tempIndex = ();
} // Of while
}// Of toDataArrays
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
();
("The values are: " + ());
("The indices are: " + ());
}// Of main
第 23 天: 使用具有通用性的队列
昨天使用的队列有两种: 存储二叉树节点的队列; 存储整数的队列. 这样的话, 难道我们要为每种类型单独写一个队列? 这样显然没有充分利用代码的复用性. 实际上, 我们只需要一个存储对象的队列就够啦!
- Java 里面, 所有的类均为 Object 类的 (直接或间接) 子类. 如果不写就默认为直接子类. 例如
public class CircleObjectQueue;
等价于
public class CircleObjectQueue extends Object; - 存储对象的队列, 实际上是存储对象的地址 (引用、指针). 因此, 可以存储任何类的对象 (的引用).
- 可以通过强制类型转换将对象转成其本身的类别. 例如前面程序
tempTree = (BinaryCharTree) ();
括号中的类型即表示强制类型转换. - Java 本身将 int, double, char 分别封装到 Integer, Double, Char 类.
- 今天的代码量非常少, 但涉及面向对象的思想不少.
让我们见识一下它的威力吧.
/**
********************
* Convert the tree to data arrays, including a char array and an int array.
* The results are stored in two member variables.
*
* @see #valuesArray
* @see #indicesArray
*********************
*/
public void toDataArraysObjectQueue() {
// Initialize arrays.
int tempLength = getNumNodes();
valuesArray = new char[tempLength];
indicesArray = new int[tempLength];
int i = 0;
// Traverse and convert at the same time.
CircleObjectQueue tempQueue = new CircleObjectQueue();
(this);
CircleObjectQueue tempIntQueue = new CircleObjectQueue();
Integer tempIndexInteger = (0);
(tempIndexInteger);
BinaryCharTree tempTree = (BinaryCharTree) ();
int tempIndex = ((Integer) ()).intValue();
("tempIndex = " + tempIndex);
while (tempTree != null) {
valuesArray[i] = ;
indicesArray[i] = tempIndex;
i++;
if ( != null) {
();
((tempIndex * 2 + 1));
} // Of if
if ( != null) {
();
((tempIndex * 2 + 2));
} // Of if
tempTree = (BinaryCharTree) ();
if (tempTree == null) {
break;
}//Of if
tempIndex = ((Integer) ()).intValue();
} // Of while
}// Of toDataArraysObjectQueue
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
();
("The values are: " + ());
("The indices are: " + ());
();
("Only object queue.");
("The values are: " + ());
("The indices are: " + ());
}// Of main
思考: 既然队列具有通用性, 即可以处理任何类型的基础数据, 二叉树是否也可以呢? 答案是肯定的. 这需要我们把
char value;
改成
Object value;
并在适当的地方使用强制类型转换即可.
第 24 天: 二叉树的建立
- 只增加了一个构造方法, 相当于第 22 天的逆过程.
- 保留了调拭语句, 因为下标很容易出错.
- 使用一个线性表先分配所有节点的空间, 再将节点链接起来.
- 最后并没有返回, 而是把第 0 个节点的相应值拷贝给自己.
- 整个的设计理念, 需要花时间理解. 又到了进阶的时候, 秒懂 vs. 怎么也不懂. 如果是后者, 建议不要再继续.
/**
*********************
* The second constructor. The parameters must be correct since no validity
* check is undertaken.
*
* @param paraDataArray The array for data.
* @param paraIndicesArray The array for indices.
*********************
*/
public BinaryCharTree(char[] paraDataArray, int[] paraIndicesArray) {
// Step 1. Use a sequential list to store all nodes.
int tempNumNodes = ;
BinaryCharTree[] tempAllNodes = new BinaryCharTree[tempNumNodes];
for (int i = 0; i < tempNumNodes; i++) {
tempAllNodes[i] = new BinaryCharTree(paraDataArray[i]);
} // Of for i
// Step 2. Link these nodes.
for (int i = 1; i < tempNumNodes; i++) {
for (int j = 0; j < i; j++) {
("indices " + paraIndicesArray[j] + " vs. " + paraIndicesArray[i]);
if (paraIndicesArray[i] == paraIndicesArray[j] * 2 + 1) {
tempAllNodes[j].leftChild = tempAllNodes[i];
("Linking " + j + " with " + i);
break;
} else if (paraIndicesArray[i] == paraIndicesArray[j] * 2 + 2) {
tempAllNodes[j].rightChild = tempAllNodes[i];
("Linking " + j + " with " + i);
break;
} // Of if
} // Of for j
} // Of for i
//Step 3. The root is the first node.
value = tempAllNodes[0].value;
leftChild = tempAllNodes[0].leftChild;
rightChild = tempAllNodes[0].rightChild;
}// Of the the second constructor
/**
*********************
* The entrance of the program.
*
* @param args Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
();
("The values are: " + ());
("The indices are: " + ());
();
("Only object queue.");
("The values are: " + ());
("The indices are: " + ());
char[] tempCharArray = {'A', 'B', 'C', 'D', 'E', 'F'};
int[] tempIndicesArray = {0, 1, 2, 4, 5, 12};
BinaryCharTree tempTree2 = new BinaryCharTree(tempCharArray, tempIndicesArray);
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
}// Of main
第 25 天: 二叉树深度遍历的栈实现 (中序)
今天先来做中序. 分成两个子任务.
25.1 具有通用性的对象栈
- 改写栈程序, 里面存放对象.
- 该程序应该放在 包内.
- 还是依靠强制类型转换, 支持不同的数据类型.
- 增加了 isEmpty() 方法.
package ;
/**
* Circle int queue.
*
* @author Fan Min minfanphd@.
*/
public class ObjectStack {
/**
* The depth.
*/
public static final int MAX_DEPTH = 10;
/**
* The actual depth.
*/
int depth;
/**
* The data
*/
Object[] data;
/**
*********************
* Construct an empty sequential list.
*********************
*/
public ObjectStack() {
depth = 0;
data = new Object[MAX_DEPTH];
}// Of the first constructor
/**
*********************
* Overrides the method claimed in Object, the superclass of any class.
*********************
*/
public String toString() {
String resultString = "";
for (int i = 0; i < depth; i++) {
resultString += data[i];
} // Of for i
return resultString;
}// Of toString
/**
*********************
* Push an element.
*
* @param paraObject
* The given object.
* @return Success or not.
*********************
*/
public boolean push(Object paraObject) {
if (depth == MAX_DEPTH) {
("Stack full.");
return false;
} // Of if
data[depth] = paraObject;
depth++;
return true;
}// Of push
/**
*********************
* Pop an element.
*
* @return The object at the top of the stack.
*********************
*/
public Object pop() {
if (depth == 0) {
("Nothing to pop.");
return '\0';
} // of if
Object resultObject = data[depth - 1];
depth--;
return resultObject;
}// Of pop
/**
*********************
* Is the stack empty?
*
* @return True if empty.
*********************
*/
public boolean isEmpty() {
if (depth == 0) {
return true;
}//Of if
return false;
}// Of isEmpty
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
ObjectStack tempStack = new ObjectStack();
for (char ch = 'a'; ch < 'm'; ch++) {
(new Character(ch));
("The current stack is: " + tempStack);
} // Of for i
char tempChar;
for (int i = 0; i < 12; i++) {
tempChar = ((Character)()).charValue();
("Poped: " + tempChar);
("The current stack is: " + tempStack);
} // Of for i
}// Of main
}//Of class ObjectStack
25.2 中序遍历
- 代码依然短.
- 中序是几种遍历中最简单的.
- 具体设计思路自己琢磨, 用几个例子来运行就懂了.
/**
*********************
* In-order visit with stack.
*********************
*/
public void inOrderVisitWithStack() {
ObjectStack tempStack = new ObjectStack();
BinaryCharTree tempNode = this;
while (!() || tempNode != null) {
if (tempNode != null) {
(tempNode);
tempNode = ;
} else {
tempNode = (BinaryCharTree) ();
("" + + " ");
tempNode = ;
} // Of if
} // Of while
}// Of inOrderVisit
/**
*********************
* The entrance of the program.
*
* @param args Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
();
("The values are: " + ());
("The indices are: " + ());
();
("Only object queue.");
("The values are: " + ());
("The indices are: " + ());
char[] tempCharArray = { 'A', 'B', 'C', 'D', 'E', 'F' };
int[] tempIndicesArray = { 0, 1, 2, 4, 5, 12 };
BinaryCharTree tempTree2 = new BinaryCharTree(tempCharArray, tempIndicesArray);
("\r\nPre-order visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\nIn-order visit with stack:");
();
}// Of main
第 26 天: 二叉树深度遍历的栈实现 (前序和后序)
以为前序和后序要比中序难很多, 其实并没有.
- 前序与中序的区别, 仅仅在于输出语句的位置不同.
- 二叉树的遍历, 总共有 6 种排列: 1) 左中右 (中序); 2) 左右中 (后序); 3) 中左右 (前序); 4) 中右左; 5) 右左中; 6) 右中左. 我们平常关心的是前三种, 是因为我们习惯于先左后右. 如果要先右后左, 就相当于左右子树互换, 这个是很容易做到的.
- 如果将前序的左右子树互换, 就可得到 4) 中右左; 再进行逆序, 可以得到 2) 左右中. 因此, 要把前序的代码改为后序, 需要首先将 leftChild 和 rightChild 互换, 然后用一个栈来存储需要输出的字符, 最终反向输出即可. 这种将一个问题转换成另一个等价问题的方式, 无论在数学还是计算机领域, 都极度重要. 参见 /minfanphd/article/details/117318844 中莫峦奇的版本.
- 如果不按上述方式, 直接写后序遍历, 就会复杂得多, 有双重的 while 循环. 参见 /minfanphd/article/details/117318844 中潘佳豪的版本.
/**
*********************
* Pre-order visit with stack.
*********************
*/
public void preOrderVisitWithStack() {
ObjectStack tempStack = new ObjectStack();
BinaryCharTree tempNode = this;
while (!() || tempNode != null) {
if (tempNode != null) {
("" + + " ");
(tempNode);
tempNode = ;
} else {
tempNode = (BinaryCharTree) ();
tempNode = ;
} // Of if
} // Of while
}// Of preOrderVisitWithStack
/**
*********************
* Post-order visit with stack.
*********************
*/
public void postOrderVisitWithStack() {
ObjectStack tempStack = new ObjectStack();
BinaryCharTree tempNode = this;
ObjectStack tempOutputStack = new ObjectStack();
while (!() || tempNode != null) {
if (tempNode != null) {
//Store for output.
(new Character());
(tempNode);
tempNode = ;
} else {
tempNode = (BinaryCharTree) ();
tempNode = ;
} // Of if
} // Of while
//Now reverse output.
while (!()) {
("" + () + " ");
}//Of while
}// Of postOrderVisitWithStack
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
();
("The values are: " + ());
("The indices are: " + ());
();
("Only object queue.");
("The values are: " + ());
("The indices are: " + ());
char[] tempCharArray = { 'A', 'B', 'C', 'D', 'E', 'F' };
int[] tempIndicesArray = { 0, 1, 2, 4, 5, 12 };
BinaryCharTree tempTree2 = new BinaryCharTree(tempCharArray, tempIndicesArray);
("\r\nPre-order visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\nIn-order visit with stack:");
();
("\r\nPre-order visit with stack:");
();
("\r\nPost-order visit with stack:");
();
}// Of main
第 27 天: Hanoi 塔问题
这是一个经典问题, 如果不知道请自行百度, 这里就不描述了.
- 在数据结构中, 一般把 Hanoi 塔放到栈这一章. 但我觉得, 将其展开更像是二叉树, 也有点三叉的味道.
- 虽然代码很短, 但参数复杂. 必须用有意义的变量名, 才不容易出错. 话说现实生活中一般不会遇到这样复杂的参数. 当然, 你也可以说:“才 4 个参数, 一点都不复杂”. 好吧, 过两天你如果能默写出来, 就赢了.
package ;
/**
* Hanoi tower.
*
* @author Fan Min minfanphd@.
*/
public class Hanoi {
/**
*********************
* Move a number of plates.
*
* @param paraSource
* The source pole.
* @param paraIntermedium
* The intermediary pole.
* @param paraDestination
* The destination pole.
* @param paraNumber
* The number of plates.
*********************
*/
public static void hanoi(char paraSource, char paraIntermediary, char paraDestination,
int paraNumber) {
if (paraNumber == 1) {
(paraSource + "->" + paraDestination + " ");
return;
} // Of if
hanoi(paraSource, paraDestination, paraIntermediary, paraNumber - 1);
(paraSource + "->" + paraDestination + " ");
hanoi(paraIntermediary, paraSource, paraDestination, paraNumber - 1);
}// Of hanoi
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
hanoi('a', 'b', 'c', 3);
}// Of main
}// Of class Hanoi
第 28 天: Huffman 编码 (节点定义与文件读取)
不要被代码量吓到了! 今天只需要写到 148 行, 以及 main 里面相应的代码. 三天的代码一起贴上来是为了保证完整性.
- 定义了一个内嵌类. 如果是实际项目, 我就为其单独写一个文件了, 这里仅仅是为了方便.
- 每个节点的内容包括: 字符 (仅对叶节点有效)、权重 (用的整数, 该字符的个数)、指向子节点父节点的引用. 这里指向父节点的引用是必须的.
- NUM_CHARS 是指 ASCII 字符集的字符个数. 为方便起见, 仅支持 ASCII.
- inputText 的引入只是想把程序尽可能细分成独立的模块, 这样便于学习和调拭.
- alphabet 仅存 inputText 出现过的字符.
- alphabetLength 完全可以用 () 代替, 但我就喜欢写成独立的变量.
- charCounts 要为所有的节点负责, 其元素对应于 HuffmanNode 里面的 weight. 为了节约, 可以把其中一个省掉.
- charMapping 是为了从 ASCII 里面的顺序映射到 alphabet 里面的顺序. 这也是我只采用 ASCII 字符集 (仅 256 字符) 的原因.
- huffmanCodes 将个字符映射为一个字符串, 其实应该是二进制串. 我这里不是想偷懒么.
- nodes 要先把所有的节点存储在一个数组里面, 然后再链接它们. 这是常用招数.
- 构造方法仅初始化了 charMapping, 读入了文件.
- readText 采用了最简单粗暴的方式. 还可以有其它的逐行读入的方式.
- 要自己弄个文本文件, 里面存放一个字符串 abcdedgsgs 之类, 或者几行英文文本.
package ;
import ;
import ;
import ;
import ;
import ;
/**
* Huffman tree, encoding, and decoding. For simplicity, only ASCII characters
* are supported.
*
* @author Fan Min minfanphd@.
*/
public class Huffman {
/**
* An inner class for Huffman nodes.
*/
class HuffmanNode {
/**
* The char. Only valid for leaf nodes.
*/
char character;
/**
* Weight. It can also be double.
*/
int weight;
/**
* The left child.
*/
HuffmanNode leftChild;
/**
* The right child.
*/
HuffmanNode rightChild;
/**
* The parent. It helps constructing the Huffman code of each character.
*/
HuffmanNode parent;
/**
*******************
* The first constructor
*******************
*/
public HuffmanNode(char paraCharacter, int paraWeight, HuffmanNode paraLeftChild,
HuffmanNode paraRightChild, HuffmanNode paraParent) {
character = paraCharacter;
weight = paraWeight;
leftChild = paraLeftChild;
rightChild = paraRightChild;
parent = paraParent;
}// Of HuffmanNode
/**
*******************
* To string.
*******************
*/
public String toString() {
String resultString = "(" + character + ", " + weight + ")";
return resultString;
}// Of toString
}// Of class HuffmanNode
/**
* The number of characters. 256 for ASCII.
*/
public static final int NUM_CHARS = 256;
/**
* The input text. It is stored in a string for simplicity.
*/
String inputText;
/**
* The length of the alphabet, also the number of leaves.
*/
int alphabetLength;
/**
* The alphabet.
*/
char[] alphabet;
/**
* The count of chars. The length is 2 * alphabetLength - 1 to include
* non-leaf nodes.
*/
int[] charCounts;
/**
* The mapping of chars to the indices in the alphabet.
*/
int[] charMapping;
/**
* Codes for each char in the alphabet. It should have the same length as
* alphabet.
*/
String[] huffmanCodes;
/**
* All nodes. The last node is the root.
*/
HuffmanNode[] nodes;
/**
*********************
* The first constructor.
*
* @param paraFilename
* The text filename.
*********************
*/
public Huffman(String paraFilename) {
charMapping = new int[NUM_CHARS];
readText(paraFilename);
}// Of the first constructor
/**
*********************
* Read text.
*
* @param paraFilename
* The text filename.
*********************
*/
public void readText(String paraFilename) {
try {
inputText = ((paraFilename), StandardCharsets.UTF_8)
.lines().collect(("\n"));
} catch (Exception ee) {
(ee);
(0);
} // Of try
("The text is:\r\n" + inputText);
}// Of readText
/**
*********************
* Construct the alphabet. The results are stored in the member variables
* charMapping and alphabet.
*********************
*/
public void constructAlphabet() {
// Initialize.
(charMapping, -1);
// The count for each char. At most NUM_CHARS chars.
int[] tempCharCounts = new int[NUM_CHARS];
// The index of the char in the ASCII charset.
int tempCharIndex;
// Step 1. Scan the string to obtain the counts.
char tempChar;
for (int i = 0; i < (); i++) {
tempChar = (i);
tempCharIndex = (int) tempChar;
("" + tempCharIndex + " ");
tempCharCounts[tempCharIndex]++;
} // Of for i
// Step 2. Scan to determine the size of the alphabet.
alphabetLength = 0;
for (int i = 0; i < 255; i++) {
if (tempCharCounts[i] > 0) {
alphabetLength++;
} // Of if
} // Of for i
// Step 3. Compress to the alphabet
alphabet = new char[alphabetLength];
charCounts = new int[2 * alphabetLength - 1];
int tempCounter = 0;
for (int i = 0; i < NUM_CHARS; i++) {
if (tempCharCounts[i] > 0) {
alphabet[tempCounter] = (char) i;
charCounts[tempCounter] = tempCharCounts[i];
charMapping[i] = tempCounter;
tempCounter++;
} // Of if
} // Of for i
("The alphabet is: " + (alphabet));
("Their counts are: " + (charCounts));
("The char mappings are: " + (charMapping));
}// Of constructAlphabet
/**
*********************
* Construct the tree.
*********************
*/
public void constructTree() {
// Step 1. Allocate space.
nodes = new HuffmanNode[alphabetLength * 2 - 1];
boolean[] tempProcessed = new boolean[alphabetLength * 2 - 1];
// Step 2. Initialize leaves.
for (int i = 0; i < alphabetLength; i++) {
nodes[i] = new HuffmanNode(alphabet[i], charCounts[i], null, null, null);
} // Of for i
// Step 3. Construct the tree.
int tempLeft, tempRight, tempMinimal;
for (int i = alphabetLength; i < 2 * alphabetLength - 1; i++) {
// Step 3.1 Select the first minimal as the left child.
tempLeft = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempLeft = j;
} // Of if
} // Of for j
tempProcessed[tempLeft] = true;
// Step 3.2 Select the second minimal as the right child.
tempRight = -1;
tempMinimal = Integer.MAX_VALUE;
for (int j = 0; j < i; j++) {
if (tempProcessed[j]) {
continue;
} // Of if
if (tempMinimal > charCounts[j]) {
tempMinimal = charCounts[j];
tempRight = j;
} // Of if
} // Of for j
tempProcessed[tempRight] = true;
("Selecting " + tempLeft + " and " + tempRight);
// Step 3.3 Construct the new node.
charCounts[i] = charCounts[tempLeft] + charCounts[tempRight];
nodes[i] = new HuffmanNode('*', charCounts[i], nodes[tempLeft], nodes[tempRight], null);
// Step 3.4 Link with children.
nodes[tempLeft].parent = nodes[i];
nodes[tempRight].parent = nodes[i];
("The children of " + i + " are " + tempLeft + " and " + tempRight);
} // Of for i
}// Of constructTree
/**
*********************
* Get the root of the binary tree.
*
* @return The root.
*********************
*/
public HuffmanNode getRoot() {
return nodes[ - 1];
}// Of getRoot
/**
*********************
* Pre-order visit.
*********************
*/
public void preOrderVisit(HuffmanNode paraNode) {
("(" + + ", " + + ") ");
if ( != null) {
preOrderVisit();
} // Of if
if ( != null) {
preOrderVisit();
} // Of if
}// Of preOrderVisit
/**
*********************
* Generate codes for each character in the alphabet.
*********************
*/
public void generateCodes() {
huffmanCodes = new String[alphabetLength];
HuffmanNode tempNode;
for (int i = 0; i < alphabetLength; i++) {
tempNode = nodes[i];
// Use tempCharCode instead of tempCode such that it is unlike
// tempNode.
// This is an advantage of long names.
String tempCharCode = "";
while ( != null) {
if (tempNode == ) {
tempCharCode = "0" + tempCharCode;
} else {
tempCharCode = "1" + tempCharCode;
} // Of if
tempNode = ;
} // Of while
huffmanCodes[i] = tempCharCode;
("The code of " + alphabet[i] + " is " + tempCharCode);
} // Of for i
}// Of generateCodes
/**
*********************
* Encode the given string.
*
* @param paraString
* The given string.
*********************
*/
public String coding(String paraString) {
String resultCodeString = "";
int tempIndex;
for (int i = 0; i < (); i++) {
// From the original char to the location in the alphabet.
tempIndex = charMapping[(int) (i)];
// From the location in the alphabet to the code.
resultCodeString += huffmanCodes[tempIndex];
} // Of for i
return resultCodeString;
}// Of coding
/**
*********************
* Decode the given string.
*
* @param paraString
* The given string.
*********************
*/
public String decoding(String paraString) {
String resultCodeString = "";
HuffmanNode tempNode = getRoot();
for (int i = 0; i < (); i++) {
if ((i) == '0') {
tempNode = ;
(tempNode);
} else {
tempNode = ;
(tempNode);
} // Of if
if ( == null) {
("Decode one:" + tempNode);
// Decode one char.
resultCodeString += ;
// Return to the root.
tempNode = getRoot();
} // Of if
} // Of for i
return resultCodeString;
}// Of decoding
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
Huffman tempHuffman = new Huffman("D:/minfan/temp/");
();
();
HuffmanNode tempRoot = ();
("The root is: " + tempRoot);
("Preorder visit:");
(());
();
String tempCoded = ("abcdb");
("Coded: " + tempCoded);
String tempDecoded = (tempCoded);
("Decoded: " + tempDecoded);
}// Of main
}// Of class Huffman
第 29 天: Huffman 编码 (建树)
今天写到 274 行.
- (charMapping, -1); 这种初始化工作非常重要. 搞不好就会调拭很久才找到 bug.
- 171 行将 char 强制转换为 int, 即其在 ASCII 字符集中的位置. 这是底层代码的特权. 学渣没资格用哈哈.
- 变量多的时候你才能体会到用 temp, para 这些前缀来区分不同作用域变量的重要性. 没有特殊前缀的就是成员变量.
- 建树就是一个自底向上, 贪心选择的过程. 确定子节点、父节点的代码是核心.
- 最后生成的节点就是根节点.
- 手绘相应的 Huffman 树对照, 才能真正理解.
第 30 天: Huffman 编码 (编码与解码)
- 前序遍历代码的作用仅仅是调拭.
- 双重循环有一点点难度. 好像也并没有比第 26 天的难.
- 306 每次重新初始化很重要.
- 336 行使用了 charMapping 和强制类型转换.
- 编码是从叶节点到根节点, 解码就是反过来.
- 解码获得原先的字符串, 就验证正确性了.
附录
根据陈樊同学的建议进行了代码修改.
- 22 天 new Integer(int) 被弃用, 新的方法是使用 (), 已改代码.
- 23 天的强制类型转换不安全, 所以我重新学习了泛型, 并使用这个来替换 CircleObjectQueue. 由于泛型数组不被支持, 在新类的内部会涉及强制类型转换, 但在外部使用它时不再需要.
package ;
/**
* Circle queue using general type.
*
* @author Fan Min minfanphd@.
*/
public class CircleGenericQueue<T>{
/**
* The total space. One space can never be used.
*/
public static final int TOTAL_SPACE = 10;
/**
* The data. Generic arrays are unsupported.
* We still need an object array.
*/
Object[] data;
/**
* The index of the head.
*/
int head;
/**
* The index of the tail.
*/
int tail;
/**
*******************
* The constructor
*******************
*/
public CircleGenericQueue() {
data = new Object[TOTAL_SPACE];
head = 0;
tail = 0;
}// Of the first constructor
/**
*********************
* Enqueue.
*
* @param paraValue
* The value of the new node.
*********************
*/
public void enqueue(T paraValue) {
if ((tail + 1) % TOTAL_SPACE == head) {
("Queue full.");
return;
} // Of if
data[tail % TOTAL_SPACE] = paraValue;
tail++;
}// Of enqueue
/**
*********************
* Dequeue.
*
* @return The value at the head.
*********************
*/
public T dequeue() {
if (head == tail) {
//("No element in the queue");
return null;
} // Of if
T resultValue = (T)data[head];
head++;
return resultValue;
}// Of dequeue
/**
*********************
* Overrides the method claimed in Object, the superclass of any class.
*********************
*/
public String toString() {
String resultString = "";
if (head == tail) {
return "empty";
} // Of if
for (int i = head; i < tail; i++) {
resultString += data[i % TOTAL_SPACE] + ", ";
} // Of for i
return resultString;
}// Of toString
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
CircleGenericQueue<String> tempQueue = new CircleGenericQueue();
}// Of main
}// Of CircleQueue
第 23 天的代码也作相应修改. 为了完整性, 就全部拷贝过来了.
package ;
import ;
import ;
import ;
import ;
import ;
/**
* Binary tree with char type elements.
*
* @author Fan Min minfanphd@.
*/
public class BinaryCharTree {
/**
* The value in char.
*/
char value;
/**
* The left child.
*/
BinaryCharTree leftChild;
/**
* The right child.
*/
BinaryCharTree rightChild;
/**
*********************
* The first constructor.
*
* @param paraName
* The value.
*********************
*/
public BinaryCharTree(char paraName) {
value = paraName;
leftChild = null;
rightChild = null;
}// Of the constructor
/**
*********************
* The first constructor.
*
* @param paraName
* The value.
*********************
*/
public static BinaryCharTree manualConstructTree() {
// Step 1. Construct a tree with only one node.
BinaryCharTree resultTree = new BinaryCharTree('a');
// Step 2. Construct all nodes. The first node is the root.
// BinaryCharTreeNode tempTreeA = ;
BinaryCharTree tempTreeB = new BinaryCharTree('b');
BinaryCharTree tempTreeC = new BinaryCharTree('c');
BinaryCharTree tempTreeD = new BinaryCharTree('d');
BinaryCharTree tempTreeE = new BinaryCharTree('e');
BinaryCharTree tempTreeF = new BinaryCharTree('f');
BinaryCharTree tempTreeG = new BinaryCharTree('g');
// Step 3. Link all nodes.
= tempTreeB;
= tempTreeC;
= tempTreeD;
= tempTreeE;
= tempTreeF;
= tempTreeG;
return resultTree;
}// Of manualConstructTree
/**
*********************
* Pre-order visit.
*********************
*/
public void preOrderVisit() {
("" + value + " ");
if (leftChild != null) {
();
} // Of if
if (rightChild != null) {
();
} // Of if
}// Of preOrderVisit
/**
*********************
* In-order visit.
*********************
*/
public void inOrderVisit() {
if (leftChild != null) {
();
} // Of if
("" + value + " ");
if (rightChild != null) {
();
} // Of if
}// Of inOrderVisit
/**
*********************
* Post-order visit.
*********************
*/
public void postOrderVisit() {
if (leftChild != null) {
();
} // Of if
if (rightChild != null) {
();
} // Of if
("" + value + " ");
}// Of postOrderVisit
/**
*********************
* Get the depth of the binary tree.
*
* @return The depth. It is 1 if there is only one node, ., the root.
*********************
*/
public int getDepth() {
// It is a leaf.
if ((leftChild == null) && (rightChild == null)) {
return 1;
} // Of if
// The depth of the left child.
int tempLeftDepth = 0;
if (leftChild != null) {
tempLeftDepth = ();
} // Of if
// The depth of the right child.
int tempRightDepth = 0;
if (rightChild != null) {
tempRightDepth = ();
} // Of if
// The depth should increment by 1.
if (tempLeftDepth >= tempRightDepth) {
return tempLeftDepth + 1;
} else {
return tempRightDepth + 1;
} // Of if
}// Of getDepth
/**
*********************
* Get the number of nodes.
*
* @return The number of nodes.
*********************
*/
public int getNumNodes() {
// It is a leaf.
if ((leftChild == null) && (rightChild == null)) {
return 1;
} // Of if
// The number of nodes of the left child.
int tempLeftNodes = 0;
if (leftChild != null) {
tempLeftNodes = ();
} // Of if
// The number of nodes of the right child.
int tempRightNodes = 0;
if (rightChild != null) {
tempRightNodes = ();
} // Of if
// The total number of nodes.
return tempLeftNodes + tempRightNodes + 1;
}// Of getNumNodes
/**
* The values of nodes according to breadth first traversal.
*/
char[] valuesArray;
/**
* The indices in the complete binary tree.
*/
int[] indicesArray;
/**
********************
* Convert the tree to data arrays, including a char array and an int array.
* The results are stored in two member variables.
*
* @see #valuesArray
* @see #indicesArray
*********************
*/
public void toDataArrays() {
// Initialize arrays.
int tempLength = getNumNodes();
valuesArray = new char[tempLength];
indicesArray = new int[tempLength];
int i = 0;
// Traverse and convert at the same time.
CircleObjectQueue tempQueue = new CircleObjectQueue();
(this);
CircleIntQueue tempIntQueue = new CircleIntQueue();
(0);
BinaryCharTree tempTree = (BinaryCharTree) ();
int tempIndex = ();
while (tempTree != null) {
valuesArray[i] = ;
indicesArray[i] = tempIndex;
i++;
if ( != null) {
();
(tempIndex * 2 + 1);
} // Of if
if ( != null) {
();
(tempIndex * 2 + 2);
} // Of if
tempTree = (BinaryCharTree) ();
tempIndex = ();
} // Of while
}// Of toDataArrays
/**
********************
* Convert the tree to data arrays, including a char array and an int array.
* The results are stored in two member variables.
*
* @see #valuesArray
* @see #indicesArray
*********************
*/
public void toDataArraysObjectQueue() {
// Initialize arrays.
int tempLength = getNumNodes();
valuesArray = new char[tempLength];
indicesArray = new int[tempLength];
int i = 0;
// Traverse and convert at the same time.
CircleObjectQueue tempQueue = new CircleObjectQueue();
(this);
CircleObjectQueue tempIntQueue = new CircleObjectQueue();
Integer tempIndexInteger = (0);
(tempIndexInteger);
BinaryCharTree tempTree = (BinaryCharTree) ();
int tempIndex = ((Integer) ()).intValue();
("tempIndex = " + tempIndex);
while (tempTree != null) {
valuesArray[i] = ;
indicesArray[i] = tempIndex;
i++;
if ( != null) {
();
((tempIndex * 2 + 1));
} // Of if
if ( != null) {
();
((tempIndex * 2 + 2));
} // Of if
tempTree = (BinaryCharTree) ();
if (tempTree == null) {
break;
} // Of if
tempIndex = ((Integer) ()).intValue();
} // Of while
}// Of toDataArraysObjectQueue
/**
********************
* Convert the tree to data arrays, including a char array and an int array.
* The results are stored in two member variables.
*
* @see #valuesArray
* @see #indicesArray
*********************
*/
public void toDataArraysGenericQueue() {
// Initialize arrays.
int tempLength = getNumNodes();
valuesArray = new char[tempLength];
indicesArray = new int[tempLength];
int i = 0;
// Traverse and convert at the same time.
CircleGenericQueue<BinaryCharTree> tempQueue = new CircleGenericQueue<BinaryCharTree>();
(this);
CircleGenericQueue<Integer> tempIntQueue = new CircleGenericQueue<Integer>();
Integer tempIndexInteger = (0);
(tempIndexInteger);
BinaryCharTree tempTree = ();
int tempIndex = (()).intValue();
("tempIndex = " + tempIndex);
while (tempTree != null) {
valuesArray[i] = ;
indicesArray[i] = tempIndex;
i++;
if ( != null) {
();
((tempIndex * 2 + 1));
} // Of if
if ( != null) {
();
((tempIndex * 2 + 2));
} // Of if
tempTree = ();
if (tempTree == null) {
break;
} // Of if
tempIndex = (()).intValue();
} // Of while
}// Of toDataArraysGenericQueue
/**
*********************
* The second constructor. The parameters must be correct since no validity
* check is undertaken.
*
* @param paraDataArray
* The array for data.
* @param paraIndicesArray
* The array for indices.
*********************
*/
public BinaryCharTree(char[] paraDataArray, int[] paraIndicesArray) {
// Step 1. Use a sequential list to store all nodes.
int tempNumNodes = ;
BinaryCharTree[] tempAllNodes = new BinaryCharTree[tempNumNodes];
for (int i = 0; i < tempNumNodes; i++) {
tempAllNodes[i] = new BinaryCharTree(paraDataArray[i]);
} // Of for i
// Step 2. Link these nodes.
for (int i = 1; i < tempNumNodes; i++) {
for (int j = 0; j < i; j++) {
.println("indices " + paraIndicesArray[j] + " vs. " + paraIndicesArray[i]);
if (paraIndicesArray[i] == paraIndicesArray[j] * 2 + 1) {
tempAllNodes[j].leftChild = tempAllNodes[i];
("Linking " + j + " with " + i);
break;
} else if (paraIndicesArray[i] == paraIndicesArray[j] * 2 + 2) {
tempAllNodes[j].rightChild = tempAllNodes[i];
("Linking " + j + " with " + i);
break;
} // Of if
} // Of for j
} // Of for i
// Step 3. The root is the first node.
value = tempAllNodes[0].value;
leftChild = tempAllNodes[0].leftChild;
rightChild = tempAllNodes[0].rightChild;
}// Of the the second constructor
/**
*********************
* In-order visit with stack.
*********************
*/
public void inOrderVisitWithStack() {
ObjectStack tempStack = new ObjectStack();
BinaryCharTree tempNode = this;
while (!() || tempNode != null) {
if (tempNode != null) {
(tempNode);
tempNode = ;
} else {
tempNode = (BinaryCharTree) ();
("" + + " ");
tempNode = ;
} // Of if
} // Of while
}// Of inOrderVisitWithStack
/**
*********************
* Pre-order visit with stack.
*********************
*/
public void preOrderVisitWithStack() {
ObjectStack tempStack = new ObjectStack();
BinaryCharTree tempNode = this;
while (!() || tempNode != null) {
if (tempNode != null) {
("" + + " ");
(tempNode);
tempNode = ;
} else {
tempNode = (BinaryCharTree) ();
tempNode = ;
} // Of if
} // Of while
}// Of preOrderVisitWithStack
/**
*********************
* Post-order visit with stack.
*********************
*/
public void postOrderVisitWithStack() {
ObjectStack tempStack = new ObjectStack();
BinaryCharTree tempNode = this;
ObjectStack tempOutputStack = new ObjectStack();
while (!() || tempNode != null) {
if (tempNode != null) {
//Store for output.
(new Character());
(tempNode);
tempNode = ;
} else {
tempNode = (BinaryCharTree) ();
tempNode = ;
} // Of if
} // Of while
//Now reverse output.
while (!()) {
("" + () + " ");
}//Of while
}// Of postOrderVisitWithStack
/**
*********************
* The entrance of the program.
*
* @param args
* Not used now.
*********************
*/
public static void main(String args[]) {
BinaryCharTree tempTree = manualConstructTree();
("\r\nPreorder visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\n\r\nThe depth is: " + ());
("The number of nodes is: " + ());
();
("The values are: " + ());
("The indices are: " + ());
//();
();
("Only object queue.");
("The values are: " + ());
("The indices are: " + ());
char[] tempCharArray = { 'A', 'B', 'C', 'D', 'E', 'F' };
int[] tempIndicesArray = { 0, 1, 2, 4, 5, 12 };
BinaryCharTree tempTree2 = new BinaryCharTree(tempCharArray, tempIndicesArray);
("\r\nPre-order visit:");
();
("\r\nIn-order visit:");
();
("\r\nPost-order visit:");
();
("\r\nIn-order visit with stack:");
();
("\r\nPre-order visit with stack:");
();
("\r\nPost-order visit with stack:");
();
}// Of main
}// Of BinaryCharTree
欢迎拍砖!