压缩算法实现之LZ78

时间:2023-03-08 19:51:59
压缩算法实现之LZ78

LZ78编码

LZ78算法,建立词典的算法。

LZ78的编码思想:

不断地从字符流中提取新的缀-符串(String),通俗地理解为新"词条",然后用"代号"也就是码字(Code word)表示这个"词条"。

对字符流的编码就变成了用码字(Code word)去替换字符流(Charstream),生成码字流(Codestream),从而达到压缩数据的目的。

几个约定:

  1. 字符流(Charstream):要被编码的数据序列。
  2. 字符(Character):字符流中的基本数据单元。
  3. 前缀(Prefix): 在一个字符之前的字符序列。
  4. 缀-符串(String):前缀+字符。
  5. 码字(Code word):编码以后在码字流中的基本数据单元,代表词典中的一串字符
  6. 码字流(Codestream): 码字和字符组成的序列,是编码器的输出
  7. 词典(Dictionary): 缀-符串表。按照词典中的索引号对每条缀-符串(String)指定一个码字(Code word)
  8. 当前前缀(Current prefix):在编码算法中使用,指当前正在处理的前缀,用符号P表示
  9. 当前字符(Current character):在编码算法中使用,指当前前缀之后的字符,用符号Char表示。
  10. 当前码字(Current code word): 在译码算法中使用,指当前处理的码字,用W表示当前码字,String.W表示当前码字的缀-符串。

编码算法步骤:

步骤1: 在开始时,词典和当前前缀P 都是空的。

步骤2: 当前字符Char :=字符流中的下一个字符。

步骤3: 判断P+Char是否在词典中:

(1) 如果"是":用Char扩展P,让P := P+Char ;

(2) 如果"否":① 输出与当前前缀P相对应的码字和当前字符Char;

② 把字符串P+Char 添加到词典中。③ 令P :=空值。

(3) 判断字符流中是否还有字符需要编码

① 如果"是":返回到步骤2。

② 如果"否":若当前前缀P不空,输出相应于当前前缀P的码字,结束编码。

解码算法步骤:

步骤1:在开始时词典为空;

步骤2:当前码字W:= 码字流中的下一个码字

步骤3:当前字符Char:=紧随码字之后的字符

步骤4:把当前码字的缀-符串(string.W)输出到字符流,然后输出字符Char

步骤5:把string.W + Char添加到词典中

步骤6:判断码字流中是否还有码字要译码,

(1)如果有,返回步骤2 (2)如果没有,则结束

代码实现(C#):

  1. /// <summary>
  2.     /// LZ78编码所需词典
  3.     /// </summary>
  4.     public struct Dictionary
  5.     {
  6.         public int id;
  7.         public string context;
  8.         public Dictionary(int id, string str)
  9.         {
  10.             this.id = id;
  11.             this.context = str;
  12.         }
  13.     }
  1. /// <summary>
  2.     /// 编码器类
  3.     /// </summary>
  4.     public static class Encoder
  5.     {
  6.         /// <summary>
  7.         /// 词典
  8.         /// </summary>
  9.         static List<Dictionary> D = new List<Dictionary>();
  10.         /// <summary>
  11.         /// 在词典中查找相应串
  12.         /// </summary>
  13.         /// <param name="item"></param>
  14.         /// <param name="D"></param>
  15.         /// <returns></returns>
  16.         static bool Find(string item, List<Dictionary> D)
  17.         {
  18.             foreach (Dictionary d in D)
  19.                 if (d.context == item)
  20.                     return true;
  21.             return false;
  22.         }
  23.         /// <summary>
  24.         /// 根据词典条目内容查找相应编号
  25.         /// </summary>
  26.         /// <param name="item"></param>
  27.         /// <param name="D"></param>
  28.         /// <returns></returns>
  29.         static int GetDicID(string item, List<Dictionary> D)
  30.         {
  31.             foreach (Dictionary d in D)
  32.                 if (d.context == item)
  33.                     return d.id;
  34.             return 0;
  35.         }
  36.         /// <summary>
  37.         /// 将一个条目加入词典
  38.         /// </summary>
  39.         /// <param name="item"></param>
  40.         /// <param name="D"></param>
  41.         static void AddToDic(string item, List<Dictionary> D)
  42.         {
  43.             int maxID;
  44.             if (D.Count == 0)
  45.                 maxID = 0;
  46.             else
  47.                 maxID = D.Last().id;
  48.             D.Add(new Dictionary(maxID + 1, item));
  49.         }
  50.         /// <summary>
  51.         /// 显示词典
  52.         /// </summary>
  53.         public static void ShowDictionary()
  54.         {
  55.             Console.WriteLine("Dictionary:");
  56.             foreach (Dictionary d in D)
  57.             {
  58.                 Console.WriteLine("<{0},{1}>", d.id, d.context);
  59.             }
  60.         }
  61.         /// <summary>
  62.         /// 执行LZ78编码算法
  63.         /// </summary>
  64.         /// <param name="str"></param>
  65.         public static void Execute(string str)
  66.         {
  67.             StringBuilder P = new StringBuilder();
  68.             char CHAR;
  69.             P.Clear();
  70.             foreach (char c in str)
  71.             {
  72.                 CHAR = c;
  73.                 if (Find((P.ToString() + CHAR.ToString()), D))
  74.                     P.Append(CHAR);
  75.                 else
  76.                 {
  77.                     Console.Write("({0},{1})", GetDicID(P.ToString(), D), c);
  78.                     AddToDic(P.ToString() + c.ToString(), D);
  79.                     P.Clear();
  80.                 }
  81.             }
  82.             if (P.ToString() != "")
  83.                 Console.Write("({0},{1})", GetDicID(P.ToString(), D), "/");
  84.             Console.WriteLine();
  85.         }
  86.     }
  1. /// <summary>
  2.     /// 解码器类
  3.     /// </summary>
  4.     public static class Decoder
  5.     {
  6.         /// <summary>
  7.         /// 内部类:将码字字符串转换为编码数组
  8.         /// </summary>
  9.         struct Codes
  10.         {
  11.             public int id;
  12.             public char code;
  13.             public Codes(int id, char code)
  14.             {
  15.                 this.id = id;
  16.                 this.code = code;
  17.             }
  18.         }
  19.         /// <summary>
  20.         /// 词典
  21.         /// </summary>
  22.         static List<Dictionary> D = new List<Dictionary>();
  23.         /// <summary>
  24.         /// 码字流,从字符串中获取
  25.         /// </summary>
  26.         static List<Codes> CodeStream = new List<Codes>();
  27.         /// <summary>
  28.         /// 将码字串变为码字流
  29.         /// </summary>
  30.         /// <param name="str"></param>
  31.         static void BuildCodes(string str)
  32.         {
  33.             /******************
  34.              * stauts 定义:
  35.              * 0: 开始/结束状态
  36.              * 1: 逗号之前
  37.              * 2: 逗号之后
  38.              ******************/
  39.             int status = 0;
  40.             int id = 0;
  41.             char code = (char)0;
  42.             string number = "";
  43.             foreach (char c in str)
  44.             {
  45.                 if (c == '(')
  46.                     status = 1;
  47.                 else if (status == 1 && c != ',')
  48.                     number += c;
  49.                 else if (c == ',')
  50.                 {
  51.                     status = 2;
  52.                     id = Convert.ToInt32(number);
  53.                     number = "";
  54.                 }
  55.                 else if (status == 2)
  56.                 {
  57.                     code = c;
  58.                     status = 0;
  59.                 }
  60.                 else if (c == ')')
  61.                     CodeStream.Add(new Codes(id, code));
  62.             }
  63.         }
  64.         /// <summary>
  65.         /// 将一个条目加入词典
  66.         /// </summary>
  67.         /// <param name="item"></param>
  68.         /// <param name="D"></param>
  69.         static void AddToDic(string item, List<Dictionary> D)
  70.         {
  71.             int maxID;
  72.             if (D.Count == 0)
  73.                 maxID = 0;
  74.             else
  75.                 maxID = D.Last().id;
  76.             D.Add(new Dictionary(maxID + 1, item));
  77.         }
  78.         /// <summary>
  79.         /// 根据词典序号找出词典内容
  80.         /// </summary>
  81.         /// <param name="id"></param>
  82.         /// <param name="D"></param>
  83.         /// <returns></returns>
  84.         static string GetContext(int id, List<Dictionary> D)
  85.         {
  86.             foreach (Dictionary d in D)
  87.             {
  88.                 if (d.id == id)
  89.                     return d.context;
  90.             }
  91.             return string.Empty;
  92.         }
  93.         /// <summary>
  94.         /// 执行LZ78译码算法
  95.         /// </summary>
  96.         /// <param name="str"></param>
  97.         public static void Execute(string str)
  98.         {
  99.             int W;
  100.             char CHAR;
  101.             string original;
  102.             BuildCodes(str);
  103.             foreach (Codes c in CodeStream)
  104.             {
  105.                 W = c.id;
  106.                 if (c.code != '/')
  107.                     CHAR = c.code;
  108.                 else CHAR = (char)0;
  109.                 if (W == 0)
  110.                 {
  111.                     Console.Write(CHAR);
  112.                     AddToDic(CHAR.ToString(), D);
  113.                 }
  114.                 else
  115.                 {
  116.                     original = GetContext(W, D);
  117.                     Console.Write(original + CHAR.ToString());
  118.                     AddToDic(original + CHAR.ToString(), D);
  119.                 }
  120.             }
  121.             Console.WriteLine();
  122.         }
  123.     }

执行效果(主界面程序代码省略):

压缩算法实现之LZ78

可见算法执行的结果是完全正确的。

源码下载:http://files.cnblogs.com/ryuasuka/LZ78.rar