javascript脚本压缩工具JSEncoder实现
1、算法原理:
从javascript脚本文件中提取单词,存入字典表中,这里使用|分割的字符串,然后将单词对应的序号(仿base64编码值)写入原来代码的地方,形成压缩后的js代码
2、压缩效果:
jquery-1.2.3.js原始文件大小95kb->[其他工具处理,去掉回车、注释等]jquery-1.2.3.min.js,大小53kb
=>本文工具压缩后:32kb
3、可选其他工具:
JSA 2.0 pre-alpha:http://sourceforge.net/project/showfiles.php?group_id=175776
packer:http://dean.edwards.name/packer/
4、下载(包含源代码在jar文件中)
2008.4 Ver:0.5 下载
5、源代码
java代码如下,写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现,不过好像作者没有开源,
本文算作是一种技术上的研究了。
通过对jquery-1.2.3.min.js http://code.google.com/p/jqueryjs/downloads/detail?name=jquery-1.2.3.min.js
进行压缩测试通过,压缩率>40%.
1package com.cngd.jstool;
2
3import java.io.FileReader;
4import java.io.BufferedReader;
5import java.io.FileOutputStream;
6import java.io.IOException;
7import java.util.regex.Matcher;
8import java.util.regex.Pattern;
9import java.util.Vector;
10import java.text.DecimalFormat;
11
12/**
13 * JSEncoder脚本压缩工具
14 * <p/>
15 * 写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现
16 * 针对jquery-1.2.3.min.js这个文件的压缩比率结果比较如下
17 * -------------------------------------------------------------------
18 * 原始大小 | JSEncoder | JSA-20071021(2.0 pre-alpha) | jquery packer算法
19 * -------------------------------------------------------------------
20 * 53kb | 32kb | 29kb | 29kb
21 * -------------------------------------------------------------------
22 * 因为JSA进一步将局部变量进行了压缩,因此相比较更小
23 * <p/>
24 * <p/>
25 * <p/>
26 * User: (在路上 http://www.cnblogs.com/midea0978)
27 * Date: 2008-4-18
28 * Version:0.5
29 */
30public class JSEncoder {
31 public static final String ENCODE_BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$";
32 public boolean isDebug = false;
33
34 /**
35 * @param filename js filename
36 * @param offset offset>=0指定偏移变量,不同的offset可以实现代码表位置的变换,较小的offset可以获得更大的压缩率
37 * @return 压缩后的代码
38 */
39 public String encode(String filename, int offset) throws Exception {
40 String jscript = readFileData(filename);
41 int size = jscript.length();
42 jscript = jscript.replaceAll("\n", " ");
43 //替换\->\\
44 jscript = jscript.replaceAll("\\\\", "\\\\\\\\");
45 //替换单引号\'=>\\'
46 jscript = jscript.replaceAll("\\\'", "\\\\\\\'");
47
48 Pattern p = Pattern.compile("([\\w\\$]+)");
49 Matcher m = p.matcher(jscript);
50 String element;
51 Vector<String> dict = new Vector<String>();
52 int index;
53 StringBuffer encscript = new StringBuffer();
54 StringBuffer dicttab = new StringBuffer();
55
56 debugInfo("=====编码字典对应表=====");
57 while (m.find()) {
58 element = m.group(1).trim();
59 if (!dict.contains(element)) {
60 dict.add(element);
61 index = dict.size() - 1;
62 } else {
63 index = dict.indexOf(element);
64 }
65 debugInfo(index + "==>" + element);
66 m.appendReplacement(encscript, Base64Encode(offset + index + 1));
67 }
68 for (String o : dict) dicttab.append(o + "|");
69 m.appendTail(encscript);
70 debugInfo("===== 编码字典结束 =====");
71 debugInfo("Offset=" + offset + ",字典大小=" + dict.size());
72 debugInfo("压缩后的代码:\n" + encscript.toString());
73 String dictstr = dicttab.substring(0, dicttab.length() - 1).toString();
74 debugInfo("字典字符串:\n" + dictstr);
75 String res = formatCode(encscript.toString(), dictstr, dict.size(), offset);
76 int packsize = res.length();
77 DecimalFormat df = new DecimalFormat("######.0");
78 System.out.println("\n原始文件大小:" + size + "\n压缩后文件大小:" + packsize);
79 System.out.println("=================\n压缩比率:" + df.format((size - packsize) * 100.0 / size) + "%");
80 return res;
81 }
82
83 private String readFileData(String filename) throws IOException {
84 BufferedReader in = new BufferedReader(new FileReader(filename));
85 StringBuffer sb = new StringBuffer();
86 while (in.ready()) {
87 sb.append(in.readLine() + "\n");
88 }
89 in.close();
90 return sb.toString();
91 }
92
93 private void debugInfo(String txt) {
94 if (isDebug) System.out.println(txt);
95 }
96
97 public static void main(String[] args) {
98 System.out.println("JSEncoder 0.5 by midea0978 2008.4");
99 System.out.println("=====================================");
100 System.out.println("http://www.cnblogs.com/midea0978\n");
101 if (args.length < 2) {
102 System.out.println("Usage:java JSEncoder.jar jsfile outputfile [offset].");
103 System.exit(0);
104 }
105 try {
106 System.out.println("输入文件: " + args[0]);
107 System.out.println("输出文件: " + args[1]);
108 JSEncoder util = new JSEncoder();
109 int offset = args.length >= 3 ? Integer.parseInt(args[2]) : 0;
110 String code = util.encode(args[0], offset);
111 FileOutputStream fs = new FileOutputStream(args[1]);
112 fs.write(code.getBytes());
113 fs.close();
114 } catch (Exception e) {
115 e.printStackTrace();
116 }
117 }
118
119 /**
120 * 仿Base64解码
121 *
122 * @param c 待编码的数字
123 * @return 编码值
124 */
125 private String Base64Encode(int c) throws Exception {
126 String res;
127 if (c < 0) throw new Exception("Error:Offset必须>=0.");
128 if (c > 63)
129 res = Base64Encode(c >> 6) + Base64Encode(c & 63);
130 else {
131 //为了配合appendReplacement方法的使用,将$替换为\$
132 res = c == 63 ? "\\$" : String.valueOf(ENCODE_BASE64.charAt(c));
133 }
134 return res;
135 }
136
137 private String formatCode(String enc, String dict, int size, int offset) {
138 StringBuffer str = new StringBuffer();
139 str.append("/* Compressed by JSEncoder */\neval(function(E,I,A,D,J,K,L,H){function C(A){return A<62?String.fromCharCode(A+=A<26?65:A<52?71:-4):A<63?\'_\':A<64?\'$\':C(A>>6)+C(A&63)}while(A>0)K[C(D--)]=I[--A];function N(A){return K[A]==L[A]?A:K[A]}if(\'\'.replace(/^/,String)){var M=E.match(J),B=M[0],F=E.split(J),G=0;if(E.indexOf(F[0]))F=[\'\'].concat(F);do{H[A++]=F[G++];H[A++]=N(B)}while(B=M[G]);H[A++]=F[G]||\'\';return H.join(\'\')}return E.replace(J,N)}(");
140 str.append("\'" + enc + "\',");
141 str.append("\'" + dict + "\'.split(\'|\'),");
142 str.append(size + "," + (size + offset) + ",/[\\w\\$]+/g, {}, {}, []))");
143 return str.toString();
144 }
145
146}
147
2
3import java.io.FileReader;
4import java.io.BufferedReader;
5import java.io.FileOutputStream;
6import java.io.IOException;
7import java.util.regex.Matcher;
8import java.util.regex.Pattern;
9import java.util.Vector;
10import java.text.DecimalFormat;
11
12/**
13 * JSEncoder脚本压缩工具
14 * <p/>
15 * 写完代码之后才发现这是JSA(http://sourceforge.net/project/showfiles.php?group_id=175776)的压缩算法的再实现
16 * 针对jquery-1.2.3.min.js这个文件的压缩比率结果比较如下
17 * -------------------------------------------------------------------
18 * 原始大小 | JSEncoder | JSA-20071021(2.0 pre-alpha) | jquery packer算法
19 * -------------------------------------------------------------------
20 * 53kb | 32kb | 29kb | 29kb
21 * -------------------------------------------------------------------
22 * 因为JSA进一步将局部变量进行了压缩,因此相比较更小
23 * <p/>
24 * <p/>
25 * <p/>
26 * User: (在路上 http://www.cnblogs.com/midea0978)
27 * Date: 2008-4-18
28 * Version:0.5
29 */
30public class JSEncoder {
31 public static final String ENCODE_BASE64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_$";
32 public boolean isDebug = false;
33
34 /**
35 * @param filename js filename
36 * @param offset offset>=0指定偏移变量,不同的offset可以实现代码表位置的变换,较小的offset可以获得更大的压缩率
37 * @return 压缩后的代码
38 */
39 public String encode(String filename, int offset) throws Exception {
40 String jscript = readFileData(filename);
41 int size = jscript.length();
42 jscript = jscript.replaceAll("\n", " ");
43 //替换\->\\
44 jscript = jscript.replaceAll("\\\\", "\\\\\\\\");
45 //替换单引号\'=>\\'
46 jscript = jscript.replaceAll("\\\'", "\\\\\\\'");
47
48 Pattern p = Pattern.compile("([\\w\\$]+)");
49 Matcher m = p.matcher(jscript);
50 String element;
51 Vector<String> dict = new Vector<String>();
52 int index;
53 StringBuffer encscript = new StringBuffer();
54 StringBuffer dicttab = new StringBuffer();
55
56 debugInfo("=====编码字典对应表=====");
57 while (m.find()) {
58 element = m.group(1).trim();
59 if (!dict.contains(element)) {
60 dict.add(element);
61 index = dict.size() - 1;
62 } else {
63 index = dict.indexOf(element);
64 }
65 debugInfo(index + "==>" + element);
66 m.appendReplacement(encscript, Base64Encode(offset + index + 1));
67 }
68 for (String o : dict) dicttab.append(o + "|");
69 m.appendTail(encscript);
70 debugInfo("===== 编码字典结束 =====");
71 debugInfo("Offset=" + offset + ",字典大小=" + dict.size());
72 debugInfo("压缩后的代码:\n" + encscript.toString());
73 String dictstr = dicttab.substring(0, dicttab.length() - 1).toString();
74 debugInfo("字典字符串:\n" + dictstr);
75 String res = formatCode(encscript.toString(), dictstr, dict.size(), offset);
76 int packsize = res.length();
77 DecimalFormat df = new DecimalFormat("######.0");
78 System.out.println("\n原始文件大小:" + size + "\n压缩后文件大小:" + packsize);
79 System.out.println("=================\n压缩比率:" + df.format((size - packsize) * 100.0 / size) + "%");
80 return res;
81 }
82
83 private String readFileData(String filename) throws IOException {
84 BufferedReader in = new BufferedReader(new FileReader(filename));
85 StringBuffer sb = new StringBuffer();
86 while (in.ready()) {
87 sb.append(in.readLine() + "\n");
88 }
89 in.close();
90 return sb.toString();
91 }
92
93 private void debugInfo(String txt) {
94 if (isDebug) System.out.println(txt);
95 }
96
97 public static void main(String[] args) {
98 System.out.println("JSEncoder 0.5 by midea0978 2008.4");
99 System.out.println("=====================================");
100 System.out.println("http://www.cnblogs.com/midea0978\n");
101 if (args.length < 2) {
102 System.out.println("Usage:java JSEncoder.jar jsfile outputfile [offset].");
103 System.exit(0);
104 }
105 try {
106 System.out.println("输入文件: " + args[0]);
107 System.out.println("输出文件: " + args[1]);
108 JSEncoder util = new JSEncoder();
109 int offset = args.length >= 3 ? Integer.parseInt(args[2]) : 0;
110 String code = util.encode(args[0], offset);
111 FileOutputStream fs = new FileOutputStream(args[1]);
112 fs.write(code.getBytes());
113 fs.close();
114 } catch (Exception e) {
115 e.printStackTrace();
116 }
117 }
118
119 /**
120 * 仿Base64解码
121 *
122 * @param c 待编码的数字
123 * @return 编码值
124 */
125 private String Base64Encode(int c) throws Exception {
126 String res;
127 if (c < 0) throw new Exception("Error:Offset必须>=0.");
128 if (c > 63)
129 res = Base64Encode(c >> 6) + Base64Encode(c & 63);
130 else {
131 //为了配合appendReplacement方法的使用,将$替换为\$
132 res = c == 63 ? "\\$" : String.valueOf(ENCODE_BASE64.charAt(c));
133 }
134 return res;
135 }
136
137 private String formatCode(String enc, String dict, int size, int offset) {
138 StringBuffer str = new StringBuffer();
139 str.append("/* Compressed by JSEncoder */\neval(function(E,I,A,D,J,K,L,H){function C(A){return A<62?String.fromCharCode(A+=A<26?65:A<52?71:-4):A<63?\'_\':A<64?\'$\':C(A>>6)+C(A&63)}while(A>0)K[C(D--)]=I[--A];function N(A){return K[A]==L[A]?A:K[A]}if(\'\'.replace(/^/,String)){var M=E.match(J),B=M[0],F=E.split(J),G=0;if(E.indexOf(F[0]))F=[\'\'].concat(F);do{H[A++]=F[G++];H[A++]=N(B)}while(B=M[G]);H[A++]=F[G]||\'\';return H.join(\'\')}return E.replace(J,N)}(");
140 str.append("\'" + enc + "\',");
141 str.append("\'" + dict + "\'.split(\'|\'),");
142 str.append(size + "," + (size + offset) + ",/[\\w\\$]+/g, {}, {}, []))");
143 return str.toString();
144 }
145
146}
147