常见编码对中英文字符的支持及占用字节数
使用 Java 验证
import java.io.UnsupportedEncodingException;
import org.junit.Test;
public class EncodeTest {
@Test
public void test() throws UnsupportedEncodingException {
String characterEn = "A";
String characterZhSimple = "简";
String characterZhComplex = "簡";
System.out.println("ASCII编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("ASCII").length);
System.out.println();
System.out.println("Unicode编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("Unicode").length);
System.out.println("中文简体:" + characterZhSimple.getBytes("Unicode").length);
System.out.println("中文繁体:" + characterZhComplex.getBytes("Unicode").length);
System.out.println();
System.out.println("UTF-8编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("UTF-8").length);
System.out.println("中文简体:" + characterZhSimple.getBytes("UTF-8").length);
System.out.println("中文繁体:" + characterZhComplex.getBytes("UTF-8").length);
System.out.println();
System.out.println("UTF-16编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("UTF-16").length);
System.out.println("中文简体:" + characterZhSimple.getBytes("UTF-16").length);
System.out.println("中文繁体:" + characterZhComplex.getBytes("UTF-16").length);
System.out.println();
System.out.println("GB2312编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("GB2312").length);
System.out.println("中文简体:" + characterZhSimple.getBytes("GB2312").length);
System.out.println();
System.out.println("BIG5编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("BIG5").length);
System.out.println("中文繁体:" + characterZhComplex.getBytes("BIG5").length);
System.out.println();
System.out.println("GBK编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("GBK").length);
System.out.println("中文简体:" + characterZhSimple.getBytes("GBK").length);
System.out.println("中文繁体:" + characterZhComplex.getBytes("GBK").length);
System.out.println();
System.out.println("GB18030编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("GB18030").length);
System.out.println("中文简体:" + characterZhSimple.getBytes("GB18030").length);
System.out.println("中文繁体:" + characterZhComplex.getBytes("GB18030").length);
System.out.println();
System.out.println("ISO-8859-1编码字节数:");
System.out.println("英文字符:" + characterEn.getBytes("ISO-8859-1").length);
System.out.println();
}
}
验证结果:
ASCII编码字节数:
英文字符:1
Unicode编码字节数:
英文字符:4
中文简体:4
中文繁体:4
UTF-8编码字节数:
英文字符:1
中文简体:3
中文繁体:3
UTF-16编码字节数:
英文字符:4
中文简体:4
中文繁体:4
GB2312编码字节数:
英文字符:1
中文简体:2
BIG5编码字节数:
英文字符:1
中文繁体:2
GBK编码字节数:
英文字符:1
中文简体:2
中文繁体:2
GB18030编码字节数:
英文字符:1
中文简体:2
中文繁体:2
ISO-8859-1编码字节数:
英文字符:1