一个汉字在UTF-8编码中占用几个字节?
占用3个字节的范围
合计: 52156 个
占用4个字节的范围
合计: 64029 个
所以,我认为答案应该是:3个或4个字节。
//字符编码数值对应的存储长度:
//UCS-2编码(16进制) UTF-8 字节流(二进制)
//0000 - 007F 0xxxxxxx (1字节)
//0080 - 07FF 110xxxxx 10xxxxxx (2字节)
//0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx (3字节)
String.prototype.getBytesLength = function() {
var totalLength = 0;
var charCode;
for (var i = 0; i < this.length; i++) {
charCode = this.charCodeAt(i);
if (charCode < 0x007f) {
totalLength++;
} else if ((0x0080 <= charCode) && (charCode <= 0x07ff)) {
totalLength += 2;
} else if ((0x0800 <= charCode) && (charCode <= 0xffff)) {
totalLength += 3;
} else{
totalLength += 4;
}
}
return totalLength;
}
var str=",";
alert("字符数"+str.length+" ,字节数"+str.getBytesLength());
//Java中的字节数==="字符串".getBytes("UTF-8").length
//-->