为了一个gb2312下post中文参数到utf-8页面的程序,随闷的难受,查了一下午资料,大部分是讲Gb2312传到Gb2312页面的UrlEncode,没有提供到Utf-8页面的UrlEncode,后来找到Unicode转Utf-8码的资料,参考之下,终于写出了Utf-8的UrlEncode,这里整理下各种UrlEncode方法,供有需要的朋友参考。
详细Utf-8编码规则请百度一下。
Unicode 与 Utf-8码间的内码规则模板为:
原始码(16进制) UTF-8编码(二进制)
--------------------------------------------
0000 - 007F 0xxxxxxx
0080 - 07FF 110xxxxx 10xxxxxx
0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx (中文字在此区间)
……
--------------------------------------------
例如:
百度中查询“中国人”,会将中文URL参数转为Gb2312码的16进制表示,一个中文字用2个字节
http://www.baidu.com/s?wd=%D6%D0%B9%FA%C8%CB
Google中查询“中国人”,会将中文URL参数转为Utf-8编码的16进制表示,一个中文字用3个字节
http://www.google.cn/search?client=opera&rls=en&q=%E4%B8%AD%E5%9B%BD%E4%BA%BA&sourceid=opera&ie=utf-8&oe=utf-8
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
'
Url编码,Gb2312页面之间传递参数
Function
URLEncode_Gb(ByVal str)
Dim
i,s
Dim
B,bCode,gb,Hight8b,Low8b
s
=
""
For
i
=
1
To
Len
(str)
B
=
Mid
(str,i,
1
)
bCode
=
Abs
(
Asc
(B))
If
(bCode
>=
48
And
bCode
<=
57
)
Or
(bCode
>=
65
And
bCode
<=
90
)
Or
(bCode
>=
97
And
bCode
<=
122
)
Or
bCode
=
42
Or
bCode
=
45
Or
bCode
=
46
Or
bCode
=
64
Or
bCode
=
95
Then
'
48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
'
42代表*;46代表.;64代表@;45代表-;95代表_
s
=
s
&
B
ElseIf
bCode
=
32
Then
'
空格转成+
s
=
s
&
"
+
"
ElseIf
bCode
<
128
Then
'
低于128的Ascii转成1个字节
s
=
s
&
"
%
"
&
Right
(
"
00
"
&
Hex
(bCode),
2
)
Else
gb
=
Asc
(B)
If
gb
<
0
Then
gb
=
gb
+
&
H10000
'
gb编码为负数,要加上65536
End
If
Hight8b
=
(gb
And
&
HFF00)
/
&
H100
'
二进制高8位
Low8b
=
gb
And
&
HFF
'
二进制低8位
s
=
s
&
"
%
"
&
Hex
(Hight8b)
&
"
%
"
&
Hex
(Low8b)
End
If
Next
URLEncode_Gb
=
s
End Function
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
'
Url解码,Gb2312页面之间传递参数
Function
URLDecode_Gb(ByVal str)
Dim
i,s
Dim
B,bCode,gb,Hight8b,Low8b
s
=
""
For
i
=
1
To
Len
(str)
B
=
Mid
(str,i,
1
)
Select
Case
B
Case
"
+
"
s
=
s
&
"
"
Case
"
%
"
gb
=
Mid
(str,i
+
1
,
2
)
bCode
=
CInt
(
"
&H
"
&
gb)
If
bCode
<
128
Then
i
=
i
+
2
Else
bCode
=
CInt
(
"
&H
"
&
gb
&
Mid
(str,i
+
4
,
2
))
i
=
i
+
5
End
If
s
=
s
&
Chr
(bCode)
Case
Else
s
=
s
&
B
End
Select
Next
URLDecode_Gb
=
s
End Function
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
'
URL编码,Gb2312页面提交到Utf-8页面
Function
UrlEncode_GBToUtf8(ByVal str)
Dim
B
'
单个字符
Dim
ub
'
中文字的Unicode码(2字节)
Dim
High8b, Low8b
'
Unicode码的高低位字节
Dim
UtfB1, UtfB2, UtfB3
'
Utf-8码的三个字节
Dim
i, s
For
i
=
1
To
Len
(str)
B
=
Mid
(str, i,
1
)
ub
=
AscW(B)
If
(ub
>=
48
And
ub
<=
57
)
Or
(ub
>=
65
And
ub
<=
90
)
Or
(ub
>=
97
And
ub
<=
122
)
Or
ub
=
42
Or
ub
=
45
Or
ub
=
46
Or
ub
=
64
Or
ub
=
95
Then
'
48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
'
42代表*;46代表.;64代表@;45代表-;95代表_
s
=
s
&
B
ElseIf
ub
=
32
Then
'
空格转成+
s
=
s
&
"
+
"
ElseIf
ub
<
128
Then
'
低于128的Ascii转成1个字节
s
=
s
&
"
%
"
&
Right
(
"
00
"
&
Hex
(ub),
2
)
Else
High8b
=
(ub
And
&
HFF00)
/
&
H100
'
Unicode码高位
Low8b
=
ub
And
&
HFF
'
Unicode码低位
UtfB1
=
(High8b
And
&
HF0)
/
&
H10
Or
&
HE0
'
取Unicode高位字节的二进制的前4位 + 11100000
UtfB2
=
((High8b
And
&
HF)
*
&
H4
+
(Low8b
And
&
HC0)
/
&
H40)
Or
&
H80
'
取Unicode高位字节的后4位及低位字节的前2位 +10000000
UtfB3
=
(Low8b
And
&
H3F)
Or
&
H80
'
取Unicode低位字节的二进制后6位 + 10000000
s
=
s
&
"
%
"
&
Hex
(UtfB1)
&
"
%
"
&
Hex
(UtfB2)
&
"
%
"
&
Hex
(UtfB3)
End
If
Next
UrlEncode_GBToUtf8
=
s
End Function
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
'
“汉”-AscW("汉")=27721(十进制) 01101100 01001001(二进制) 6C49(十六进制)
'
将Gb2312码转成Utf-8码(十六进制表示)的方法为,先用AscW将Gb2312转为Unicode码(2字节),再'将Unicode码的二进制中的位按utf-8(3字节)模板规则填充 x 位:
'
URL解码,Gb2312页面提交到Utf-8页面
Function
UrlDecode_GBToUtf8(ByVal str)
Dim
B,ub
'
中文字的Unicode码(2字节)
Dim
UtfB
'
Utf-8单个字节
Dim
UtfB1, UtfB2, UtfB3
'
Utf-8码的三个字节
Dim
i, n, s
n
=
0
ub
=
0
For
i
=
1
To
Len
(str)
B
=
Mid
(str, i,
1
)
Select
Case
B
Case
"
+
"
s
=
s
&
"
"
Case
"
%
"
ub
=
Mid
(str, i
+
1
,
2
)
UtfB
=
CInt
(
"
&H
"
&
ub)
If
UtfB
<
128
Then
i
=
i
+
2
s
=
s
&
ChrW(UtfB)
Else
UtfB1
=
(UtfB
And
&
H0F)
*
&
H1000
'
取第1个Utf-8字节的二进制后4位
UtfB2
=
(
CInt
(
"
&H
"
&
Mid
(str, i
+
4
,
2
))
And
&
H3F)
*
&
H40
'
取第2个Utf-8字节的二进制后6位
UtfB3
=
CInt
(
"
&H
"
&
Mid
(str, i
+
7
,
2
))
And
&
H3F
'
取第3个Utf-8字节的二进制后6位
s
=
s
&
ChrW(UtfB1
Or
UtfB2
Or
UtfB3)
i
=
i
+
8
End
If
Case
Else
'
Ascii码
s
=
s
&
B
End
Select
Next
UrlDecode_GBToUtf8
=
s
End Function
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
'
URL编码,Gb2312页面提交到Utf-8页面,另一种位计算方法
Private
Function
UrlEncode_GBToUtf8_V2(szInput)
Dim
wch, uch, szRet
Dim
x
Dim
nAsc, nAsc2, nAsc3
If
szInput
=
""
Then
UrlEncode_GBToUtf8_V2
=
szInput
Exit
Function
End
If
For
x
=
1
To
Len
(szInput)
wch
=
Mid
(szInput, x,
1
)
nAsc
=
AscW(wch)
If
nAsc
<
0
Then
nAsc
=
nAsc
+
65536
If
(nAsc
And
&
HFF80)
=
0
Then
szRet
=
szRet
&
wch
Else
If
(nAsc
And
&
HF000)
=
0
Then
uch
=
"
%
"
&
Hex
(((nAsc
2
^
6
))
Or
&
HC0)
&
Hex
(nAsc
And
&
H3F
Or
&
H80)
szRet
=
szRet
&
uch
Else
uch
=
"
%
"
&
Hex
((nAsc
2
^
12
)
Or
&
HE0)
&
"
%
"
&
_
Hex
((nAsc
2
^
6
)
And
&
H3F
Or
&
H80)
&
"
%
"
&
_
Hex
(nAsc
And
&
H3F
Or
&
H80)
szRet
=
szRet
&
uch
End
If
End
If
Next
UrlEncode_GBToUtf8_V2
=
szRet
End Function
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
'
VB下用API方法的Unicode转Utf-8方法:
Private
Declare
Function
WideCharToMultiByte Lib
"
kernel32
"
(ByVal CodePage
As
Long
, ByVal dwFlags
As
Long
, ByVal lpWideCharStr
As
Long
, ByVal cchWideChar
As
Long
, ByRef lpMultiByteStr
As
Any, ByVal cchMultiByte
As
Long
, ByVal lpDefaultChar
As
String
, ByVal lpUsedDefaultChar
As
Long
)
As
Long
Private
Declare
Function
MultiByteToWideChar Lib
"
kernel32
"
(ByVal CodePage
As
Long
, ByVal dwFlags
As
Long
, ByVal lpMultiByteStr
As
Long
, ByVal cchMultiByte
As
Long
, ByVal lpWideCharStr
As
Long
, ByVal cchWideChar
As
Long
)
As
Long
Private
Const
CP_UTF8
=
65001
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
Function
Utf8ToUnicode(ByRef Utf()
As
Byte
)
As
String
Dim
lRet
As
Long
Dim
lLength
As
Long
Dim
lBufferSize
As
Long
lLength
=
UBound
(Utf)
-
LBound
(Utf)
+
1
If
lLength
<=
0
Then
Exit
Function
lBufferSize
=
lLength
*
2
Utf8ToUnicode
=
String
$(lBufferSize,
Chr
(
0
))
lRet
=
MultiByteToWideChar(CP_UTF8,
0
, VarPtr(Utf(
0
)), lLength, StrPtr(Utf8ToUnicode), lBufferSize)
If
lRet
<>
0
Then
Utf8ToUnicode
=
Left
(Utf8ToUnicode, lRet)
End
If
End Function
data:image/s3,"s3://crabby-images/42674/42674817e8fa3a8802ce8a04a989f75e48711c39" alt="Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全) Gb2312及Gb2312转Utf-8编码的UrlEncode编码解码(全)"
Function
UnicodeToUtf8(ByVal UCS
As
String
)
As
Byte
()
Dim
lLength
As
Long
Dim
lBufferSize
As
Long
Dim
lResult
As
Long
Dim
abUTF8()
As
Byte
lLength
=
Len
(UCS)
If
lLength
=
0
Then
Exit
Function
lBufferSize
=
lLength
*
3
+
1
ReDim
abUTF8(lBufferSize
-
1
)
lResult
=
WideCharToMultiByte(CP_UTF8,
0
, StrPtr(UCS), lLength, abUTF8(
0
), lBufferSize, vbNullString,
0
)
If
lResult
<>
0
Then
lResult
=
lResult
-
1
ReDim
Preserve abUTF8(lResult)
UnicodeToUtf8
=
abUTF8
End
If
End Function