为了一个gb2312下post中文参数到utf-8页面的程序,随闷的难受,查了一下午资料,大部分是讲Gb2312传到Gb2312页面的UrlEncode,没有提供到Utf-8页面的UrlEncode,后来找到Unicode转Utf-8码的资料,参考之下,终于写出了Utf-8的UrlEncode,这里整理下各种UrlEncode方法,供有需要的朋友参考。
详细Utf-8编码规则请百度一下。
Unicode 与 Utf-8码间的内码规则模板为:
原始码(16进制) UTF-8编码(二进制)
--------------------------------------------
0000 - 007F 0xxxxxxx
0080 - 07FF 110xxxxx 10xxxxxx
0800 - FFFF 1110xxxx 10xxxxxx 10xxxxxx (中文字在此区间)
……
--------------------------------------------
例如:
百度中查询“中国人”,会将中文URL参数转为Gb2312码的16进制表示,一个中文字用2个字节
http://www.baidu.com/s?wd=%D6%D0%B9%FA%C8%CB
Google中查询“中国人”,会将中文URL参数转为Utf-8编码的16进制表示,一个中文字用3个字节
http://www.google.cn/search?client=opera&rls=en&q=%E4%B8%AD%E5%9B%BD%E4%BA%BA&sourceid=opera&ie=utf-8&oe=utf-8
' Url编码,Gb2312页面之间传递参数
Function URLEncode_Gb(ByVal str)
Dim i,s
Dim B,bCode,gb,Hight8b,Low8b
s = ""
For i = 1 To Len (str)
B = Mid (str,i, 1 )
bCode = Abs ( Asc (B))
If (bCode >= 48 And bCode <= 57 ) Or (bCode >= 65 And bCode <= 90 ) Or (bCode >= 97 And bCode <= 122 ) Or bCode = 42 Or bCode = 45 Or bCode = 46 Or bCode = 64 Or bCode = 95 Then
' 48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
' 42代表*;46代表.;64代表@;45代表-;95代表_
s = s & B
ElseIf bCode = 32 Then ' 空格转成+
s = s & " + "
ElseIf bCode < 128 Then ' 低于128的Ascii转成1个字节
s = s & " % " & Right ( " 00 " & Hex (bCode), 2 )
Else
gb = Asc (B)
If gb < 0 Then
gb = gb + & H10000 ' gb编码为负数,要加上65536
End If
Hight8b = (gb And & HFF00) / & H100 ' 二进制高8位
Low8b = gb And & HFF ' 二进制低8位
s = s & " % " & Hex (Hight8b) & " % " & Hex (Low8b)
End If
Next
URLEncode_Gb = s
End Function
' Url解码,Gb2312页面之间传递参数
Function URLDecode_Gb(ByVal str)
Dim i,s
Dim B,bCode,gb,Hight8b,Low8b
s = ""
For i = 1 To Len (str)
B = Mid (str,i, 1 )
Select Case B
Case " + "
s = s & " "
Case " % "
gb = Mid (str,i + 1 , 2 )
bCode = CInt ( " &H " & gb)
If bCode < 128 Then
i = i + 2
Else
bCode = CInt ( " &H " & gb & Mid (str,i + 4 , 2 ))
i = i + 5
End If
s = s & Chr (bCode)
Case Else
s = s & B
End Select
Next
URLDecode_Gb = s
End Function
' URL编码,Gb2312页面提交到Utf-8页面
Function UrlEncode_GBToUtf8(ByVal str)
Dim B ' 单个字符
Dim ub ' 中文字的Unicode码(2字节)
Dim High8b, Low8b ' Unicode码的高低位字节
Dim UtfB1, UtfB2, UtfB3 ' Utf-8码的三个字节
Dim i, s
For i = 1 To Len (str)
B = Mid (str, i, 1 )
ub = AscW(B)
If (ub >= 48 And ub <= 57 ) Or (ub >= 65 And ub <= 90 ) Or (ub >= 97 And ub <= 122 ) Or ub = 42 Or ub = 45 Or ub = 46 Or ub = 64 Or ub = 95 Then
' 48 to 57代表0~9;65 to 90代表A~Z;97 to 122代表a~z
' 42代表*;46代表.;64代表@;45代表-;95代表_
s = s & B
ElseIf ub = 32 Then ' 空格转成+
s = s & " + "
ElseIf ub < 128 Then ' 低于128的Ascii转成1个字节
s = s & " % " & Right ( " 00 " & Hex (ub), 2 )
Else
High8b = (ub And & HFF00) / & H100 ' Unicode码高位
Low8b = ub And & HFF ' Unicode码低位
UtfB1 = (High8b And & HF0) / & H10 Or & HE0 ' 取Unicode高位字节的二进制的前4位 + 11100000
UtfB2 = ((High8b And & HF) * & H4 + (Low8b And & HC0) / & H40) Or & H80 ' 取Unicode高位字节的后4位及低位字节的前2位 +10000000
UtfB3 = (Low8b And & H3F) Or & H80 ' 取Unicode低位字节的二进制后6位 + 10000000
s = s & " % " & Hex (UtfB1) & " % " & Hex (UtfB2) & " % " & Hex (UtfB3)
End If
Next
UrlEncode_GBToUtf8 = s
End Function
' “汉”-AscW("汉")=27721(十进制) 01101100 01001001(二进制) 6C49(十六进制)
' 将Gb2312码转成Utf-8码(十六进制表示)的方法为,先用AscW将Gb2312转为Unicode码(2字节),再'将Unicode码的二进制中的位按utf-8(3字节)模板规则填充 x 位:
' URL解码,Gb2312页面提交到Utf-8页面
Function UrlDecode_GBToUtf8(ByVal str)
Dim B,ub ' 中文字的Unicode码(2字节)
Dim UtfB ' Utf-8单个字节
Dim UtfB1, UtfB2, UtfB3 ' Utf-8码的三个字节
Dim i, n, s
n = 0
ub = 0
For i = 1 To Len (str)
B = Mid (str, i, 1 )
Select Case B
Case " + "
s = s & " "
Case " % "
ub = Mid (str, i + 1 , 2 )
UtfB = CInt ( " &H " & ub)
If UtfB < 128 Then
i = i + 2
s = s & ChrW(UtfB)
Else
UtfB1 = (UtfB And & H0F) * & H1000 ' 取第1个Utf-8字节的二进制后4位
UtfB2 = ( CInt ( " &H " & Mid (str, i + 4 , 2 )) And & H3F) * & H40 ' 取第2个Utf-8字节的二进制后6位
UtfB3 = CInt ( " &H " & Mid (str, i + 7 , 2 )) And & H3F ' 取第3个Utf-8字节的二进制后6位
s = s & ChrW(UtfB1 Or UtfB2 Or UtfB3)
i = i + 8
End If
Case Else ' Ascii码
s = s & B
End Select
Next
UrlDecode_GBToUtf8 = s
End Function
' URL编码,Gb2312页面提交到Utf-8页面,另一种位计算方法
Private Function UrlEncode_GBToUtf8_V2(szInput)
Dim wch, uch, szRet
Dim x
Dim nAsc, nAsc2, nAsc3
If szInput = "" Then
UrlEncode_GBToUtf8_V2 = szInput
Exit Function
End If
For x = 1 To Len (szInput)
wch = Mid (szInput, x, 1 )
nAsc = AscW(wch)
If nAsc < 0 Then nAsc = nAsc + 65536
If (nAsc And & HFF80) = 0 Then
szRet = szRet & wch
Else
If (nAsc And & HF000) = 0 Then
uch = " % " & Hex (((nAsc 2 ^ 6 )) Or & HC0) & Hex (nAsc And & H3F Or & H80)
szRet = szRet & uch
Else
uch = " % " & Hex ((nAsc 2 ^ 12 ) Or & HE0) & " % " & _
Hex ((nAsc 2 ^ 6 ) And & H3F Or & H80) & " % " & _
Hex (nAsc And & H3F Or & H80)
szRet = szRet & uch
End If
End If
Next
UrlEncode_GBToUtf8_V2 = szRet
End Function
' VB下用API方法的Unicode转Utf-8方法:
Private Declare Function WideCharToMultiByte Lib " kernel32 " (ByVal CodePage As Long , ByVal dwFlags As Long , ByVal lpWideCharStr As Long , ByVal cchWideChar As Long , ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long , ByVal lpDefaultChar As String , ByVal lpUsedDefaultChar As Long ) As Long
Private Declare Function MultiByteToWideChar Lib " kernel32 " (ByVal CodePage As Long , ByVal dwFlags As Long , ByVal lpMultiByteStr As Long , ByVal cchMultiByte As Long , ByVal lpWideCharStr As Long , ByVal cchWideChar As Long ) As Long
Private Const CP_UTF8 = 65001
Function Utf8ToUnicode(ByRef Utf() As Byte ) As String
Dim lRet As Long
Dim lLength As Long
Dim lBufferSize As Long
lLength = UBound (Utf) - LBound (Utf) + 1
If lLength <= 0 Then Exit Function
lBufferSize = lLength * 2
Utf8ToUnicode = String $(lBufferSize, Chr ( 0 ))
lRet = MultiByteToWideChar(CP_UTF8, 0 , VarPtr(Utf( 0 )), lLength, StrPtr(Utf8ToUnicode), lBufferSize)
If lRet <> 0 Then
Utf8ToUnicode = Left (Utf8ToUnicode, lRet)
End If
End Function
Function UnicodeToUtf8(ByVal UCS As String ) As Byte ()
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim abUTF8() As Byte
lLength = Len (UCS)
If lLength = 0 Then Exit Function
lBufferSize = lLength * 3 + 1
ReDim abUTF8(lBufferSize - 1 )
lResult = WideCharToMultiByte(CP_UTF8, 0 , StrPtr(UCS), lLength, abUTF8( 0 ), lBufferSize, vbNullString, 0 )
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve abUTF8(lResult)
UnicodeToUtf8 = abUTF8
End If
End Function