现在有这么一个问题,比如说 “中文1234” 这各格式的结果就是“中?234” ,也就是说中文字和后面的一个半角的数字或标点符合就会合成一个新的乱码,有什么100%无乱码的转换代码吗?
Public Function DownToStr(ByVal sUrl As String) As String
On Error GoTo ToExit '打开错误陷阱
'------------------------------------------------
Dim hOpen As Long, hFile As Long, RetLen As Long, Buffer() As Byte, szBuffer As String
hOpen = InternetOpen(vbNullString, INTERNET_OPEN_TYPE_DIRECT, vbNullString, vbNullString, 0)
hFile = InternetOpenUrl(hOpen, sUrl, vbNullString, ByVal 0&, INTERNET_FLAG_RELOAD, ByVal 0&)
If hFile <> 0 Then
Do
ReDim Buffer(OnceLen - 1)
InternetReadFile hFile, ByVal VarPtr(Buffer(0)), OnceLen, RetLen
DoEvents
If RetLen = 0 Then Exit Do
If RetLen < OnceLen Then ReDim Preserve Buffer(RetLen - 1)
szBuffer = szBuffer & CStr(Buffer)
Loop
InternetCloseHandle hFile
End If
InternetCloseHandle hOpen
DownToStr = StrConv(szBuffer, vbUnicode)
'------------------------------------------------
Exit Function
'----------------
ToExit:
DownToStr = ""
MsgBox "远程连接出错:" & Err.Number & " " & Err.Description
End Function
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Const CP_UTF8 = 65001
' Utf8 to Unicode
Public Function Utf8ToUnicode(ByVal sUTF8 As String) As String
Dim lngUtf8Size As Long
Dim strBuffer As String
Dim lngBufferSize As Long
Dim lngResult As Long
Dim bytUtf8() As Byte
Dim N As Long
If LenB(sUTF8) = 0 Then Exit Function
bytUtf8 = StrConv(sUTF8, vbFromUnicode)
lngUtf8Size = UBound(bytUtf8) + 1
On Error GoTo 0
lngBufferSize = lngUtf8Size * 2
strBuffer = String$(lngBufferSize, vbNullChar)
'Translate using code page 65001(UTF-8)
lngResult = MultiByteToWideChar(CP_UTF8, 0, bytUtf8(0), lngUtf8Size, StrPtr(strBuffer), lngBufferSize)
'Trim result to actual length
If lngResult Then
Utf8ToUnicode = Left$(strBuffer, lngResult)
End If
End Function
6 个解决方案
#1
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long
'常用的代码页:
const cpUTF8 =65001
const cpGB2312 = 936
const cpGB18030=54936
const cpUTF7 =65000
Function MultiByteToUTF16(UTF8() As Byte, CodePage As Long) As String
Dim bufSize As Long
bufSize = MultiByteToWideChar(CodePage, 0&, UTF8(0), UBound(UTF8) + 1, 0, 0)
MultiByteToUTF16 = Space(bufSize)
MultiByteToWideChar CodePage, 0&, UTF8(0), UBound(UTF8) + 1, StrPtr(MultiByteToUTF16), bufSize
End Function
Function UTF16ToMultiByte(UTF16 As String, CodePage As Long) As Byte()
Dim bufSize As Long
Dim arr() As Byte
bufSize = WideCharToMultiByte(CodePage, 0&, StrPtr(UTF16), Len(UTF16), 0, 0, 0, 0)
ReDim arr(bufSize - 1)
WideCharToMultiByte CodePage, 0&, StrPtr(UTF16), Len(UTF16), arr(0), bufSize, 0, 0
UTF16ToMultiByte = arr
End Function
Private Sub Command1_Click()
MsgBox MultiByteToUTF16(UTF16ToMultiByte("ab中,c", cpUTF8), cpUTF8)
End Sub
#2
+1
#3
你这个到底要干嘛用的?解释一下?
#4
你这个MsgBox MultiByteToUTF16(UTF16ToMultiByte("ab中,c", cpUTF8), cpUTF8)测试到是正常的,但你用我哪个DownToStr抓一个UTF8的网页代码,转换DownToStr中的就会还是更多乱码
#5
确定是UTF8 65001 吗?我用过一个类似函数解出来也是部分乱码.
后来经不懈努力才发现,原来那网页是936的,不是UTF8. 65001 改成936就OK了.
试试咯,解码函数网络有很多,一般都比较成熟不会出错咯~~~
后来经不懈努力才发现,原来那网页是936的,不是UTF8. 65001 改成936就OK了.
试试咯,解码函数网络有很多,一般都比较成熟不会出错咯~~~
#6
要转换不出错,其实代码是没有问题的。关键是你转换的源!!抓取到的数据直接byte数组这样不会出现乱码的,如果是string的话,因为已经转换过一次了,所以会出现乱码的。呵呵
#1
Private Declare Function MultiByteToWideChar Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long) As Long
Private Declare Function WideCharToMultiByte Lib "kernel32" (ByVal CodePage As Long, ByVal dwFlags As Long, ByVal lpWideCharStr As Long, ByVal cchWideChar As Long, ByRef lpMultiByteStr As Any, ByVal cchMultiByte As Long, ByVal lpDefaultChar As Long, ByVal lpUsedDefaultChar As Long) As Long
'常用的代码页:
const cpUTF8 =65001
const cpGB2312 = 936
const cpGB18030=54936
const cpUTF7 =65000
Function MultiByteToUTF16(UTF8() As Byte, CodePage As Long) As String
Dim bufSize As Long
bufSize = MultiByteToWideChar(CodePage, 0&, UTF8(0), UBound(UTF8) + 1, 0, 0)
MultiByteToUTF16 = Space(bufSize)
MultiByteToWideChar CodePage, 0&, UTF8(0), UBound(UTF8) + 1, StrPtr(MultiByteToUTF16), bufSize
End Function
Function UTF16ToMultiByte(UTF16 As String, CodePage As Long) As Byte()
Dim bufSize As Long
Dim arr() As Byte
bufSize = WideCharToMultiByte(CodePage, 0&, StrPtr(UTF16), Len(UTF16), 0, 0, 0, 0)
ReDim arr(bufSize - 1)
WideCharToMultiByte CodePage, 0&, StrPtr(UTF16), Len(UTF16), arr(0), bufSize, 0, 0
UTF16ToMultiByte = arr
End Function
Private Sub Command1_Click()
MsgBox MultiByteToUTF16(UTF16ToMultiByte("ab中,c", cpUTF8), cpUTF8)
End Sub
#2
+1
#3
你这个到底要干嘛用的?解释一下?
#4
你这个MsgBox MultiByteToUTF16(UTF16ToMultiByte("ab中,c", cpUTF8), cpUTF8)测试到是正常的,但你用我哪个DownToStr抓一个UTF8的网页代码,转换DownToStr中的就会还是更多乱码
#5
确定是UTF8 65001 吗?我用过一个类似函数解出来也是部分乱码.
后来经不懈努力才发现,原来那网页是936的,不是UTF8. 65001 改成936就OK了.
试试咯,解码函数网络有很多,一般都比较成熟不会出错咯~~~
后来经不懈努力才发现,原来那网页是936的,不是UTF8. 65001 改成936就OK了.
试试咯,解码函数网络有很多,一般都比较成熟不会出错咯~~~
#6
要转换不出错,其实代码是没有问题的。关键是你转换的源!!抓取到的数据直接byte数组这样不会出现乱码的,如果是string的话,因为已经转换过一次了,所以会出现乱码的。呵呵