比如1-4位代表日期,6-10位代表姓,12-18位代表名,20-30代表公司名...
如:
1024 张 三 A公司
1023 李 四 B公司
我上网查了一下,好象有方法说是先把这几种字符转成unicode,VB里面本来就有这个函数,然后再把unicode转成uft-8.
现在的问题是TXT文件的每一行有20几个字段(日期,姓,名,公司名....还有一些其他属性),我每个字段都调用函数转成unicode,然后再调用函数转成utf-8再调用函数用来格式化字段(比如姓字段有4位,姓不足4位的时候补空格),这样的话感觉效率很低吧.请问这里的高手们有什么其他好的办法吗?
11 个解决方案
#2
VB里有函数直接转?
#3
这个需要自己编写程序转换
#4
摘自网上的一段代码,你看看
Option Explicit
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (pDest As Any, pSrc As Any, ByVal ByteLen As Long)
'如果可选的sytle参数为"hex", 输出utf8的hex字符串(用于调试和专门目的),否则默认输出的是utf的字符串
Public Function Str_UTF_8(ByVal Str_GB As String, Optional ByVal Sytle As String = "string") As String
Dim Source() As Byte
Dim UTF_16 As Long
Dim Str_Bin As String
Dim My_utf_Bin As String
Dim Str_chr As String
Dim UTF_VAL As Long
Dim Str_hex As String
Dim Str_utf_hex As String
Dim i As Integer
Dim j As Integer
Dim nLength As Long
For j = 1 To Len(Str_GB)
CopyMemory UTF_VAL, ByVal StrPtr(Mid(Str_GB, j, 1)), 2 '得到unicode码
Str_hex = Hex(UTF_VAL) '转为16进制字符串
Str_Bin = H_To_B(Str_hex, 16) '转为2进制字符串
If UTF_VAL < &H80 Then ' 1 UTF-8 byte
My_utf_Bin = Mid(Str_Bin, 9, 8)
ElseIf UTF_VAL < &H800 Then ' 2 UTF-8 bytes
My_utf_Bin = "110" + Mid(Str_Bin, 5, 5) + "10" + Mid(Str_Bin, 11, 6)
Else ' 3 UTF-8 bytes
My_utf_Bin = "1110" + Mid(Str_Bin, 1, 4) + "10" + Mid(Str_Bin, 5, 6) + "10" + Mid(Str_Bin, 11, 6)
End If
Str_utf_hex = Str_utf_hex + B_To_H(My_utf_Bin) '转为utf8的16进制字符串
Next j
nLength = Len(Str_utf_hex) / 2
ReDim Source(Len(Str_utf_hex) / 2)
For i = 1 To Len(Str_utf_hex) Step 2
CopyMemory Source((i + 1) / 2), ByVal StrPtr(ChrB("&h" + Mid(Str_utf_hex, i, 2))), 1
Str_chr = Str_chr & ChrB(Source((i + 1) / 2))
Next i
If Sytle = "hex" Or Sytle = "Hex" Or Sytle = "HEX" Then '判断是不是要输出机器码
Str_UTF_8 = Str_utf_hex
Else
Str_UTF_8 = Str_chr
End If
End Function
'二进制转16进制函数
Public Function B_To_H(ByVal Bininary_in As String) As String
Dim i As Long
Dim H As String
If Len(Bininary_in) Mod 4 <> 0 Then
Bininary_in = String(4 - Len(Bininary_in) Mod 4, "0") & Bininary_in
End If
For i = 1 To Len(Bininary_in) Step 4
Select Case Mid(Bininary_in, i, 4)
Case "0000": H = H & "0"
Case "0001": H = H & "1"
Case "0010": H = H & "2"
Case "0011": H = H & "3"
Case "0100": H = H & "4"
Case "0101": H = H & "5"
Case "0110": H = H & "6"
Case "0111": H = H & "7"
Case "1000": H = H & "8"
Case "1001": H = H & "9"
Case "1010": H = H & "A"
Case "1011": H = H & "B"
Case "1100": H = H & "C"
Case "1101": H = H & "D"
Case "1110": H = H & "E"
Case "1111": H = H & "F"
End Select
Next i
B_To_H = H
End Function
'16进制转二进制函数
Public Function H_To_B(ByVal hex_str As String, MinimumDigits As Integer) As String
Dim i As Long
Dim B As String
Dim ExtraDigitsNeeded As Integer
hex_str = UCase(hex_str)
For i = 1 To Len(hex_str)
Select Case Mid(hex_str, i, 1)
Case "0": B = B & "0000"
Case "1": B = B & "0001"
Case "2": B = B & "0010"
Case "3": B = B & "0011"
Case "4": B = B & "0100"
Case "5": B = B & "0101"
Case "6": B = B & "0110"
Case "7": B = B & "0111"
Case "8": B = B & "1000"
Case "9": B = B & "1001"
Case "A": B = B & "1010"
Case "B": B = B & "1011"
Case "C": B = B & "1100"
Case "D": B = B & "1101"
Case "E": B = B & "1110"
Case "F": B = B & "1111"
End Select
Next i
ExtraDigitsNeeded = MinimumDigits - Len(B)
If ExtraDigitsNeeded > 0 Then
B = String(ExtraDigitsNeeded, "0") & B
End If
H_To_B = B
End Function
#5
谢谢Veron_04~!我仔细看了你下面说的这个帖子,用了里面的函数把几种语言都成功转成了utf-8码. 可我这个文件需要上传到unix服务器,生成的文件是PC格式的,请问有什么办法转成unix格式吗?
#6
晕,上个回复引用错误.该引用下面这段才对:)
#7
unicode 文件会有 big-endian 和 little-endian 区别,而 utf-8 文件是不存在这个问题的。
不信你用记事本保存一个 utf-8 文本,上传到 unix 上打开试试。
不信你用记事本保存一个 utf-8 文本,上传到 unix 上打开试试。
#8
'测试
Private Sub Command1_Click()
Dim byt() As Byte
Dim str As String
str = str & ChkStr(CNull(Text1.Text), 6)
str = str & ChkStr(CNull(Text2.Text), 6)
str = str & ChkStr(CNull(Text3.Text), 6)
byt = UnicodeToUtf8(str)
Dim sMast As String
sMast = App.Path + "\PA\" + "test.txt"
Open sMast For Binary As #1
Put #1, , byt()
Close #1
End Sub
'网上查的转成UTF-8的函数
Public Function UnicodeToUtf8(ByVal UCS As String) As Byte()
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim abUTF8() As Byte
lLength = Len(UCS)
If lLength = 0 Then Exit Function
lBufferSize = lLength * 3 + 1
ReDim abUTF8(lBufferSize - 1)
lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(UCS), lLength, abUTF8(0), lBufferSize, vbNullString, 0)
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve abUTF8(lResult)
UnicodeToUtf8 = abUTF8
End If
End Function
'格式化输出,比如姓占6位,名占6位,公司名占6位....
Public Function ChkStr(strX As String, iLen As Integer) As String
If InStr(strX, vbCrLf) > 0 Or InStr(strX, vbCr) > 0 Or InStr(strX, vbLf) > 0 Then
strX = Replace(strX, vbCrLf, "")
End If
Dim iLenStrX As Integer
iLenStrX = LenB(StrConv(strX, vbFromUnicode, LocaleID))
If iLenStrX > iLen Then
strX = Space(iLen)
End If
If iLenStrX < iLen Then
strX = strX & Space(iLen - iLenStrX)
End If
ChkStr = strX
End Function
Private Sub Command1_Click()
Dim byt() As Byte
Dim str As String
str = str & ChkStr(CNull(Text1.Text), 6)
str = str & ChkStr(CNull(Text2.Text), 6)
str = str & ChkStr(CNull(Text3.Text), 6)
byt = UnicodeToUtf8(str)
Dim sMast As String
sMast = App.Path + "\PA\" + "test.txt"
Open sMast For Binary As #1
Put #1, , byt()
Close #1
End Sub
'网上查的转成UTF-8的函数
Public Function UnicodeToUtf8(ByVal UCS As String) As Byte()
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim abUTF8() As Byte
lLength = Len(UCS)
If lLength = 0 Then Exit Function
lBufferSize = lLength * 3 + 1
ReDim abUTF8(lBufferSize - 1)
lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(UCS), lLength, abUTF8(0), lBufferSize, vbNullString, 0)
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve abUTF8(lResult)
UnicodeToUtf8 = abUTF8
End If
End Function
'格式化输出,比如姓占6位,名占6位,公司名占6位....
Public Function ChkStr(strX As String, iLen As Integer) As String
If InStr(strX, vbCrLf) > 0 Or InStr(strX, vbCr) > 0 Or InStr(strX, vbLf) > 0 Then
strX = Replace(strX, vbCrLf, "")
End If
Dim iLenStrX As Integer
iLenStrX = LenB(StrConv(strX, vbFromUnicode, LocaleID))
If iLenStrX > iLen Then
strX = Space(iLen)
End If
If iLenStrX < iLen Then
strX = strX & Space(iLen - iLenStrX)
End If
ChkStr = strX
End Function
#9
上传到unix打开是乱码. 用Editplus打开很正常,可看下面状态提示是:PC UTF-8格式.
我上传不了图.
中文用Editplus打开如下:
张三 xyz 某公司
繁体如下:
桃園市abc 工程師
英文如下:
Text1 Text2 Text3
Editplus 状态栏显示 PC ANSI(不是UTF-8...)
韩文如下:
부천시 abc 상만
我上传不了图.
中文用Editplus打开如下:
张三 xyz 某公司
繁体如下:
桃園市abc 工程師
英文如下:
Text1 Text2 Text3
Editplus 状态栏显示 PC ANSI(不是UTF-8...)
韩文如下:
부천시 abc 상만
#10
上传到unix,打开如下
寮| 涓~I ab 鎫_~P鍉E~O?
寮| 涓~I ab 鎫_~P鍉E~O?
#1
#2
VB里有函数直接转?
#3
这个需要自己编写程序转换
#4
摘自网上的一段代码,你看看
Option Explicit
Public Declare Sub CopyMemory Lib "kernel32" Alias "RtlMoveMemory" (pDest As Any, pSrc As Any, ByVal ByteLen As Long)
'如果可选的sytle参数为"hex", 输出utf8的hex字符串(用于调试和专门目的),否则默认输出的是utf的字符串
Public Function Str_UTF_8(ByVal Str_GB As String, Optional ByVal Sytle As String = "string") As String
Dim Source() As Byte
Dim UTF_16 As Long
Dim Str_Bin As String
Dim My_utf_Bin As String
Dim Str_chr As String
Dim UTF_VAL As Long
Dim Str_hex As String
Dim Str_utf_hex As String
Dim i As Integer
Dim j As Integer
Dim nLength As Long
For j = 1 To Len(Str_GB)
CopyMemory UTF_VAL, ByVal StrPtr(Mid(Str_GB, j, 1)), 2 '得到unicode码
Str_hex = Hex(UTF_VAL) '转为16进制字符串
Str_Bin = H_To_B(Str_hex, 16) '转为2进制字符串
If UTF_VAL < &H80 Then ' 1 UTF-8 byte
My_utf_Bin = Mid(Str_Bin, 9, 8)
ElseIf UTF_VAL < &H800 Then ' 2 UTF-8 bytes
My_utf_Bin = "110" + Mid(Str_Bin, 5, 5) + "10" + Mid(Str_Bin, 11, 6)
Else ' 3 UTF-8 bytes
My_utf_Bin = "1110" + Mid(Str_Bin, 1, 4) + "10" + Mid(Str_Bin, 5, 6) + "10" + Mid(Str_Bin, 11, 6)
End If
Str_utf_hex = Str_utf_hex + B_To_H(My_utf_Bin) '转为utf8的16进制字符串
Next j
nLength = Len(Str_utf_hex) / 2
ReDim Source(Len(Str_utf_hex) / 2)
For i = 1 To Len(Str_utf_hex) Step 2
CopyMemory Source((i + 1) / 2), ByVal StrPtr(ChrB("&h" + Mid(Str_utf_hex, i, 2))), 1
Str_chr = Str_chr & ChrB(Source((i + 1) / 2))
Next i
If Sytle = "hex" Or Sytle = "Hex" Or Sytle = "HEX" Then '判断是不是要输出机器码
Str_UTF_8 = Str_utf_hex
Else
Str_UTF_8 = Str_chr
End If
End Function
'二进制转16进制函数
Public Function B_To_H(ByVal Bininary_in As String) As String
Dim i As Long
Dim H As String
If Len(Bininary_in) Mod 4 <> 0 Then
Bininary_in = String(4 - Len(Bininary_in) Mod 4, "0") & Bininary_in
End If
For i = 1 To Len(Bininary_in) Step 4
Select Case Mid(Bininary_in, i, 4)
Case "0000": H = H & "0"
Case "0001": H = H & "1"
Case "0010": H = H & "2"
Case "0011": H = H & "3"
Case "0100": H = H & "4"
Case "0101": H = H & "5"
Case "0110": H = H & "6"
Case "0111": H = H & "7"
Case "1000": H = H & "8"
Case "1001": H = H & "9"
Case "1010": H = H & "A"
Case "1011": H = H & "B"
Case "1100": H = H & "C"
Case "1101": H = H & "D"
Case "1110": H = H & "E"
Case "1111": H = H & "F"
End Select
Next i
B_To_H = H
End Function
'16进制转二进制函数
Public Function H_To_B(ByVal hex_str As String, MinimumDigits As Integer) As String
Dim i As Long
Dim B As String
Dim ExtraDigitsNeeded As Integer
hex_str = UCase(hex_str)
For i = 1 To Len(hex_str)
Select Case Mid(hex_str, i, 1)
Case "0": B = B & "0000"
Case "1": B = B & "0001"
Case "2": B = B & "0010"
Case "3": B = B & "0011"
Case "4": B = B & "0100"
Case "5": B = B & "0101"
Case "6": B = B & "0110"
Case "7": B = B & "0111"
Case "8": B = B & "1000"
Case "9": B = B & "1001"
Case "A": B = B & "1010"
Case "B": B = B & "1011"
Case "C": B = B & "1100"
Case "D": B = B & "1101"
Case "E": B = B & "1110"
Case "F": B = B & "1111"
End Select
Next i
ExtraDigitsNeeded = MinimumDigits - Len(B)
If ExtraDigitsNeeded > 0 Then
B = String(ExtraDigitsNeeded, "0") & B
End If
H_To_B = B
End Function
#5
谢谢Veron_04~!我仔细看了你下面说的这个帖子,用了里面的函数把几种语言都成功转成了utf-8码. 可我这个文件需要上传到unix服务器,生成的文件是PC格式的,请问有什么办法转成unix格式吗?
#6
晕,上个回复引用错误.该引用下面这段才对:)
#7
unicode 文件会有 big-endian 和 little-endian 区别,而 utf-8 文件是不存在这个问题的。
不信你用记事本保存一个 utf-8 文本,上传到 unix 上打开试试。
不信你用记事本保存一个 utf-8 文本,上传到 unix 上打开试试。
#8
'测试
Private Sub Command1_Click()
Dim byt() As Byte
Dim str As String
str = str & ChkStr(CNull(Text1.Text), 6)
str = str & ChkStr(CNull(Text2.Text), 6)
str = str & ChkStr(CNull(Text3.Text), 6)
byt = UnicodeToUtf8(str)
Dim sMast As String
sMast = App.Path + "\PA\" + "test.txt"
Open sMast For Binary As #1
Put #1, , byt()
Close #1
End Sub
'网上查的转成UTF-8的函数
Public Function UnicodeToUtf8(ByVal UCS As String) As Byte()
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim abUTF8() As Byte
lLength = Len(UCS)
If lLength = 0 Then Exit Function
lBufferSize = lLength * 3 + 1
ReDim abUTF8(lBufferSize - 1)
lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(UCS), lLength, abUTF8(0), lBufferSize, vbNullString, 0)
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve abUTF8(lResult)
UnicodeToUtf8 = abUTF8
End If
End Function
'格式化输出,比如姓占6位,名占6位,公司名占6位....
Public Function ChkStr(strX As String, iLen As Integer) As String
If InStr(strX, vbCrLf) > 0 Or InStr(strX, vbCr) > 0 Or InStr(strX, vbLf) > 0 Then
strX = Replace(strX, vbCrLf, "")
End If
Dim iLenStrX As Integer
iLenStrX = LenB(StrConv(strX, vbFromUnicode, LocaleID))
If iLenStrX > iLen Then
strX = Space(iLen)
End If
If iLenStrX < iLen Then
strX = strX & Space(iLen - iLenStrX)
End If
ChkStr = strX
End Function
Private Sub Command1_Click()
Dim byt() As Byte
Dim str As String
str = str & ChkStr(CNull(Text1.Text), 6)
str = str & ChkStr(CNull(Text2.Text), 6)
str = str & ChkStr(CNull(Text3.Text), 6)
byt = UnicodeToUtf8(str)
Dim sMast As String
sMast = App.Path + "\PA\" + "test.txt"
Open sMast For Binary As #1
Put #1, , byt()
Close #1
End Sub
'网上查的转成UTF-8的函数
Public Function UnicodeToUtf8(ByVal UCS As String) As Byte()
Dim lLength As Long
Dim lBufferSize As Long
Dim lResult As Long
Dim abUTF8() As Byte
lLength = Len(UCS)
If lLength = 0 Then Exit Function
lBufferSize = lLength * 3 + 1
ReDim abUTF8(lBufferSize - 1)
lResult = WideCharToMultiByte(CP_UTF8, 0, StrPtr(UCS), lLength, abUTF8(0), lBufferSize, vbNullString, 0)
If lResult <> 0 Then
lResult = lResult - 1
ReDim Preserve abUTF8(lResult)
UnicodeToUtf8 = abUTF8
End If
End Function
'格式化输出,比如姓占6位,名占6位,公司名占6位....
Public Function ChkStr(strX As String, iLen As Integer) As String
If InStr(strX, vbCrLf) > 0 Or InStr(strX, vbCr) > 0 Or InStr(strX, vbLf) > 0 Then
strX = Replace(strX, vbCrLf, "")
End If
Dim iLenStrX As Integer
iLenStrX = LenB(StrConv(strX, vbFromUnicode, LocaleID))
If iLenStrX > iLen Then
strX = Space(iLen)
End If
If iLenStrX < iLen Then
strX = strX & Space(iLen - iLenStrX)
End If
ChkStr = strX
End Function
#9
上传到unix打开是乱码. 用Editplus打开很正常,可看下面状态提示是:PC UTF-8格式.
我上传不了图.
中文用Editplus打开如下:
张三 xyz 某公司
繁体如下:
桃園市abc 工程師
英文如下:
Text1 Text2 Text3
Editplus 状态栏显示 PC ANSI(不是UTF-8...)
韩文如下:
부천시 abc 상만
我上传不了图.
中文用Editplus打开如下:
张三 xyz 某公司
繁体如下:
桃園市abc 工程師
英文如下:
Text1 Text2 Text3
Editplus 状态栏显示 PC ANSI(不是UTF-8...)
韩文如下:
부천시 abc 상만
#10
上传到unix,打开如下
寮| 涓~I ab 鎫_~P鍉E~O?
寮| 涓~I ab 鎫_~P鍉E~O?
#11
见《
利用ADO STREAM实现GB2312和UTF8编码转换
》,其它编码的也可以参照实现。
》,其它编码的也可以参照实现。