主要是用于URL的编码。
9 个解决方案
#1
有没有转化函数?
#2
api
WideCharToMultiByte
MultiByteToWideChar
先把ansi字串用MultiByteToWideChar转为unicode
再把这个unicode用WideCharToMultiByte转成gbk
WideCharToMultiByte
MultiByteToWideChar
先把ansi字串用MultiByteToWideChar转为unicode
再把这个unicode用WideCharToMultiByte转成gbk
#3
码农何苦为难码农,你是delphi论坛的高手啊。
我试试。
#4
呵呵,支持转化为GBK或UTF-8,转化为UTF-8比较方便,
那就转化为UTF-8了。
2010-08-19 14:12 将字符串转换成 UTF8 编码的函数 delphi7:
function ToUTF8Encode(str: string): string;
var
u:UTF8String;
Len:Integer;
begin
u:=AnsiToUTF8(str);
Len := Length(u);
SetLength(Result, Len shl 1);
BinToHex(PChar(u), PChar(Result), Len);
end;
delphi2010:
//函数:
function ToUTF8Encode(str: string): string;
var
b: Byte;
begin
for b in BytesOf(UTF8Encode(str)) do
Result := Format('%s%%%.2x', [Result, b]);
end;
//测试:
var
str: string;
begin
str := '万一';
str := ToUTF8Encode(str);
ShowMessage(str); //%E4%B8%87%E4%B8%80
end;
//反向函数
function ToUTF8Decode(const str: string): string;
var
List: TStrings;
tmpStr: AnsiString;
i: Integer;
begin
List := TStringList.Create;
ExtractStrings(['%'], ['%'], PChar(str), List);
SetLength(tmpStr, List.Count);
for i := 0 to List.Count - 1 do
Byte(tmpStr[i+1]) := StrToInt('$' + List[i]); List.Free; Result := UTF8Decode(tmpStr); end;
{ 调用测试 }
procedure TForm1.FormCreate(Sender: TObject);
var
s1: AnsiString;
s2: WideString;
begin
s1 := '%E4%B8%87%E4%B8%80';
s2 := ToUTF8Decode(s1);
ShowMessage(s2); { 万一 }
end;
那就转化为UTF-8了。
2010-08-19 14:12 将字符串转换成 UTF8 编码的函数 delphi7:
function ToUTF8Encode(str: string): string;
var
u:UTF8String;
Len:Integer;
begin
u:=AnsiToUTF8(str);
Len := Length(u);
SetLength(Result, Len shl 1);
BinToHex(PChar(u), PChar(Result), Len);
end;
delphi2010:
//函数:
function ToUTF8Encode(str: string): string;
var
b: Byte;
begin
for b in BytesOf(UTF8Encode(str)) do
Result := Format('%s%%%.2x', [Result, b]);
end;
//测试:
var
str: string;
begin
str := '万一';
str := ToUTF8Encode(str);
ShowMessage(str); //%E4%B8%87%E4%B8%80
end;
//反向函数
function ToUTF8Decode(const str: string): string;
var
List: TStrings;
tmpStr: AnsiString;
i: Integer;
begin
List := TStringList.Create;
ExtractStrings(['%'], ['%'], PChar(str), List);
SetLength(tmpStr, List.Count);
for i := 0 to List.Count - 1 do
Byte(tmpStr[i+1]) := StrToInt('$' + List[i]); List.Free; Result := UTF8Decode(tmpStr); end;
{ 调用测试 }
procedure TForm1.FormCreate(Sender: TObject);
var
s1: AnsiString;
s2: WideString;
begin
s1 := '%E4%B8%87%E4%B8%80';
s2 := ToUTF8Decode(s1);
ShowMessage(s2); { 万一 }
end;
#5
上面这个不完全符合要求,需要做个类似java的URLEnCode.EnCode函数功能:
java.net.URLEncoder
Utility class for HTML form encoding.
This class contains static methods for converting a String
to the application/x-www-form-urlencoded MIME format.
For more information about HTML form encoding, consult the HTML specification.
When encoding a String, the following rules apply:
The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
The special characters ".", "-", "*", and "_" remain the same.
The space character " " is converted into a plus sign "+".
All other characters are unsafe and
are first converted into one or more bytes using some encoding scheme.
Then each byte is represented by the 3-character string "%xy",
where xy is the two-digit hexadecimal representation of the byte.
The recommended encoding scheme to use is UTF-8. However,
for compatibility reasons, if an encoding is not specified,
then the default encoding of the platform is used.
For example using UTF-8 as the encoding scheme the string "The string ü@foo-bar"
would get converted to "The+string+%C3%BC%40foo-bar"
because in UTF-8 the character ü is encoded as two bytes C3 (hex) and BC (hex),
and the character @ is encoded as one byte 40 (hex).
Since:
JDK1.0
java.net.URLEncoder
Utility class for HTML form encoding.
This class contains static methods for converting a String
to the application/x-www-form-urlencoded MIME format.
For more information about HTML form encoding, consult the HTML specification.
When encoding a String, the following rules apply:
The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
The special characters ".", "-", "*", and "_" remain the same.
The space character " " is converted into a plus sign "+".
All other characters are unsafe and
are first converted into one or more bytes using some encoding scheme.
Then each byte is represented by the 3-character string "%xy",
where xy is the two-digit hexadecimal representation of the byte.
The recommended encoding scheme to use is UTF-8. However,
for compatibility reasons, if an encoding is not specified,
then the default encoding of the platform is used.
For example using UTF-8 as the encoding scheme the string "The string ü@foo-bar"
would get converted to "The+string+%C3%BC%40foo-bar"
because in UTF-8 the character ü is encoded as two bytes C3 (hex) and BC (hex),
and the character @ is encoded as one byte 40 (hex).
Since:
JDK1.0
#6
我在100年前的程序里找出来的。嘿嘿
function EnCode(Code: string): string;
var
I: Integer;
Hex: string;
begin
for I := 1 to Length(Code) do
case Code[i] of
' ': Result := Result + '+';
'A'..'Z', 'a'..'z', '*', '@', '.', '_', '-',
'0'..'9', '$', '!', '''', '(', ')':
Result := Result + Code[i];
else
begin
Hex := IntToHex(ord(Code[i]), 2);
if Length(Hex) = 2 then
Result := Result + '%' + Hex
else
Result := Result + '%0' + hex;
end;
end;
end;
#7
URL := 'http://so.360.cn/s?src=hao_hot&q=' + EnCode('武汉公交车自燃');
//http://so.360.cn/s?src=hao_hot&q=%E6%AD%A6%E6%B1%89%E5%85%AC%E4%BA%A4%E8%BD%A6%E8%87%AA%E7%87%83
#8
//得到gbk方式的字符串编码值(淘宝简化此步骤,拍拍有此的utf-8和gbk处理)
{ HTTPEncode 是替换掉URL中request等中的'/\'等程序内的系统分隔符号 ,
替换成通用的特殊符号,防止分隔符和请求值中的分隔符混杂,出现解析错误。
URLEnCode,是把字符串的,数字和字母保持不变输出,其他非数字和字母,
按照不同编码,如GBK、UTF-8,得到不同的编码值。}
function URLEncode_gbk(S: string): string;
var i: Integer; InQueryString :Boolean;
begin
Result :='';
InQueryString :=true;
for i :=1 to Length(S) do begin
case S[i] of
'A'..'Z', 'a'..'z', '0'..'9', '-', '_', '.': //1 用户常用的字母和数字,以及'-_.'不作为系统分隔符,可以不做转换。
Result := Result + S[i];
' ': //2 空格符号,转化为+或%20,函数最后,都加替换掉’+‘的处理代码
if InQueryString then
Result :=Result +'+'
else Result :=Result +'%20';
else //3 其他特殊符号或中文,都要转化为3位的Utf-8编码值
Result := Result + '%' + SysUtils.IntToHex(Ord(S[i]), 2);
end;
end;
Result :=StringReplace(Result, '+', '%20', [rfReplaceAll]);
Result :=StringReplace(Result, '*', '%2A', [rfReplaceAll]);
end;
{ HTTPEncode 是替换掉URL中request等中的'/\'等程序内的系统分隔符号 ,
替换成通用的特殊符号,防止分隔符和请求值中的分隔符混杂,出现解析错误。
URLEnCode,是把字符串的,数字和字母保持不变输出,其他非数字和字母,
按照不同编码,如GBK、UTF-8,得到不同的编码值。}
function URLEncode_gbk(S: string): string;
var i: Integer; InQueryString :Boolean;
begin
Result :='';
InQueryString :=true;
for i :=1 to Length(S) do begin
case S[i] of
'A'..'Z', 'a'..'z', '0'..'9', '-', '_', '.': //1 用户常用的字母和数字,以及'-_.'不作为系统分隔符,可以不做转换。
Result := Result + S[i];
' ': //2 空格符号,转化为+或%20,函数最后,都加替换掉’+‘的处理代码
if InQueryString then
Result :=Result +'+'
else Result :=Result +'%20';
else //3 其他特殊符号或中文,都要转化为3位的Utf-8编码值
Result := Result + '%' + SysUtils.IntToHex(Ord(S[i]), 2);
end;
end;
Result :=StringReplace(Result, '+', '%20', [rfReplaceAll]);
Result :=StringReplace(Result, '*', '%2A', [rfReplaceAll]);
end;
#9
上面的函数URLEncode_gbk,是转化为GBK的,转化为UTF-8,
就是遇到中文的是变成3个字节,怎么改写。
就是遇到中文的是变成3个字节,怎么改写。
#1
有没有转化函数?
#2
api
WideCharToMultiByte
MultiByteToWideChar
先把ansi字串用MultiByteToWideChar转为unicode
再把这个unicode用WideCharToMultiByte转成gbk
WideCharToMultiByte
MultiByteToWideChar
先把ansi字串用MultiByteToWideChar转为unicode
再把这个unicode用WideCharToMultiByte转成gbk
#3
码农何苦为难码农,你是delphi论坛的高手啊。
我试试。
#4
呵呵,支持转化为GBK或UTF-8,转化为UTF-8比较方便,
那就转化为UTF-8了。
2010-08-19 14:12 将字符串转换成 UTF8 编码的函数 delphi7:
function ToUTF8Encode(str: string): string;
var
u:UTF8String;
Len:Integer;
begin
u:=AnsiToUTF8(str);
Len := Length(u);
SetLength(Result, Len shl 1);
BinToHex(PChar(u), PChar(Result), Len);
end;
delphi2010:
//函数:
function ToUTF8Encode(str: string): string;
var
b: Byte;
begin
for b in BytesOf(UTF8Encode(str)) do
Result := Format('%s%%%.2x', [Result, b]);
end;
//测试:
var
str: string;
begin
str := '万一';
str := ToUTF8Encode(str);
ShowMessage(str); //%E4%B8%87%E4%B8%80
end;
//反向函数
function ToUTF8Decode(const str: string): string;
var
List: TStrings;
tmpStr: AnsiString;
i: Integer;
begin
List := TStringList.Create;
ExtractStrings(['%'], ['%'], PChar(str), List);
SetLength(tmpStr, List.Count);
for i := 0 to List.Count - 1 do
Byte(tmpStr[i+1]) := StrToInt('$' + List[i]); List.Free; Result := UTF8Decode(tmpStr); end;
{ 调用测试 }
procedure TForm1.FormCreate(Sender: TObject);
var
s1: AnsiString;
s2: WideString;
begin
s1 := '%E4%B8%87%E4%B8%80';
s2 := ToUTF8Decode(s1);
ShowMessage(s2); { 万一 }
end;
那就转化为UTF-8了。
2010-08-19 14:12 将字符串转换成 UTF8 编码的函数 delphi7:
function ToUTF8Encode(str: string): string;
var
u:UTF8String;
Len:Integer;
begin
u:=AnsiToUTF8(str);
Len := Length(u);
SetLength(Result, Len shl 1);
BinToHex(PChar(u), PChar(Result), Len);
end;
delphi2010:
//函数:
function ToUTF8Encode(str: string): string;
var
b: Byte;
begin
for b in BytesOf(UTF8Encode(str)) do
Result := Format('%s%%%.2x', [Result, b]);
end;
//测试:
var
str: string;
begin
str := '万一';
str := ToUTF8Encode(str);
ShowMessage(str); //%E4%B8%87%E4%B8%80
end;
//反向函数
function ToUTF8Decode(const str: string): string;
var
List: TStrings;
tmpStr: AnsiString;
i: Integer;
begin
List := TStringList.Create;
ExtractStrings(['%'], ['%'], PChar(str), List);
SetLength(tmpStr, List.Count);
for i := 0 to List.Count - 1 do
Byte(tmpStr[i+1]) := StrToInt('$' + List[i]); List.Free; Result := UTF8Decode(tmpStr); end;
{ 调用测试 }
procedure TForm1.FormCreate(Sender: TObject);
var
s1: AnsiString;
s2: WideString;
begin
s1 := '%E4%B8%87%E4%B8%80';
s2 := ToUTF8Decode(s1);
ShowMessage(s2); { 万一 }
end;
#5
上面这个不完全符合要求,需要做个类似java的URLEnCode.EnCode函数功能:
java.net.URLEncoder
Utility class for HTML form encoding.
This class contains static methods for converting a String
to the application/x-www-form-urlencoded MIME format.
For more information about HTML form encoding, consult the HTML specification.
When encoding a String, the following rules apply:
The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
The special characters ".", "-", "*", and "_" remain the same.
The space character " " is converted into a plus sign "+".
All other characters are unsafe and
are first converted into one or more bytes using some encoding scheme.
Then each byte is represented by the 3-character string "%xy",
where xy is the two-digit hexadecimal representation of the byte.
The recommended encoding scheme to use is UTF-8. However,
for compatibility reasons, if an encoding is not specified,
then the default encoding of the platform is used.
For example using UTF-8 as the encoding scheme the string "The string ü@foo-bar"
would get converted to "The+string+%C3%BC%40foo-bar"
because in UTF-8 the character ü is encoded as two bytes C3 (hex) and BC (hex),
and the character @ is encoded as one byte 40 (hex).
Since:
JDK1.0
java.net.URLEncoder
Utility class for HTML form encoding.
This class contains static methods for converting a String
to the application/x-www-form-urlencoded MIME format.
For more information about HTML form encoding, consult the HTML specification.
When encoding a String, the following rules apply:
The alphanumeric characters "a" through "z", "A" through "Z" and "0" through "9" remain the same.
The special characters ".", "-", "*", and "_" remain the same.
The space character " " is converted into a plus sign "+".
All other characters are unsafe and
are first converted into one or more bytes using some encoding scheme.
Then each byte is represented by the 3-character string "%xy",
where xy is the two-digit hexadecimal representation of the byte.
The recommended encoding scheme to use is UTF-8. However,
for compatibility reasons, if an encoding is not specified,
then the default encoding of the platform is used.
For example using UTF-8 as the encoding scheme the string "The string ü@foo-bar"
would get converted to "The+string+%C3%BC%40foo-bar"
because in UTF-8 the character ü is encoded as two bytes C3 (hex) and BC (hex),
and the character @ is encoded as one byte 40 (hex).
Since:
JDK1.0
#6
我在100年前的程序里找出来的。嘿嘿
function EnCode(Code: string): string;
var
I: Integer;
Hex: string;
begin
for I := 1 to Length(Code) do
case Code[i] of
' ': Result := Result + '+';
'A'..'Z', 'a'..'z', '*', '@', '.', '_', '-',
'0'..'9', '$', '!', '''', '(', ')':
Result := Result + Code[i];
else
begin
Hex := IntToHex(ord(Code[i]), 2);
if Length(Hex) = 2 then
Result := Result + '%' + Hex
else
Result := Result + '%0' + hex;
end;
end;
end;
#7
URL := 'http://so.360.cn/s?src=hao_hot&q=' + EnCode('武汉公交车自燃');
//http://so.360.cn/s?src=hao_hot&q=%E6%AD%A6%E6%B1%89%E5%85%AC%E4%BA%A4%E8%BD%A6%E8%87%AA%E7%87%83
#8
//得到gbk方式的字符串编码值(淘宝简化此步骤,拍拍有此的utf-8和gbk处理)
{ HTTPEncode 是替换掉URL中request等中的'/\'等程序内的系统分隔符号 ,
替换成通用的特殊符号,防止分隔符和请求值中的分隔符混杂,出现解析错误。
URLEnCode,是把字符串的,数字和字母保持不变输出,其他非数字和字母,
按照不同编码,如GBK、UTF-8,得到不同的编码值。}
function URLEncode_gbk(S: string): string;
var i: Integer; InQueryString :Boolean;
begin
Result :='';
InQueryString :=true;
for i :=1 to Length(S) do begin
case S[i] of
'A'..'Z', 'a'..'z', '0'..'9', '-', '_', '.': //1 用户常用的字母和数字,以及'-_.'不作为系统分隔符,可以不做转换。
Result := Result + S[i];
' ': //2 空格符号,转化为+或%20,函数最后,都加替换掉’+‘的处理代码
if InQueryString then
Result :=Result +'+'
else Result :=Result +'%20';
else //3 其他特殊符号或中文,都要转化为3位的Utf-8编码值
Result := Result + '%' + SysUtils.IntToHex(Ord(S[i]), 2);
end;
end;
Result :=StringReplace(Result, '+', '%20', [rfReplaceAll]);
Result :=StringReplace(Result, '*', '%2A', [rfReplaceAll]);
end;
{ HTTPEncode 是替换掉URL中request等中的'/\'等程序内的系统分隔符号 ,
替换成通用的特殊符号,防止分隔符和请求值中的分隔符混杂,出现解析错误。
URLEnCode,是把字符串的,数字和字母保持不变输出,其他非数字和字母,
按照不同编码,如GBK、UTF-8,得到不同的编码值。}
function URLEncode_gbk(S: string): string;
var i: Integer; InQueryString :Boolean;
begin
Result :='';
InQueryString :=true;
for i :=1 to Length(S) do begin
case S[i] of
'A'..'Z', 'a'..'z', '0'..'9', '-', '_', '.': //1 用户常用的字母和数字,以及'-_.'不作为系统分隔符,可以不做转换。
Result := Result + S[i];
' ': //2 空格符号,转化为+或%20,函数最后,都加替换掉’+‘的处理代码
if InQueryString then
Result :=Result +'+'
else Result :=Result +'%20';
else //3 其他特殊符号或中文,都要转化为3位的Utf-8编码值
Result := Result + '%' + SysUtils.IntToHex(Ord(S[i]), 2);
end;
end;
Result :=StringReplace(Result, '+', '%20', [rfReplaceAll]);
Result :=StringReplace(Result, '*', '%2A', [rfReplaceAll]);
end;
#9
上面的函数URLEncode_gbk,是转化为GBK的,转化为UTF-8,
就是遇到中文的是变成3个字节,怎么改写。
就是遇到中文的是变成3个字节,怎么改写。