1 #if (defined _WIN32 || defined _WIN64) 2 # include <windows.h> 3 # include <stdio.h> 4 # include <ctype.h> 5 #elif defined(__linux__) 6 # include <iconv.h> 7 # include <wctype.h> 8 # include <wchar.h> 9 # include <errno.h> 10 #endif 11 12 using namespace std; 13 14 //代码页 15 #define CP_GBK 936 16 #define CP_UTF8 65001 17 18 std::wstring s2ws(const std::string str, int code_page); 19 std::string ws2s(const std::wstring wstr, int code_page); 20 21 //默认的输出字符串字节长度 22 //经测试发现OUT_LEN = 10 每次可转3个汉字 23 const int OUT_LEN = 200; 24 25 /** @fn wstring s2ws(const string str, int code_page) 26 * @brief 从多字节字符串转为宽字符串 27 * @param str 源字符串 28 * @param code_page 要使用的代码页 29 * @return 成功返回宽字符串,失败返回空字符串 30 */ 31 wstring s2ws(const string str, int code_page) 32 { 33 wstring wstr_dest; 34 if (str.size() == 0) 35 { 36 return wstr_dest; 37 } 38 wchar_t* wcs = NULL; 39 #ifdef _MSC_VER 40 //要转换的多字节字符串 41 int size = MultiByteToWideChar(code_page, 0, str.c_str(), -1, NULL, 0); 42 wcs = new(std::nothrow)wchar_t[size]; 43 if (wcs == NULL) 44 { 45 return wstr_dest; 46 } 47 if (MultiByteToWideChar(code_page, 0, str.c_str(), -1, wcs, size) == 0) 48 { 49 wstr_dest.clear(); 50 } 51 else 52 { 53 wstr_dest += wcs; 54 } 55 delete[] wcs; 56 57 #elif defined __linux 58 //申请临时缓冲区,用于保存转换后的字符串 59 wcs = new(std::nothrow)wchar_t[OUT_LEN]; 60 if (wcs == NULL) 61 { 62 return wstr_dest; 63 } 64 iconv_t handle = (void*)-1; 65 switch (code_page) 66 { 67 case CP_GBK: 68 handle = iconv_open("UCS-4", "GBK"); 69 break; 70 case CP_UTF8: 71 handle = iconv_open("UCS-4", "UTF-8"); 72 break; 73 default: 74 //不支持 75 break; 76 } 77 if (handle == (void*)-1) 78 { 79 delete[] wcs; 80 return wstr_dest; 81 } 82 83 size_t nsrc = str.size()*sizeof(char); 84 char* src = (char*)str.c_str(); 85 wchar_t* tmp = wcs; 86 size_t ndst = OUT_LEN * sizeof(wchar_t); 87 //需多次转换,直到转换完毕 88 while (nsrc>0) 89 { 90 memset(wcs, 0, OUT_LEN*sizeof(wchar_t)); 91 tmp = wcs; 92 ndst = OUT_LEN * sizeof(wchar_t); 93 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG) 94 { 95 wstr_dest.clear(); 96 break; 97 } 98 wstr_dest += wstring(wcs, OUT_LEN - ndst/sizeof(wchar_t)); 99 } 100 iconv_close(handle); 101 //释放临时缓冲区 102 delete[] wcs; 103 104 #endif 105 return wstr_dest; 106 } 107 108 /** @fn string ws2s(const wstring wstr, int code_page) 109 * @brief 从宽字符串转为多字节字符串 110 * @param wstr 源字符串 111 * @param code_page 要使用的代码页 112 * @return 成功返回多字节字符串,失败返回空字符串 113 */ 114 string ws2s(const wstring wstr, int code_page) 115 { 116 string str_dest; 117 if (wstr.size() == 0) 118 { 119 return str_dest; 120 } 121 char *mbs = NULL; 122 #ifdef _MSC_VER 123 int size = WideCharToMultiByte(code_page, 0, wstr.c_str(), -1, NULL, 0, NULL, NULL); 124 mbs = new(std::nothrow) char[size]; 125 if (NULL == mbs) 126 { 127 return str_dest; 128 } 129 if (0 == WideCharToMultiByte(code_page, 0, wstr.c_str(), -1, mbs, size, NULL, NULL)) 130 { 131 str_dest.clear(); 132 } 133 else 134 { 135 str_dest += mbs; 136 } 137 delete[] mbs; 138 #elif defined __linux 139 //申请临时缓冲区,用于保存转换后的字符串 140 mbs = new(std::nothrow)char[OUT_LEN]; 141 if (NULL == mbs) 142 { 143 return str_dest; 144 } 145 iconv_t handle = (void*)-1; 146 switch (code_page) 147 { 148 case CP_GBK: 149 handle = iconv_open("GBK", "UCS-4"); 150 break; 151 case CP_UTF8: 152 handle = iconv_open("UTF-8", "UCS-4"); 153 break; 154 default: 155 //不支持 156 break; 157 } 158 if (handle == (void*)-1) 159 { 160 delete[] mbs; 161 return str_dest; 162 } 163 164 size_t nsrc = wstr.size() * sizeof(wchar_t); 165 wchar_t* src = (wchar_t*)wstr.c_str(); 166 char* tmp = NULL; 167 size_t ndst = OUT_LEN; 168 //需多次转换,直到转换完毕 169 while (nsrc>0) 170 { 171 memset(mbs, 0, OUT_LEN); 172 tmp = mbs; 173 ndst = OUT_LEN; 174 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG) 175 { 176 str_dest.clear(); 177 break; 178 } 179 str_dest += string(mbs, OUT_LEN - ndst); 180 } 181 iconv_close(handle); 182 //释放临时缓冲区 183 delete[] mbs; 184 185 #endif 186 return str_dest; 187 } 188 189 /** @fn string utf82gbk(const string str_utf8) 190 * @brief 从UTF-8字符串转为GBK字符串 191 * @param str_utf8 源字符串 192 * @return 成功返回GBK字符串,失败返回空字符串 193 */ 194 string utf82gbk(const string str_utf8) 195 { 196 string str_gbk; 197 #ifdef _MSC_VER 198 wstring wstr = s2ws(str_utf8, CP_UTF8); 199 str_gbk = ws2s(wstr, CP_GBK); 200 #elif defined __linux 201 //申请临时缓冲区,用于保存转换后的字符串 202 char* gbk = new(std::nothrow)char[OUT_LEN]; 203 if (NULL == gbk) 204 { 205 return str_gbk; 206 } 207 iconv_t handle = iconv_open("GBK", "UTF-8"); 208 if (handle == (void*)-1) 209 { 210 delete[] gbk; 211 return str_gbk; 212 } 213 size_t nsrc = str_utf8.size(); 214 char* src = (char*)str_utf8.c_str(); 215 char* tmp = NULL; 216 size_t ndst = OUT_LEN; 217 //需多次转换,直到转换完毕 218 while (nsrc > 0) 219 { 220 memset(gbk, 0, OUT_LEN); 221 tmp = gbk; 222 ndst = OUT_LEN; 223 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG) 224 { 225 str_gbk.clear(); 226 break; 227 } 228 str_gbk += string(gbk, OUT_LEN - ndst); 229 } 230 iconv_close(handle); 231 //释放临时缓冲区 232 delete[] gbk; 233 #endif 234 return str_gbk; 235 } 236 237 /** @fn string gbk2utf8(const string str_gbk) 238 * @brief 从GBK字符串转为UTF-8字符串 239 * @param str_gbk 源字符串指针 240 * @return 成功返回UTF-8字符串,失败返回空字符串 241 */ 242 string gbk2utf8(const string str_gbk) 243 { 244 string str_utf8; 245 #ifdef _MSC_VER 246 wstring wstr = s2ws(str_gbk, CP_GBK); 247 str_utf8 = ws2s(wstr, CP_UTF8); 248 #elif defined __linux 249 //申请临时缓冲区,用于保存转换后的字符串 250 char* utf8 = new(std::nothrow)char[OUT_LEN]; 251 if (NULL == utf8) 252 { 253 return str_utf8; 254 } 255 iconv_t handle = iconv_open("UTF-8", "GBK"); 256 if (handle == (void*)-1) 257 { 258 delete[] utf8; 259 return str_utf8; 260 } 261 size_t nsrc = str_gbk.size(); 262 char* src = (char*)str_gbk.c_str(); 263 char* tmp = NULL; 264 size_t ndst = OUT_LEN; 265 //需多次转换,直到转换完毕 266 while (nsrc > 0) 267 { 268 memset(utf8, 0, OUT_LEN); 269 tmp = utf8; 270 ndst = OUT_LEN; 271 if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG) 272 { 273 str_utf8.clear(); 274 break; 275 } 276 str_utf8 += string(utf8, OUT_LEN - ndst); 277 } 278 iconv_close(handle); 279 //释放临时缓冲区 280 delete[] utf8; 281 #endif 282 return str_utf8; 283 } 284 285 286 //wchar_t转成UTF-8 287 int Wchar2Utf8Convert( const wchar_t* a_szSrc, char* a_szDest, int a_nDestSize ) 288 { 289 #if (defined _WIN32 || defined _WIN64) 290 return WideCharToMultiByte( CP_UTF8, 0, a_szSrc, -1, a_szDest, a_nDestSize, NULL, NULL ); 291 #elif defined(__linux__) 292 size_t result; 293 size_t srcSize = (wcslen(a_szSrc)+1)*sizeof(wchar_t); 294 iconv_t env; 295 env = iconv_open("UTF-8","WCHAR_T"); 296 if (env==(iconv_t)-1) 297 { 298 //printf("iconv_open WCHAR_T->UTF8 error%s %d/n",strerror(errno),errno) ; 299 return 0; 300 } 301 size_t buf_count = a_nDestSize; 302 result = iconv(env,(char**)&a_szSrc,(size_t*)&srcSize,(char**)&a_szDest,(size_t*)&buf_count); 303 if (result==(size_t)-1) 304 { 305 //printf("iconv WCHAR_T->UTF8 error %d/n",errno) ; 306 return 0; 307 } 308 iconv_close(env); 309 return (int)result; 310 #endif 311 } 312 313 //UTF-8转成wchar_t 314 int Utf82WcharConvert( const char* a_szSrc, wchar_t* a_szDest, int a_nDestSize ) 315 { 316 #if (defined _WIN32 || defined _WIN64) 317 return MultiByteToWideChar( CP_UTF8, 0, a_szSrc, -1, a_szDest, a_nDestSize ); 318 #elif defined(__linux__) 319 size_t result; 320 iconv_t env; 321 size_t size = strlen(a_szSrc)+1 ; 322 env = iconv_open("WCHAR_T","UTF-8"); 323 if (env==(iconv_t)-1) 324 { 325 //printf("iconv_open UTF8->WCHAR_T error %d/n",errno) ; 326 return 0; 327 } 328 size_t buf_count = a_nDestSize*sizeof(wchar_t); 329 result = iconv(env,(char**)&a_szSrc,(size_t*)&size,(char**)&a_szDest,(size_t*)&buf_count); 330 if (result==(size_t)-1) 331 { 332 //printf("iconv UTF8->WCHAR_T error %d/n",errno) ; 333 return 0; 334 } 335 iconv_close(env); 336 return (int)result; 337 338 #endif 339 }