跨平台字符编码转换GBK、UTF8

时间:2022-05-14 03:56:17
  1 #if (defined _WIN32 || defined _WIN64)
  2 #   include <windows.h>  
  3 #   include <stdio.h>  
  4 #   include <ctype.h> 
  5 #elif defined(__linux__)
  6 #   include <iconv.h>  
  7 #   include <wctype.h>  
  8 #   include <wchar.h>  
  9 #   include <errno.h> 
 10 #endif
 11 
 12 using namespace std;
 13 
 14 //代码页
 15 #define CP_GBK 936
 16 #define CP_UTF8 65001
 17 
 18 std::wstring  s2ws(const std::string str, int code_page);
 19 std::string   ws2s(const std::wstring wstr, int code_page);
 20 
 21 //默认的输出字符串字节长度
 22 //经测试发现OUT_LEN = 10 每次可转3个汉字
 23 const int OUT_LEN = 200;
 24 
 25 /**    @fn    wstring  s2ws(const string str, int code_page)
 26  *    @brief    从多字节字符串转为宽字符串
 27  *    @param str 源字符串
 28  *    @param code_page 要使用的代码页
 29  *    @return    成功返回宽字符串,失败返回空字符串
 30  */
 31 wstring  s2ws(const string str, int code_page)
 32 {
 33     wstring wstr_dest;
 34     if (str.size() == 0)
 35     {
 36         return wstr_dest;
 37     }
 38     wchar_t* wcs = NULL;
 39 #ifdef _MSC_VER
 40     //要转换的多字节字符串
 41     int size = MultiByteToWideChar(code_page, 0, str.c_str(), -1, NULL, 0);
 42     wcs = new(std::nothrow)wchar_t[size];
 43     if (wcs == NULL)
 44     {
 45         return wstr_dest;
 46     }
 47     if (MultiByteToWideChar(code_page, 0, str.c_str(), -1, wcs, size) == 0)
 48     {
 49         wstr_dest.clear();
 50     }
 51     else
 52     {
 53         wstr_dest += wcs;
 54     }
 55     delete[] wcs;
 56 
 57 #elif defined __linux
 58     //申请临时缓冲区,用于保存转换后的字符串
 59     wcs = new(std::nothrow)wchar_t[OUT_LEN];
 60     if (wcs == NULL)
 61     {
 62         return wstr_dest;
 63     }
 64     iconv_t handle = (void*)-1;
 65     switch (code_page)
 66     {
 67     case CP_GBK:
 68         handle = iconv_open("UCS-4", "GBK");
 69         break;
 70     case CP_UTF8:
 71         handle = iconv_open("UCS-4", "UTF-8");
 72         break;
 73     default:
 74         //不支持
 75         break;
 76     }
 77     if (handle == (void*)-1)
 78     {
 79         delete[] wcs;
 80         return wstr_dest;
 81     }
 82 
 83     size_t nsrc = str.size()*sizeof(char);
 84     char* src = (char*)str.c_str();
 85     wchar_t* tmp = wcs;
 86     size_t ndst = OUT_LEN * sizeof(wchar_t);
 87     //需多次转换,直到转换完毕
 88     while (nsrc>0)
 89     {
 90         memset(wcs, 0, OUT_LEN*sizeof(wchar_t));
 91         tmp = wcs;
 92         ndst = OUT_LEN * sizeof(wchar_t);
 93         if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
 94         {
 95             wstr_dest.clear();
 96             break;
 97         }
 98         wstr_dest += wstring(wcs, OUT_LEN - ndst/sizeof(wchar_t)); 
 99     }
100     iconv_close(handle);
101     //释放临时缓冲区
102     delete[] wcs;
103     
104 #endif
105         return wstr_dest;
106 }
107 
108 /**    @fn    string  ws2s(const wstring wstr, int code_page)
109  *    @brief    从宽字符串转为多字节字符串
110  *    @param wstr 源字符串
111  *    @param code_page 要使用的代码页
112  *    @return    成功返回多字节字符串,失败返回空字符串
113  */
114 string  ws2s(const wstring wstr, int code_page)
115 {
116     string str_dest;
117     if (wstr.size() == 0)
118     {
119         return str_dest;
120     }
121     char *mbs = NULL;
122 #ifdef _MSC_VER
123     int size = WideCharToMultiByte(code_page, 0, wstr.c_str(), -1, NULL, 0, NULL, NULL);
124     mbs = new(std::nothrow) char[size];
125     if (NULL == mbs)
126     {
127         return str_dest;
128     }
129     if (0 == WideCharToMultiByte(code_page, 0, wstr.c_str(), -1, mbs, size, NULL, NULL))
130     {   
131         str_dest.clear();
132     }
133     else
134     {
135         str_dest += mbs;
136     }
137     delete[] mbs;
138 #elif defined __linux
139     //申请临时缓冲区,用于保存转换后的字符串
140     mbs = new(std::nothrow)char[OUT_LEN];
141     if (NULL == mbs)
142     {
143         return str_dest;
144     }
145     iconv_t handle = (void*)-1;
146     switch (code_page)
147     {
148     case CP_GBK:
149         handle = iconv_open("GBK", "UCS-4");
150         break;
151     case CP_UTF8:
152         handle = iconv_open("UTF-8", "UCS-4");
153         break;
154     default:
155         //不支持
156         break;
157     }
158     if (handle == (void*)-1)
159     {
160         delete[] mbs;
161         return str_dest;
162     }
163 
164     size_t nsrc = wstr.size() * sizeof(wchar_t);
165     wchar_t* src = (wchar_t*)wstr.c_str();
166     char* tmp = NULL;
167     size_t ndst = OUT_LEN;
168     //需多次转换,直到转换完毕
169     while (nsrc>0)
170     {
171         memset(mbs, 0, OUT_LEN);
172         tmp = mbs;
173         ndst = OUT_LEN;
174         if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
175         {
176             str_dest.clear();
177             break;
178         }
179         str_dest += string(mbs, OUT_LEN - ndst); 
180     }
181     iconv_close(handle);
182     //释放临时缓冲区
183     delete[] mbs;
184 
185 #endif
186     return str_dest;
187 }
188 
189 /**    @fn    string utf82gbk(const string str_utf8)
190  *    @brief    从UTF-8字符串转为GBK字符串
191  *    @param str_utf8 源字符串
192  *    @return    成功返回GBK字符串,失败返回空字符串
193  */
194 string utf82gbk(const string str_utf8)
195 {
196     string str_gbk;
197 #ifdef _MSC_VER
198     wstring wstr = s2ws(str_utf8, CP_UTF8);
199     str_gbk = ws2s(wstr, CP_GBK);
200 #elif defined __linux
201     //申请临时缓冲区,用于保存转换后的字符串
202     char* gbk = new(std::nothrow)char[OUT_LEN];
203     if (NULL == gbk)
204     {
205         return str_gbk;
206     }
207     iconv_t handle = iconv_open("GBK", "UTF-8");
208     if (handle == (void*)-1)
209     {
210         delete[] gbk;
211         return str_gbk;
212     }
213     size_t nsrc = str_utf8.size();
214     char* src = (char*)str_utf8.c_str();
215     char* tmp = NULL;
216     size_t ndst = OUT_LEN;
217     //需多次转换,直到转换完毕
218     while (nsrc > 0)
219     {
220         memset(gbk, 0, OUT_LEN);
221         tmp = gbk;
222         ndst = OUT_LEN;
223         if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
224         {
225             str_gbk.clear();
226             break;
227         }
228         str_gbk += string(gbk, OUT_LEN - ndst); 
229     }
230     iconv_close(handle);
231     //释放临时缓冲区
232     delete[] gbk;
233 #endif
234         return str_gbk;
235 }
236 
237 /**    @fn    string gbk2utf8(const string str_gbk)
238  *    @brief    从GBK字符串转为UTF-8字符串
239  *    @param str_gbk 源字符串指针
240  *    @return    成功返回UTF-8字符串,失败返回空字符串
241  */
242 string gbk2utf8(const string str_gbk)
243 {
244     string str_utf8;
245 #ifdef _MSC_VER
246     wstring wstr = s2ws(str_gbk, CP_GBK);
247     str_utf8 = ws2s(wstr, CP_UTF8);
248 #elif defined __linux
249     //申请临时缓冲区,用于保存转换后的字符串
250     char* utf8 = new(std::nothrow)char[OUT_LEN];
251     if (NULL == utf8)
252     {
253         return str_utf8;
254     }
255     iconv_t handle = iconv_open("UTF-8", "GBK");
256     if (handle == (void*)-1)
257     {
258         delete[] utf8;
259         return str_utf8;
260     }
261     size_t nsrc = str_gbk.size();
262     char* src = (char*)str_gbk.c_str();
263     char* tmp = NULL;
264     size_t ndst = OUT_LEN;
265     //需多次转换,直到转换完毕
266     while (nsrc > 0)
267     {
268         memset(utf8, 0, OUT_LEN);
269         tmp = utf8;
270         ndst = OUT_LEN;
271         if (iconv(handle, (char**)&src, &nsrc, (char**)&tmp, &ndst) ==(size_t)-1 && errno != E2BIG)
272         {
273             str_utf8.clear();
274             break;
275         }
276         str_utf8 += string(utf8, OUT_LEN - ndst); 
277     }
278     iconv_close(handle);
279     //释放临时缓冲区
280     delete[] utf8;
281 #endif
282     return str_utf8;
283 }
284 
285 
286 //wchar_t转成UTF-8  
287 int Wchar2Utf8Convert( const wchar_t* a_szSrc, char* a_szDest, int a_nDestSize )  
288 {  
289 #if (defined _WIN32 || defined _WIN64)
290     return WideCharToMultiByte( CP_UTF8, 0, a_szSrc, -1, a_szDest, a_nDestSize, NULL, NULL );  
291 #elif defined(__linux__)
292     size_t result;  
293     size_t srcSize = (wcslen(a_szSrc)+1)*sizeof(wchar_t);
294     iconv_t env;  
295     env = iconv_open("UTF-8","WCHAR_T");  
296     if (env==(iconv_t)-1)  
297     {  
298         //printf("iconv_open WCHAR_T->UTF8 error%s %d/n",strerror(errno),errno) ;  
299         return 0;  
300     }  
301     size_t buf_count = a_nDestSize;
302     result = iconv(env,(char**)&a_szSrc,(size_t*)&srcSize,(char**)&a_szDest,(size_t*)&buf_count);  
303     if (result==(size_t)-1)  
304     {  
305         //printf("iconv WCHAR_T->UTF8 error %d/n",errno) ;  
306         return 0;  
307     }  
308     iconv_close(env);  
309     return (int)result;  
310 #endif
311 } 
312 
313 //UTF-8转成wchar_t  
314 int Utf82WcharConvert( const char* a_szSrc, wchar_t* a_szDest, int a_nDestSize )  
315 {  
316 #if (defined _WIN32 || defined _WIN64)
317     return MultiByteToWideChar( CP_UTF8, 0, a_szSrc, -1, a_szDest, a_nDestSize );
318 #elif defined(__linux__)
319     size_t result;  
320     iconv_t env;  
321     size_t size = strlen(a_szSrc)+1 ;  
322     env = iconv_open("WCHAR_T","UTF-8");  
323     if (env==(iconv_t)-1)  
324     {  
325         //printf("iconv_open UTF8->WCHAR_T error %d/n",errno) ;  
326         return 0;  
327     }
328     size_t buf_count = a_nDestSize*sizeof(wchar_t);
329     result = iconv(env,(char**)&a_szSrc,(size_t*)&size,(char**)&a_szDest,(size_t*)&buf_count);  
330     if (result==(size_t)-1)  
331     {  
332         //printf("iconv UTF8->WCHAR_T error %d/n",errno) ;  
333         return 0;  
334     }  
335     iconv_close(env);  
336     return (int)result;  
337 
338 #endif
339 }