笔者在Windows下使用C++编写程序接收UTF8字符串会发生中文无法正常在console上显示的问题,故特来解决UTF8字符串如何在VC++上正常显示。
1、问题重现,UTF-8编码下的字符串“中文”在Windows下的显示如下图,其中上行为UTF8字符串,下行为GBK字符串:
2、方法:将UTF-8格式的代码变为GBK编码(需要Windows.h头文件)
1 string UTF8ToGBK(string &strUtf8) 2 { 3 string strOutGBK = ""; 4 int len = MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, NULL, 0); 5 WCHAR *wszGBK = new WCHAR[len + 1]; 6 memset(wszGBK, 0, len * 2 + 2); 7 MultiByteToWideChar(CP_UTF8, 0, strUtf8.c_str(), -1, wszGBK, len); 8 len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL); 9 char *pszGBK = new char[len + 1]; 10 memset(pszGBK, 0, len + 1); 11 WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, pszGBK, len, NULL, NULL); 12 strOutGBK = pszGBK; 13 delete[]pszGBK; 14 delete[]wszGBK; 15 return strOutGBK; 16 }
调用例子如下:
1 string strGBK = UTF8ToGBK(strUTF8);//strUTF8为UTF8字符串 2 cout << "字符串(UTF-8):"<< strUTF8 << endl; 3 printf( "字符串( GBK ):%s\n", strGBK.c_str());
结果如下:
附上GBK格式的代码变为UTF-8编码函数:
1 string GBKToUTF8(const std::string& strGBK) 2 { 3 string strOutUTF8 = ""; 4 WCHAR * str1; 5 int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0); 6 str1 = new WCHAR[n]; 7 MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n); 8 n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL); 9 char * str2 = new char[n]; 10 WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL); 11 strOutUTF8 = str2; 12 delete[]str1; 13 str1 = NULL; 14 delete[]str2; 15 str2 = NULL; 16 return strOutUTF8; 17 }