c++11 好像用u8来表示utf-8,然后还有一大套转换函数 u16string,char16_t。std::codecvt_utf8 std::codecvt_utf16 std::codecvt_utf8_utf16 wstring_convert。
然后还有utf-8常亮字符串
std::string str2 = u8"你好啊";
gb2312和utf8相互转换:
#include <locale>
#include <vector>
#include <string>
#include <codecvt>
{
std::vector<wchar_t> buff(strGb2312.size());
#ifdef _MSC_VER
std::locale loc("zh-CN");
#else
std::locale loc("zh_CN.GB18030");
#endif
wchar_t* pwszNext = nullptr;
const char* pszNext = nullptr;
mbstate_t state = {};
int res = std::use_facet<std::codecvt<wchar_t, char, mbstate_t> >
(loc).in(state,
strGb2312.data(), strGb2312.data() + strGb2312.size(), pszNext,
buff.data(), buff.data() + buff.size(), pwszNext);
if (std::codecvt_base::ok == res)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> cutf8;
return cutf8.to_bytes(std::wstring(buff.data(), pwszNext));
}
return "";
}
std::string utf8_to_gb2312(std::string const &strUtf8)
{
std::wstring_convert<std::codecvt_utf8<wchar_t>> cutf8;
std::wstring wTemp = cutf8.from_bytes(strUtf8);
#ifdef _MSC_VER
std::locale loc("zh-CN");
#else
std::locale loc("zh_CN.GB18030");
#endif
const wchar_t* pwszNext = nullptr;
char* pszNext = nullptr;
mbstate_t state = {};
std::vector<char> buff(wTemp.size() * 2);
int res = std::use_facet<std::codecvt<wchar_t, char, mbstate_t> >
(loc).out(state,
wTemp.data(), wTemp.data() + wTemp.size(), pwszNext,
buff.data(), buff.data() + buff.size(), pszNext);
if (std::codecvt_base::ok == res)
{
return std::string(buff.data(), pszNext);
}
return "";
}
注意linux要支持zh_CN.GB18030代码才能正常运行。Ubuntu下使用cat /var/lib/locales/supported.d/local 来查看系统是否支持zh_CN.GB18030。