vc中文编码互转GB2312/UTF-8/Unicode

时间:2022-03-16 20:19:42
#pragma once

#include <windows.h>

#include <string>
  
 

namespace Util{ //start namespace

int SEU_Rand(int ran); //自定义的随机数发生器

/************************************************************************/
/*
	 三种编码格式互转

     UTF-8 <===> GB2312
	∧      ∧
	|	|
  	|	|
	∨	∨
         Unicode

UTF-8和GB2312可以用strlen来计算长度


Unicode要wsclen(p)*2=字符串长度. 因为unicode一个ansi字符也占2字节. 一个汉字也占2字节.


*/
/************************************************************************/
//UTF-8到GB2312的转换,返回的指针要delete
char* UTF8_To_GB2312(const char* utf8);
//string版
std::string UTF8_To_GB2312( const std::string &utf8 );


//GB2312到UTF-8的转换,返回的指针要delete
char* GB2312_To_UTF8(const char* gb2312);
//string版
std::string GB2312_To_UTF8( const std::string &gb2312 );


char* UTF8_To_Unicode( IN const char* utf8, OUT int &   len);//参数2是为了方便知道unicode的字符串长度.也可以用wsclen来计算该函数返回的unicode char*指针.来统计unicode的字符数.x2就是字符串长度
//string版
std::wstring UTF8_To_Unicode( const std::string &utf8 );


char* Unicode_To_UTF8(const char* unicode);
//string版
std::string Unicode_To_UTF8(const std::wstring &unicode );


//等同于MByteToWChar. 内部调用了MByteToWChar
char* GB2312_To_Unicode(const char* gb2312,int & len);
//string版
std::wstring GB2312_To_Unicode( const std::string &gb2312 );


//等同与WCharToMByte  内部调用了WCharToMByte
char* Unicode_To_GB2312(const char* unicode);
//string版
std::string Unicode_To_GB2312(const std::wstring &unicode );


/*
 宽字符转多字节 
   注意返回的字符串不用后要delete
 */
CHAR* WCharToMByte(WCHAR* lpcwszStr); //其实就是unicode 转ansi/gb2312



/*
  多字节转宽字符
	 注意返回的字符串不用后要delete
 */
WCHAR* MByteToWChar(CHAR* lpcstr);

 


static inline std::string convWith( const std::wstring &src );

static inline std::wstring convWith( const std::string &src );


}; //end namespace

#include "stdafx.h"
#include "util.h"



 

namespace Util{ //start namespace

/************************************************************************/
/*	
	936  (GB2312)
	   Chinese (PRC, Singapore)

	950  (GBK)
	  Chinese (*; * SAR, PRC) 
	    

	65001
	      UTF-8

                                                                     */
/************************************************************************/


#define CODE_PAGE_GB2312 936


int SEU_Rand(int ran)//自定义的随机数发生器
{
	unsigned long Time=GetTickCount();
	int seed=rand()+3;
	seed=(seed*Time)%ran;
	return seed;
}
 
//__开头的都是内部函数..
BOOL __WChar2MByte(LPCWSTR lpcwszStr, LPSTR lpszStr, DWORD dwSize)
{
	DWORD dwMinSize;
	dwMinSize = WideCharToMultiByte(CP_OEMCP,NULL,lpcwszStr,-1,NULL,0,NULL,FALSE);
	if(dwSize < dwMinSize)
	{
		return FALSE;
	}
	WideCharToMultiByte(CP_OEMCP,NULL,lpcwszStr,-1,lpszStr,dwSize,NULL,FALSE);
	return TRUE;
}

char* WCharToMByte(WCHAR* lpcwszStr)
{
	//所需的短字符数组空间的个数
	DWORD dwNum = WideCharToMultiByte(CP_OEMCP,0,lpcwszStr,-1,NULL,0,NULL,FALSE)+4;
	//printf("长度:%d\n",dwNum);
	char* temp=new char[dwNum];
	__WChar2MByte(lpcwszStr,temp,dwNum);

	return temp;
}



WCHAR*  MByteToWChar (CHAR* lpcstr)
{
	LPWSTR pszOut = NULL; 
	if (lpcstr != NULL)
	{
		int nInputStrLen = (int)strlen (lpcstr); 
		// Double NULL Termination 
		int nOutputStrLen = MultiByteToWideChar(CODE_PAGE_GB2312, 0, lpcstr, nInputStrLen, NULL, 0) + 2;
		pszOut = new WCHAR [nOutputStrLen]; 
		if(pszOut)
		{
			memset (pszOut, 0x00, sizeof (WCHAR)*nOutputStrLen);
			MultiByteToWideChar (CODE_PAGE_GB2312, 0, lpcstr, nInputStrLen, pszOut, nInputStrLen);
		}
	}
	return pszOut; 
}


//UTF-8到GB2312的转换
char* UTF8_To_GB2312(const char* utf8)
{
	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len+1];
	memset(wstr, 0, len+1);
	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
	len = WideCharToMultiByte(CODE_PAGE_GB2312, 0, wstr, -1, NULL, 0, NULL, NULL);
	char* str = new char[len+1];
	memset(str, 0, len+1);
	WideCharToMultiByte(CODE_PAGE_GB2312, 0, wstr, -1, str, len, NULL, NULL);
	if(wstr) delete[] wstr;
	return str;
}

//GB2312到UTF-8的转换
char* GB2312_To_UTF8(const char* gb2312)
{
	int len = MultiByteToWideChar(CODE_PAGE_GB2312, 0, gb2312, -1, NULL, 0);
	wchar_t* wstr = new wchar_t[len+1];
	memset(wstr, 0, len+1);
	MultiByteToWideChar(CODE_PAGE_GB2312, 0, gb2312, -1, wstr, len);
	len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
	char* str = new char[len+1];
	memset(str, 0, len+1);
	WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
	if(wstr) delete[] wstr;
	return str;
}

std::string GB2312_To_UTF8( const std::string &gb2312 )
{
	char* utf8=GB2312_To_UTF8((const char*)gb2312.c_str());
	std::string str_utf8=(utf8);
	delete utf8;
	return str_utf8;
}

std::string UTF8_To_GB2312( const std::string &utf8 )
{
	char* gb2312=UTF8_To_GB2312((const char*)utf8.c_str());
	std::string str_gb2312=(gb2312);
	delete gb2312;
	return str_gb2312;
}

char* GB2312_To_Unicode(const char* gb2312,int & len)
{  
	//gb2312转unicode
	WCHAR* w=MByteToWChar((char*)gb2312);
 
	len=wcslen(w)*2;

	return (char*)w;
}	 
 
std::wstring GB2312_To_Unicode( const std::string &gb2312 )
{
	int len;
	return std::wstring( (wchar_t*) GB2312_To_Unicode((const char*) gb2312.c_str() , len ) );
}

char* UTF8_To_Unicode(const char* utf8,int & len)
{
	//先utf8 转 gb2312
	char* gb2312=UTF8_To_GB2312(utf8);
	
	//gb2312转unicode
	WCHAR* w=MByteToWChar(gb2312);
	
	delete gb2312;

	len=wcslen(w)*2;

	return (char*)w;
}	

std::wstring UTF8_To_Unicode( const std::string &utf8 )
{
	int len;
	return std::wstring( (wchar_t*) UTF8_To_Unicode((const char*) utf8.c_str(),len));
}

char* Unicode_To_UTF8(const char* unicode )
{
	//先unicode 转 gb2312
	char* gb2312=WCharToMByte((WCHAR*)unicode);

	//gb2312 转utf8
	char* utf8=GB2312_To_UTF8(gb2312);
	
	delete gb2312;

	return utf8;
}

std::string Unicode_To_UTF8( const  std::wstring &unicode )
{
	return std::string( Unicode_To_UTF8((const char*)unicode.c_str()));
}

char* Unicode_To_GB2312(const char* unicode)
{
	char*gb2312=WCharToMByte((WCHAR*)unicode);

	return gb2312;
}

std::string Unicode_To_GB2312(const std::wstring &unicode )
{
	return std::string( Unicode_To_GB2312((const char*)unicode.c_str()) );
}


/*

static inline std::string convWith( const std::wstring &src )
{
	u32 destSize = 2 * src.size() + 1;
	c8 *destData = new c8[destSize];

	wcstombs_s( 0, destData, destSize, src.c_str(), destSize );

	std::string temp( destData );
	delete [] destData;

	return temp;
}

static inline std::wstring convWith( const std::string &src )
{
	u32 destSize = src.size() * 2;
	wchar_t *destData = new wchar_t[destSize];

	mbstowcs_s( 0, destData, destSize, src.c_str(), destSize );

	std::wstring temp( destData );
	delete [] destData;

	return temp;
}
*/

}; //end namespace