GBK和UTF8之间的转换 C语言

来自：http://www.diybl.com/course/3_program/c++/cppsl/2008320/105871.html

GBK和UTF8之间的转换可以使用MultiByteToWideChar和WideCharToMultiByte两个API，方法是先把它们转换为中间编码Unicode，再转换为对应的编码即可。

#include < stdio.h >
#include < windows.h >

// GBK编码转换到UTF8编码
int GBKToUTF8(unsigned char * lpGBKStr,unsigned char * lpUTF8Str, int nUTF8StrLen)
{
    wchar_t * lpUnicodeStr = NULL;
     int nRetLen = 0 ;

     if ( ! lpGBKStr)   // 如果GBK字符串为NULL则出错退出
         return 0 ;

    nRetLen = ::MultiByteToWideChar(CP_ACP, 0 ,( char * )lpGBKStr, - 1 ,NULL,NULL);   // 获取转换到Unicode编码后所需要的字符空间长度
    lpUnicodeStr = new WCHAR[nRetLen + 1 ];   // 为Unicode字符串空间
    nRetLen = ::MultiByteToWideChar(CP_ACP, 0 ,( char * )lpGBKStr, - 1 ,lpUnicodeStr,nRetLen);   // 转换到Unicode编码
     if ( ! nRetLen)   // 转换失败则出错退出
         return 0 ;

    nRetLen = ::WideCharToMultiByte(CP_UTF8, 0 ,lpUnicodeStr, - 1 ,NULL, 0 ,NULL,NULL);   // 获取转换到UTF8编码后所需要的字符空间长度

     if ( ! lpUTF8Str)   // 输出缓冲区为空则返回转换后需要的空间大小
    {
         if (lpUnicodeStr)
            delete []lpUnicodeStr;
         return nRetLen;
    }

     if (nUTF8StrLen < nRetLen)   // 如果输出缓冲区长度不够则退出
    {
         if (lpUnicodeStr)
            delete []lpUnicodeStr;
         return 0 ;
    }

    nRetLen = ::WideCharToMultiByte(CP_UTF8, 0 ,lpUnicodeStr, - 1 ,( char * )lpUTF8Str,nUTF8StrLen,NULL,NULL);   // 转换到UTF8编码

     if (lpUnicodeStr)
        delete []lpUnicodeStr;

     return nRetLen;
}

// UTF8编码转换到GBK编码
int UTF8ToGBK(unsigned char * lpUTF8Str,unsigned char * lpGBKStr, int nGBKStrLen)
{
    wchar_t * lpUnicodeStr = NULL;
     int nRetLen = 0 ;

     if ( ! lpUTF8Str)   // 如果UTF8字符串为NULL则出错退出
         return 0 ;

    nRetLen = ::MultiByteToWideChar(CP_UTF8, 0 ,( char * )lpUTF8Str, - 1 ,NULL,NULL);   // 获取转换到Unicode编码后所需要的字符空间长度
    lpUnicodeStr = new WCHAR[nRetLen + 1 ];   // 为Unicode字符串空间
    nRetLen = ::MultiByteToWideChar(CP_UTF8, 0 ,( char * )lpUTF8Str, - 1 ,lpUnicodeStr,nRetLen);   // 转换到Unicode编码
     if ( ! nRetLen)   // 转换失败则出错退出
         return 0 ;

    nRetLen = ::WideCharToMultiByte(CP_ACP, 0 ,lpUnicodeStr, - 1 ,NULL,NULL,NULL,NULL);   // 获取转换到GBK编码后所需要的字符空间长度

     if ( ! lpGBKStr)   // 输出缓冲区为空则返回转换后需要的空间大小
    {
         if (lpUnicodeStr)
            delete []lpUnicodeStr;
         return nRetLen;
    }

     if (nGBKStrLen < nRetLen)   // 如果输出缓冲区长度不够则退出
    {
         if (lpUnicodeStr)
            delete []lpUnicodeStr;
         return 0 ;
    }

    nRetLen = ::WideCharToMultiByte(CP_ACP, 0 ,lpUnicodeStr, - 1 ,( char * )lpGBKStr,nRetLen,NULL,NULL);   // 转换到GBK编码

     if (lpUnicodeStr)
        delete []lpUnicodeStr;

     return nRetLen;
}

// 使用这两个函数的例子
int main()
{
     char cGBKStr[] = " 我是中国人! " ;
     char * lpGBKStr = NULL;
     char * lpUTF8Str = NULL;
    FILE * fp = NULL;
     int nRetLen = 0 ;

    nRetLen = GBKToUTF8((unsigned char * )cGBKStr,NULL,NULL);
    printf( " 转换后的字符串需要的空间长度为：%d " ,nRetLen);
    lpUTF8Str = new char [nRetLen + 1 ];
    nRetLen = GBKToUTF8((unsigned char * )cGBKStr,(unsigned char * )lpUTF8Str,nRetLen);
     if (nRetLen)
    {
        printf( " GBKToUTF8转换成功！ " );
    }
     else
    {
        printf( " GBKToUTF8转换失败！ " );
         goto Ret0;
    }

    fp = fopen( " C:\GBK转UTF8.txt " , " wb " );   // 保存到文本文件
    fwrite(lpUTF8Str,nRetLen, 1 ,fp);
    fclose(fp);

    getchar();   // 先去打开那个文本文件看看，单击记事本的“文件”-“另存为”菜单，在对话框中看到编码框变为了“UTF-8”说明转换成功了

    nRetLen = UTF8ToGBK((unsigned char * )lpUTF8Str,NULL,NULL);   // 再转回来
    printf( " 转换后的字符串需要的空间长度为：%d " ,nRetLen);
    lpGBKStr = new char [nRetLen + 1 ];
    nRetLen = UTF8ToGBK((unsigned char * )lpUTF8Str,(unsigned char * )lpGBKStr,nRetLen);
     if (nRetLen)
    {
        printf( " UTF8ToGBK转换成功！ " );
    }
     else
    {
        printf( " UTF8ToGBK转换失败！ " );
         goto Ret0;
    }

    fp = fopen( " C:\UTF8转GBK.txt " , " wb " );   // 保存到文本文件
    fwrite(lpGBKStr,nRetLen, 1 ,fp);
    fclose(fp);

    getchar();   // 再去打开文本文件看看，发现编码框又变为了“ANSI”说明转换成功了

Ret0:
     if (lpGBKStr)
        delete []lpGBKStr;

     if (lpUTF8Str)
        delete []lpUTF8Str;

     return 0 ;
}

在网上看到的一些文章说，UTF8转换为GBK的时候会有问题，特别是当UTF8字符串中的汉字数为奇数时。关于这个问题我没有去验证过，而且我对UTF8和GB2312的编码还不是很熟悉，呵呵，等以后有空的时候再去了解一下编码吧。

秒客网

GBK和UTF8之间的转换 C语言

相关文章