字符串:多字节与宽字节转换

时间:2022-03-23 20:17:40

C 标准

#include <stdlib.h>
mbstowcs 多字节字符串转换为宽字符串
wcstombs 宽字符串转换为多字节字符串

使用注意:
对于 mbstowcs 函数来说,它需要知道多字节的编码类型才能正确的将其转换成宽字节的 unicode,很可惜这个编码类型并没有体现在函数的参数列表里,而是隐含依赖全局的 locale 。更加不幸的是,全局 locale 默认没有使用系统当前语言,而是设置为没什么用处的 “C” locale 。

在调用 mbstowcs 之前,先用 setlocale 将全局默认 locale 设为当前系统默认 locale
如果是在非中文系统上转 GBK 编码,就需要指定中文 locale :

#include <locale.h>
setlocale(LC_ALL, "chs");

wcstombs例子:

#include <locale.h>
#include <stdio.h>
#include <stdlib.h>

#define BUFFER_SIZE 100

int main( void )
{
    size_t  count;
    char    *pMBBuffer = (char *)malloc(BUFFER_SIZE);
    wchar_t *pWCBuffer = L"Hello, world.你好,世界。";

    setlocale(LC_ALL, "chs");
    count = wcstombs(NULL, pWCBuffer, 0); // 只获取多字节字符串的长度 
    count = wcstombs(pMBBuffer, pWCBuffer, BUFFER_SIZE); // 转换并返回多字节字符串的长度
    printf(" Characters converted: %u\n", count);
    printf(" Multibyte character: %s\n\n", pMBBuffer);
    free(pMBBuffer);
}

输出:

 Characters converted: 25
 Multibyte character: Hello, world.你好,世界。 

mbstowcs例子

#include <locale.h>
#include <stdio.h>
#include <stdlib.h>

#define BUFFER_SIZE 100

int main(void)
{
    size_t  count;
    char    *pMBBuffer = "Hello, world.你好,世界。";
    wchar_t *pWCBuffer = (wchar_t *)malloc(BUFFER_SIZE);

    setlocale(LC_ALL, "chs");
    count = mbstowcs(NULL, pMBBuffer, 0);
    count = mbstowcs(pWCBuffer, pMBBuffer, strlen(pMBBuffer));
    printf(" Characters converted: %u\n", count);
    wprintf(L" Widebyte character: %s\n\n", pWCBuffer);
    free(pWCBuffer);
}

输出:

 Characters converted: 19
 Widebyte character: Hello, world.你好,世界。 

Windows API

#include <Windows.h>
MultiByteToWideChar 多字节转宽字节,并且支持字符编码相互转换(gbk–utf8)
WideCharToMultiByte 宽字节转多字节,并且支持字符编码相互转换(gbk–utf8)

WideCharToMultiByte例子:

#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>

#define BUFFER_SIZE 100

int main( void )
{
    size_t  count;
    char    *pMBBuffer = (char *)malloc(BUFFER_SIZE);
    wchar_t *pWCBuffer = L"Hello, world.你好,世界。";

    count = WideCharToMultiByte(0, 0, pWCBuffer, wcslen(pWCBuffer)+1, pMBBuffer, BUFFER_SIZE, NULL, NULL); 

    printf(" Characters converted: %u\n",count );
    printf(" Multibyte character: %s\n\n",pMBBuffer );
    free(pMBBuffer);
}

结果:

 Characters converted: 26
 Multibyte character: Hello, world.你好,世界。 

MultiByteToWideChar例子:

#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>

#define BUFFER_SIZE 100

int main(void)
{
    size_t  count;
    char    *pMBBuffer = "Hello, world.你好,世界。";
    wchar_t *pWCBuffer = (wchar_t *)malloc(BUFFER_SIZE);

    count = MultiByteToWideChar(0, 0, pMBBuffer, strlen(pMBBuffer)+1, pWCBuffer, BUFFER_SIZE);

    setlocale(LC_ALL, "chs"); // 设置wprintf正常显示中文
    printf(" Characters converted: %u\n", count);
    wprintf(L" Widebyte character: %s\n\n", pWCBuffer);
    free(pWCBuffer);
}

结果:

 Characters converted: 20
 Widebyte character: Hello, world.你好,世界。