c程序实现unicode字符转utf-8字符

时间:2022-05-27 19:02:25

下面是一个unicode字符转换为utf-8的c程序实现:

/*
* =====================================================================================
*
* Filename: unicodetoutf8.c
*
* Description: 
*
* Version: 1.0
* Created: 08/06/2015 10:53:31 AM
* Revision: none
* Compiler: gcc
*
* Author: YOUR NAME (), 
* Organization: 
*
* =====================================================================================
*/


#include <stdlib.h>
#include <stdio.h>
#include <string.h>

int unicode_to_utf8 (unsigned int codepoint, char *str)
{
  char out[4];
  if (codepoint < 0x80)
  {
    out[0] = (char)codepoint;
    strncpy (str, out, 1);
  }
  else if (codepoint < 0x800)
  {
    out[0] = 0xC0 | (codepoint >> 6);
    out[1] = 0x80 | (codepoint & 0x0000003F);
    strncpy (str, out, 2);
  }
  else if (codepoint < 0x10000)
  {
    out[0] = 0xE0 | (codepoint >> 12);
    out[1] = 0x80 | ((codepoint & 0x00000FFF) >> 6);
    out[2] = 0x80 | (codepoint & 0x0000003F);
    strncpy (str, out, 3);
  }
  else
  {
    out[0] = 0xF0 | (codepoint >> 18);
    out[1] = 0x80 | ((codepoint & 0x0003FFFF) >> 12);
    out[2] = 0x80 | ((codepoint & 0x00000FFF) >> 6);
    out[3] = 0x80 | (codepoint & 0x0000003F);
    strncpy (str, out, 4);
  }

  return 0; 
}

/* 
* === FUNCTION ======================================================================
* Name: main
* Description: 
* =====================================================================================
*/
int main ( int argc, char *argv[] )
{
  //unsigned int codepoint = 0x6c41;
  unsigned int codepoint = 0xFFFF;
  char str[16] = {};
  unicode_to_utf8 (codepoint, str);
  printf ("str:%s\n", str);
  return EXIT_SUCCESS;
}     /* ---------- end of function main ---------- */