RFC3986编码 C 语言实现(支持大部分中文)

　　前些时间做 xauth 认证程序的编写,网上找到RFC3986编码不支持中文的编码,所以便查找了一些资料.自己写了一个,代码如下.

#include <stdlib.h>

#include <string.h>

#include <stdio.h>

#include <android/log.h>

#include "RFC3986Encoder.h"

bool isReverseChar(char c);

char* charToHexString(char c);

bool isChinese(char c);

//#define snprintf _snprintf

/**

 * Escape 'string' according to RFC3986 and

 * http://oauth.net/core/1.0/#encoding_parameters.

 *

 * @param string The data to be encoded

 * @return encoded string otherwise NULL

 * The caller must free the returned string.

 */

static void *xmalloc_fatal(size_t size) {

  if (size==) return NULL;

  fprintf(stderr, "Out of memory.");

  exit();

}

void *xrealloc (void *ptr, size_t size) {

  void *p = realloc (ptr, size);

  if (p == NULL) return xmalloc_fatal(size);

  return p;

}

void *xmalloc (size_t size) {

  void *ptr = malloc (size);

  if (ptr == NULL) return xmalloc_fatal(size);

  return ptr;

}

char *xstrdup (const char *s) {

  void *ptr = xmalloc(strlen(s)+);

  strcpy((char *)ptr, s);

  return (char*)ptr;

}

char *oauth_url_escape(const char *string) {

  size_t alloc, newlen;

  char *ns = NULL, *testing_ptr = NULL;

  unsigned char in;

  size_t strindex=;

  size_t length;

  if (!string) return xstrdup("");

  alloc = strlen(string)+;

  newlen = alloc;

  ns = (char*) xmalloc(alloc);

  length = alloc-;

  while(length--) {

    in = *string;

    switch(in){

    case '': case '': case '': case '': case '':

    case '': case '': case '': case '': case '':

    case 'a': case 'b': case 'c': case 'd': case 'e':

    case 'f': case 'g': case 'h': case 'i': case 'j':

    case 'k': case 'l': case 'm': case 'n': case 'o':

    case 'p': case 'q': case 'r': case 's': case 't':

    case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':

    case 'A': case 'B': case 'C': case 'D': case 'E':

    case 'F': case 'G': case 'H': case 'I': case 'J':

    case 'K': case 'L': case 'M': case 'N': case 'O':

    case 'P': case 'Q': case 'R': case 'S': case 'T':

    case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z':

    case '_': case '~': case '.': case '-':

      ns[strindex++]=in;

      break;

    default:

      newlen += ; /* this'll become a %XX */

      if(newlen > alloc) {

        alloc *= ;

        testing_ptr = (char*) xrealloc(ns, alloc);

        ns = testing_ptr;

      }     

      //转换成16进制.  58-->3A

      //%3A  这是3个.这里好像是自动  加 '/0'  如果生成的的字符串,大于4则不会,自动加/0

      //在linux 下,这个方法,的count 包括  '/0'所以最后的结果也是正确的.

      int result = snprintf(&ns[strindex], , "%%%02X", in);    

      /*

      printf("%d\n",result);

      printf("%d\n",in);

      printf("strlen(buf) = %d\n",strlen(ns));

      printf("%s\n",ns);

      */

      strindex+=;

      break;

    }

    string++;

  }

  ns[strindex]=;

  return ns;

}

#ifndef ISXDIGIT

# define ISXDIGIT(x) (isxdigit((int) ((unsigned char)x)))

#endif

//自己写的代码  3

char* rfc3986Encoder(const char* input)

{

    //__android_log_print(ANDROID_LOG_INFO, "Test_jni","encoder start %s\n", input);

    if (!input) return xstrdup("");

    //char* sb = (char*)malloc(1024);

    char* sb = new char[];

    sb[]= '\0';

    int length =  strlen(input);

    for (int i = ; i<length; i++)

    {

        char c = input[i];

        if(isReverseChar(c)){

            const char* temp;

            if(isChinese(c))

            {

                //这里认为汉字 utf-8为三字节,首位为连续三个1,取unicode的后8位

                //这里相于 utf-8 到 unicode 的转换,只是转换了后8位.

                int high = (input[i+]&0x03)<<;

                int low = input[i+]&0x3f;

                char chinese = (high+low)&0xff;

                temp = charToHexString(chinese);

                i+=;

            }else{

                temp = charToHexString(c);

            }

            strcat(sb,temp);

        }

        else{

            int len = strlen(sb);

            const char*  p =  &c;  //字符看不到结束符,所以会错.

            strcat(sb,p);

            sb[len+] = '\0';

        }

    }

    //__android_log_print(ANDROID_LOG_INFO, "Test_jni","encoder end  %s\n", sb);

    return sb;

}

bool isReverseChar(char c)

{

    return !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '' && c <= '')

                    || c == '-' || c == '_' || c == '.' || c == '~');

}

char* charToHexString(char src){

   int v = src & 0xFF;

   char* hv = new char[];

   snprintf(hv, ,"%%%02X",v);

   return hv ;

}

bool isChinese(char c)

{

    int x = c&0xE0;

    if(x == )

        return true;

    else

        return false;

}

如程序中体现的,oauth_url_escape() 这个方法不支持对中文的编码 ,所以我便另外写了个方法 rfc3986Encoder().如程序中所说,我这里认为UTF8 汉字为 3 byte ,所以已经包含了大部分的汉字.具体的参考资料当时没做笔记,大该就是关于 UTF8 汉字编码的问题.程序思想,也比较好懂.研读java 的RFC3986编码实现,发现其获取utf8汉字的编码实际为unicode 编码的后8位.所以我这里把 utf8 向 unicode 编码的转化,只取了后8位,再转换成 hex 的形式.代码我做了部分测试,目前还没有发现什么问题.