网上典型的代码是这一段:
char* ConvertEnc( char *encFrom, char *encTo, const char * in)这段代码里面有3个问题:
{
static char bufin[1024], bufout[1024], *sin, *sout;
int lenin, lenout, ret;
iconv_t c_pt;
if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
{
printf("iconv_open false: %s ==> %s", encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);
lenin = strlen(in) + 1;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, (const char**)&sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
if (ret == -1)
{
return NULL;
}
iconv_close(c_pt);
return bufout;
}
1.没有重复使用初始化后的iconv_t。
2.lenin = strlen(in) + 1这行代码在某些情况下有问题。
3.转换缓冲区是一个固定值。
我根据iconv官网上的文档重写了相关代码,官网地址如下:
http://www.gnu.org/savannah-checkouts/gnu/libiconv
代码如下
#ifndef _ICONV_PAIR_HXX_
#define _ICONV_PAIR_HXX_
#include <string>
#include <iconv/include/iconv.h>
class IconvPair
{
enum
{
INIT_BUFFER = 4096
};
public:
IconvPair(const std::string &toCode, const std::string &fromCode);
~IconvPair();
private:
IconvPair(const IconvPair&);
IconvPair& operator=(const IconvPair&);
public:
friend bool operator<(const IconvPair &lhs, const IconvPair &rhs)
{
if (lhs.mToCode < rhs.mToCode)
{
return true;
}
else if (lhs.mToCode > rhs.mToCode)
{
return false;
}
return lhs.mFromCode <=rhs.mFromCode ? true : false;
}
const char* buffer() const {return mBuffer;}
size_t bufferLen() const {return mBufferLen;}
size_t contentLen() const {return mContentLen;}
size_t convert(const char **inBuffer, size_t *inBytesLeft);
private:
void incBuffer();
private:
std::string mToCode;
std::string mFromCode;
iconv_t mIconv;
char *mBuffer;
size_t mBufferLen;
size_t mContentLen;
};
#endif
#include "statistics/IconvPair.hxx"#include <cstdlib>#include <cerrno>#include <cassert>#include <climits>#include <exception>IconvPair::IconvPair(const std::string &toCode, const std::string &fromCode) : mToCode(toCode) , mFromCode(fromCode) , mIconv(reinterpret_cast<iconv_t>(-1)) , mBuffer(NULL) , mBufferLen(0) , mContentLen(0){ if (mToCode.empty() || mFromCode.empty()) { throw std::exception(); } mBufferLen = INIT_BUFFER; mBuffer = reinterpret_cast<char*>(malloc(mBufferLen)); if (NULL == mBuffer) { throw std::bad_alloc(); } mIconv = iconv_open(toCode.c_str(), fromCode.c_str()); if (reinterpret_cast<iconv_t>(-1) == mIconv) { throw std::exception(); }}IconvPair::~IconvPair(){ if (reinterpret_cast<iconv_t>(-1) != mIconv) { iconv_close(mIconv); } free(mBuffer);}size_t IconvPair::convert(const char **inBuffer, size_t *inBytesLeft){ assert((NULL != mBuffer) && (reinterpret_cast<iconv_t>(-1) != mIconv)); assert((NULL != inBuffer) && (NULL != *inBuffer) && (NULL != inBytesLeft)); iconv(mIconv, NULL, NULL, NULL, NULL); char *outBuffer = mBuffer; size_t outBytesLeft = mBufferLen; size_t ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer, &outBytesLeft); while ((UINT_MAX == ret) && (0 == outBytesLeft)/*(E2BIG == errno)*/) { size_t oldBufferLen = mBufferLen; incBuffer(); outBuffer = mBuffer + oldBufferLen; outBytesLeft = mBufferLen - oldBufferLen; ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer, &outBytesLeft); } mContentLen = mBufferLen - outBytesLeft; if (0 == outBytesLeft) { incBuffer(); } mBuffer[mContentLen] = 0; return ret;}void IconvPair::incBuffer(){ mBufferLen *= 2; mBuffer = reinterpret_cast<char*>(realloc(mBuffer, mBufferLen)); if (NULL == mBuffer) { throw std::bad_alloc(); }}
测试代码块如下:
void UnitTest::testIconv()
{
const size_t pageSize = 4096;
IconvPair g2u("utf-8", "gb2312");
char array[pageSize] = {'a'};
const char *inArray = array;
size_t inArrayLen = pageSize;
size_t ret = g2u.convert(&inArray, &inArrayLen);
CPPUNIT_ASSERT((0 == ret) && ((pageSize * 2) == g2u.bufferLen()));
char *buffer = reinterpret_cast<char*>(malloc(1024 * 1024 * sizeof(char)));
CPPUNIT_ASSERT(NULL != buffer);
FILE *f = fopen("league2012b.htm", "r");
CPPUNIT_ASSERT(NULL != f);
size_t fileLen = 0;
while (!feof(f))
{
fileLen += fread(buffer + fileLen, 1, pageSize, f);
}
fclose(f);
const char *bufferIn = buffer;
ret = g2u.convert(&bufferIn, &fileLen);
CPPUNIT_ASSERT(0 == ret);
f = fopen("league2012b.txt", "w");
CPPUNIT_ASSERT(NULL != f);
fwrite(g2u.buffer(), 1, g2u.contentLen(), f);
fclose(f);
free(buffer);
}
由于直接将测试代码块中的league2012b.htm复制到CSDN的代码块中会有问题,所以只好请大家直接用快盘共享出来的链接自己下载了。链接地址如下
http://www.kuaipan.cn/file/id_19631556775674797.htm