正确的iconv使用方法

时间:2021-08-19 23:22:01

网上典型的代码是这一段:

char* ConvertEnc( char *encFrom, char *encTo, const char * in)
{
static char bufin[1024], bufout[1024], *sin, *sout;
int lenin, lenout, ret;
iconv_t c_pt;

if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
{
printf("iconv_open false: %s ==> %s", encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);

lenin = strlen(in) + 1;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, (const char**)&sin, (size_t *)&lenin, &sout, (size_t *)&lenout);

if (ret == -1)
{
return NULL;
}
iconv_close(c_pt);

return bufout;
}
这段代码里面有3个问题:
1.没有重复使用初始化后的iconv_t。
2.lenin  = strlen(in) + 1这行代码在某些情况下有问题。
3.转换缓冲区是一个固定值。


我根据iconv官网上的文档重写了相关代码,官网地址如下:

http://www.gnu.org/savannah-checkouts/gnu/libiconv


代码如下

#ifndef _ICONV_PAIR_HXX_
#define _ICONV_PAIR_HXX_

#include <string>
#include <iconv/include/iconv.h>

class IconvPair
{
enum
{
INIT_BUFFER = 4096
};

public:
IconvPair(const std::string &toCode, const std::string &fromCode);
~IconvPair();

private:
IconvPair(const IconvPair&);
IconvPair& operator=(const IconvPair&);

public:
friend bool operator<(const IconvPair &lhs, const IconvPair &rhs)
{
if (lhs.mToCode < rhs.mToCode)
{
return true;
}
else if (lhs.mToCode > rhs.mToCode)
{
return false;
}

return lhs.mFromCode <=rhs.mFromCode ? true : false;
}

const char* buffer() const {return mBuffer;}
size_t bufferLen() const {return mBufferLen;}
size_t contentLen() const {return mContentLen;}

size_t convert(const char **inBuffer, size_t *inBytesLeft);

private:
void incBuffer();

private:
std::string mToCode;
std::string mFromCode;
iconv_t mIconv;
char *mBuffer;
size_t mBufferLen;
size_t mContentLen;
};

#endif

#include "statistics/IconvPair.hxx"#include <cstdlib>#include <cerrno>#include <cassert>#include <climits>#include <exception>IconvPair::IconvPair(const std::string &toCode, const std::string &fromCode)	: mToCode(toCode)	, mFromCode(fromCode)	, mIconv(reinterpret_cast<iconv_t>(-1))	, mBuffer(NULL)	, mBufferLen(0)	, mContentLen(0){	if (mToCode.empty() || mFromCode.empty())	{		throw std::exception();	}	mBufferLen = INIT_BUFFER;	mBuffer = reinterpret_cast<char*>(malloc(mBufferLen));	if (NULL == mBuffer)	{		throw std::bad_alloc();	}	mIconv = iconv_open(toCode.c_str(), fromCode.c_str());	if (reinterpret_cast<iconv_t>(-1) == mIconv)	{		throw std::exception();	}}IconvPair::~IconvPair(){	if (reinterpret_cast<iconv_t>(-1) != mIconv)	{		iconv_close(mIconv);	}	free(mBuffer);}size_t IconvPair::convert(const char **inBuffer, size_t *inBytesLeft){	assert((NULL != mBuffer) && (reinterpret_cast<iconv_t>(-1) != mIconv));	assert((NULL != inBuffer) && (NULL != *inBuffer) && (NULL != inBytesLeft));	iconv(mIconv, NULL, NULL, NULL, NULL);	char *outBuffer = mBuffer;	size_t outBytesLeft = mBufferLen;	size_t ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer,		&outBytesLeft);	while ((UINT_MAX == ret) && (0 == outBytesLeft)/*(E2BIG == errno)*/)	{		size_t oldBufferLen = mBufferLen;		incBuffer();		outBuffer = mBuffer + oldBufferLen;		outBytesLeft = mBufferLen - oldBufferLen;		ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer,			&outBytesLeft);	}	mContentLen = mBufferLen - outBytesLeft;	if (0 == outBytesLeft)	{		incBuffer();	}	mBuffer[mContentLen] = 0;	return ret;}void IconvPair::incBuffer(){	mBufferLen *= 2;	mBuffer = reinterpret_cast<char*>(realloc(mBuffer, mBufferLen));	if (NULL == mBuffer)	{		throw std::bad_alloc();	}}



测试代码块如下:

void UnitTest::testIconv()
{
const size_t pageSize = 4096;
IconvPair g2u("utf-8", "gb2312");

char array[pageSize] = {'a'};
const char *inArray = array;
size_t inArrayLen = pageSize;
size_t ret = g2u.convert(&inArray, &inArrayLen);
CPPUNIT_ASSERT((0 == ret) && ((pageSize * 2) == g2u.bufferLen()));

char *buffer = reinterpret_cast<char*>(malloc(1024 * 1024 * sizeof(char)));
CPPUNIT_ASSERT(NULL != buffer);

FILE *f = fopen("league2012b.htm", "r");
CPPUNIT_ASSERT(NULL != f);
size_t fileLen = 0;
while (!feof(f))
{
fileLen += fread(buffer + fileLen, 1, pageSize, f);
}
fclose(f);

const char *bufferIn = buffer;
ret = g2u.convert(&bufferIn, &fileLen);
CPPUNIT_ASSERT(0 == ret);

f = fopen("league2012b.txt", "w");
CPPUNIT_ASSERT(NULL != f);
fwrite(g2u.buffer(), 1, g2u.contentLen(), f);
fclose(f);

free(buffer);
}

由于直接将测试代码块中的league2012b.htm复制到CSDN的代码块中会有问题,所以只好请大家直接用快盘共享出来的链接自己下载了。链接地址如下

http://www.kuaipan.cn/file/id_19631556775674797.htm