ANSI, UNICODE与UTF8相互转换模板类

时间:2021-10-31 20:19:41

在WIN32开发过程中经常需要在不同的字符集之间进行转换,这可以通过WIN32 API函数WideCharToMultiByte和MultiByteToWideChar来完成,但转换过程涉及空间的分配与回收问题,直接调用API稍显麻烦,故在此将字符集转换功能封装到一个模板类中,以简化转换过程。该类代码如下:

 

 /**************************************************************************
*   Copyright (C) 2009 by Linmei,Jiang                                    *
*   clough@hqu.edu.cn                                                     *
*                                                                         *
*   This program is free software; you can redistribute it and/or modify  *
*   it under the terms of the GNU General Public License as published by  *
*   the Free Software Foundation; either version 2 of the License, or     *
*   (at your option) any later version.                                   *
*                                                                         *
*   This program is distributed in the hope that it will be useful,       *
*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*   GNU General Public License for more details.                          *
*                                                                         *
*   You should have received a copy of the GNU General Public License     *
*   along with this program; if not, write to the                         *
*   Free Software Foundation, Inc.,                                       *
*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
***************************************************************************/

#ifndef _C_TRANS_CODE_
#define _C_TRANS_CODE_

template <typename T = LPCTSTR>
class CTransCode
{
public:
    enum ECode {EN_NULL, EN_ANSI, EN_UNICODE, EN_UTF8};
public:
    CTransCode(T pSrcStr, ECode enSrcCode = EN_NULL)
        : m_pSrcStr(pSrcStr), m_enSrcCode(enSrcCode),
        m_pAnsi(NULL), m_pUtf8(NULL), m_pMulti(NULL)
    {
#ifdef _UNICODE
        if (m_enSrcCode == EN_NULL)
        {
            m_enSrcCode = EN_UNICODE;
        }
#else
        if (m_enSrcCode == EN_NULL)
        {
            m_enSrcCode = EN_ANSI;
        }
#endif
    }
    ~CTransCode(void)
    {
        Release();
    }
public:
    const wchar_t* ToUnicode();
    const char* ToAnsi();
    const char* ToUtf8();
    // You can manually release the buffer or wait for the destructor to do it.
    void Release()
    {
        if (m_pAnsi)
        {
            delete[] m_pAnsi;
            m_pAnsi = NULL;
        }
        if (m_pUtf8)
        {
            delete[] m_pUtf8;
            m_pUtf8 = NULL;
        }
        if (m_pMulti)
        {
            delete[] m_pMulti;
            m_pMulti = NULL;
        }
    }
private:
    T           m_pSrcStr;
    ECode       m_enSrcCode;
    char*       m_pAnsi;
    char*       m_pUtf8;
    wchar_t*    m_pMulti;
};

//////////////////////////////////////////////////////////////////////////
template <typename T>
const wchar_t* CTransCode<T>::ToUnicode()
{
    //ASSERT(m_enSrcCode == EN_ANSI || m_enSrcCode == EN_UTF8);
    if (m_enSrcCode == EN_UNICODE) return (const wchar_t*)m_pSrcStr;
    if (m_pMulti) return m_pMulti;

    UINT uCodePage = m_enSrcCode == EN_UTF8 ? CP_UTF8 : CP_ACP;
    int iLen = MultiByteToWideChar(uCodePage, 0, (LPCSTR)m_pSrcStr, -1, NULL, 0);
    m_pMulti = new wchar_t[iLen + 1];
    memset(m_pMulti, 0, (iLen + 1) * sizeof(wchar_t));
    MultiByteToWideChar(uCodePage, 0, (LPCSTR)m_pSrcStr, -1, m_pMulti, iLen);

    return m_pMulti;
}

template <typename T>
const char* CTransCode<T>::ToAnsi()
{
    //ASSERT(m_enSrcCode == EN_UNICODE || m_enSrcCode == EN_UTF8);
    if (m_enSrcCode == EN_ANSI) return (const char*)m_pSrcStr;
    if (m_pAnsi) return m_pAnsi;

    if (m_enSrcCode == EN_UTF8)
    {
        ToUnicode();
    }
    const wchar_t* pMulti = m_enSrcCode == EN_UTF8 ? m_pMulti : (const wchar_t*)m_pSrcStr;
   
    int iLen = WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)pMulti, -1, NULL, 0, NULL, NULL);
    m_pAnsi = new char[iLen + 1];
    memset(m_pAnsi, 0, (iLen + 1) * sizeof(char));
    WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)pMulti, -1, m_pAnsi, iLen, NULL, NULL);

    return m_pAnsi;
}

template <typename T>
const char* CTransCode<T>::ToUtf8()
{
    //ASSERT(m_enSrcCode == EN_ANSI || m_enSrcCode == EN_UNICODE);
    if (m_enSrcCode == EN_UTF8) return (const char*)m_pSrcStr;
    if (m_pUtf8) return m_pUtf8;

    if (m_enSrcCode == EN_ANSI)
    {
        ToUnicode();
    }
    const wchar_t* pMulti = m_enSrcCode == EN_ANSI ? m_pMulti : (const wchar_t*)m_pSrcStr;

    int iLen = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)pMulti, -1, NULL, 0, NULL, NULL);
    m_pUtf8 = new char[iLen + 1];
    memset(m_pUtf8, 0, (iLen + 1) * sizeof(char));
    WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)pMulti, -1, m_pUtf8, iLen, NULL, NULL);

    return m_pUtf8;
}

#endif

 

 

使用方式如下:

 

 

CTransCode<> tc(_T("字符集转换"));

// 或显示指定源串的类型和字符集,如:

// CTransCode<LPCWSTR> tc(_T("字符集转换"), CTransCode<LPCWSTR>::EN_ANSI);

char *  pAnsi = tc.ToAnsi(); // 返回一个ansi串

wchar_t * pUnicode = tc.ToUnicode(); // 返回一个unicode串

char* pUtf8 = tc.ToUtf8(); // 返回一个utf8串

// 使用完后,可以显式调用 Release() 方法释放资源如下:

tc.Release();

// 也可以不调用 Release() 方法,析构函数中会自动调用。