首页 > 代码库 > 几个多字节和UNICODE及UTF-8之间相互转化的函数

几个多字节和UNICODE及UTF-8之间相互转化的函数

 做vc++开发免不了多字节UNICODE、UTF-8转来转去,下面贴出我写的几个字符转化函数

 1、 TCHAR  和CHAR转化, TCHAR 对应UNICODE和多字节下分别是WCHAR 和CHAR 函数如下:

  需要注意*dest是new出来的需要释放

//char 字符串转化为tchar字符串void C2T(TCHAR** dest, const char* src){#ifdef _UNICODE    if (src =http://www.mamicode.com/= NULL)    {        return ;    }    size_t alen = strlen(src) + 1;    size_t  ulen = (size_t)MultiByteToWideChar(CP_ACP, 0, src,alen,NULL, 0 )+1;    *dest = new WCHAR[ulen];    ::MultiByteToWideChar(CP_ACP, 0, src, alen, *dest, ulen);#else     //多字节TCHAR就是 char     int len = strlen(src)+1;    *dest = new char[len];    strcpy(*dest, src);#endif}

2、TCHAR转化为多字节,同样要注意*dest指针释放

void T2C(char** dest, const TCHAR* src){    if(src =http://www.mamicode.com/= NULL)        return ;#ifdef _UNICODE    size_t len = WideCharToMultiByte(CP_ACP, 0, src, -1, NULL, 0 , NULL, NULL);    if (len == 0)    {        return;    }    *dest = new char[len];    WideCharToMultiByte( CP_ACP, 0, src, -1, *dest, len, NULL, NULL );#else    int len = _tcslen(src) + 1;    *dest = new TCHAR[len];    strcpy(*dest, src);#endif}

3、下面两个函数跟上面的有点重复但是后面会用到,也贴出来

//多字节转化为宽字节void C2W(WCHAR** dest, const char* src){    if (src =http://www.mamicode.com/= NULL)    {        return ;    }    size_t alen = strlen(src) + 1;    size_t  ulen = (size_t)MultiByteToWideChar(CP_ACP, 0, src,alen,NULL, 0 )+1;    *dest = new WCHAR[ulen];    ::MultiByteToWideChar(CP_ACP, 0, src, alen, *dest, ulen);}//宽字节转化为多字节void W2C(char** dest, const WCHAR *src){    if(src =http://www.mamicode.com/= NULL)        return ;    size_t len = WideCharToMultiByte(CP_ACP, 0, src, -1, NULL, 0 , NULL, NULL);    if (len == 0)    {        return;    }    *dest = new char[len];    WideCharToMultiByte( CP_ACP, 0, src, -1, *dest, len, NULL, NULL );}

4、UTF-8和多字节及宽字节之间的转化

//UNICODE可以直接转化为UTF-8 void UnicodeToUtf8(char** dest , const WCHAR* src) {      ASSERT(dest!= NULL || src != NULL);      int len = -1;      len = WideCharToMultiByte(CP_UTF8, 0, src, -1, 0, 0, 0, 0)+1;      *dest = new char[len+1];      ::WideCharToMultiByte(CP_UTF8, 0, src, -1,*dest, len, 0, 0); }//多字节要先转化为宽字节在转化为UTF-8void AnsiToUtf8(char** dest, const char* src) {    ASSERT(dest!= NULL || src != NULL);    WCHAR* pwszStr = NULL;    C2W(&pwszStr, src);    UnicodeToUtf8(dest, pwszStr);    SAFE_ARRYDELETE(pwszStr); }

UTF-8转化为多字节或者UNICODE:

 void Utf8ToAnsi(char** dest, const char* src) {     ASSERT(dest!= NULL || src != NULL);     WCHAR* str = NULL;     Utf8ToUnicode(&str, src);     W2C(dest, str);     SAFE_ARRYDELETE(str); }void Utf8ToUnicode(WCHAR** dest,const char* src){    ASSERT(dest!= NULL || src != NULL);    int unicodeLen = ::MultiByteToWideChar( CP_UTF8, 0, src, -1, NULL, 0 ) + 1;          *dest = new WCHAR[unicodeLen];    //memset(*dest, 0x0, (unicodeLen + 1)*sizeof(WCHAR));    MultiByteToWideChar(CP_UTF8, 0, src, -1, *dest, unicodeLen);}

 

SAFE_ARRYDELETE是一个内存是释放宏如下

#define SAFE_ARRYDELETE(x) if(NULL!=x){delete[] x;x = NULL;}#define SAFE_DELETE(x) if(NULL!=x){delete x; x =NULL;}

 

有了以上函数基本上不同编码间可以随意转,大家也还可以对以上这些函数尽心扩展,需要注意的是上面这些函数中dest指向的指针需要释放。



 

几个多字节和UNICODE及UTF-8之间相互转化的函数