首页 > 代码库 > linux下c语言利用iconv实现utf-8转unicode
linux下c语言利用iconv实现utf-8转unicode
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
int main(int argc, char **argv)
{
/* 目的编码, TRANSLIT:遇到无法转换的字符就找相近字符替换
* IGNORE :遇到无法转换字符跳过*/
//char *encTo = "UNICODE//TRANSLIT";
char *encTo = "UNICODE//IGNORE";
/* 源编码 */
char *encFrom = "UTF-8";
/* 获得转换句柄
*@param encTo 目标编码方式
*@param encFrom 源编码方式
*
* */
iconv_t cd = iconv_open (encTo, encFrom);
if (cd == (iconv_t)-1)
{
perror ("iconv_open");
}
/* 需要转换的字符串 */
char inbuf[1024] = "abcdef哈哈哈哈行";
size_t srclen = strlen (inbuf);
/* 打印需要转换的字符串的长度 */
printf("srclen=%d\n", srclen);
/* 存放转换后的字符串 */
size_t outlen = 1024;
char outbuf[outlen];
memset (outbuf, 0, outlen);
/* 由于iconv()函数会修改指针,所以要保存源指针 */
char *srcstart = inbuf;
char *tempoutbuf = outbuf;
/* 进行转换
*@param cd iconv_open()产生的句柄
*@param srcstart 需要转换的字符串
*@param srclen 存放还有多少字符没有转换
*@param tempoutbuf 存放转换后的字符串
*@param outlen 存放转换后,tempoutbuf剩余的空间
*
* */
size_t ret = iconv (cd, &srcstart, &srclen, &tempoutbuf, &outlen);
if (ret == -1)
{
perror ("iconv");
}
printf ("inbuf=%s, srclen=%d, outbuf=%s, outlen=%d\n", inbuf, srclen, outbuf, outlen);
printf ("print outbuf: ");
int i;
for (i=0; i<24; i++)
{
printf("%2c", outbuf[i]);
}
printf("\n");
/* 关闭句柄 */
iconv_close (cd);
return 0;
}
下面做了一下函数的封装
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
int char_convert (char *encTo, char *encFrom, char *inbuf, size_t inlen, char *outbuf, size_t outlen)
{
/* 获得转换句柄
*@param encTo 目标编码方式
*@param encFrom 源编码方式
**/
iconv_t cd = iconv_open (encTo, encFrom);
if (cd == (iconv_t)-1)
{
perror ("iconv_open");
}
/* 由于iconv()函数会修改指针,所以要保存源指针 */
char *srcstart = inbuf;
char *tempoutbuf = outbuf;
/*进行转换
*@param cd iconv_open()产生的句柄
*@param srcstart 需要转换的字符串
*@param inlen 存放还有多少字符没有转换
*@param tempoutbuf 存放转换后的字符串
*@param outlen 存放转换后,tempoutbuf剩余的空间
**/
size_t ret = iconv (cd, &srcstart, &inlen, &tempoutbuf, &outlen);
if (ret == -1)
{
perror ("iconv");
}
printf ("inbuf=%s, inlen=%d, outbuf=%s, outlen=%d\n", inbuf, inlen, outbuf, outlen);
/* 关闭句柄 */
iconv_close (cd);
return inlen;
}
#if 0
int main(int argc, char **argv)
{
/* 目的编码, TRANSLIT:遇到无法转换的字符就找相近字符替换
* IGNORE :遇到无法转换字符跳过*/
//char *encTo = "UNICODE//TRANSLIT";
char *encTo = "UNICODE//IGNORE";
/* 源编码 */
char *encFrom = "UTF-8";
/* 需要转换的字符串 */
char inbuf[1024] = "发送打法abcdef哈哈哈哈行";
size_t inlen = strlen (inbuf);
/* 打印需要转换的字符串的长度 */
printf("inlen=%d\n", inlen);
/* 存放转换后的字符串 */
size_t outlen = 1024;
char outbuf[outlen];
memset (outbuf, 0, outlen);
char_convert (encTo, encFrom, inbuf, inlen, outbuf, outlen);
printf ("print outbuf: ");
int i;
for (i=0; i<24; i++)
{
printf("%2c", outbuf[i]);
}
printf("\n");
encFrom = "UNICODE";
/* 源编码 */
encTo = "UTF-8//IGNORE";
char_convert (encTo, encFrom, outbuf, inlen, inbuf, outlen);
printf ("print outbuf2: ");
for (i=0; i<24; i++)
{
printf("%2c", inbuf[i]);
}
printf("\n");
}
#endif