首页 > 代码库 > linux下c语言利用iconv实现utf-8转unicode

linux下c语言利用iconv实现utf-8转unicode

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>

int main(int argc, char **argv)
{
  /* 目的编码, TRANSLIT:遇到无法转换的字符就找相近字符替换
   *    IGNORE  :遇到无法转换字符跳过*/
  //char *encTo = "UNICODE//TRANSLIT";
  char *encTo = "UNICODE//IGNORE";
  /* 源编码 */
  char *encFrom = "UTF-8";

  /* 获得转换句柄
   *@param encTo 目标编码方式
   *@param encFrom 源编码方式
   *
   * */
  iconv_t cd = iconv_open (encTo, encFrom);
  if (cd == (iconv_t)-1)
  {
   perror ("iconv_open");
  }

  /* 需要转换的字符串 */
  char inbuf[1024] = "abcdef哈哈哈哈行";
  size_t srclen = strlen (inbuf);
  /* 打印需要转换的字符串的长度 */
  printf("srclen=%d\n", srclen);

  /* 存放转换后的字符串 */
  size_t outlen = 1024;
  char outbuf[outlen];
  memset (outbuf, 0, outlen);

  /* 由于iconv()函数会修改指针,所以要保存源指针 */
  char *srcstart = inbuf;
  char *tempoutbuf = outbuf;

  /* 进行转换
   *@param cd iconv_open()产生的句柄
   *@param srcstart 需要转换的字符串
   *@param srclen 存放还有多少字符没有转换
   *@param tempoutbuf 存放转换后的字符串
   *@param outlen 存放转换后,tempoutbuf剩余的空间
   *
   * */
  size_t ret = iconv (cd, &srcstart, &srclen, &tempoutbuf, &outlen);
  if (ret == -1)
  {
   perror ("iconv");
  }
  printf ("inbuf=%s, srclen=%d, outbuf=%s, outlen=%d\n", inbuf, srclen, outbuf, outlen);
  printf ("print outbuf: ");
  int i;
  for (i=0; i<24; i++)
  {
   printf("%2c", outbuf[i]);
  }
  printf("\n");

  /* 关闭句柄 */
  iconv_close (cd);

  return 0;
}

 

 

下面做了一下函数的封装

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>

int char_convert (char *encTo, char *encFrom, char *inbuf, size_t inlen, char *outbuf, size_t outlen)
{
/* 获得转换句柄
*@param encTo 目标编码方式
*@param encFrom 源编码方式
**/
iconv_t cd = iconv_open (encTo, encFrom);
if (cd == (iconv_t)-1)
{
perror ("iconv_open");
}

/* 由于iconv()函数会修改指针,所以要保存源指针 */
char *srcstart = inbuf;
char *tempoutbuf = outbuf;

/*进行转换
*@param cd iconv_open()产生的句柄
*@param srcstart 需要转换的字符串
*@param inlen 存放还有多少字符没有转换
*@param tempoutbuf 存放转换后的字符串
*@param outlen 存放转换后,tempoutbuf剩余的空间
**/
size_t ret = iconv (cd, &srcstart, &inlen, &tempoutbuf, &outlen);
if (ret == -1)
{
perror ("iconv");
}
printf ("inbuf=%s, inlen=%d, outbuf=%s, outlen=%d\n", inbuf, inlen, outbuf, outlen);
/* 关闭句柄 */
iconv_close (cd);

return inlen;
}

#if 0
int main(int argc, char **argv)
{
/* 目的编码, TRANSLIT:遇到无法转换的字符就找相近字符替换
* IGNORE :遇到无法转换字符跳过*/
//char *encTo = "UNICODE//TRANSLIT";
char *encTo = "UNICODE//IGNORE";
/* 源编码 */
char *encFrom = "UTF-8";
/* 需要转换的字符串 */
char inbuf[1024] = "发送打法abcdef哈哈哈哈行";
size_t inlen = strlen (inbuf);
/* 打印需要转换的字符串的长度 */
printf("inlen=%d\n", inlen);

/* 存放转换后的字符串 */
size_t outlen = 1024;
char outbuf[outlen];
memset (outbuf, 0, outlen);

char_convert (encTo, encFrom, inbuf, inlen, outbuf, outlen);
printf ("print outbuf: ");
int i;
for (i=0; i<24; i++)
{
printf("%2c", outbuf[i]);
}
printf("\n");

encFrom = "UNICODE";
/* 源编码 */
encTo = "UTF-8//IGNORE";
char_convert (encTo, encFrom, outbuf, inlen, inbuf, outlen);

printf ("print outbuf2: ");
for (i=0; i<24; i++)
{
printf("%2c", inbuf[i]);
}
printf("\n");

}
#endif