ucs2_to_gb2312.c 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /**************************************************************************
  2. * Copyright (C), AirM2M Tech. Co., Ltd.
  3. *
  4. * Name: ucs2_to_gb2312.c
  5. * Author: liweiqiang
  6. * Version: V0.1
  7. * Date: 2013/7/15
  8. *
  9. * Description:
  10. * ucs2 ת»» gb2312
  11. **************************************************************************/
  12. #include "stdio.h"
  13. #include "errno.h"
  14. #include "luat_base.h"
  15. #define u16 uint16_t
  16. #define u8 uint8_t
  17. #include "ucs2_to_gb2312_table.h"
  18. #include "ucs2_to_gb2312_offset.h"
  19. const u8 number_of_bit_1[256] =
  20. {
  21. 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03,
  22. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  23. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  24. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  25. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  26. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  27. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  28. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  29. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  30. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  31. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  32. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  33. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  34. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  35. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  36. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  37. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  38. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  39. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  40. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  41. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  42. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  43. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  44. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  45. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  46. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  47. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  48. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  49. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  50. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  51. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  52. 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08,
  53. };
  54. /* 0x4E00 <= ucs2 < 0xA000 */
  55. static u16 get_ucs2_offset(u16 ucs2)
  56. {
  57. u16 offset, page, tmp;
  58. u8 *mirror_ptr, ch;
  59. page = (ucs2>>8) - 0x4E;
  60. ucs2 &= 0xFF;
  61. tmp = ucs2>>6; /* now 0 <= tmp < 4 */
  62. offset = ucs2_index_table_4E00_9FFF[page][tmp];
  63. mirror_ptr = (u8*)&ucs2_mirror_4E00_9FFF[page][tmp<<3]; /* [0, 8, 16, 24] */
  64. tmp = ucs2&0x3F; /* mod 64 */
  65. while(tmp >= 8)
  66. {
  67. offset += number_of_bit_1[*mirror_ptr];
  68. mirror_ptr++;
  69. tmp -= 8;
  70. }
  71. ch = *mirror_ptr;
  72. if(ch&(0x1<<tmp))
  73. { /* Ok , this ucs2 can be covert to GB2312. */
  74. while(tmp)
  75. {
  76. if(ch&0x1)
  77. offset++;
  78. ch>>=1;
  79. tmp--;
  80. }
  81. return offset;
  82. }
  83. return (u16)(-1);
  84. }
  85. /*+\NEW\liweiqiang\2013.11.26\ÍêÉÆgb2312<->ucs2(ucs2be)±àÂëת»»*/
  86. size_t iconv_ucs2_to_gb2312_endian(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft, int endian)
  87. {
  88. u16 offset, gb2312 = 0xA1A1;
  89. u16 ucs2;
  90. size_t gb_length = 0;
  91. u16 *ucs2buf = (u16*)*_inbuf;
  92. char *outbuf = (char *)*_outbuf;
  93. size_t inlen = *inbytesleft/2;
  94. size_t outlen = *outbytesleft;
  95. size_t ret = 0;
  96. while(inlen > 0)
  97. {
  98. if(gb_length+2 > outlen)
  99. {
  100. errno = E2BIG;
  101. ret = -1;
  102. goto ucs2_to_gb2312_exit;
  103. }
  104. ucs2 = *ucs2buf++;
  105. if(endian == 1)
  106. ucs2 = (ucs2<<8)|(ucs2>>8);
  107. gb2312 = 0xA1A1;
  108. //End 7205
  109. if(0x80 > ucs2)
  110. {
  111. // can be convert to ASCII char
  112. *outbuf++ = (u8)ucs2;
  113. gb_length++;
  114. }
  115. else
  116. {
  117. if((0x4E00 <= ucs2) && (0xA000 > ucs2))
  118. {
  119. offset = get_ucs2_offset(ucs2);
  120. if((u16)(-1) != offset)
  121. {
  122. gb2312 = ucs2_to_gb2312_table[offset];
  123. }
  124. }
  125. else
  126. {
  127. u16 u16count = sizeof(tab_UCS2_to_GBK)/4;
  128. u16 ui = 0;
  129. for(ui=0;ui<u16count;ui++)
  130. {
  131. if(ucs2 == tab_UCS2_to_GBK[ui][0])
  132. {
  133. gb2312 = tab_UCS2_to_GBK[ui][1];
  134. }
  135. }
  136. }
  137. #if 0
  138. else
  139. {
  140. // Is chinese symbol ?
  141. // try search another table
  142. for( offset = 0; offset < 94 * 16; offset++ )
  143. {
  144. if( ucs2 == gb2312_to_ucs2_table[ offset ] )
  145. {
  146. gb2312 = offset / 94 + 0xA0;
  147. gb2312 = (gb2312 << 8) + (offset % 94 + 0xA1);
  148. break;
  149. }
  150. }
  151. }
  152. #endif
  153. *outbuf++ = (u8)(gb2312>>8);
  154. *outbuf++ = (u8)(gb2312);
  155. gb_length += 2;
  156. }
  157. inlen--;
  158. }
  159. if(inlen > 0)
  160. {
  161. errno = EINVAL;
  162. ret = -1;
  163. }
  164. ucs2_to_gb2312_exit:
  165. *inbytesleft = inlen;
  166. *outbytesleft -= gb_length;
  167. return ret;
  168. }
  169. size_t iconv_ucs2_to_gb2312(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft)
  170. {
  171. return iconv_ucs2_to_gb2312_endian(_inbuf, inbytesleft, _outbuf, outbytesleft, 0);
  172. }
  173. size_t iconv_ucs2be_to_gb2312(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft)
  174. {
  175. return iconv_ucs2_to_gb2312_endian(_inbuf, inbytesleft, _outbuf, outbytesleft, 1);
  176. }
  177. /*-\NEW\liweiqiang\2013.11.26\ÍêÉÆgb2312<->ucs2(ucs2be)±àÂëת»»*/