ucs2_to_gb2312.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /**************************************************************************
  2. * Copyright (C), AirM2M Tech. Co., Ltd.
  3. *
  4. * Name: ucs2_to_gb2312.c
  5. * Author: liweiqiang
  6. * Version: V0.1
  7. * Date: 2013/7/15
  8. *
  9. * Description:
  10. * ucs2 ת»» gb2312
  11. **************************************************************************/
  12. #include <stdint.h>
  13. #include "stdio.h"
  14. #include "errno.h"
  15. #include "ucs2_to_gb2312_table.h"
  16. #include "ucs2_to_gb2312_offset.h"
  17. const uint8_t number_of_bit_1[256] =
  18. {
  19. 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03,
  20. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  21. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  22. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  23. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  24. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  25. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  26. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  27. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  28. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  29. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  30. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  31. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  32. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  33. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  34. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  35. 0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
  36. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  37. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  38. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  39. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  40. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  41. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  42. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  43. 0x02, 0x03, 0x03, 0x04, 0x03, 0x04, 0x04, 0x05,
  44. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  45. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  46. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  47. 0x03, 0x04, 0x04, 0x05, 0x04, 0x05, 0x05, 0x06,
  48. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  49. 0x04, 0x05, 0x05, 0x06, 0x05, 0x06, 0x06, 0x07,
  50. 0x05, 0x06, 0x06, 0x07, 0x06, 0x07, 0x07, 0x08,
  51. };
  52. /* 0x4E00 <= ucs2 < 0xA000 */
  53. static uint16_t get_ucs2_offset(uint16_t ucs2)
  54. {
  55. uint16_t offset, page, tmp;
  56. uint8_t *mirror_ptr, ch;
  57. page = (ucs2>>8) - 0x4E;
  58. ucs2 &= 0xFF;
  59. tmp = ucs2>>6; /* now 0 <= tmp < 4 */
  60. offset = ucs2_index_table_4E00_9FFF[page][tmp];
  61. mirror_ptr = (uint8_t*)&ucs2_mirror_4E00_9FFF[page][tmp<<3]; /* [0, 8, 16, 24] */
  62. tmp = ucs2&0x3F; /* mod 64 */
  63. while(tmp >= 8)
  64. {
  65. offset += number_of_bit_1[*mirror_ptr];
  66. mirror_ptr++;
  67. tmp -= 8;
  68. }
  69. ch = *mirror_ptr;
  70. if(ch&(0x1<<tmp))
  71. { /* Ok , this ucs2 can be covert to GB2312. */
  72. while(tmp)
  73. {
  74. if(ch&0x1)
  75. offset++;
  76. ch>>=1;
  77. tmp--;
  78. }
  79. return offset;
  80. }
  81. return (uint16_t)(-1);
  82. }
  83. uint16_t unicode_to_gb2312(uint16_t ucs2, uint8_t marks)
  84. {
  85. uint16_t gb = 0xA1A1;
  86. if(0x80 > ucs2)
  87. {
  88. // can be convert to ASCII char
  89. gb = ucs2;
  90. }
  91. else
  92. {
  93. if((0x4E00 <= ucs2) && (0xA000 > ucs2))
  94. {
  95. uint16_t offset = get_ucs2_offset(ucs2);
  96. if((uint16_t)(-1) != offset)
  97. {
  98. gb = ucs2_to_gb2312_table[offset];
  99. }
  100. }
  101. else if (marks == 0)
  102. {
  103. uint16_t u16count = sizeof(tab_UCS2_to_GBK)/4;
  104. for(uint16_t ui=0; ui < u16count; ui++)
  105. {
  106. if(ucs2 == tab_UCS2_to_GBK[ui][0])
  107. {
  108. gb = tab_UCS2_to_GBK[ui][1];
  109. }
  110. }
  111. }
  112. }
  113. return gb;
  114. }
  115. /*+\NEW\liweiqiang\2013.11.26\ÍêÉÆgb2312<->ucs2(ucs2be)±àÂëת»»*/
  116. static size_t iconv_ucs2_to_gb2312_endian(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft, int endian)
  117. {
  118. uint16_t gb2312 = 0xA1A1;
  119. uint16_t ucs2;
  120. size_t gb_length = 0;
  121. uint16_t *ucs2buf = (uint16_t*)*_inbuf;
  122. char *outbuf = (char *)*_outbuf;
  123. size_t inlen = *inbytesleft/2;
  124. size_t outlen = *outbytesleft;
  125. size_t ret = 0;
  126. while(inlen > 0)
  127. {
  128. if(gb_length+2 > outlen)
  129. {
  130. errno = E2BIG;
  131. ret = -1;
  132. goto ucs2_to_gb2312_exit;
  133. }
  134. ucs2 = *ucs2buf++;
  135. if(endian == 1)
  136. ucs2 = (ucs2<<8)|(ucs2>>8);
  137. gb2312 = unicode_to_gb2312(ucs2, 0);
  138. //End 7205
  139. if(0x80 > gb2312)
  140. {
  141. // can be convert to ASCII char
  142. *outbuf++ = (uint8_t)gb2312;
  143. gb_length++;
  144. }
  145. else
  146. {
  147. *outbuf++ = (uint8_t)(gb2312>>8);
  148. *outbuf++ = (uint8_t)(gb2312);
  149. gb_length += 2;
  150. }
  151. inlen--;
  152. }
  153. if(inlen > 0)
  154. {
  155. errno = EINVAL;
  156. ret = -1;
  157. }
  158. ucs2_to_gb2312_exit:
  159. *inbytesleft = inlen;
  160. *outbytesleft -= gb_length;
  161. return ret;
  162. }
  163. size_t iconv_ucs2_to_gb2312(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft)
  164. {
  165. return iconv_ucs2_to_gb2312_endian(_inbuf, inbytesleft, _outbuf, outbytesleft, 0);
  166. }
  167. size_t iconv_ucs2be_to_gb2312(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft)
  168. {
  169. return iconv_ucs2_to_gb2312_endian(_inbuf, inbytesleft, _outbuf, outbytesleft, 1);
  170. }
  171. /*-\NEW\liweiqiang\2013.11.26\ÍêÉÆgb2312<->ucs2(ucs2be)±àÂëת»»*/