Dozingfiretruck 2 лет назад
Родитель
Сommit
d2913ee1c6

+ 21 - 7
components/iconv/gb2312_to_ucs2.c

@@ -9,22 +9,36 @@
  * Description:
  *          gb2312 ת»» ucs2
  **************************************************************************/
-
+#include <stdint.h>
 #include "stdio.h"
 #include "errno.h"
-#include "luat_base.h"
 
 #include "gb2312_to_ucs2_table.h"
 
-#define u16 uint16_t
-#define u8 uint8_t
+uint16_t gb2312_to_ucs(uint16_t gb2312)
+{
+    uint16_t ucs;
+    uint8_t gb = gb2312;
+
+    if (gb < 0x80)
+    {
+        ucs = gb;
+    }
+    else
+    {
+        uint16_t offset = ((gb2312 >> 8) - 0xA0) * 94 + ((gb2312 & 0x00ff) - 0xA1);
+        ucs = gb2312_to_ucs2_table[offset];
+    }
+
+    return ucs;
+}
 
 size_t iconv_gb2312_to_ucs2_endian(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft, int endian)
 {
-    u16 offset,gb2312;
+    uint16_t offset,gb2312;
     char *gbbuf = *_inbuf;
-    u16 *ucs2buf = (u16*)*_outbuf;
-    u16 ucs2;
+    uint16_t *ucs2buf = (uint16_t*)*_outbuf;
+    uint16_t ucs2;
     size_t ucs2len = 0;
     size_t inlen = *inbytesleft;
     size_t outlen = *outbytesleft;

+ 1 - 4
components/iconv/gb2312_to_ucs2_table.h

@@ -1,7 +1,4 @@
-#include "luat_base.h"
-#define u16 uint16_t
-#define u8 uint8_t
-const u16 gb2312_to_ucs2_table[] = {
+const uint16_t gb2312_to_ucs2_table[] = {
     /* 0xA0A1   */ 0x724E,
     /* 0xA0A2   */ 0x724F,
     /* 0xA0A3   */ 0x7250,

+ 43 - 8
components/iconv/iconv.c

@@ -9,6 +9,12 @@
  * Description:
  *          字符编码转换
  **************************************************************************/
+ /*
+@module  iconv
+@summary 字符编码转换
+@version V0.1
+@data    2021年12月13日
+*/
 
 #include <string.h>
 #include "iconv.h"
@@ -37,14 +43,24 @@ static const builtin_iconv_map iconv_map[] =
 /*-\NEW\liweiqiang\2013.7.19\增加utf8<->ucs2,ucs2be编码转换*/
 };
 
-iconv_t iconv_open (const char *__tocode, const char *__fromcode)
+/*
+打开相应字符编码转换函数
+@function iconv.open(tocode, fromcode) 
+@string tocode$目标编码格式$gb2312/ucs2/ucs2be/utf8
+@string fromcode$源编码格式$gb2312/ucs2/ucs2be/utf8
+@return table$cd$编码转换函数的转换句柄$ 
+@usage
+--unicode大端编码 转化为 utf8编码
+local cd = iconv.open("utf8", "ucs2be")
+*/
+iconv_t iconv_open (const char * to_code, const char * from_code)
 {
     size_t i;
 
     for(i = 0; i < sizeof(iconv_map)/sizeof(iconv_map[0]); i++)
     {
-        if(strcmp(iconv_map[i].from, __fromcode) == 0 &&
-            strcmp(iconv_map[i].to, __tocode) == 0)
+        if(strcmp(iconv_map[i].from, from_code) == 0 &&
+            strcmp(iconv_map[i].to, to_code) == 0)
         {
             return (iconv_t)&iconv_map[i];
         }
@@ -53,10 +69,19 @@ iconv_t iconv_open (const char *__tocode, const char *__fromcode)
     return (iconv_t)-1;
 }
 
-size_t iconv (iconv_t __cd, char ** __inbuf,
-		     size_t * __inbytesleft,
-		     char ** __outbuf,
-		     size_t * __outbytesleft)
+/*
+字符编码转换
+@function cd:iconv(inbuf) 
+@string inbuf$输入字符串$例如:ucs2s 
+@return number$result$返回编码转换后的结果$0成功,-1失败
+@usage
+--unicode大端编码 转化为 utf8编码
+function ucs2beToUtf8(ucs2s)
+    local cd = iconv.open("utf8", "ucs2be")
+    return cd:iconv(ucs2s)
+end
+*/
+size_t iconv_convert (iconv_t __cd, char ** __inbuf, size_t * __inbytesleft, char ** __outbuf, size_t * __outbytesleft)
 {
     builtin_iconv_map *_map_cd = (builtin_iconv_map *)__cd;
 
@@ -70,7 +95,17 @@ size_t iconv (iconv_t __cd, char ** __inbuf,
     return _map_cd->fct(__inbuf, __inbytesleft, __outbuf, __outbytesleft);
 }
 
-int iconv_close (iconv_t __cd)
+/*
+关闭字符编码转换
+@function iconv.close(cd) 
+@string cd$iconv.open返回的句柄$ 
+@return  
+@usage
+--关闭字符编码转换
+local cd = iconv.open("utf8", "ucs2be")
+iconv.close(cd)
+*/
+int iconv_close (iconv_t cd)
 {
     return 0;
 }

+ 5 - 5
components/iconv/iconv.h

@@ -13,24 +13,24 @@
 #ifndef __ICONV_H__
 #define __ICONV_H__
 
-#include "stddef.h"
+// #include "stddef.h"
 
 /* Identifier for conversion method from one codeset to another.  */
 typedef void *iconv_t;
 
 /* Allocate descriptor for code conversion from codeset FROMCODE to
    codeset TOCODE.  */
-extern iconv_t iconv_open (const char *__tocode, const char *__fromcode);
+extern iconv_t iconv_open (const char *to_code, const char *from_code);
 
 /* Convert at most *INBYTESLEFT bytes from *INBUF according to the
    code conversion algorithm specified by CD and place up to
    *OUTBYTESLEFT bytes in buffer at *OUTBUF.  */
-extern size_t iconv (iconv_t __cd, char ** __inbuf,
+extern size_t iconv_convert (iconv_t __cd, char ** __inbuf,
 		     size_t * __inbytesleft,
 		     char ** __outbuf,
 		     size_t * __outbytesleft);
 
 /* Free resources allocated for descriptor CD for code conversion.  */
-extern int iconv_close (iconv_t __cd);
+extern int iconv_close (iconv_t cd);
 
-#endif/*__ICONV_H__*/
+#endif/*__ICONV_H__*/

+ 43 - 0
components/iconv/luat_iconv.c

@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022 OpenLuat & AirM2M
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "luat_base.h"
+
+#include "luat_iconv.h"
+#include "iconv.h"
+
+
+luat_iconv_t luat_iconv_open (const char *to_code, const char *from_code)
+{
+    return iconv_open(to_code, from_code);
+}
+
+
+size_t luat_iconv_convert (luat_iconv_t cd, char ** inbuf, size_t * in_bytes_left, char ** outbuf, size_t * out_bytes_left)
+{
+    return iconv_convert(cd, inbuf, in_bytes_left, outbuf, out_bytes_left);
+}
+
+
+int luat_iconv_close (luat_iconv_t cd)
+{
+    return iconv_close(cd);
+}

+ 1 - 1
components/iconv/luat_lib_iconv.c

@@ -142,7 +142,7 @@ static int Liconv(lua_State *L) {
     outbufs = outbuf;
 
     do {
-        ret = iconv(cd, &inbuf, &ibleft, &outbuf, &obleft);
+        ret = iconv_convert(cd, &inbuf, &ibleft, &outbuf, &obleft);
         if (ret == (size_t)(-1)) {
             lua_pushlstring(L, outbufs, obsize - obleft);
             if (hasone == 1)

+ 51 - 58
components/iconv/ucs2_to_gb2312.c

@@ -10,17 +10,14 @@
  *          ucs2 ת»» gb2312
  **************************************************************************/
 
+#include <stdint.h>
 #include "stdio.h"
 #include "errno.h"
-#include "luat_base.h"
-#define u16 uint16_t
-#define u8 uint8_t
 
 #include "ucs2_to_gb2312_table.h"
-
 #include "ucs2_to_gb2312_offset.h"
 
-const u8 number_of_bit_1[256] = 
+const uint8_t number_of_bit_1[256] = 
 {
     0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03,
     0x01, 0x02, 0x02, 0x03, 0x02, 0x03, 0x03, 0x04,
@@ -57,17 +54,17 @@ const u8 number_of_bit_1[256] =
 };
 
 /* 0x4E00 <= ucs2 < 0xA000 */ 
-static u16 get_ucs2_offset(u16 ucs2)
+static uint16_t get_ucs2_offset(uint16_t ucs2)
 {
-    u16   offset, page, tmp;
-    u8    *mirror_ptr, ch;
+    uint16_t   offset, page, tmp;
+    uint8_t    *mirror_ptr, ch;
 
     page = (ucs2>>8) - 0x4E;
     ucs2 &= 0xFF;
 
     tmp        = ucs2>>6; /* now 0 <= tmp < 4  */ 
     offset     = ucs2_index_table_4E00_9FFF[page][tmp];  
-    mirror_ptr = (u8*)&ucs2_mirror_4E00_9FFF[page][tmp<<3]; /* [0, 8, 16, 24] */ 
+    mirror_ptr = (uint8_t*)&ucs2_mirror_4E00_9FFF[page][tmp<<3]; /* [0, 8, 16, 24] */ 
 
     tmp = ucs2&0x3F; /* mod 64 */ 
 
@@ -91,16 +88,50 @@ static u16 get_ucs2_offset(u16 ucs2)
         return offset;
     }
 
-    return (u16)(-1);
+    return (uint16_t)(-1);
+}
+
+uint16_t unicode_to_gb2312(uint16_t ucs2, uint8_t marks)
+{
+	uint16_t gb = 0xA1A1;
+	if(0x80 > ucs2)
+    {
+        // can be convert to ASCII char
+        gb = ucs2;
+    }
+    else
+    {
+        if((0x4E00 <= ucs2) && (0xA000 > ucs2))
+        {
+            uint16_t offset = get_ucs2_offset(ucs2);
+            if((uint16_t)(-1) != offset)
+            {
+                gb = ucs2_to_gb2312_table[offset];
+            }
+        }
+        else if (marks == 0)
+        {
+            uint16_t u16count = sizeof(tab_UCS2_to_GBK)/4;
+            for(uint16_t ui=0; ui < u16count; ui++)
+            {
+                if(ucs2 == tab_UCS2_to_GBK[ui][0])
+                {
+                    gb = tab_UCS2_to_GBK[ui][1];
+                }
+            }
+                
+        }
+    }
+	return gb;
 }
 
 /*+\NEW\liweiqiang\2013.11.26\ÍêÉÆgb2312<->ucs2(ucs2be)±àÂëת»»*/
-size_t iconv_ucs2_to_gb2312_endian(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft, int endian)
+static size_t iconv_ucs2_to_gb2312_endian(char **_inbuf, size_t *inbytesleft, char **_outbuf, size_t *outbytesleft, int endian)
 {
-    u16 offset, gb2312 = 0xA1A1; 
-    u16 ucs2;
+    uint16_t gb2312 = 0xA1A1; 
+    uint16_t ucs2;
     size_t gb_length = 0;
-    u16 *ucs2buf = (u16*)*_inbuf;
+    uint16_t *ucs2buf = (uint16_t*)*_inbuf;
     char *outbuf = (char *)*_outbuf;
     size_t inlen = *inbytesleft/2;
     size_t outlen = *outbytesleft;
@@ -120,57 +151,19 @@ size_t iconv_ucs2_to_gb2312_endian(char **_inbuf, size_t *inbytesleft, char **_o
         if(endian == 1)
             ucs2 = (ucs2<<8)|(ucs2>>8);
 
-        gb2312 = 0xA1A1;
+        gb2312 = unicode_to_gb2312(ucs2, 0);
         //End 7205
-  
-        if(0x80 > ucs2)
+
+        if(0x80 > gb2312)
         {
             // can be convert to ASCII char
-            *outbuf++ = (u8)ucs2;
+            *outbuf++ = (uint8_t)gb2312;
             gb_length++;
         }
         else
         {
-            if((0x4E00 <= ucs2) && (0xA000 > ucs2))
-            {
-                offset = get_ucs2_offset(ucs2);
-                if((u16)(-1) != offset)
-                {
-                    gb2312 = ucs2_to_gb2312_table[offset];
-                }
-            }
-            else
-            {
-                u16 u16count = sizeof(tab_UCS2_to_GBK)/4;
-                u16 ui = 0;
-                for(ui=0;ui<u16count;ui++)
-                {
-                    if(ucs2 == tab_UCS2_to_GBK[ui][0])
-                    {
-                        gb2312 = tab_UCS2_to_GBK[ui][1];
-                    }
-                }
-                
-            }
-#if 0
-            else
-            {
-                // Is chinese symbol ?
-                // try search another table
-                for( offset = 0; offset < 94 * 16; offset++ )
-                {
-                    if( ucs2 == gb2312_to_ucs2_table[ offset ] )
-                    {
-                        gb2312 = offset / 94 + 0xA0;             
-                        gb2312 = (gb2312 << 8) + (offset % 94 + 0xA1);
-                        break;
-                    }
-                }
-            }
-#endif
- 
-            *outbuf++ = (u8)(gb2312>>8);
-            *outbuf++ = (u8)(gb2312);
+            *outbuf++ = (uint8_t)(gb2312>>8);
+            *outbuf++ = (uint8_t)(gb2312);
             gb_length += 2;
         }
         

+ 3 - 3
components/iconv/ucs2_to_gb2312_offset.h

@@ -1,5 +1,5 @@
-const u16 number_of_ucs2_4E00_9FFF = 0x1a6b;
-const u8 ucs2_mirror_4E00_9FFF[ 0xA0 - 0x4E ][256 >> 3] = 
+const uint16_t number_of_ucs2_4E00_9FFF = 0x1a6b;
+const uint8_t ucs2_mirror_4E00_9FFF[ 0xA0 - 0x4E ][256 >> 3] = 
 {
     {
         0x8b, 0x7f, 0x7b, 0x7f, 0xb4, 0x3d, 0x55, 0xef, 
@@ -494,7 +494,7 @@ const u8 ucs2_mirror_4E00_9FFF[ 0xA0 - 0x4E ][256 >> 3] =
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
     },
 };
-const u16 ucs2_index_table_4E00_9FFF[0xA0 - 0x4E][4] = 
+const uint16_t ucs2_index_table_4E00_9FFF[0xA0 - 0x4E][4] = 
 {
     {0x0000, 0x002c, 0x0049, 0x006d, },
     {0x0096, 0x00b6, 0x00d8, 0x00f1, },

+ 11 - 6
components/iconv/utf8_to_ucs2.c

@@ -140,12 +140,16 @@ static int enc_utf8_to_unicode_one(const char* pInput, char* pOutput, int endian
                 *pOutput = 0x00;
             }
             return 2;
-        break;
+            //break;
         case 2:
             b1 = *pInput;
             b2 = *(pInput + 1);
-            if ( (b2 & 0xE0) != 0x80 )
-            return -1;
+            /*+\BUG\wangyuan\2020.11.18\遇到一些特殊字符会返回错误*/
+			/*UTF-8二进制形式为 1100xxxx 10xxxxxx
+			例如:'·'的  UTF-8编码 11000010 10110111*/
+            if ( (b2 & 0xC0) != 0x80 )
+            	return -1;
+			/*-\BUG\wangyuan\2020.11.18\遇到一些特殊字符会返回错误*/
             if(endian)
             {
                 *pOutput++ = (b1 >> 2) & 0x07;
@@ -157,7 +161,7 @@ static int enc_utf8_to_unicode_one(const char* pInput, char* pOutput, int endian
                 *pOutput = (b1 >> 2) & 0x07;
             }
             return 2;
-        break;
+            //break;
         case 3:
             b1 = *pInput;
             b2 = *(pInput + 1);
@@ -175,7 +179,7 @@ static int enc_utf8_to_unicode_one(const char* pInput, char* pOutput, int endian
                 *pOutput = (b1 << 4) + ((b2 >> 2) & 0x0F);
             }
             return 2;
-        break;
+            //break;
 #if 0
         case 4:
             b1 = *pInput;
@@ -237,7 +241,8 @@ static size_t enc_utf8_to_unicode(char **_inbuf, size_t *inbytesleft, char **_ou
     char *src = *_inbuf;
     char *dst = *_outbuf;
     size_t iPos, oPos;
-    size_t utfbytes = 0, unicodeBytes = 0;
+    size_t utfbytes = 0;
+    size_t unicodeBytes = 0;
     int result = 0;
 
     for(iPos = 0, oPos = 0; iPos < iLen; )