Просмотр исходного кода

update: 调整luf格式的布局, 把字符串统一放在尾部,并做去重, 实现与luac差不多的文件大小

Wendal Chen 4 лет назад
Родитель
Сommit
25433df93a
2 измененных файлов с 273 добавлено и 124 удалено
  1. 222 87
      components/luf/luat_luf_dump.c
  2. 51 37
      components/luf/luat_luf_undump.c

+ 222 - 87
components/luf/luat_luf_dump.c

@@ -29,7 +29,8 @@
 #define white2gray(x)	resetbits(x->marked, WHITEBITS)
 #define black2gray(x)	resetbit(x->marked, BLACKBIT)
 
-static size_t fd_offset;
+#define fslen(s) (sizeof(TString) + tsslen(s) + 1)
+
 
 #define LUF_SIGNATURE "\x1cLUF"
 
@@ -42,6 +43,109 @@ typedef struct {
 } DumpState;
 
 
+static void DumpString (const TString *s, DumpState *D);
+
+static size_t fd_offset = 0;
+static size_t str_offset = 0;
+
+typedef struct strpool
+{
+  TString* ts[256];
+  void* ptr[256];
+  void* next;
+}strpool_t;
+
+static strpool_t *spool = NULL;
+
+static void  spool_init(void) {
+  spool = luat_heap_malloc(sizeof(strpool_t));
+  memset(spool, 0, sizeof(strpool_t));
+}
+static void  spool_deinit(void) {
+  strpool_t *tmp = spool;
+  while (tmp != NULL) {
+    luat_heap_free(tmp);
+    tmp = (strpool_t *)tmp->next;
+  }
+}
+static void spool_dump(DumpState *D) {
+  strpool_t *tmp = spool;
+  while (tmp != NULL) {
+    for (size_t i = 0; i < 256; i++)
+    {
+      if (tmp->ts[i] == NULL)
+        return;
+      DumpString(tmp->ts[i], D);
+    }
+    tmp = (strpool_t *)tmp->next;
+  }
+}
+
+static TString* spool_add(TString* ts) {
+  if (ts == NULL)
+    return ts;
+  strpool_t *tmp = spool;
+  // strpool_t *next = NULL;
+  while (tmp != NULL) {
+    for (size_t i = 0; i < 256; i++)
+    {
+      if (tmp->ts[i] == NULL) {
+        //LLOGD("add string [%s]", getstr(ts));
+        tmp->ts[i] = ts;
+        tmp->ptr[i] = (void*)(str_offset);
+        str_offset += fslen(ts);
+        return tmp->ptr[i];
+      }
+      if (!strcmp(getstr(ts), getstr(tmp->ts[i]))) {
+        return tmp->ptr[i];
+      }
+    }
+    if (tmp->next == NULL)
+      break;
+    tmp = tmp->next;
+  }
+  tmp->next = luat_heap_malloc(sizeof(strpool_t));
+  memset(tmp->next, 0, sizeof(strpool_t));
+
+  tmp->ts[0] = ts;
+  tmp->ptr[0] = (void*)(str_offset);
+  str_offset += fslen(ts);
+  return tmp->ptr[0];
+}
+
+size_t countProtoDumpSize(Proto *f) {
+  if (f == NULL)
+    return 0;
+  size_t count = 0;
+
+
+  /*
+  DumpInt(f->linedefined, D);
+  DumpInt(f->lastlinedefined, D);
+  DumpByte(f->numparams, D);
+  DumpByte(f->is_vararg, D);
+  DumpByte(f->maxstacksize, D);
+  DumpByte(f->source == NULL ? 0 : 1, D);
+  */
+  count += sizeof(int) * 2 + sizeof(lu_byte) * 4;
+
+  count += f->sizecode * sizeof(Instruction);
+  count += f->sizek * sizeof(TValue);
+  count += f->sizeupvalues * sizeof(Upvaldesc);
+  count += f->sizelineinfo * sizeof(int);
+  count += f->sizelocvars * sizeof(LocVar);
+
+  for (size_t i = 0; i < f->sizep; i++)
+  {
+    count += countProtoDumpSize(f->p[i]);
+  }
+
+  count += sizeof(int) * 6; // sizeX * 6
+
+  return count;
+}
+
+
 /*
 ** All high-level dumps go through DumpVector; you can change it to
 ** change the endianness of the result
@@ -50,8 +154,13 @@ typedef struct {
 
 #define DumpLiteral(s,D)	DumpBlock(s, sizeof(s) - sizeof(char), D)
 
-#define fslen(s) (sizeof(TString) + tsslen(s) + 1)
-
+// static TString* AddMockString(TString* ts) {
+//   if (ts == NULL)
+//     return ts;
+//   TString* t = (TString*) (fd_offset + str_offset);
+//   str_offset += (sizeof(TString) + tsslen(ts) + 1);
+//   return t;
+// }
 
 static void DumpBlock (const void *b, size_t size, DumpState *D) {
   if (D->status == 0 && size > 0) {
@@ -89,12 +198,12 @@ static void DumpInteger (lua_Integer x, DumpState *D) {
 
 
 static void DumpString (const TString *s, DumpState *D) {
-  size_t size = 0;
-  if (s == NULL) {
-    DumpByte(0, D);
-    return;
-  }
-  DumpByte(1, D);
+  // size_t size = 0;
+  // if (s == NULL) {
+  //   DumpByte(0, D);
+  //   return;
+  // }
+  // DumpByte(1, D);
   TString ts;
   memcpy(&ts, s, sizeof(TString));
   ts.next = NULL;
@@ -113,7 +222,7 @@ static void DumpString (const TString *s, DumpState *D) {
 }
 
 static void DumpCode (const Proto *f, DumpState *D) {
-  DumpInt(f->sizecode, D);
+  //DumpInt(f->sizecode, D);
   DumpVector(f->code, f->sizecode, D);
 }
 
@@ -125,10 +234,10 @@ static void DumpConstants (const Proto *f, DumpState *D) {
   int n = f->sizek;
 
   //LLOGD("DumpConstants %d %d", n, n * sizeof(TValue));
-  DumpInt(n, D);
+  //DumpInt(n, D);
 
-  size_t init_offset = fd_offset + sizeof(TValue) * n + sizeof(int);
-  size_t i_offset = init_offset;
+  //size_t init_offset = fd_offset + sizeof(TValue) * n + sizeof(int);
+  //size_t i_offset = init_offset;
   TValue tmp;
   for (i = 0; i < n; i++) {
     const TValue *o = &f->k[i];
@@ -137,9 +246,9 @@ static void DumpConstants (const Proto *f, DumpState *D) {
       case LUA_TLNGSTR:
       // {
         memcpy(&tmp, o, sizeof(TValue));
-        tmp.value_.gc = (void*)(init_offset);
+        tmp.value_.gc = spool_add(tsvalue(o));
       //   o = &tmp;
-        init_offset += fslen(tsvalue(o)) + 1;
+      //  init_offset += fslen(tsvalue(o)) + 1;
       //   //break;
       // }
       default:
@@ -148,18 +257,18 @@ static void DumpConstants (const Proto *f, DumpState *D) {
     }
   }
   //LLOGD("DumpConstants1 Strings len %d %d %d %d", init_offset, i_offset, fd_offset, init_offset - i_offset);
-  DumpInt(init_offset - i_offset, D);
-  for (i = 0; i < n; i++) {
-    const TValue *o = &f->k[i];
-    switch (ttype(o)) {
-      case LUA_TSHRSTR:
-      case LUA_TLNGSTR:
-      {
-        DumpString(tsvalue(o), D);
-        break;
-      }
-    }
-  }
+  // DumpInt(init_offset - i_offset, D);
+  // for (i = 0; i < n; i++) {
+  //   const TValue *o = &f->k[i];
+  //   switch (ttype(o)) {
+  //     case LUA_TSHRSTR:
+  //     case LUA_TLNGSTR:
+  //     {
+  //       DumpString(tsvalue(o), D);
+  //       break;
+  //     }
+  //   }
+  // }
   //LLOGD("DumpConstants2 Strings len %d %d %d %d", init_offset, i_offset, fd_offset, init_offset - i_offset);
 }
 
@@ -167,110 +276,125 @@ static void DumpConstants (const Proto *f, DumpState *D) {
 static void DumpProtos (const Proto *f, DumpState *D) {
   int i;
   int n = f->sizep;
-  DumpInt(n, D);
+  // DumpInt(n, D);
   for (i = 0; i < n; i++)
     DumpFunction(f->p[i], f->source, D);
 }
 
 
 static void DumpUpvalues (const Proto *f, DumpState *D) {
-  int i, n = f->sizeupvalues;
-  DumpInt(n, D);
+  int i, n;
+  i = 0;
+  n = f->sizeupvalues;
+  // DumpInt(n, D);
   //LLOGD("LoadUpvalues %d %d", n, sizeof(Upvaldesc) * n);
-  size_t init_offset = fd_offset + sizeof(Upvaldesc) * f->sizeupvalues + sizeof(int);
-  size_t i_offset = init_offset;
+  // size_t init_offset = fd_offset + sizeof(Upvaldesc) * f->sizeupvalues + sizeof(int);
+  // size_t i_offset = init_offset;
   Upvaldesc desc;
   for (i = 0; i < n; i++) {
-    if (f->upvalues[i].name == NULL)
-      desc.name = NULL;
-    else
-      desc.name = (TString*)(init_offset);
+    desc.name = spool_add(f->upvalues[i].name);;
     desc.idx = f->upvalues[i].idx;
     desc.instack = f->upvalues[i].instack;
 
     DumpBlock(&desc, sizeof(Upvaldesc), D);
 
-    if (f->upvalues[i].name) {
-      init_offset += fslen(f->upvalues[i].name) + 1;
-      //LLOGD("DumpUpvalues name %s %d %d", getstr(f->upvalues[i].name), i_offset, init_offset);
-    }
-    else {
-      init_offset += 1;
-    }
+    // if (f->upvalues[i].name) {
+    //   init_offset += fslen(f->upvalues[i].name) + 1;
+    //   //LLOGD("DumpUpvalues name %s %d %d", getstr(f->upvalues[i].name), i_offset, init_offset);
+    // }
+    // else {
+    //   init_offset += 1;
+    // }
   }
   
   //LLOGD("DumpUpvalues Strings len %d %d %d %d", init_offset, i_offset, fd_offset, init_offset - i_offset);
-  DumpInt(init_offset - i_offset, D);
-  for (i = 0; i < n; i++) {
-    DumpString(f->upvalues[i].name, D);
-  }
+  // DumpInt(init_offset - i_offset, D);
+  // for (i = 0; i < n; i++) {
+  //   DumpString(f->upvalues[i].name, D);
+  // }
 }
 
 
 static void DumpDebug (const Proto *f, DumpState *D) {
   int i, n;
-  n = (D->strip) ? 0 : f->sizelineinfo;
-  DumpInt(n, D);
+  n = f->sizelineinfo;
+  // DumpInt(n, D);
   DumpVector(f->lineinfo, n, D);
-  n = (D->strip) ? 0 : f->sizelocvars;
-  DumpInt(n, D);
-  size_t init_offset = fd_offset + sizeof(LocVar) * f->sizelocvars + sizeof(int);
-  size_t i_offset = init_offset;
+  n = f->sizelocvars;
+  // DumpInt(n, D);
+  // size_t init_offset = fd_offset + sizeof(LocVar) * f->sizelocvars + sizeof(int);
+  // size_t i_offset = init_offset;
+  LocVar lv;
   for (i = 0; i < n; i++) {
-    DumpInt(f->locvars[i].varname == NULL ? 0 : init_offset, D);
-    DumpInt(f->locvars[i].startpc, D);
-    DumpInt(f->locvars[i].endpc, D);
-
-    if (f->locvars[i].varname) {
-      init_offset += fslen(f->locvars[i].varname) + 1;
-    }
-    else {
-      init_offset += 1;
-    }
+    lv.varname = spool_add(f->locvars[i].varname);
+    lv.startpc = f->locvars[i].startpc;
+    lv.endpc   = f->locvars[i].endpc;
+    DumpBlock(&lv, sizeof(LocVar), D);
   }
   //LLOGD("DumpDebug Strings len %d %d %d %d", init_offset, i_offset, fd_offset, init_offset - i_offset);
-  DumpInt(init_offset - i_offset, D);
-  for (i = 0; i < n; i++) {
-    DumpString(f->locvars[i].varname, D);
-  }
+  // DumpInt(init_offset - i_offset, D);
+  // for (i = 0; i < n; i++) {
+  //   DumpString(f->locvars[i].varname, D);
+  // }
 }
 
 
 static void DumpFunction (const Proto *f, TString *psource, DumpState *D) {
-  DumpString(f->source, D);
+  //DumpString(f->source, D);
+  LLOGD("<<<<<<<<< DumpFunction");
 
   DumpInt(f->linedefined, D);
   DumpInt(f->lastlinedefined, D);
   DumpByte(f->numparams, D);
   DumpByte(f->is_vararg, D);
   DumpByte(f->maxstacksize, D);
+  DumpByte(f->source == NULL ? 0 : 1, D);
 
-  // LLOGD("linedefined %d", f->linedefined);
-  // LLOGD("lastlinedefined %d", f->lastlinedefined);
-  // LLOGD("numparams %d", f->numparams);
-  // LLOGD("is_vararg %d", f->is_vararg);
-  // LLOGD("maxstacksize %d", f->maxstacksize);
+  LLOGD("linedefined %d", f->linedefined);
+  LLOGD("lastlinedefined %d", f->lastlinedefined);
+  LLOGD("numparams %d", f->numparams);
+  LLOGD("is_vararg %d", f->is_vararg);
+  LLOGD("maxstacksize %d", f->maxstacksize);
+
+  DumpInt(f->sizecode, D);
+  DumpInt(f->sizek, D);
+  DumpInt(f->sizeupvalues, D);
+  DumpInt(f->sizep, D);
+  DumpInt(f->sizelineinfo, D);
+  DumpInt(f->sizelocvars, D);
+
+  LLOGD("sizecode %d", f->sizecode);
+  LLOGD("sizek %d", f->sizek);
+  LLOGD("sizeupvalues %d", f->sizeupvalues);
+  LLOGD("sizep %d", f->sizep);
+  LLOGD("sizelineinfo %d", f->sizelineinfo);
+  LLOGD("sizelocvars %d", f->sizelocvars);
 
   DumpCode(f, D);
   DumpConstants(f, D);
   DumpUpvalues(f, D);
   DumpProtos(f, D);
   DumpDebug(f, D);
+
+  LLOGD(">>>>>>>>>>>>> DumpFunction");
+
+  //if (f->source)
+  //  DumpString((const TString*)f->source, D);
 }
 
 
-static void DumpHeader (DumpState *D) {
-  DumpLiteral(LUF_SIGNATURE, D);
-  DumpByte(LUAC_VERSION, D);
-  DumpByte(LUAC_FORMAT + 1, D);
-  DumpLiteral(LUAC_DATA, D);
-  DumpByte(sizeof(int), D);
-  DumpByte(sizeof(size_t), D);
-  DumpByte(sizeof(Instruction), D);
-  DumpByte(sizeof(lua_Integer), D);
-  DumpByte(sizeof(lua_Number), D);
-  DumpInteger(LUAC_INT, D);
-  DumpNumber(LUAC_NUM, D);
+static void DumpHeader (DumpState *D) { // 15+12
+  DumpLiteral(LUF_SIGNATURE, D); // 4
+  DumpByte(LUAC_VERSION, D); // 1
+  DumpByte(LUAC_FORMAT + 1, D); // 1
+  DumpLiteral(LUAC_DATA, D); // 6
+  DumpByte(sizeof(int), D); // 1
+  DumpByte(sizeof(size_t), D); // 1
+  DumpByte(sizeof(Instruction), D); // 1
+  DumpByte(sizeof(lua_Integer), D); // 1
+  DumpByte(sizeof(lua_Number), D); // 1
+  DumpInteger(LUAC_INT, D); // 4
+  DumpNumber(LUAC_NUM, D);  // 4
 }
 
 
@@ -291,10 +415,21 @@ int luf_dump(lua_State *L, const Proto *f, lua_Writer w, void *data,
   // LLOGD("sizeof(Upvaldesc) %d", sizeof(Upvaldesc));
   // LLOGD("sizeof(LocVar) %d", sizeof(LocVar));
 
-  DumpHeader(&D);
-  DumpByte(f->sizeupvalues, &D);
+  DumpHeader(&D); // 27
+  DumpByte(f->sizeupvalues, &D); // 1
+
+  size_t tcount = countProtoDumpSize(f);
+  spool_init();
+  str_offset = fd_offset + tcount;
   // LLOGD("sizeupvalues %d", f->sizeupvalues);
+  DumpInt(f->source == NULL ? 0 : str_offset, &D);
+  spool_add(f->source);
   DumpFunction(f, NULL, &D);
+  LLOGD("after DumpFunction <");
+  spool_dump(&D);
+  LLOGD("spool_dump <");
+  spool_deinit();
+  LLOGD("spool_deinit <");
   return D.status;
 }
 

+ 51 - 37
components/luf/luat_luf_undump.c

@@ -116,10 +116,10 @@ static TString *LoadString (LoadState *S, Proto *p) {
 }
 
 static void LoadCode (LoadState *S, Proto *f) {
-  int n = LoadInt(S);
+  // int n = LoadInt(S);
   // LLOGD("LoadCode %d %d", n, sizeof(Instruction) * n);
-  f->sizecode = n;
-  f->code = DistBlock(S, sizeof(Instruction) * n);
+  // f->sizecode = n;
+  f->code = DistBlock(S, sizeof(Instruction) * f->sizecode);
 }
 
 
@@ -127,18 +127,18 @@ static void LoadFunction(LoadState *S, Proto *f, TString *psource);
 
 
 static void LoadConstants (LoadState *S, Proto *f) {
-  int i;
-  int n = LoadInt(S);
+  // int i;
+  // int n = LoadInt(S);
   // LLOGD("LoadConstants %d %d", n, sizeof(TValue) * n);
-  f->sizek = n;
+  // f->sizek = n;
   // 指向常数数组
-  f->k = DistBlock(S, sizeof(TValue) * n);
+  f->k = DistBlock(S, sizeof(TValue) * f->sizek);
   // 跳过字符串段
   
   // LLOGD("1>>LoadConstants %02X %02X %02X %02X", *(S->Z->p), *(S->Z->p + 1), *(S->Z->p + 2), *(S->Z->p + 3));
-  n = LoadInt(S);
+  // n = LoadInt(S);
   // LLOGD("LoadConstants skip Strings %d", n);
-  DistBlock(S, sizeof(char) * n);
+  // DistBlock(S, sizeof(char) * n);
 
   // LLOGD("2>>LoadConstants %02X %02X %02X %02X", *(S->Z->p), *(S->Z->p + 1), *(S->Z->p + 2), *(S->Z->p + 3));
 }
@@ -146,12 +146,12 @@ static void LoadConstants (LoadState *S, Proto *f) {
 
 static void LoadProtos (LoadState *S, Proto *f) {
   int i;
-  int n = LoadInt(S);
-  f->p = luaM_newvector(S->L, n, Proto *);
-  f->sizep = n;
-  for (i = 0; i < n; i++)
+  // int n = LoadInt(S);
+  f->p = luaM_newvector(S->L, f->sizep, Proto *);
+  // f->sizep = n;
+  for (i = 0; i < f->sizep; i++)
     f->p[i] = NULL;
-  for (i = 0; i < n; i++) {
+  for (i = 0; i < f->sizep; i++) {
     f->p[i] = luaF_newproto(S->L);
     luaC_objbarrier(S->L, f, f->p[i]);
     LoadFunction(S, f->p[i], f->source);
@@ -162,43 +162,41 @@ static void LoadProtos (LoadState *S, Proto *f) {
 
 static void LoadUpvalues (LoadState *S, Proto *f) {
   int i, n;
-  n = LoadInt(S);
-  f->sizeupvalues = n;
+  // n = LoadInt(S);
+  // f->sizeupvalues = n;
   // LLOGD("LoadUpvalues %d %d", n, sizeof(Upvaldesc) * n);
-  f->upvalues = DistBlock(S, sizeof(Upvaldesc) * n);
+  f->upvalues = DistBlock(S, sizeof(Upvaldesc) * f->sizeupvalues);
   // char* tmp = luaM_newvector(S->L, n, Upvaldesc);
   // memcpy(tmp, f->upvalues, sizeof(Upvaldesc) * n);
   // f->upvalues = tmp;
   // 跳过字符串段
-  n = LoadInt(S);
+  // n = LoadInt(S);
   // LLOGD("LoadUpvalues skip Strings %d", n);
-  DistBlock(S, sizeof(char) * n);
+  // DistBlock(S, sizeof(char) * n);
 }
 
 
 static void LoadDebug (LoadState *S, Proto *f) {
   int i, n;
   
-  n = LoadInt(S);
-  f->sizelineinfo = n;
+  // n = LoadInt(S);
+  // f->sizelineinfo = n;
   // LLOGD("LoadDebug sizelineinfo %d %d", n, sizeof(int) * n);
-  f->lineinfo = DistBlock(S, sizeof(int) * n);
+  f->lineinfo = DistBlock(S, sizeof(int) * f->sizelineinfo);
   
-  n = LoadInt(S);
-  f->sizelocvars = n;
+  // n = LoadInt(S);
+  // f->sizelocvars = n;
   // LLOGD("LoadDebug sizelocvars %d %d", n, sizeof(LocVar) * n);
-  f->locvars = DistBlock(S, sizeof(LocVar) * n);
+  f->locvars = DistBlock(S, sizeof(LocVar) * f->sizelocvars);
 
-  n = LoadInt(S);
-  DistBlock(S, sizeof(char) * n);
+  // n = LoadInt(S);
+  // DistBlock(S, sizeof(char) * n);
 }
 
 
 static void LoadFunction (LoadState *S, Proto *f, TString *psource) {
   //LLOGD(">> %02X %02X %02X %02X", *(S->Z->p), *(S->Z->p + 1), *(S->Z->p + 2), *(S->Z->p + 3));
-  f->source = LoadString(S, f);
-  if (f->source == NULL)  /* no source in dump? */
-    f->source = psource;  /* reuse parent's source */
+  f->source = psource;  /* reuse parent's source */
 
   // if (f->source)
   //   LLOGI("%s %d source %s", __FILE__, __LINE__, getstr(f->source));
@@ -210,12 +208,27 @@ static void LoadFunction (LoadState *S, Proto *f, TString *psource) {
   f->numparams = LoadByte(S);
   f->is_vararg = LoadByte(S);
   f->maxstacksize = LoadByte(S);
-
-  // LLOGD("linedefined %d", f->linedefined);
-  // LLOGD("lastlinedefined %d", f->lastlinedefined);
-  // LLOGD("numparams %d", f->numparams);
-  // LLOGD("is_vararg %d", f->is_vararg);
-  // LLOGD("maxstacksize %d", f->maxstacksize);
+  LoadByte(S); // f->source != NULL ?
+
+  LLOGD("linedefined %d", f->linedefined);
+  LLOGD("lastlinedefined %d", f->lastlinedefined);
+  LLOGD("numparams %d", f->numparams);
+  LLOGD("is_vararg %d", f->is_vararg);
+  LLOGD("maxstacksize %d", f->maxstacksize);
+
+  f->sizecode = LoadInt(S);
+  f->sizek = LoadInt(S);
+  f->sizeupvalues = LoadInt(S);
+  f->sizep = LoadInt(S);
+  f->sizelineinfo = LoadInt(S);
+  f->sizelocvars = LoadInt(S);
+
+  LLOGD("sizecode %d", f->sizecode);
+  LLOGD("sizek %d", f->sizek);
+  LLOGD("sizeupvalues %d", f->sizeupvalues);
+  LLOGD("sizep %d", f->sizep);
+  LLOGD("sizelineinfo %d", f->sizelineinfo);
+  LLOGD("sizelocvars %d", f->sizelocvars);
 
   LoadCode(S, f);
   LoadConstants(S, f);
@@ -279,7 +292,8 @@ LClosure *luat_luf_undump(lua_State *L, ZIO *Z, const char *name) {
   cl->p = luaF_newproto(L);
   // LLOGD("sizeupvalues %d", cl->nupvalues);
   luaC_objbarrier(L, cl, cl->p); // add by wendal, refer: https://github.com/lua/lua/commit/f5eb809d3f1da13683cd02184042e67228206205
-  LoadFunction(&S, cl->p, NULL);
+  size_t s = LoadInt(&S);
+  LoadFunction(&S, cl->p, (TString*)s);
   lua_assert(cl->nupvalues == cl->p->sizeupvalues);
   luai_verifycode(L, buff, cl->p);
   luaF_initupvals(L, cl);