4 år sedan · a2e7f1802c
--- a/Third_Party/jpeg_encode/tiny_jpeg.c
+++ b/Third_Party/jpeg_encode/tiny_jpeg.c
@@ -0,0 +1,1271 @@
 
				+#include "tiny_jpeg.h"
			
 
				+#include <inttypes.h>
			
 
				+#include <math.h>   // floorf, ceilf
			
 
				+#include <string.h> // memcpy
			
 
				+
			
 
				+#include "bsp_common.h"
			
 
				+#include "core_debug.h"
			
 
				+#define assert(x) ASSERT(x)
			
 
				+#define tjei_min(a, b) ((a) < b) ? (a) : (b);
			
 
				+#define tjei_max(a, b) ((a) < b) ? (b) : (a);
			
 
				+
			
 
				+
			
 
				+#if defined(_MSC_VER)
			
 
				+#define TJEI_FORCE_INLINE __forceinline
			
 
				+// #define TJEI_FORCE_INLINE __declspec(noinline)  // For profiling
			
 
				+#else
			
 
				+#define TJEI_FORCE_INLINE static // TODO: equivalent for gcc & clang
			
 
				+#endif
			
 
				+
			
 
				+// Only use zero for debugging and/or inspection.
			
 
				+#define TJE_USE_FAST_DCT 1
			
 
				+
			
 
				+// C std lib
			
 
				+
			
 
				+
			
 
				+// ============================================================
			
 
				+// Table definitions.
			
 
				+//
			
 
				+// The spec defines tjei_default reasonably good quantization matrices and huffman
			
 
				+// specification tables.
			
 
				+//
			
 
				+//
			
 
				+// Instead of hard-coding the final huffman table, we only hard-code the table
			
 
				+// spec suggested by the specification, and then derive the full table from
			
 
				+// there.  This is only for didactic purposes but it might be useful if there
			
 
				+// ever is the case that we need to swap huffman tables from various sources.
			
 
				+// ============================================================
			
 
				+
			
 
				+
			
 
				+// K.1 - suggested luminance QT
			
 
				+static const uint8_t tjei_default_qt_luma_from_spec[] =
			
 
				+{
			
 
				+   16,11,10,16, 24, 40, 51, 61,
			
 
				+   12,12,14,19, 26, 58, 60, 55,
			
 
				+   14,13,16,24, 40, 57, 69, 56,
			
 
				+   14,17,22,29, 51, 87, 80, 62,
			
 
				+   18,22,37,56, 68,109,103, 77,
			
 
				+   24,35,55,64, 81,104,113, 92,
			
 
				+   49,64,78,87,103,121,120,101,
			
 
				+   72,92,95,98,112,100,103, 99,
			
 
				+};
			
 
				+
			
 
				+// Unused
			
 
				+#if 0
			
 
				+static const uint8_t tjei_default_qt_chroma_from_spec[] =
			
 
				+{
			
 
				+    // K.1 - suggested chrominance QT
			
 
				+   17,18,24,47,99,99,99,99,
			
 
				+   18,21,26,66,99,99,99,99,
			
 
				+   24,26,56,99,99,99,99,99,
			
 
				+   47,66,99,99,99,99,99,99,
			
 
				+   99,99,99,99,99,99,99,99,
			
 
				+   99,99,99,99,99,99,99,99,
			
 
				+   99,99,99,99,99,99,99,99,
			
 
				+   99,99,99,99,99,99,99,99,
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+static const uint8_t tjei_default_qt_chroma_from_paper[] =
			
 
				+{
			
 
				+    // Example QT from JPEG paper
			
 
				+    16,  12, 14,  14, 18, 24,  49,  72,
			
 
				+    11,  10, 16,  24, 40, 51,  61,  12,
			
 
				+    13,  17, 22,  35, 64, 92,  14,  16,
			
 
				+    22,  37, 55,  78, 95, 19,  24,  29,
			
 
				+    56,  64, 87,  98, 26, 40,  51,  68,
			
 
				+    81, 103, 112, 58, 57, 87,  109, 104,
			
 
				+    121,100, 60,  69, 80, 103, 113, 120,
			
 
				+    103, 55, 56,  62, 77, 92,  101, 99,
			
 
				+};
			
 
				+
			
 
				+// == Procedure to 'deflate' the huffman tree: JPEG spec, C.2
			
 
				+
			
 
				+// Number of 16 bit values for every code length. (K.3.3.1)
			
 
				+static const uint8_t tjei_default_ht_luma_dc_len[16] =
			
 
				+{
			
 
				+    0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0
			
 
				+};
			
 
				+// values
			
 
				+static const uint8_t tjei_default_ht_luma_dc[12] =
			
 
				+{
			
 
				+    0,1,2,3,4,5,6,7,8,9,10,11
			
 
				+};
			
 
				+
			
 
				+// Number of 16 bit values for every code length. (K.3.3.1)
			
 
				+static const uint8_t tjei_default_ht_chroma_dc_len[16] =
			
 
				+{
			
 
				+    0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0
			
 
				+};
			
 
				+// values
			
 
				+static const uint8_t tjei_default_ht_chroma_dc[12] =
			
 
				+{
			
 
				+    0,1,2,3,4,5,6,7,8,9,10,11
			
 
				+};
			
 
				+
			
 
				+// Same as above, but AC coefficients.
			
 
				+static const uint8_t tjei_default_ht_luma_ac_len[16] =
			
 
				+{
			
 
				+    0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d
			
 
				+};
			
 
				+static const uint8_t tjei_default_ht_luma_ac[] =
			
 
				+{
			
 
				+    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
			
 
				+    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0,
			
 
				+    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28,
			
 
				+    0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
			
 
				+    0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
			
 
				+    0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
			
 
				+    0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
			
 
				+    0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
			
 
				+    0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2,
			
 
				+    0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
			
 
				+    0xF9, 0xFA
			
 
				+};
			
 
				+
			
 
				+static const uint8_t tjei_default_ht_chroma_ac_len[16] =
			
 
				+{
			
 
				+    0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77
			
 
				+};
			
 
				+static const uint8_t tjei_default_ht_chroma_ac[] =
			
 
				+{
			
 
				+    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
			
 
				+    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0,
			
 
				+    0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26,
			
 
				+    0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
			
 
				+    0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
			
 
				+    0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
			
 
				+    0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
			
 
				+    0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
			
 
				+    0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
			
 
				+    0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
			
 
				+    0xF9, 0xFA
			
 
				+};
			
 
				+static float aan_scales[] = {
			
 
				+    1.0f, 1.387039845f, 1.306562965f, 1.175875602f,
			
 
				+    1.0f, 0.785694958f, 0.541196100f, 0.275899379f
			
 
				+};
			
 
				+
			
 
				+// ============================================================
			
 
				+// Code
			
 
				+// ============================================================
			
 
				+
			
 
				+// Zig-zag order:
			
 
				+static const uint8_t tjei_zig_zag[64] =
			
 
				+{
			
 
				+    0,   1,  5,  6, 14, 15, 27, 28,
			
 
				+    2,   4,  7, 13, 16, 26, 29, 42,
			
 
				+    3,   8, 12, 17, 25, 30, 41, 43,
			
 
				+    9,  11, 18, 24, 31, 40, 44, 53,
			
 
				+    10, 19, 23, 32, 39, 45, 52, 54,
			
 
				+    20, 22, 33, 38, 46, 51, 55, 60,
			
 
				+    21, 34, 37, 47, 50, 56, 59, 61,
			
 
				+    35, 36, 48, 49, 57, 58, 62, 63,
			
 
				+};
			
 
				+#define tjei_be_word BSP_Swap16
			
 
				+
			
 
				+// ============================================================
			
 
				+// The following structs exist only for code clarity, debugability, and
			
 
				+// readability. They are used when writing to disk, but it is useful to have
			
 
				+// 1-packed-structs to document how the format works, and to inspect memory
			
 
				+// while developing.
			
 
				+// ============================================================
			
 
				+
			
 
				+static const uint8_t tjeik_jfif_id[] = "JFIF";
			
 
				+static const uint8_t tjeik_com_str[] = "Created by JPEG Encoder";
			
 
				+
			
 
				+// TODO: Get rid of packed structs!
			
 
				+#pragma pack(push)
			
 
				+#pragma pack(1)
			
 
				+typedef struct
			
 
				+{
			
 
				+    uint16_t SOI;
			
 
				+    // JFIF header.
			
 
				+    uint16_t APP0;
			
 
				+    uint16_t jfif_len;
			
 
				+    uint8_t  jfif_id[5];
			
 
				+    uint16_t version;
			
 
				+    uint8_t  units;
			
 
				+    uint16_t x_density;
			
 
				+    uint16_t y_density;
			
 
				+    uint8_t  x_thumb;
			
 
				+    uint8_t  y_thumb;
			
 
				+} TJEJPEGHeader;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    uint16_t com;
			
 
				+    uint16_t com_len;
			
 
				+    char     com_str[sizeof(tjeik_com_str) - 1];
			
 
				+} TJEJPEGComment;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    void*           context;
			
 
				+    tje_write_func* func;
			
 
				+} TJEWriteContext;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    // Huffman data.
			
 
				+    uint8_t         ehuffsize[4][257];
			
 
				+    uint16_t        ehuffcode[4][256];
			
 
				+    uint8_t const * ht_bits[4];
			
 
				+    uint8_t const * ht_vals[4];
			
 
				+
			
 
				+    // Cuantization tables.
			
 
				+    uint8_t         qt_luma[64];
			
 
				+    uint8_t         qt_chroma[64];
			
 
				+
			
 
				+    // fwrite by default. User-defined when using tje_encode_with_func.
			
 
				+    TJEWriteContext write_context;
			
 
				+
			
 
				+    // Buffered output. Big performance win when using the usual stdlib implementations.
			
 
				+    size_t          output_buffer_count;
			
 
				+    uint8_t         output_buffer[TJEI_BUFFER_SIZE];
			
 
				+} TJEState;
			
 
				+
			
 
				+// Helper struct for TJEFrameHeader (below).
			
 
				+typedef struct
			
 
				+{
			
 
				+    uint8_t  component_id;
			
 
				+    uint8_t  sampling_factors;    // most significant 4 bits: horizontal. 4 LSB: vertical (A.1.1)
			
 
				+    uint8_t  qt;                  // Quantization table selector.
			
 
				+} TJEComponentSpec;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    uint16_t         SOF;
			
 
				+    uint16_t         len;                   // 8 + 3 * frame.num_components
			
 
				+    uint8_t          precision;             // Sample precision (bits per sample).
			
 
				+    uint16_t         height;
			
 
				+    uint16_t         width;
			
 
				+    uint8_t          num_components;        // For this implementation, will be equal to 3.
			
 
				+    TJEComponentSpec component_spec[3];
			
 
				+} TJEFrameHeader;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    uint8_t component_id;                 // Just as with TJEComponentSpec
			
 
				+    uint8_t dc_ac;                        // (dc|ac)
			
 
				+} TJEFrameComponentSpec;
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+    uint16_t              SOS;
			
 
				+    uint16_t              len;
			
 
				+    uint8_t               num_components;  // 3.
			
 
				+    TJEFrameComponentSpec component_spec[3];
			
 
				+    uint8_t               first;  // 0
			
 
				+    uint8_t               last;  // 63
			
 
				+    uint8_t               ah_al;  // o
			
 
				+} TJEScanHeader;
			
 
				+#pragma pack(pop)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+static void tjei_write(TJEState* state, const void* data, size_t num_bytes, size_t num_elements)
			
 
				+{
			
 
				+    size_t to_write = num_bytes * num_elements;
			
 
				+
			
 
				+    // Cap to the buffer available size and copy memory.
			
 
				+    size_t capped_count = tjei_min(to_write, TJEI_BUFFER_SIZE - 1 - state->output_buffer_count);
			
 
				+
			
 
				+    memcpy(state->output_buffer + state->output_buffer_count, data, capped_count);
			
 
				+    state->output_buffer_count += capped_count;
			
 
				+
			
 
				+    assert (state->output_buffer_count <= TJEI_BUFFER_SIZE - 1);
			
 
				+    // Flush the buffer.
			
 
				+    if ( state->output_buffer_count == TJEI_BUFFER_SIZE - 1 ) {
			
 
				+        state->write_context.func(state->write_context.context, state->output_buffer, (int)state->output_buffer_count);
			
 
				+        state->output_buffer_count = 0;
			
 
				+    }
			
 
				+
			
 
				+    // Recursively calling ourselves with the rest of the buffer.
			
 
				+    if (capped_count < to_write) {
			
 
				+        tjei_write(state, (uint8_t*)data+capped_count, to_write - capped_count, 1);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void tjei_write_DQT(TJEState* state, const uint8_t* matrix, uint8_t id)
			
 
				+{
			
 
				+    uint16_t DQT = tjei_be_word(0xffdb);
			
 
				+    tjei_write(state, &DQT, sizeof(uint16_t), 1);
			
 
				+    uint16_t len = tjei_be_word(0x0043); // 2(len) + 1(id) + 64(matrix) = 67 = 0x43
			
 
				+    tjei_write(state, &len, sizeof(uint16_t), 1);
			
 
				+    assert(id < 4);
			
 
				+    uint8_t precision_and_id = id;  // 0x0000 8 bits | 0x00id
			
 
				+    tjei_write(state, &precision_and_id, sizeof(uint8_t), 1);
			
 
				+    // Write matrix
			
 
				+    tjei_write(state, matrix, 64*sizeof(uint8_t), 1);
			
 
				+}
			
 
				+
			
 
				+typedef enum
			
 
				+{
			
 
				+    TJEI_DC = 0,
			
 
				+    TJEI_AC = 1
			
 
				+} TJEHuffmanTableClass;
			
 
				+
			
 
				+static void tjei_write_DHT(TJEState* state,
			
 
				+                           uint8_t const * matrix_len,
			
 
				+                           uint8_t const * matrix_val,
			
 
				+                           TJEHuffmanTableClass ht_class,
			
 
				+                           uint8_t id)
			
 
				+{
			
 
				+    int num_values = 0;
			
 
				+    for ( int i = 0; i < 16; ++i ) {
			
 
				+        num_values += matrix_len[i];
			
 
				+    }
			
 
				+    assert(num_values <= 0xffff);
			
 
				+
			
 
				+    uint16_t DHT = tjei_be_word(0xffc4);
			
 
				+    // 2(len) + 1(Tc|th) + 16 (num lengths) + ?? (num values)
			
 
				+    uint16_t len = tjei_be_word(2 + 1 + 16 + (uint16_t)num_values);
			
 
				+    assert(id < 4);
			
 
				+    uint8_t tc_th = (uint8_t)((((uint8_t)ht_class) << 4) | id);
			
 
				+
			
 
				+    tjei_write(state, &DHT, sizeof(uint16_t), 1);
			
 
				+    tjei_write(state, &len, sizeof(uint16_t), 1);
			
 
				+    tjei_write(state, &tc_th, sizeof(uint8_t), 1);
			
 
				+    tjei_write(state, matrix_len, sizeof(uint8_t), 16);
			
 
				+    tjei_write(state, matrix_val, sizeof(uint8_t), (size_t)num_values);
			
 
				+}
			
 
				+// ============================================================
			
 
				+//  Huffman deflation code.
			
 
				+// ============================================================
			
 
				+
			
 
				+// Returns all code sizes from the BITS specification (JPEG C.3)
			
 
				+static uint8_t* tjei_huff_get_code_lengths(uint8_t huffsize[/*256*/], uint8_t const * bits)
			
 
				+{
			
 
				+    int k = 0;
			
 
				+    for ( int i = 0; i < 16; ++i ) {
			
 
				+        for ( int j = 0; j < bits[i]; ++j ) {
			
 
				+            huffsize[k++] = (uint8_t)(i + 1);
			
 
				+        }
			
 
				+        huffsize[k] = 0;
			
 
				+    }
			
 
				+    return huffsize;
			
 
				+}
			
 
				+
			
 
				+// Fills out the prefixes for each code.
			
 
				+static uint16_t* tjei_huff_get_codes(uint16_t codes[], uint8_t* huffsize, int64_t count)
			
 
				+{
			
 
				+    uint16_t code = 0;
			
 
				+    int k = 0;
			
 
				+    uint8_t sz = huffsize[0];
			
 
				+    for(;;) {
			
 
				+        do {
			
 
				+            assert(k < count);
			
 
				+            codes[k++] = code++;
			
 
				+        } while (huffsize[k] == sz);
			
 
				+        if (huffsize[k] == 0) {
			
 
				+            return codes;
			
 
				+        }
			
 
				+        do {
			
 
				+            code = (uint16_t)(code << 1);
			
 
				+            ++sz;
			
 
				+        } while( huffsize[k] != sz );
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+static void tjei_huff_get_extended(uint8_t* out_ehuffsize,
			
 
				+                                   uint16_t* out_ehuffcode,
			
 
				+                                   uint8_t const * huffval,
			
 
				+                                   uint8_t* huffsize,
			
 
				+                                   uint16_t* huffcode, int64_t count)
			
 
				+{
			
 
				+    int k = 0;
			
 
				+    do {
			
 
				+        uint8_t val = huffval[k];
			
 
				+        out_ehuffcode[val] = huffcode[k];
			
 
				+        out_ehuffsize[val] = huffsize[k];
			
 
				+        k++;
			
 
				+    } while ( k < count );
			
 
				+}
			
 
				+// ============================================================
			
 
				+
			
 
				+// Returns:
			
 
				+//  out[1] : number of bits
			
 
				+//  out[0] : bits
			
 
				+TJEI_FORCE_INLINE void tjei_calculate_variable_length_int(int value, uint16_t out[2])
			
 
				+{
			
 
				+    int abs_val = value;
			
 
				+    if ( value < 0 ) {
			
 
				+        abs_val = -abs_val;
			
 
				+        --value;
			
 
				+    }
			
 
				+    out[1] = 1;
			
 
				+    while( abs_val >>= 1 ) {
			
 
				+        ++out[1];
			
 
				+    }
			
 
				+    out[0] = (uint16_t)(value & ((1 << out[1]) - 1));
			
 
				+}
			
 
				+
			
 
				+// Write bits to file.
			
 
				+TJEI_FORCE_INLINE void tjei_write_bits(TJEState* state,
			
 
				+                                       uint32_t* bitbuffer, uint32_t* location,
			
 
				+                                       uint16_t num_bits, uint16_t bits)
			
 
				+{
			
 
				+    //   v-- location
			
 
				+    //  [                     ]   <-- bit buffer
			
 
				+    // 32                     0
			
 
				+    //
			
 
				+    // This call pushes to the bitbuffer and saves the location. Data is pushed
			
 
				+    // from most significant to less significant.
			
 
				+    // When we can write a full byte, we write a byte and shift.
			
 
				+
			
 
				+    // Push the stack.
			
 
				+    uint32_t nloc = *location + num_bits;
			
 
				+    *bitbuffer |= (uint32_t)(bits << (32 - nloc));
			
 
				+    *location = nloc;
			
 
				+    while ( *location >= 8 ) {
			
 
				+        // Grab the most significant byte.
			
 
				+        uint8_t c = (uint8_t)((*bitbuffer) >> 24);
			
 
				+        // Write it to file.
			
 
				+        tjei_write(state, &c, 1, 1);
			
 
				+        if ( c == 0xff )  {
			
 
				+            // Special case: tell JPEG this is not a marker.
			
 
				+            char z = 0;
			
 
				+            tjei_write(state, &z, 1, 1);
			
 
				+        }
			
 
				+        // Pop the stack.
			
 
				+        *bitbuffer <<= 8;
			
 
				+        *location -= 8;
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+// DCT implementation by Thomas G. Lane.
			
 
				+// Obtained through NVIDIA
			
 
				+//  http://developer.download.nvidia.com/SDK/9.5/Samples/vidimaging_samples.html#gpgpu_dct
			
 
				+//
			
 
				+// QUOTE:
			
 
				+//  This implementation is based on Arai, Agui, and Nakajima's algorithm for
			
 
				+//  scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
			
 
				+//  Japanese, but the algorithm is described in the Pennebaker & Mitchell
			
 
				+//  JPEG textbook (see REFERENCES section in file README).  The following code
			
 
				+//  is based directly on figure 4-8 in P&M.
			
 
				+//
			
 
				+static void tjei_fdct (float * data)
			
 
				+{
			
 
				+    float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
			
 
				+    float tmp10, tmp11, tmp12, tmp13;
			
 
				+    float z1, z2, z3, z4, z5, z11, z13;
			
 
				+    float *dataptr;
			
 
				+    int ctr;
			
 
				+
			
 
				+    /* Pass 1: process rows. */
			
 
				+
			
 
				+    dataptr = data;
			
 
				+    for ( ctr = 7; ctr >= 0; ctr-- ) {
			
 
				+        tmp0 = dataptr[0] + dataptr[7];
			
 
				+        tmp7 = dataptr[0] - dataptr[7];
			
 
				+        tmp1 = dataptr[1] + dataptr[6];
			
 
				+        tmp6 = dataptr[1] - dataptr[6];
			
 
				+        tmp2 = dataptr[2] + dataptr[5];
			
 
				+        tmp5 = dataptr[2] - dataptr[5];
			
 
				+        tmp3 = dataptr[3] + dataptr[4];
			
 
				+        tmp4 = dataptr[3] - dataptr[4];
			
 
				+
			
 
				+        /* Even part */
			
 
				+
			
 
				+        tmp10 = tmp0 + tmp3;    /* phase 2 */
			
 
				+        tmp13 = tmp0 - tmp3;
			
 
				+        tmp11 = tmp1 + tmp2;
			
 
				+        tmp12 = tmp1 - tmp2;
			
 
				+
			
 
				+        dataptr[0] = tmp10 + tmp11; /* phase 3 */
			
 
				+        dataptr[4] = tmp10 - tmp11;
			
 
				+
			
 
				+        z1 = (tmp12 + tmp13) * ((float) 0.707106781); /* c4 */
			
 
				+        dataptr[2] = tmp13 + z1;    /* phase 5 */
			
 
				+        dataptr[6] = tmp13 - z1;
			
 
				+
			
 
				+        /* Odd part */
			
 
				+
			
 
				+        tmp10 = tmp4 + tmp5;    /* phase 2 */
			
 
				+        tmp11 = tmp5 + tmp6;
			
 
				+        tmp12 = tmp6 + tmp7;
			
 
				+
			
 
				+        /* The rotator is modified from fig 4-8 to avoid extra negations. */
			
 
				+        z5 = (tmp10 - tmp12) * ((float) 0.382683433); /* c6 */
			
 
				+        z2 = ((float) 0.541196100) * tmp10 + z5; /* c2-c6 */
			
 
				+        z4 = ((float) 1.306562965) * tmp12 + z5; /* c2+c6 */
			
 
				+        z3 = tmp11 * ((float) 0.707106781); /* c4 */
			
 
				+
			
 
				+        z11 = tmp7 + z3;        /* phase 5 */
			
 
				+        z13 = tmp7 - z3;
			
 
				+
			
 
				+        dataptr[5] = z13 + z2;  /* phase 6 */
			
 
				+        dataptr[3] = z13 - z2;
			
 
				+        dataptr[1] = z11 + z4;
			
 
				+        dataptr[7] = z11 - z4;
			
 
				+
			
 
				+        dataptr += 8;     /* advance pointer to next row */
			
 
				+    }
			
 
				+
			
 
				+    /* Pass 2: process columns. */
			
 
				+
			
 
				+    dataptr = data;
			
 
				+    for ( ctr = 8-1; ctr >= 0; ctr-- ) {
			
 
				+        tmp0 = dataptr[8*0] + dataptr[8*7];
			
 
				+        tmp7 = dataptr[8*0] - dataptr[8*7];
			
 
				+        tmp1 = dataptr[8*1] + dataptr[8*6];
			
 
				+        tmp6 = dataptr[8*1] - dataptr[8*6];
			
 
				+        tmp2 = dataptr[8*2] + dataptr[8*5];
			
 
				+        tmp5 = dataptr[8*2] - dataptr[8*5];
			
 
				+        tmp3 = dataptr[8*3] + dataptr[8*4];
			
 
				+        tmp4 = dataptr[8*3] - dataptr[8*4];
			
 
				+
			
 
				+        /* Even part */
			
 
				+
			
 
				+        tmp10 = tmp0 + tmp3;    /* phase 2 */
			
 
				+        tmp13 = tmp0 - tmp3;
			
 
				+        tmp11 = tmp1 + tmp2;
			
 
				+        tmp12 = tmp1 - tmp2;
			
 
				+
			
 
				+        dataptr[8*0] = tmp10 + tmp11; /* phase 3 */
			
 
				+        dataptr[8*4] = tmp10 - tmp11;
			
 
				+
			
 
				+        z1 = (tmp12 + tmp13) * ((float) 0.707106781); /* c4 */
			
 
				+        dataptr[8*2] = tmp13 + z1; /* phase 5 */
			
 
				+        dataptr[8*6] = tmp13 - z1;
			
 
				+
			
 
				+        /* Odd part */
			
 
				+
			
 
				+        tmp10 = tmp4 + tmp5;    /* phase 2 */
			
 
				+        tmp11 = tmp5 + tmp6;
			
 
				+        tmp12 = tmp6 + tmp7;
			
 
				+
			
 
				+        /* The rotator is modified from fig 4-8 to avoid extra negations. */
			
 
				+        z5 = (tmp10 - tmp12) * ((float) 0.382683433); /* c6 */
			
 
				+        z2 = ((float) 0.541196100) * tmp10 + z5; /* c2-c6 */
			
 
				+        z4 = ((float) 1.306562965) * tmp12 + z5; /* c2+c6 */
			
 
				+        z3 = tmp11 * ((float) 0.707106781); /* c4 */
			
 
				+
			
 
				+        z11 = tmp7 + z3;        /* phase 5 */
			
 
				+        z13 = tmp7 - z3;
			
 
				+
			
 
				+        dataptr[8*5] = z13 + z2; /* phase 6 */
			
 
				+        dataptr[8*3] = z13 - z2;
			
 
				+        dataptr[8*1] = z11 + z4;
			
 
				+        dataptr[8*7] = z11 - z4;
			
 
				+
			
 
				+        dataptr++;          /* advance pointer to next column */
			
 
				+    }
			
 
				+}
			
 
				+#if !TJE_USE_FAST_DCT
			
 
				+static float slow_fdct(int u, int v, float* data)
			
 
				+{
			
 
				+#define kPI 3.14159265f
			
 
				+    float res = 0.0f;
			
 
				+    float cu = (u == 0) ? 0.70710678118654f : 1;
			
 
				+    float cv = (v == 0) ? 0.70710678118654f : 1;
			
 
				+    for ( int y = 0; y < 8; ++y ) {
			
 
				+        for ( int x = 0; x < 8; ++x ) {
			
 
				+            res += (data[y * 8 + x]) *
			
 
				+                    cosf(((2.0f * x + 1.0f) * u * kPI) / 16.0f) *
			
 
				+                    cosf(((2.0f * y + 1.0f) * v * kPI) / 16.0f);
			
 
				+        }
			
 
				+    }
			
 
				+    res *= 0.25f * cu * cv;
			
 
				+    return res;
			
 
				+#undef kPI
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#define ABS(x) ((x) < 0 ? -(x) : (x))
			
 
				+
			
 
				+static void tjei_encode_and_write_MCU(TJEState* state,
			
 
				+                                      float* mcu,
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+                                      float* qt,  // Pre-processed quantization matrix.
			
 
				+#else
			
 
				+                                      uint8_t* qt,
			
 
				+#endif
			
 
				+                                      uint8_t* huff_dc_len, uint16_t* huff_dc_code, // Huffman tables
			
 
				+                                      uint8_t* huff_ac_len, uint16_t* huff_ac_code,
			
 
				+                                      int* pred,  // Previous DC coefficient
			
 
				+                                      uint32_t* bitbuffer,  // Bitstack.
			
 
				+                                      uint32_t* location)
			
 
				+{
			
 
				+    int du[64];  // Data unit in zig-zag order
			
 
				+
			
 
				+    float dct_mcu[64];
			
 
				+    memcpy(dct_mcu, mcu, 64 * sizeof(float));
			
 
				+
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+    tjei_fdct(dct_mcu);
			
 
				+    for ( int i = 0; i < 64; ++i ) {
			
 
				+        float fval = dct_mcu[i];
			
 
				+        fval *= qt[i];
			
 
				+#if 0
			
 
				+        fval = (fval > 0) ? floorf(fval + 0.5f) : ceilf(fval - 0.5f);
			
 
				+#else
			
 
				+        fval = floorf(fval + 1024 + 0.5f);
			
 
				+        fval -= 1024;
			
 
				+#endif
			
 
				+        int val = (int)fval;
			
 
				+        du[tjei_zig_zag[i]] = val;
			
 
				+    }
			
 
				+#else
			
 
				+    for ( int v = 0; v < 8; ++v ) {
			
 
				+        for ( int u = 0; u < 8; ++u ) {
			
 
				+            dct_mcu[v * 8 + u] = slow_fdct(u, v, mcu);
			
 
				+        }
			
 
				+    }
			
 
				+    for ( int i = 0; i < 64; ++i ) {
			
 
				+        float fval = dct_mcu[i] / (qt[i]);
			
 
				+        int val = (int)((fval > 0) ? floorf(fval + 0.5f) : ceilf(fval - 0.5f));
			
 
				+        du[tjei_zig_zag[i]] = val;
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				+    uint16_t vli[2];
			
 
				+
			
 
				+    // Encode DC coefficient.
			
 
				+    int diff = du[0] - *pred;
			
 
				+    *pred = du[0];
			
 
				+    if ( diff != 0 ) {
			
 
				+        tjei_calculate_variable_length_int(diff, vli);
			
 
				+        // Write number of bits with Huffman coding
			
 
				+        tjei_write_bits(state, bitbuffer, location, huff_dc_len[vli[1]], huff_dc_code[vli[1]]);
			
 
				+        // Write the bits.
			
 
				+        tjei_write_bits(state, bitbuffer, location, vli[1], vli[0]);
			
 
				+    } else {
			
 
				+        tjei_write_bits(state, bitbuffer, location, huff_dc_len[0], huff_dc_code[0]);
			
 
				+    }
			
 
				+
			
 
				+    // ==== Encode AC coefficients ====
			
 
				+
			
 
				+    int last_non_zero_i = 0;
			
 
				+    // Find the last non-zero element.
			
 
				+    for ( int i = 63; i > 0; --i ) {
			
 
				+        if (du[i] != 0) {
			
 
				+            last_non_zero_i = i;
			
 
				+            break;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    for ( int i = 1; i <= last_non_zero_i; ++i ) {
			
 
				+        // If zero, increase count. If >=15, encode (FF,00)
			
 
				+        int zero_count = 0;
			
 
				+        while ( du[i] == 0 ) {
			
 
				+            ++zero_count;
			
 
				+            ++i;
			
 
				+            if (zero_count == 16) {
			
 
				+                // encode (ff,00) == 0xf0
			
 
				+                tjei_write_bits(state, bitbuffer, location, huff_ac_len[0xf0], huff_ac_code[0xf0]);
			
 
				+                zero_count = 0;
			
 
				+            }
			
 
				+        }
			
 
				+        tjei_calculate_variable_length_int(du[i], vli);
			
 
				+
			
 
				+        assert(zero_count < 0x10);
			
 
				+        assert(vli[1] <= 10);
			
 
				+
			
 
				+        uint16_t sym1 = (uint16_t)((uint16_t)zero_count << 4) | vli[1];
			
 
				+
			
 
				+        assert(huff_ac_len[sym1] != 0);
			
 
				+
			
 
				+        // Write symbol 1  --- (RUNLENGTH, SIZE)
			
 
				+        tjei_write_bits(state, bitbuffer, location, huff_ac_len[sym1], huff_ac_code[sym1]);
			
 
				+        // Write symbol 2  --- (AMPLITUDE)
			
 
				+        tjei_write_bits(state, bitbuffer, location, vli[1], vli[0]);
			
 
				+    }
			
 
				+
			
 
				+    if (last_non_zero_i != 63) {
			
 
				+        // write EOB HUFF(00,00)
			
 
				+        tjei_write_bits(state, bitbuffer, location, huff_ac_len[0], huff_ac_code[0]);
			
 
				+    }
			
 
				+    return;
			
 
				+}
			
 
				+
			
 
				+enum {
			
 
				+    TJEI_LUMA_DC,
			
 
				+    TJEI_LUMA_AC,
			
 
				+    TJEI_CHROMA_DC,
			
 
				+    TJEI_CHROMA_AC,
			
 
				+};
			
 
				+
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+struct TJEProcessedQT
			
 
				+{
			
 
				+    float chroma[64];
			
 
				+    float luma[64];
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				+// Set up huffman tables in state.
			
 
				+static void tjei_huff_expand(TJEState* state)
			
 
				+{
			
 
				+    assert(state);
			
 
				+
			
 
				+    state->ht_bits[TJEI_LUMA_DC]   = tjei_default_ht_luma_dc_len;
			
 
				+    state->ht_bits[TJEI_LUMA_AC]   = tjei_default_ht_luma_ac_len;
			
 
				+    state->ht_bits[TJEI_CHROMA_DC] = tjei_default_ht_chroma_dc_len;
			
 
				+    state->ht_bits[TJEI_CHROMA_AC] = tjei_default_ht_chroma_ac_len;
			
 
				+
			
 
				+    state->ht_vals[TJEI_LUMA_DC]   = tjei_default_ht_luma_dc;
			
 
				+    state->ht_vals[TJEI_LUMA_AC]   = tjei_default_ht_luma_ac;
			
 
				+    state->ht_vals[TJEI_CHROMA_DC] = tjei_default_ht_chroma_dc;
			
 
				+    state->ht_vals[TJEI_CHROMA_AC] = tjei_default_ht_chroma_ac;
			
 
				+
			
 
				+    // How many codes in total for each of LUMA_(DC|AC) and CHROMA_(DC|AC)
			
 
				+    int32_t spec_tables_len[4] = { 0 };
			
 
				+
			
 
				+    for ( int i = 0; i < 4; ++i ) {
			
 
				+        for ( int k = 0; k < 16; ++k ) {
			
 
				+            spec_tables_len[i] += state->ht_bits[i][k];
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // Fill out the extended tables..
			
 
				+    uint8_t huffsize[4][257];
			
 
				+    uint16_t huffcode[4][256];
			
 
				+    for ( int i = 0; i < 4; ++i ) {
			
 
				+        assert (256 >= spec_tables_len[i]);
			
 
				+        tjei_huff_get_code_lengths(huffsize[i], state->ht_bits[i]);
			
 
				+        tjei_huff_get_codes(huffcode[i], huffsize[i], spec_tables_len[i]);
			
 
				+    }
			
 
				+    for ( int i = 0; i < 4; ++i ) {
			
 
				+        int64_t count = spec_tables_len[i];
			
 
				+        tjei_huff_get_extended(state->ehuffsize[i],
			
 
				+                               state->ehuffcode[i],
			
 
				+                               state->ht_vals[i],
			
 
				+                               &huffsize[i][0],
			
 
				+                               &huffcode[i][0], count);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+//static int tjei_encode_main(TJEState* state,
			
 
				+//                            const unsigned char* src_data,
			
 
				+//                            const int width,
			
 
				+//                            const int height,
			
 
				+//                            const int src_num_components)
			
 
				+//{
			
 
				+//    if (src_num_components != 3 && src_num_components != 4) {
			
 
				+//        return 0;
			
 
				+//    }
			
 
				+//
			
 
				+//    if (width > 0xffff || height > 0xffff) {
			
 
				+//        return 0;
			
 
				+//    }
			
 
				+//
			
 
				+//#if TJE_USE_FAST_DCT
			
 
				+//    struct TJEProcessedQT pqt;
			
 
				+//    // Again, taken from classic japanese implementation.
			
 
				+//    //
			
 
				+//    /* For float AA&N IDCT method, divisors are equal to quantization
			
 
				+//     * coefficients scaled by scalefactor[row]*scalefactor[col], where
			
 
				+//     *   scalefactor[0] = 1
			
 
				+//     *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
			
 
				+//     * We apply a further scale factor of 8.
			
 
				+//     * What's actually stored is 1/divisor so that the inner loop can
			
 
				+//     * use a multiplication rather than a division.
			
 
				+//     */
			
 
				+//
			
 
				+//
			
 
				+//    // build (de)quantization tables
			
 
				+//    for(int y=0; y<8; y++) {
			
 
				+//        for(int x=0; x<8; x++) {
			
 
				+//            int i = y*8 + x;
			
 
				+//            pqt.luma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_luma[tjei_zig_zag[i]]);
			
 
				+//            pqt.chroma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_chroma[tjei_zig_zag[i]]);
			
 
				+//        }
			
 
				+//    }
			
 
				+//#endif
			
 
				+//
			
 
				+//    { // Write header
			
 
				+//        TJEJPEGHeader header;
			
 
				+//        // JFIF header.
			
 
				+//        header.SOI = tjei_be_word(0xffd8);  // Sequential DCT
			
 
				+//        header.APP0 = tjei_be_word(0xffe0);
			
 
				+//
			
 
				+//        uint16_t jfif_len = sizeof(TJEJPEGHeader) - 4 /*SOI & APP0 markers*/;
			
 
				+//        header.jfif_len = tjei_be_word(jfif_len);
			
 
				+//        memcpy(header.jfif_id, (void*)tjeik_jfif_id, 5);
			
 
				+//        header.version = tjei_be_word(0x0102);
			
 
				+//        header.units = 0x01;  // Dots-per-inch
			
 
				+//        header.x_density = tjei_be_word(0x0060);  // 96 DPI
			
 
				+//        header.y_density = tjei_be_word(0x0060);  // 96 DPI
			
 
				+//        header.x_thumb = 0;
			
 
				+//        header.y_thumb = 0;
			
 
				+//        tjei_write(state, &header, sizeof(TJEJPEGHeader), 1);
			
 
				+//    }
			
 
				+//    {  // Write comment
			
 
				+//        TJEJPEGComment com;
			
 
				+//        uint16_t com_len = 2 + sizeof(tjeik_com_str) - 1;
			
 
				+//        // Comment
			
 
				+//        com.com = tjei_be_word(0xfffe);
			
 
				+//        com.com_len = tjei_be_word(com_len);
			
 
				+//        memcpy(com.com_str, (void*)tjeik_com_str, sizeof(tjeik_com_str)-1);
			
 
				+//        tjei_write(state, &com, sizeof(TJEJPEGComment), 1);
			
 
				+//    }
			
 
				+//
			
 
				+//    // Write quantization tables.
			
 
				+//    tjei_write_DQT(state, state->qt_luma, 0x00);
			
 
				+//    tjei_write_DQT(state, state->qt_chroma, 0x01);
			
 
				+//
			
 
				+//    {  // Write the frame marker.
			
 
				+//        TJEFrameHeader header;
			
 
				+//        header.SOF = tjei_be_word(0xffc0);
			
 
				+//        header.len = tjei_be_word(8 + 3 * 3);
			
 
				+//        header.precision = 8;
			
 
				+//        assert(width <= 0xffff);
			
 
				+//        assert(height <= 0xffff);
			
 
				+//        header.width = tjei_be_word((uint16_t)width);
			
 
				+//        header.height = tjei_be_word((uint16_t)height);
			
 
				+//        header.num_components = 3;
			
 
				+//        uint8_t tables[3] = {
			
 
				+//            0,  // Luma component gets luma table (see tjei_write_DQT call above.)
			
 
				+//            1,  // Chroma component gets chroma table
			
 
				+//            1,  // Chroma component gets chroma table
			
 
				+//        };
			
 
				+//        for (int i = 0; i < 3; ++i) {
			
 
				+//            TJEComponentSpec spec;
			
 
				+//            spec.component_id = (uint8_t)(i + 1);  // No particular reason. Just 1, 2, 3.
			
 
				+//            spec.sampling_factors = (uint8_t)0x11;
			
 
				+//            spec.qt = tables[i];
			
 
				+//
			
 
				+//            header.component_spec[i] = spec;
			
 
				+//        }
			
 
				+//        // Write to file.
			
 
				+//        tjei_write(state, &header, sizeof(TJEFrameHeader), 1);
			
 
				+//    }
			
 
				+//
			
 
				+//    tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_DC],   state->ht_vals[TJEI_LUMA_DC], TJEI_DC, 0);
			
 
				+//    tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_AC],   state->ht_vals[TJEI_LUMA_AC], TJEI_AC, 0);
			
 
				+//    tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_DC], state->ht_vals[TJEI_CHROMA_DC], TJEI_DC, 1);
			
 
				+//    tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_AC], state->ht_vals[TJEI_CHROMA_AC], TJEI_AC, 1);
			
 
				+//
			
 
				+//    // Write start of scan
			
 
				+//    {
			
 
				+//        TJEScanHeader header;
			
 
				+//        header.SOS = tjei_be_word(0xffda);
			
 
				+//        header.len = tjei_be_word((uint16_t)(6 + (sizeof(TJEFrameComponentSpec) * 3)));
			
 
				+//        header.num_components = 3;
			
 
				+//
			
 
				+//        uint8_t tables[3] = {
			
 
				+//            0x00,
			
 
				+//            0x11,
			
 
				+//            0x11,
			
 
				+//        };
			
 
				+//        for (int i = 0; i < 3; ++i) {
			
 
				+//            TJEFrameComponentSpec cs;
			
 
				+//            // Must be equal to component_id from frame header above.
			
 
				+//            cs.component_id = (uint8_t)(i + 1);
			
 
				+//            cs.dc_ac = (uint8_t)tables[i];
			
 
				+//
			
 
				+//            header.component_spec[i] = cs;
			
 
				+//        }
			
 
				+//        header.first = 0;
			
 
				+//        header.last  = 63;
			
 
				+//        header.ah_al = 0;
			
 
				+//        tjei_write(state, &header, sizeof(TJEScanHeader), 1);
			
 
				+//
			
 
				+//    }
			
 
				+//    // Write compressed data.
			
 
				+//
			
 
				+//    float du_y[64];
			
 
				+//    float du_b[64];
			
 
				+//    float du_r[64];
			
 
				+//
			
 
				+//    // Set diff to 0.
			
 
				+//    int pred_y = 0;
			
 
				+//    int pred_b = 0;
			
 
				+//    int pred_r = 0;
			
 
				+//
			
 
				+//    // Bit stack
			
 
				+//    uint32_t bitbuffer = 0;
			
 
				+//    uint32_t location = 0;
			
 
				+//
			
 
				+//
			
 
				+//    for ( int y = 0; y < height; y += 8 ) {
			
 
				+//        for ( int x = 0; x < width; x += 8 ) {
			
 
				+//            // Block loop: ====
			
 
				+//            for ( int off_y = 0; off_y < 8; ++off_y ) {
			
 
				+//                for ( int off_x = 0; off_x < 8; ++off_x ) {
			
 
				+//                    int block_index = (off_y * 8 + off_x);
			
 
				+//
			
 
				+//                    int src_index = (((y + off_y) * width) + (x + off_x)) * src_num_components;
			
 
				+//
			
 
				+//                    int col = x + off_x;
			
 
				+//                    int row = y + off_y;
			
 
				+//
			
 
				+//                    if(row >= height) {
			
 
				+//                        src_index -= (width * (row - height + 1)) * src_num_components;
			
 
				+//                    }
			
 
				+//                    if(col >= width) {
			
 
				+//                        src_index -= (col - width + 1) * src_num_components;
			
 
				+//                    }
			
 
				+//                    assert(src_index < width * height * src_num_components);
			
 
				+//
			
 
				+//                    uint8_t r = src_data[src_index + 0];
			
 
				+//                    uint8_t g = src_data[src_index + 1];
			
 
				+//                    uint8_t b = src_data[src_index + 2];
			
 
				+//
			
 
				+//                    float luma = 0.299f   * r + 0.587f    * g + 0.114f    * b - 128;
			
 
				+//                    float cb   = -0.1687f * r - 0.3313f   * g + 0.5f      * b;
			
 
				+//                    float cr   = 0.5f     * r - 0.4187f   * g - 0.0813f   * b;
			
 
				+//
			
 
				+//                    du_y[block_index] = luma;
			
 
				+//                    du_b[block_index] = cb;
			
 
				+//                    du_r[block_index] = cr;
			
 
				+//                }
			
 
				+//            }
			
 
				+//
			
 
				+//            tjei_encode_and_write_MCU(state, du_y,
			
 
				+//#if TJE_USE_FAST_DCT
			
 
				+//                                     pqt.luma,
			
 
				+//#else
			
 
				+//                                     state->qt_luma,
			
 
				+//#endif
			
 
				+//                                     state->ehuffsize[TJEI_LUMA_DC], state->ehuffcode[TJEI_LUMA_DC],
			
 
				+//                                     state->ehuffsize[TJEI_LUMA_AC], state->ehuffcode[TJEI_LUMA_AC],
			
 
				+//                                     &pred_y, &bitbuffer, &location);
			
 
				+//            tjei_encode_and_write_MCU(state, du_b,
			
 
				+//#if TJE_USE_FAST_DCT
			
 
				+//                                     pqt.chroma,
			
 
				+//#else
			
 
				+//                                     state->qt_chroma,
			
 
				+//#endif
			
 
				+//                                     state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
			
 
				+//                                     state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
			
 
				+//                                     &pred_b, &bitbuffer, &location);
			
 
				+//            tjei_encode_and_write_MCU(state, du_r,
			
 
				+//#if TJE_USE_FAST_DCT
			
 
				+//                                     pqt.chroma,
			
 
				+//#else
			
 
				+//                                     state->qt_chroma,
			
 
				+//#endif
			
 
				+//                                     state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
			
 
				+//                                     state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
			
 
				+//                                     &pred_r, &bitbuffer, &location);
			
 
				+//
			
 
				+//
			
 
				+//        }
			
 
				+//    }
			
 
				+//
			
 
				+//    // Finish the image.
			
 
				+//    { // Flush
			
 
				+//        if (location > 0 && location < 8) {
			
 
				+//            tjei_write_bits(state, &bitbuffer, &location, (uint16_t)(8 - location), 0);
			
 
				+//        }
			
 
				+//    }
			
 
				+//    uint16_t EOI = tjei_be_word(0xffd9);
			
 
				+//    tjei_write(state, &EOI, sizeof(uint16_t), 1);
			
 
				+//
			
 
				+//    if (state->output_buffer_count) {
			
 
				+//        state->write_context.func(state->write_context.context, state->output_buffer, (int)state->output_buffer_count);
			
 
				+//        state->output_buffer_count = 0;
			
 
				+//    }
			
 
				+//
			
 
				+//    return 1;
			
 
				+//}
			
 
				+
			
 
				+
			
 
				+//int tje_encode_with_func(tje_write_func* func,
			
 
				+//                         void* context,
			
 
				+//                         const int quality,
			
 
				+//                         const int width,
			
 
				+//                         const int height,
			
 
				+//                         const int num_components,
			
 
				+//                         const unsigned char* src_data)
			
 
				+//{
			
 
				+//    if (quality < 1 || quality > 3) {
			
 
				+//        tje_log("[ERROR] -- Valid 'quality' values are 1 (lowest), 2, or 3 (highest)");
			
 
				+//        return 0;
			
 
				+//    }
			
 
				+//
			
 
				+//    TJEState state = { 0 };
			
 
				+//
			
 
				+//
			
 
				+//
			
 
				+//    TJEWriteContext wc = { 0 };
			
 
				+//
			
 
				+//    wc.context = context;
			
 
				+//    wc.func = func;
			
 
				+//
			
 
				+//    state.write_context = wc;
			
 
				+//
			
 
				+//
			
 
				+//    tjei_huff_expand(&state);
			
 
				+//
			
 
				+//    int result = tjei_encode_main(&state, src_data, width, height, num_components);
			
 
				+//
			
 
				+//    return result;
			
 
				+//}
			
 
				+// ============================================================
			
 
				+
			
 
				+typedef struct
			
 
				+{
			
 
				+	TJEState encode_state;
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+    struct TJEProcessedQT pqt;
			
 
				+#endif
			
 
				+	uint32_t width;
			
 
				+	uint32_t height;
			
 
				+	uint32_t num_components;
			
 
				+	uint32_t cur_height;
			
 
				+    // Set diff to 0.
			
 
				+    int pred_y;
			
 
				+    int pred_b;
			
 
				+    int pred_r;
			
 
				+
			
 
				+    // Bit stack
			
 
				+    uint32_t bitbuffer;
			
 
				+    uint32_t location;
			
 
				+
			
 
				+    int32_t y_diff;
			
 
				+}TJE_ContextStruct;
			
 
				+void *jpeg_encode_init(tje_write_func* func, void* context, uint8_t quality, uint32_t width, uint32_t height, uint8_t src_num_components, int32_t y_diff)
			
 
				+{
			
 
				+    if (quality < 1 || quality > 3) {
			
 
				+        tje_log("Valid 'quality' %d values are 1 (lowest), 2, or 3 (highest)", quality);
			
 
				+        return NULL;
			
 
				+    }
			
 
				+    if (src_num_components != 3 && src_num_components != 4) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+    if (width > 0xffff || height > 0xffff) {
			
 
				+        return NULL;
			
 
				+    }
			
 
				+
			
 
				+	TJE_ContextStruct *ctx = zalloc(sizeof(TJE_ContextStruct));
			
 
				+    uint8_t qt_factor = 1;
			
 
				+    switch(quality) {
			
 
				+    case 3:
			
 
				+        for ( int i = 0; i < 64; ++i ) {
			
 
				+        	ctx->encode_state.qt_luma[i]   = 1;
			
 
				+        	ctx->encode_state.qt_chroma[i] = 1;
			
 
				+        }
			
 
				+        break;
			
 
				+    case 2:
			
 
				+        qt_factor = 10;
			
 
				+        // don't break. fall through.
			
 
				+    case 1:
			
 
				+        for ( int i = 0; i < 64; ++i ) {
			
 
				+        	ctx->encode_state.qt_luma[i]   = tjei_default_qt_luma_from_spec[i] / qt_factor;
			
 
				+            if (ctx->encode_state.qt_luma[i] == 0) {
			
 
				+            	ctx->encode_state.qt_luma[i] = 1;
			
 
				+            }
			
 
				+            ctx->encode_state.qt_chroma[i] = tjei_default_qt_chroma_from_paper[i] / qt_factor;
			
 
				+            if (ctx->encode_state.qt_chroma[i] == 0) {
			
 
				+            	ctx->encode_state.qt_chroma[i] = 1;
			
 
				+            }
			
 
				+        }
			
 
				+        break;
			
 
				+    default:
			
 
				+        assert(!"invalid code path");
			
 
				+        break;
			
 
				+    }
			
 
				+
			
 
				+    ctx->encode_state.write_context.func = func;
			
 
				+    ctx->encode_state.write_context.context = context;
			
 
				+    ctx->width = width;
			
 
				+    ctx->height = height;
			
 
				+    ctx->num_components = src_num_components;
			
 
				+    tjei_huff_expand(&ctx->encode_state);
			
 
				+    TJEState* state = &ctx->encode_state;
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+    // Again, taken from classic japanese implementation.
			
 
				+    //
			
 
				+    /* For float AA&N IDCT method, divisors are equal to quantization
			
 
				+     * coefficients scaled by scalefactor[row]*scalefactor[col], where
			
 
				+     *   scalefactor[0] = 1
			
 
				+     *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
			
 
				+     * We apply a further scale factor of 8.
			
 
				+     * What's actually stored is 1/divisor so that the inner loop can
			
 
				+     * use a multiplication rather than a division.
			
 
				+     */
			
 
				+
			
 
				+
			
 
				+    // build (de)quantization tables
			
 
				+    for(int y=0; y<8; y++) {
			
 
				+        for(int x=0; x<8; x++) {
			
 
				+            int i = y*8 + x;
			
 
				+            ctx->pqt.luma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_luma[tjei_zig_zag[i]]);
			
 
				+            ctx->pqt.chroma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_chroma[tjei_zig_zag[i]]);
			
 
				+        }
			
 
				+    }
			
 
				+#endif
			
 
				+
			
 
				+    { // Write header
			
 
				+        TJEJPEGHeader header;
			
 
				+        // JFIF header.
			
 
				+        header.SOI = tjei_be_word(0xffd8);  // Sequential DCT
			
 
				+        header.APP0 = tjei_be_word(0xffe0);
			
 
				+
			
 
				+        uint16_t jfif_len = sizeof(TJEJPEGHeader) - 4 /*SOI & APP0 markers*/;
			
 
				+        header.jfif_len = tjei_be_word(jfif_len);
			
 
				+        memcpy(header.jfif_id, (void*)tjeik_jfif_id, 5);
			
 
				+        header.version = tjei_be_word(0x0102);
			
 
				+        header.units = 0x01;  // Dots-per-inch
			
 
				+        header.x_density = tjei_be_word(0x0060);  // 96 DPI
			
 
				+        header.y_density = tjei_be_word(0x0060);  // 96 DPI
			
 
				+        header.x_thumb = 0;
			
 
				+        header.y_thumb = 0;
			
 
				+        tjei_write(state, &header, sizeof(TJEJPEGHeader), 1);
			
 
				+    }
			
 
				+    {  // Write comment
			
 
				+        TJEJPEGComment com;
			
 
				+        uint16_t com_len = 2 + sizeof(tjeik_com_str) - 1;
			
 
				+        // Comment
			
 
				+        com.com = tjei_be_word(0xfffe);
			
 
				+        com.com_len = tjei_be_word(com_len);
			
 
				+        memcpy(com.com_str, (void*)tjeik_com_str, sizeof(tjeik_com_str)-1);
			
 
				+        tjei_write(state, &com, sizeof(TJEJPEGComment), 1);
			
 
				+    }
			
 
				+
			
 
				+    // Write quantization tables.
			
 
				+    tjei_write_DQT(state, state->qt_luma, 0x00);
			
 
				+    tjei_write_DQT(state, state->qt_chroma, 0x01);
			
 
				+
			
 
				+    {  // Write the frame marker.
			
 
				+        TJEFrameHeader header;
			
 
				+        header.SOF = tjei_be_word(0xffc0);
			
 
				+        header.len = tjei_be_word(8 + 3 * 3);
			
 
				+        header.precision = 8;
			
 
				+        assert(width <= 0xffff);
			
 
				+        assert(height <= 0xffff);
			
 
				+        header.width = tjei_be_word((uint16_t)width);
			
 
				+        header.height = tjei_be_word((uint16_t)height);
			
 
				+        header.num_components = 3;
			
 
				+        uint8_t tables[3] = {
			
 
				+            0,  // Luma component gets luma table (see tjei_write_DQT call above.)
			
 
				+            1,  // Chroma component gets chroma table
			
 
				+            1,  // Chroma component gets chroma table
			
 
				+        };
			
 
				+        for (int i = 0; i < 3; ++i) {
			
 
				+            TJEComponentSpec spec;
			
 
				+            spec.component_id = (uint8_t)(i + 1);  // No particular reason. Just 1, 2, 3.
			
 
				+            spec.sampling_factors = (uint8_t)0x11;
			
 
				+            spec.qt = tables[i];
			
 
				+
			
 
				+            header.component_spec[i] = spec;
			
 
				+        }
			
 
				+        // Write to file.
			
 
				+        tjei_write(state, &header, sizeof(TJEFrameHeader), 1);
			
 
				+    }
			
 
				+
			
 
				+    tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_DC],   state->ht_vals[TJEI_LUMA_DC], TJEI_DC, 0);
			
 
				+    tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_AC],   state->ht_vals[TJEI_LUMA_AC], TJEI_AC, 0);
			
 
				+    tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_DC], state->ht_vals[TJEI_CHROMA_DC], TJEI_DC, 1);
			
 
				+    tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_AC], state->ht_vals[TJEI_CHROMA_AC], TJEI_AC, 1);
			
 
				+
			
 
				+    // Write start of scan
			
 
				+    {
			
 
				+        TJEScanHeader header;
			
 
				+        header.SOS = tjei_be_word(0xffda);
			
 
				+        header.len = tjei_be_word((uint16_t)(6 + (sizeof(TJEFrameComponentSpec) * 3)));
			
 
				+        header.num_components = 3;
			
 
				+
			
 
				+        uint8_t tables[3] = {
			
 
				+            0x00,
			
 
				+            0x11,
			
 
				+            0x11,
			
 
				+        };
			
 
				+        for (int i = 0; i < 3; ++i) {
			
 
				+            TJEFrameComponentSpec cs;
			
 
				+            // Must be equal to component_id from frame header above.
			
 
				+            cs.component_id = (uint8_t)(i + 1);
			
 
				+            cs.dc_ac = (uint8_t)tables[i];
			
 
				+
			
 
				+            header.component_spec[i] = cs;
			
 
				+        }
			
 
				+        header.first = 0;
			
 
				+        header.last  = 63;
			
 
				+        header.ah_al = 0;
			
 
				+        tjei_write(state, &header, sizeof(TJEScanHeader), 1);
			
 
				+
			
 
				+    }
			
 
				+    ctx->y_diff = y_diff;
			
 
				+    return ctx;
			
 
				+}
			
 
				+
			
 
				+void jpeg_encode_run(void *ctx, uint8_t *src_data, uint8_t is_rgb)
			
 
				+{
			
 
				+    float du_y[64];
			
 
				+    float du_b[64];
			
 
				+    float du_r[64];
			
 
				+    TJE_ContextStruct *handle = (TJE_ContextStruct *)ctx;
			
 
				+    TJEState* state = &handle->encode_state;
			
 
				+    uint32_t width = handle->width;
			
 
				+    uint32_t height = handle->height;
			
 
				+    uint32_t src_num_components = handle->num_components;
			
 
				+    uint32_t block_index, src_index, col, row;
			
 
				+    uint8_t r,g,b;
			
 
				+	for ( uint32_t x = 0; x < width; x += 8 ) {
			
 
				+		// Block loop: ====
			
 
				+		for ( uint32_t off_y = 0; off_y < 8; ++off_y ) {
			
 
				+			for ( uint32_t off_x = 0; off_x < 8; ++off_x ) {
			
 
				+				block_index = (off_y * 8 + off_x);
			
 
				+				src_index = (((0 + off_y) * width) + (x + off_x)) * src_num_components;
			
 
				+				if (is_rgb)
			
 
				+				{
			
 
				+					r = src_data[src_index + 0];
			
 
				+					g = src_data[src_index + 1];
			
 
				+					b = src_data[src_index + 2];
			
 
				+					du_y[block_index] = 0.299f   * r + 0.587f    * g + 0.114f    * b + handle->y_diff;
			
 
				+					du_b[block_index] = -0.1687f * r - 0.3313f   * g + 0.5f      * b;
			
 
				+					du_r[block_index] = 0.5f     * r - 0.4187f   * g - 0.0813f   * b;
			
 
				+				}
			
 
				+				else
			
 
				+				{
			
 
				+					du_y[block_index] = src_data[src_index + 0];
			
 
				+					du_b[block_index] = src_data[src_index + 1];
			
 
				+					du_r[block_index] = src_data[src_index + 2];
			
 
				+					du_y[block_index] += handle->y_diff;
			
 
				+					du_b[block_index] -= 128;
			
 
				+					du_r[block_index] -= 128;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		tjei_encode_and_write_MCU(state, du_y,
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+				handle->pqt.luma,
			
 
				+#else
			
 
				+								 state->qt_luma,
			
 
				+#endif
			
 
				+								 state->ehuffsize[TJEI_LUMA_DC], state->ehuffcode[TJEI_LUMA_DC],
			
 
				+								 state->ehuffsize[TJEI_LUMA_AC], state->ehuffcode[TJEI_LUMA_AC],
			
 
				+								 &handle->pred_y, &handle->bitbuffer, &handle->location);
			
 
				+		tjei_encode_and_write_MCU(state, du_b,
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+				handle->pqt.chroma,
			
 
				+#else
			
 
				+								 state->qt_chroma,
			
 
				+#endif
			
 
				+								 state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
			
 
				+								 state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
			
 
				+								 &handle->pred_b, &handle->bitbuffer, &handle->location);
			
 
				+		tjei_encode_and_write_MCU(state, du_r,
			
 
				+#if TJE_USE_FAST_DCT
			
 
				+				handle->pqt.chroma,
			
 
				+#else
			
 
				+								 state->qt_chroma,
			
 
				+#endif
			
 
				+								 state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
			
 
				+								 state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
			
 
				+								 &handle->pred_r, &handle->bitbuffer, &handle->location);
			
 
				+
			
 
				+
			
 
				+	}
			
 
				+    handle->cur_height += 8;
			
 
				+}
			
 
				+
			
 
				+void jpeg_encode_end(void *ctx)
			
 
				+{
			
 
				+    uint16_t EOI = tjei_be_word(0xffd9);
			
 
				+    TJE_ContextStruct *handle = (TJE_ContextStruct *)ctx;
			
 
				+    TJEState* state = &handle->encode_state;
			
 
				+    tjei_write(state, &EOI, sizeof(uint16_t), 1);
			
 
				+
			
 
				+    if (state->output_buffer_count) {
			
 
				+        state->write_context.func(state->write_context.context, state->output_buffer, (int)state->output_buffer_count);
			
 
				+        state->output_buffer_count = 0;
			
 
				+    }
			
 
				+}
			
--- a/Third_Party/jpeg_encode/tiny_jpeg.h
+++ b/Third_Party/jpeg_encode/tiny_jpeg.h
@@ -0,0 +1,67 @@
 
				+// ============================================================
			
 
				+// Public interface:
			
 
				+// ============================================================
			
 
				+
			
 
				+#ifndef TJE_HEADER_GUARD
			
 
				+#define TJE_HEADER_GUARD
			
 
				+#include "bsp_common.h"
			
 
				+#define TJEI_BUFFER_SIZE 1024
			
 
				+
			
 
				+#ifdef _WIN32
			
 
				+
			
 
				+#include <windows.h>
			
 
				+#ifndef snprintf
			
 
				+#define snprintf sprintf_s
			
 
				+#endif
			
 
				+// Not quite the same but it works for us. If I am not mistaken, it differs
			
 
				+// only in the return value.
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#ifndef NDEBUG
			
 
				+
			
 
				+#define tje_log DBG
			
 
				+
			
 
				+#else  // NDEBUG
			
 
				+#define tje_log(...)
			
 
				+#endif  // NDEBUG
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+extern "C"
			
 
				+{
			
 
				+#endif
			
 
				+
			
 
				+#if defined(__GNUC__) || defined(__clang__)
			
 
				+#pragma GCC diagnostic push
			
 
				+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"  // We use {0}, which will zero-out the struct.
			
 
				+#pragma GCC diagnostic ignored "-Wmissing-braces"
			
 
				+#pragma GCC diagnostic ignored "-Wpadded"
			
 
				+#endif
			
 
				+
			
 
				+// - tje_encode_with_func -
			
 
				+//
			
 
				+// Usage
			
 
				+//  Same as tje_encode_to_file_at_quality, but it takes a callback that knows
			
 
				+//  how to handle (or ignore) `context`. The callback receives an array `data`
			
 
				+//  of `size` bytes, which can be written directly to a file. There is no need
			
 
				+//  to free the data.
			
 
				+
			
 
				+typedef void tje_write_func(void* context, void* data, int size);
			
 
				+void *jpeg_encode_init(tje_write_func* func, void* context, uint8_t quality, uint32_t width, uint32_t height, uint8_t src_num_components, int32_t y_diff);
			
 
				+void jpeg_encode_run(void *ctx, uint8_t *src_data, uint8_t IsRGB);
			
 
				+void jpeg_encode_end(void *ctx);
			
 
				+// ============================================================
			
 
				+//
			
 
				+#if defined(__GNUC__) || defined(__clang__)
			
 
				+#pragma GCC diagnostic pop
			
 
				+#endif
			
 
				+
			
 
				+
			
 
				+#ifdef __cplusplus
			
 
				+}  // extern C
			
 
				+#endif
			
 
				+#endif
			
--- a/application/src/luat_camera_air105.c
+++ b/application/src/luat_camera_air105.c
@@ -38,16 +38,23 @@
 
				 
			
 
				 typedef struct
			
 
				 {
			
 
				+	Buffer_Struct FileBuffer;
			
 
				+	Buffer_Struct JPEGSavePath;
			
 
				 	uint8_t *DataCache;
			
 
				 	uint32_t TotalSize;
			
 
				 	uint32_t CurSize;
			
 
				 	uint32_t VLen;
			
 
				 	uint32_t drawVLen;
			
 
				+	int32_t YDiff;
			
 
				 	uint16_t Width;
			
 
				 	uint16_t Height;
			
 
				 	uint8_t DataBytes;
			
 
				 	uint8_t IsDecode;
			
 
				 	uint8_t BufferFull;
			
 
				+	uint8_t JPEGQuality;
			
 
				+	uint8_t CaptureMode;
			
 
				+	uint8_t CaptureWait;
			
 
				+	uint8_t JPEGEncodeDone;
			
 
				 }Camera_CtrlStruct;
			
 
				 
			
 
				 static Camera_CtrlStruct prvCamera;
			
@@ -57,6 +64,43 @@ static struct luat_camera_conf camera_conf;
 
				 static luat_lcd_conf_t* lcd_conf;
			
 
				 static uint8_t draw_lcd = 0;
			
 
				 
			
 
				+static void Camera_SaveJPEGData(void *Cxt, void *pData, int Size)
			
 
				+{
			
 
				+	OS_BufferWrite(Cxt, pData, Size);
			
 
				+}
			
 
				+
			
 
				+static int32_t Camera_SaveJPEGDone(void *pData, void *pParam)
			
 
				+{
			
 
				+	HANDLE fd;
			
 
				+	if (prvCamera.JPEGSavePath.Data)
			
 
				+	{
			
 
				+		fd = luat_fs_fopen(prvCamera.JPEGSavePath.Data, "w");
			
 
				+	}
			
 
				+	else
			
 
				+	{
			
 
				+		fd = luat_fs_fopen("/capture.jpg", "w");
			
 
				+	}
			
 
				+	if (fd)
			
 
				+	{
			
 
				+		LLOGD("capture data %ubyte", luat_fs_fwrite(prvCamera.FileBuffer.Data, prvCamera.FileBuffer.Pos, 1, fd));
			
 
				+		luat_fs_fclose(fd);
			
 
				+	}
			
 
				+	OS_DeInitBuffer(&prvCamera.FileBuffer);
			
 
				+	prvCamera.CaptureWait = 1;
			
 
				+	prvCamera.JPEGEncodeDone = 1;
			
 
				+	prvCamera.CaptureMode = 0;
			
 
				+	Core_EncodeJPEGSetup(NULL, &prvCamera);
			
 
				+    rtos_msg_t msg = {0};
			
 
				+    {
			
 
				+        msg.handler = l_camera_handler;
			
 
				+        msg.ptr = NULL;
			
 
				+        msg.arg1 = 0;
			
 
				+        msg.arg2 = 1;
			
 
				+        luat_msgbus_put(&msg, 1);
			
 
				+	}
			
 
				+
			
 
				+}
			
 
				+
			
 
				 void DecodeQR_CBDataFun(uint8_t *Data, uint32_t Len){
			
 
				     prvCamera.IsDecode = 0;
			
 
				     rtos_msg_t msg = {0};
			
@@ -123,15 +167,43 @@ static int32_t Camera_DrawLcd(void *DrawData, uint8_t Scan){
 
				 static int32_t prvCamera_DCMICB(void *pData, void *pParam){
			
 
				 
			
 
				     uint8_t zbar_scan = (uint8_t)pParam;
			
 
				+    Buffer_Struct *RxBuf = (Buffer_Struct *)pData;
			
 
				+    if (prvCamera.CaptureMode){
			
 
				+    	if (!pData){
			
 
				+    		if (prvCamera.CaptureWait && prvCamera.JPEGEncodeDone)
			
 
				+    		{
			
 
				+    			prvCamera.CaptureWait = 0;
			
 
				+    			prvCamera.JPEGEncodeDone = 0;
			
 
				+    			Core_EncodeJPEGStart(prvCamera.Width, prvCamera.Height, prvCamera.JPEGQuality, prvCamera.YDiff);
			
 
				+    		}
			
 
				+    		else if (!prvCamera.CaptureWait)
			
 
				+    		{
			
 
				+    			prvCamera.CaptureWait = 1;
			
 
				+    			prvCamera.JPEGEncodeDone = 0;
			
 
				+    			Core_EncodeJPEGEnd(Camera_SaveJPEGDone, 0);
			
 
				+    		}
			
 
				+    	}
			
 
				+    	else
			
 
				+    	{
			
 
				+            if (!prvCamera.CaptureWait)
			
 
				+            {
			
 
				+                uint8_t *data = malloc(RxBuf->MaxLen * 4);
			
 
				+                memcpy(data, RxBuf->Data, RxBuf->MaxLen * 4);
			
 
				+                Core_EncodeJPEGRun(data, RxBuf->MaxLen * 4, zbar_scan?COLOR_MODE_GRAY:COLOR_MODE_RGB_565);
			
 
				+            }
			
 
				+    	}
			
 
				+    	return 0;
			
 
				+    }
			
 
				     if (zbar_scan == 0){
			
 
				-        Buffer_Struct *RxBuf = (Buffer_Struct *)pData;
			
 
				+
			
 
				         if (!pData){
			
 
				             prvCamera.VLen = 0;
			
 
				             return 0;
			
 
				         }
			
 
				         if (draw_lcd)
			
 
				+        {
			
 
				             Camera_DrawLcd(RxBuf->Data, zbar_scan);
			
 
				-
			
 
				+        }
			
 
				         prvCamera.VLen += prvCamera.drawVLen;
			
 
				         return 0;
			
 
				     }else if (zbar_scan == 1){
			
@@ -217,11 +289,11 @@ int luat_camera_init(luat_camera_conf_t *conf){
 
				     memcpy(&camera_conf, conf, sizeof(luat_camera_conf_t));
			
 
				     lcd_conf = conf->lcd_conf;
			
 
				     draw_lcd = conf->draw_lcd;
			
 
				-
			
 
				+    prvCamera.Width = lcd_conf->w;
			
 
				+    prvCamera.Height = lcd_conf->h;
			
 
				     if (conf->zbar_scan == 1){
			
 
				         prvCamera.DataCache = NULL;
			
 
				-        prvCamera.Width = lcd_conf->w;
			
 
				-        prvCamera.Height = lcd_conf->h;
			
 
				+
			
 
				         prvCamera.TotalSize = prvCamera.Width * prvCamera.Height;
			
 
				         prvCamera.DataBytes = 1;
			
 
				     }
			
@@ -258,7 +330,8 @@ int luat_camera_start(int id)
 
				 	}
			
 
				     if (camera_conf.zbar_scan == 0){
			
 
				         DCMI_SetCROPConfig(1, (camera_conf.sensor_height-lcd_conf->h)/2, ((camera_conf.sensor_width-lcd_conf->w)/2)*2, lcd_conf->h - 1, 2*lcd_conf->w - 1);
			
 
				-        DCMI_CaptureSwitch(1, 0,lcd_conf->w, lcd_conf->h, 2, &prvCamera.drawVLen);
			
 
				+        DCMI_CaptureSwitch(1, 0, lcd_conf->w, lcd_conf->h, 2, &prvCamera.drawVLen);
			
 
				+        prvCamera.CaptureMode = 0;
			
 
				         prvCamera.VLen = 0;
			
 
				     }else if(camera_conf.zbar_scan == 1){
			
 
				         DCMI_SetCROPConfig(1, (camera_conf.sensor_height-prvCamera.Height)/2, ((camera_conf.sensor_width-prvCamera.Width)/2)*prvCamera.DataBytes, prvCamera.Height - 1, prvCamera.DataBytes*prvCamera.Width - 1);
			
@@ -267,6 +340,32 @@ int luat_camera_start(int id)
 
				     return 0;
			
 
				 }
			
 
				 
			
 
				+int luat_camera_capture(int id, int y_diff, uint8_t quality, const char *path)
			
 
				+{
			
 
				+	DCMI_CaptureSwitch(0, 0, 0, 0, 0, NULL);
			
 
				+	if (prvCamera.DataCache)
			
 
				+	{
			
 
				+		free(prvCamera.DataCache);
			
 
				+		prvCamera.DataCache = NULL;
			
 
				+	}
			
 
				+
			
 
				+	if (path)
			
 
				+	{
			
 
				+		OS_ReInitBuffer(&prvCamera.JPEGSavePath, strlen(path) + 1);
			
 
				+		memcpy(prvCamera.JPEGSavePath.Data, path, strlen(path));
			
 
				+	}
			
 
				+	OS_ReInitBuffer(&prvCamera.FileBuffer, 16 * 1024);
			
 
				+	Core_EncodeJPEGSetup(Camera_SaveJPEGData, &prvCamera);
			
 
				+	luat_camera_start(id);
			
 
				+	prvCamera.YDiff = y_diff;
			
 
				+	prvCamera.JPEGQuality = quality;
			
 
				+	prvCamera.CaptureMode = 1;
			
 
				+	prvCamera.CaptureWait = 1;
			
 
				+	prvCamera.JPEGEncodeDone = 1;
			
 
				+
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
 
				 int luat_camera_stop(int id)
			
 
				 {
			
 
				 	DCMI_CaptureSwitch(0, 0, 0, 0, 0, NULL);
			
@@ -275,5 +374,6 @@ int luat_camera_stop(int id)
 
				 		free(prvCamera.DataCache);
			
 
				 		prvCamera.DataCache = NULL;
			
 
				 	}
			
 
				+	OS_DeInitBuffer(&prvCamera.FileBuffer);
			
 
				     return 0;
			
 
				 }
			
--- a/bsp/air105/hal/core_dcmi.c
+++ b/bsp/air105/hal/core_dcmi.c
@@ -29,7 +29,7 @@ typedef struct
 
				 	void *pUserData;
			
 
				 	uint32_t BufLen;
			
 
				 	uint8_t RxDMASn;
			
 
				-
			
 
				+	uint8_t IsFirstVsync;
			
 
				 }DCMI_CtrlStruct;
			
 
				 
			
 
				 static DCMI_CtrlStruct prvDCMI;
			
@@ -48,6 +48,10 @@ static void prvDCMI_IrqHandler(int32_t Line, void *pData)
 
				 		prvDCMI.RxDMASn = (prvDCMI.RxDMASn + 1)%DCMI_RXBUF_BAND;
			
 
				 		DMA_ClearStreamFlag(DCMI_RX_DMA_STREAM);
			
 
				 		DMA_ForceStartStream(DCMI_RX_DMA_STREAM, prvDCMI.uBuf[prvDCMI.RxDMASn].pu32, prvDCMI.BufLen, prvDCMI_DMADone, NULL, 1);
			
 
				+		if (prvDCMI.IsFirstVsync)
			
 
				+		{
			
 
				+			prvDCMI.IsFirstVsync = 0;
			
 
				+		}
			
 
				 		prvDCMI.CB(NULL, prvDCMI.pUserData);
			
 
				 	}
			
 
				 }
			
@@ -61,7 +65,10 @@ static int32_t prvDCMI_DMADone(void *pData, void *pParam)
 
				 	DMA_ClearStreamFlag(DCMI_RX_DMA_STREAM);
			
 
				 	DMA_ForceStartStream(DCMI_RX_DMA_STREAM, prvDCMI.uBuf[prvDCMI.RxDMASn].pu32, prvDCMI.BufLen, prvDCMI_DMADone, NULL, 1);
			
 
				 	Buffer_StaticInit(&RxBuf, prvDCMI.uBuf[LastRxDMASn].pu32, prvDCMI.BufLen);
			
 
				-	prvDCMI.CB(&RxBuf, prvDCMI.pUserData);
			
 
				+	if (!prvDCMI.IsFirstVsync)
			
 
				+	{
			
 
				+		prvDCMI.CB(&RxBuf, prvDCMI.pUserData);
			
 
				+	}
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -140,25 +147,27 @@ void DCMI_CaptureSwitch(uint8_t OnOff, uint32_t BufLen, uint32_t ImageW, uint32_
 
				 	{
			
 
				 		PM_SetHardwareRunFlag(PM_HW_DCMI_0, 1);
			
 
				 		if (DCMI->CR & DCMI_CR_CAPTURE) return;
			
 
				+		prvDCMI.IsFirstVsync = 1;
			
 
				 		if (!BufLen)
			
 
				 		{
			
 
				 			WDataLen = (ImageW * DataByte) >> 2;
			
 
				-			if (ImageH > 100)
			
 
				-			{
			
 
				-				if (!(ImageH % 10))
			
 
				-				{
			
 
				-					HLen = ImageH / 10;
			
 
				-				}
			
 
				-				else
			
 
				-				{
			
 
				-					HLen = ImageH >> 1;
			
 
				-				}
			
 
				-			}
			
 
				-			else
			
 
				-			{
			
 
				-				HLen = ImageH >> 1;
			
 
				-			}
			
 
				-			while( (WDataLen * HLen) > 2048)
			
 
				+//			if (ImageH > 100)
			
 
				+//			{
			
 
				+//				if (!(ImageH % 10))
			
 
				+//				{
			
 
				+//					HLen = ImageH / 10;
			
 
				+//				}
			
 
				+//				else
			
 
				+//				{
			
 
				+//					HLen = ImageH >> 1;
			
 
				+//				}
			
 
				+//			}
			
 
				+//			else
			
 
				+//			{
			
 
				+//				HLen = ImageH >> 1;
			
 
				+//			}
			
 
				+			HLen = 8;
			
 
				+			while( (WDataLen * HLen) > 4000)
			
 
				 			{
			
 
				 				HLen >>= 1;
			
 
				 			}
			
@@ -177,6 +186,7 @@ void DCMI_CaptureSwitch(uint8_t OnOff, uint32_t BufLen, uint32_t ImageW, uint32_
 
				 		DCMI->ICR = 0x1f;
			
 
				 		ISR_OnOff(DCMI_IRQn, 1);
			
 
				 		DCMI->CR |= DCMI_CR_CAPTURE|DCMI_CR_ENABLE;
			
 
				+
			
 
				 	}
			
 
				 	else
			
 
				 	{
			
--- a/bsp/common/include/bsp_common.h
+++ b/bsp/common/include/bsp_common.h
@@ -401,6 +401,9 @@ void *llist_traversal(llist_head *head, CBFuncEx_t cb, void *pData);
 
				 	container_of(ptr, type, member)
			
 
				 
			
 
				 
			
 
				+uint16_t BSP_Swap16(uint16_t n);
			
 
				+uint32_t BSP_Swap32(uint32_t n);
			
 
				+
			
 
				 uint8_t BytesGet8(const void *ptr);
			
 
				 void BytesPut8(void *ptr, uint8_t v);
			
 
				 uint16_t BytesGetBe16(const void *ptr);
			
@@ -437,4 +440,10 @@ void BytesPutDoubleToBuf(Buffer_Struct *Buf, double v);
 
				 #define realloc OS_Realloc
			
 
				 #define zalloc OS_Zalloc
			
 
				 #define calloc OS_Calloc
			
 
				+
			
 
				+#if(defined(__DEBUG__) || defined(DEBUG))
			
 
				+#define ASSERT( x ) if( ( x ) == 0 ) { __disable_irq(); DBG_Trace("\r\nassert %s,%d", __FUNCTION__, __LINE__); for( ;; ); }
			
 
				+#else
			
 
				+#define ASSERT( x )
			
 
				+#endif
			
 
				 #endif
			
--- a/bsp/common/include/core_service.h
+++ b/bsp/common/include/core_service.h
@@ -51,6 +51,11 @@ uint32_t Core_LCDDrawCacheLen(void);
 
				 void Core_USBDefaultDeviceStart(uint8_t USB_ID);
			
 
				 void Core_USBAction(uint8_t USB_ID, uint8_t Action, void *pParam);
			
 
				 void Core_ScanKeyBoard(void);
			
 
				+void Core_SetRGB565FixValue(uint8_t R, uint8_t G, uint8_t B);
			
 
				+void Core_EncodeJPEGSetup(HANDLE Fun, void *pParam);
			
 
				+void Core_EncodeJPEGStart(uint32_t Width, uint32_t Height, uint8_t Quality, int32_t YDiff);
			
 
				+void Core_EncodeJPEGRun(uint8_t *Data, uint32_t Len, uint8_t ColorMode);
			
 
				+void Core_EncodeJPEGEnd(CBFuncEx_t CB, void *pParam);
			
 
				 void Core_PrintMemInfo(void);
			
 
				 void Core_PrintServiceStack(void);
			
 
				 void Core_DebugMem(uint8_t HeapID, const char *FuncName, uint32_t Line);
			
--- a/bsp/common/include/platform_define.h
+++ b/bsp/common/include/platform_define.h
@@ -59,8 +59,10 @@ enum
 
				 
			
 
				 	COLOR_MODE_RGB_565 = 0,
			
 
				 	COLOR_MODE_GRAY,
			
 
				+	COLOR_MODE_RGB_888,
			
 
				 	COLOR_MODE_YCBCR_422_UYVY,
			
 
				 	COLOR_MODE_YCBCR_422_YUYV,
			
 
				+	COLOR_MODE_YCBCR_422_CBYCRY,
			
 
				 
			
 
				 	CORE_OTA_MODE_FULL = 0,	//param1的byte0
			
 
				 	CORE_OTA_MODE_DIFF,
			
--- a/bsp/common/src/bsp_common.c
+++ b/bsp/common/src/bsp_common.c
@@ -1567,3 +1567,21 @@ uint32_t llist_num(const llist_head *head)
 
				 	}
			
 
				 	return num;
			
 
				 }
			
 
				+
			
 
				+#define PP_HTONS(x) ((uint16_t)((((x) & (uint16_t)0x00ffU) << 8) | (((x) & (uint16_t)0xff00U) >> 8)))
			
 
				+#define PP_NTOHS(x) PP_HTONS(x)
			
 
				+#define PP_HTONL(x) ((((x) & (uint32_t)0x000000ffUL) << 24) | \
			
 
				+                     (((x) & (uint32_t)0x0000ff00UL) <<  8) | \
			
 
				+                     (((x) & (uint32_t)0x00ff0000UL) >>  8) | \
			
 
				+                     (((x) & (uint32_t)0xff000000UL) >> 24))
			
 
				+#define PP_NTOHL(x) PP_HTONL(x)
			
 
				+
			
 
				+uint16_t BSP_Swap16(uint16_t n)
			
 
				+{
			
 
				+  return (uint16_t)PP_HTONS(n);
			
 
				+}
			
 
				+
			
 
				+uint32_t BSP_Swap32(uint32_t n)
			
 
				+{
			
 
				+  return (uint32_t)PP_HTONL(n);
			
 
				+}
			
--- a/bsp/common/src/core_service.c
+++ b/bsp/common/src/core_service.c
--- a/xmake.lua
+++ b/xmake.lua
@@ -176,7 +176,7 @@ target("app.elf")
 
				         if LVGL_CONF == nil then target:add("deps", "lvgl") end
			
 
				     end)
			
 
				 
			
 
				-    add_deps("tflm")
			
 
				+    -- add_deps("tflm")
			
 
				 
			
 
				     -- add deps
			
 
				     add_files("Third_Party/cm_backtrace/*.c",{public = true})
			
@@ -197,6 +197,9 @@ target("app.elf")
 
				 
			
 
				     add_files("Third_Party/heap/*.c",{public = true})
			
 
				     add_includedirs("Third_Party/heap",{public = true})
			
 
				+
			
 
				+    add_files("Third_Party/jpeg_encode/*.c",{public = true})
			
 
				+    add_includedirs("Third_Party/jpeg_encode",{public = true})
			
 
				     
			
 
				     --add_files("bsp/common/*.c",{public = true})
			
 
				 	add_files("bsp/common/src/*.c",{public = true})