Просмотр исходного кода

Merge branch 'master' of https://gitee.com/openLuat/LuatOS

alienwalker 5 месяцев назад
Родитель
Сommit
4c8a1bb7dd

+ 316 - 4
components/fft/binding/luat_lib_fft.c

@@ -1,3 +1,14 @@
+/*
+@module  fft
+@summary 快速傅里叶变换(FFT/IFFT),支持 float32 与 q15 定点内核
+@version 0.2
+@date    2025.08
+@demo    fft
+@tag     LUAT_USE_FFT
+@usage
+-- 模块通常作为内置库,无需 require,直接调用
+-- 示例见各 API 注释以及仓库 demo/test 脚本
+*/
 #include "luat_base.h"
 #include "luat_mem.h"
 
@@ -12,6 +23,8 @@
 #include "luat_conf_bsp.h"
 #include "rotable2.h"
 #include "luat_zbuff.h"
+// Q15 内核头文件(当前版本已实现)
+#include "fft_core_q15.h"
 
 // helper: read float array from lua table (1-based)
 static int read_lua_array_float(lua_State* L, int idx, float* out, int n) {
@@ -33,6 +46,15 @@ static void write_lua_array_float(lua_State* L, float* a, int n) {
     }
 }
 
+/*
+生成 float32 旋转因子
+@api fft.generate_twiddles(N)
+@int N 点数,必须为 2 的幂
+@return table Wc, table Ws 两个 Lua 数组(长度 N/2),分别为 cos 与 -sin
+@usage
+local N = 2048
+local Wc, Ws = fft.generate_twiddles(N)
+*/
 static int l_fft_generate_twiddles(lua_State* L) {
     int N = luaL_checkinteger(L, 1);
     if (N <= 1 || (N & (N-1))) return luaL_error(L, "N must be power of 2");
@@ -52,13 +74,120 @@ static int l_fft_generate_twiddles(lua_State* L) {
     return 2;
 }
 
+// 生成 Q15 旋转因子到 zbuff(长度要求:N/2 * 2 字节)
+// 调用方式:fft.generate_twiddles_q15_to_zbuff(N, Wc_zbuff, Ws_zbuff)
+/*
+生成 q15 旋转因子到 zbuff(零浮点)
+@api fft.generate_twiddles_q15_to_zbuff(N, Wc_zb, Ws_zb)
+@int N 点数,必须为 2 的幂
+@zbuff Wc_zb 输出缓冲,长度至少为 (N/2)*2 字节,存放 int16 Q15 的 cos
+@zbuff Ws_zb 输出缓冲,长度至少为 (N/2)*2 字节,存放 int16 Q15 的 -sin(前向)
+@return nil 无返回值,结果写入传入的 zbuff
+@usage
+local N = 2048
+local Wc_q15 = zbuff.create((N//2)*2)
+local Ws_q15 = zbuff.create((N//2)*2)
+fft.generate_twiddles_q15_to_zbuff(N, Wc_q15, Ws_q15)
+*/
+static int l_fft_generate_twiddles_q15_to_zbuff(lua_State* L) {
+    // 使用整型查表(度数,1度分辨率,缩放256)生成近似Q15旋转因子
+    static const int16_t SIN_TABLE256[91] = {
+        0,4,9,13,18,22,27,31,36,40,44,49,53,58,62,66,71,75,79,83,
+        88,92,96,100,104,108,112,116,120,124,128,132,136,139,143,147,150,154,158,161,
+        165,168,171,175,178,181,184,187,190,193,196,199,202,204,207,210,212,215,217,219,
+        222,224,226,228,230,232,234,236,237,239,241,242,243,245,246,247,248,249,250,251,
+        252,253,254,254,255,255,255,256,256,256,256
+    };
+    int N = luaL_checkinteger(L, 1);
+    if (N <= 1 || (N & (N-1))) return luaL_error(L, "N 必须为 2 的幂");
+    luat_zbuff_t* zbWc = (luat_zbuff_t*)luaL_testudata(L, 2, LUAT_ZBUFF_TYPE);
+    luat_zbuff_t* zbWs = (luat_zbuff_t*)luaL_testudata(L, 3, LUAT_ZBUFF_TYPE);
+    if (!zbWc || !zbWs) return luaL_error(L, "Wc/Ws 需为 zbuff");
+    int need = (N/2) * 2;
+    if ((int)zbWc->len < need || (int)zbWs->len < need) return luaL_error(L, "zbuff 太小");
+    int16_t* Wc = (int16_t*)zbWc->addr;
+    int16_t* Ws = (int16_t*)zbWs->addr;
+    for (int k = 0; k < N/2; k++) {
+        // angle = 360 * k / N (度),四舍五入
+        int deg = (int)((int64_t)k * 360 + (N/2)) / N;
+        // cos 与 -sin,放大到 Q15(256<<7=32768,钳到 32767)
+        int s256;
+        int d = deg;
+        int neg = 0;
+        if (d < 0) { d = -d + 180; }
+        d %= 360;
+        if (d >= 180) { d -= 180; neg = 1; }
+        if (d <= 90) s256 = SIN_TABLE256[d]; else s256 = SIN_TABLE256[180 - d];
+        int c256 = 0; // cos = sin(deg+90)
+        int d2 = deg + 90;
+        neg = 0; d = d2;
+        if (d < 0) { d = -d + 180; }
+        d %= 360;
+        if (d >= 180) { d -= 180; neg = 1; }
+        if (d <= 90) c256 = SIN_TABLE256[d]; else c256 = SIN_TABLE256[180 - d];
+        if (neg) c256 = -c256;
+        int32_t wc = (int32_t)c256 << 7;
+        if (wc > 32767) wc = 32767; if (wc < -32768) wc = -32768;
+        // -sin:复用 s256 并加符号
+        int sgn = 0; d = deg;
+        if (d < 0) { d = -d + 180; }
+        d %= 360; if (d >= 180) { d -= 180; sgn = 1; }
+        int32_t ws = (int32_t)(sgn ? -s256 : s256) << 7;
+        if (ws > 32767) ws = 32767; if (ws < -32768) ws = -32768;
+        Wc[k] = (int16_t)wc;
+        Ws[k] = (int16_t)ws;
+    }
+    return 0;
+}
+
+/*
+原地 FFT 计算
+@api fft.run(real, imag, N, Wc, Ws[, opts])
+@param real 实部容器:
+ - float32 路径:Lua 数组或 zbuff(float32)
+ - q15 路径:zbuff(int16)。当 opts.core="q15" 且 opts.input_format 为 "u12"/"u16"/"s16" 时生效
+@param imag 虚部容器:同 real。可为 nil(视为全 0)
+@int N 点数,2 的幂
+@param Wc 旋转因子 cos:
+ - float32 路径:Lua 数组或 zbuff(float32)
+ - q15 路径:zbuff(int16),推荐配合 fft.generate_twiddles_q15_to_zbuff 生成
+@param Ws 旋转因子 -sin:同 Wc;IFFT 建议传入共轭版本以避免内层符号乘
+@table [opts]
+ - core: "f32" | "q15"(默认 "f32")
+   * "f32": 浮点内核,精度高(32位),计算稳定,适合精密分析
+   * "q15": 定点内核,速度快(16位整数),内存省,适合实时处理但精度略低
+ - input_format: "f32" | "u12" | "u16" | "s16"(q15 时必填其一)
+   * "f32": 标准浮点输入,适用于已处理的信号数据
+   * "u12": 12位无符号整数(0~4095),常见于ADC采样,自动去直流分量
+   * "u16": 16位无符号整数(0~65535),适用于高精度ADC或预处理数据
+   * "s16": 16位有符号整数(-32768~32767),适用于已去直流的差分信号
+@return nil 就地修改 real/imag
+@usage
+-- f32 路径示例(zbuff float32)
+local N=2048
+local real=zbuff.create(N*4); local imag=zbuff.create(N*4)
+local Wc,Ws=fft.generate_twiddles(N)
+fft.run(real, imag, N, Wc, Ws)
+
+-- q15 路径示例(U12 整数输入)
+local N=2048
+local real_i16=zbuff.create(N*2); local imag_i16=zbuff.create(N*2)
+local Wc_q15=zbuff.create((N//2)*2); local Ws_q15=zbuff.create((N//2)*2)
+fft.generate_twiddles_q15_to_zbuff(N, Wc_q15, Ws_q15)
+-- 写入 U12 数据到 real_i16 后:
+fft.run(real_i16, imag_i16, N, Wc_q15, Ws_q15, {core="q15", input_format="u12"})
+*/
 static int l_fft_run(lua_State* L) {
     int N = luaL_checkinteger(L, 3);
-    if (N <= 1 || (N & (N-1))) return luaL_error(L, "N must be power of 2");
+    if (N <= 1 || (N & (N-1))) return luaL_error(L, "N 必须为 2 的幂");
 
     float *r = NULL, *im = NULL, *Wc = NULL, *Ws = NULL;
     int r_free = 0, im_free = 0, wc_free = 0, ws_free = 0;
 
+    // 可选参数解析(opts):当前版本仅支持 core/input_format(其余项作为未来优化)
+    const char* core = "f32";         // "f32" | "q15"
+    const char* input_format = "f32"; // "f32"|"u12"|"u16"|"s16"
+
     // real
     luat_zbuff_t* zb = (luat_zbuff_t*)luaL_testudata(L, 1, LUAT_ZBUFF_TYPE);
     if (zb) { r = (float*)zb->addr; }
@@ -74,8 +203,78 @@ static int l_fft_run(lua_State* L) {
     // W_imag
     zb = (luat_zbuff_t*)luaL_testudata(L, 5, LUAT_ZBUFF_TYPE);
     if (zb) { Ws = (float*)zb->addr; }
-    else { Ws = luat_heap_malloc(sizeof(float)*(N/2)); ws_free = 1; if (!Ws) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); return luaL_error(L, "no memory"); } if (read_lua_array_float(L, 5, Ws, N/2)) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws); return luaL_error(L, "W_imag must be number array or zbuff"); } }
+    else { Ws = luat_heap_malloc(sizeof(float)*(N/2)); ws_free = 1; if (!Ws) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); return luaL_error(L, "内存不足"); } if (read_lua_array_float(L, 5, Ws, N/2)) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws); return luaL_error(L, "W_imag 需为数字数组或 zbuff"); } }
+    // 读取第6个参数的 opts(若有)
+    if (lua_gettop(L) >= 6 && lua_istable(L, 6)) {
+        lua_getfield(L, 6, "core"); if (!lua_isnil(L, -1)) core = luaL_checkstring(L, -1); lua_pop(L, 1);
+        lua_getfield(L, 6, "input_format"); if (!lua_isnil(L, -1)) input_format = luaL_checkstring(L, -1); lua_pop(L, 1);
+    }
 
+    // 如果选择 q15 内核,且输入为整数 zbuff,则走 q15 路径
+    int use_q15 = (core && strcmp(core, "q15") == 0);
+    int integer_input = (strcmp(input_format, "u12") == 0 || strcmp(input_format, "u16") == 0 || strcmp(input_format, "s16") == 0);
+    if (use_q15 && integer_input) {
+        // 校验 real/imag 是否为 zbuff(当前整型快速路径仅支持 zbuff)
+        luat_zbuff_t* zb_real = (luat_zbuff_t*)luaL_testudata(L, 1, LUAT_ZBUFF_TYPE);
+        luat_zbuff_t* zb_imag = (luat_zbuff_t*)luaL_testudata(L, 2, LUAT_ZBUFF_TYPE);
+        if (!zb_real) {
+            if (r_free) luat_heap_free(r);
+            if (im_free) luat_heap_free(im);
+            if (wc_free) luat_heap_free(Wc);
+            if (ws_free) luat_heap_free(Ws);
+            return luaL_error(L, "q15 模式要求 real 为整数 zbuff");
+        }
+        if ((int)zb_real->len < N * 2) {
+            if (r_free) luat_heap_free(r);
+            if (im_free) luat_heap_free(im);
+            if (wc_free) luat_heap_free(Wc);
+            if (ws_free) luat_heap_free(Ws);
+            return luaL_error(L, "real zbuff 太小");
+        }
+        // 原地:将整数输入就地转换为带符号 Q15(覆盖 zbuff)
+        uint16_t* r16 = (uint16_t*)zb_real->addr;
+        for (int i = 0; i < N; i++) {
+            int32_t v; uint16_t u = r16[i];
+            if (strcmp(input_format, "u12") == 0) { v = (int32_t)(u & 0x0FFF) - 2048; v <<= 4; }
+            else if (strcmp(input_format, "u16") == 0) { v = (int32_t)u - 32768; }
+            else { v = (int16_t)u; }
+            if (v > 32767) v = 32767; if (v < -32768) v = -32768;
+            ((int16_t*)zb_real->addr)[i] = (int16_t)v;
+        }
+        if (zb_imag && (int)zb_imag->len >= N*2) {
+            uint16_t* i16 = (uint16_t*)zb_imag->addr;
+            for (int i = 0; i < N; i++) {
+                int32_t v; uint16_t u = i16[i];
+                if (strcmp(input_format, "u12") == 0) { v = (int32_t)(u & 0x0FFF) - 2048; v <<= 4; }
+                else if (strcmp(input_format, "u16") == 0) { v = (int32_t)u - 32768; }
+                else { v = (int16_t)u; }
+                if (v > 32767) v = 32767; if (v < -32768) v = -32768;
+                ((int16_t*)zb_imag->addr)[i] = (int16_t)v;
+            }
+        } else if (zb_imag) {
+            // 长度不足则清零
+            memset(zb_imag->addr, 0, zb_imag->len);
+        }
+
+        // 强制要求外部传入 Q15 twiddle
+        luat_zbuff_t* zbWc = (luat_zbuff_t*)luaL_testudata(L, 4, LUAT_ZBUFF_TYPE);
+        luat_zbuff_t* zbWs = (luat_zbuff_t*)luaL_testudata(L, 5, LUAT_ZBUFF_TYPE);
+        const int need_tw = (N/2) * 2;
+        if (!(zbWc && zbWs) || (int)zbWc->len < need_tw || (int)zbWs->len < need_tw) {
+            if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws);
+            return luaL_error(L, "q15 需传入 Wc/Ws zbuff,长度 N/2*2 字节");
+        }
+
+        int scale_exp = 0;
+        int rc = fft_inplace_q15((int16_t*)zb_real->addr, zb_imag ? (int16_t*)zb_imag->addr : NULL,
+                                  N, 0, (const int16_t*)zbWc->addr, (const int16_t*)zbWs->addr,
+                                  0, &scale_exp);
+        if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws);
+        if (rc != 0) return luaL_error(L, "q15 内核执行失败");
+        return 0;
+    }
+
+    // 默认:沿用 float32 路径
     fft_run_inplace(r, im, N, Wc, Ws);
 
     // if input was table, write back
@@ -89,12 +288,27 @@ static int l_fft_run(lua_State* L) {
     return 0;
 }
 
+/*
+原地 IFFT 计算
+@api fft.ifft(real, imag, N, Wc, Ws[, opts])
+@param real 实部容器,类型与 fft.run 一致
+@param imag 虚部容器,类型与 fft.run 一致
+@int N 点数,2 的幂
+@param Wc 旋转因子 cos:类型同 fft.run
+@param Ws 旋转因子 -sin:类型同 fft.run。建议为 IFFT 预共轭传入 +sin 表
+@table [opts]
+ - core: "f32" | "q15"(默认 "f32")
+ - input_format: "f32" | "u12" | "u16" | "s16"(q15 时必填其一)
+@return nil 就地修改 real/imag,并在 f32 路径下包含 1/N 归一化
+*/
 static int l_fft_ifft(lua_State* L) {
     int N = luaL_checkinteger(L, 3);
-    if (N <= 1 || (N & (N-1))) return luaL_error(L, "N must be power of 2");
+    if (N <= 1 || (N & (N-1))) return luaL_error(L, "N 必须为 2 的幂");
 
     float *r = NULL, *im = NULL, *Wc = NULL, *Ws = NULL;
     int r_free = 0, im_free = 0, wc_free = 0, ws_free = 0;
+    // 可选 opts(同 run):当前仅 core/input_format
+    const char* core = "f32"; const char* input_format = "f32";
     luat_zbuff_t* zb = NULL;
     zb = (luat_zbuff_t*)luaL_testudata(L, 1, LUAT_ZBUFF_TYPE);
     if (zb) { r = (float*)zb->addr; } else { r = luat_heap_malloc(sizeof(float)*N); r_free = 1; if (!r) return luaL_error(L, "no memory"); if (read_lua_array_float(L, 1, r, N)) { if (r_free) luat_heap_free(r); return luaL_error(L, "real must be number array or zbuff"); } }
@@ -103,7 +317,92 @@ static int l_fft_ifft(lua_State* L) {
     zb = (luat_zbuff_t*)luaL_testudata(L, 4, LUAT_ZBUFF_TYPE);
     if (zb) { Wc = (float*)zb->addr; } else { Wc = luat_heap_malloc(sizeof(float)*(N/2)); wc_free = 1; if (!Wc) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); return luaL_error(L, "no memory"); } if (read_lua_array_float(L, 4, Wc, N/2)) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); return luaL_error(L, "W_real must be number array or zbuff"); } }
     zb = (luat_zbuff_t*)luaL_testudata(L, 5, LUAT_ZBUFF_TYPE);
-    if (zb) { Ws = (float*)zb->addr; } else { Ws = luat_heap_malloc(sizeof(float)*(N/2)); ws_free = 1; if (!Ws) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); return luaL_error(L, "no memory"); } if (read_lua_array_float(L, 5, Ws, N/2)) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws); return luaL_error(L, "W_imag must be number array or zbuff"); } }
+    if (zb) { Ws = (float*)zb->addr; } else { Ws = luat_heap_malloc(sizeof(float)*(N/2)); ws_free = 1; if (!Ws) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); return luaL_error(L, "内存不足"); } if (read_lua_array_float(L, 5, Ws, N/2)) { if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws); return luaL_error(L, "W_imag 需为数字数组或 zbuff"); } }
+
+    if (lua_gettop(L) >= 6 && lua_istable(L, 6)) {
+        lua_getfield(L, 6, "core"); if (!lua_isnil(L, -1)) core = luaL_checkstring(L, -1); lua_pop(L, 1);
+        lua_getfield(L, 6, "input_format"); if (!lua_isnil(L, -1)) input_format = luaL_checkstring(L, -1); lua_pop(L, 1);
+    }
+
+    int use_q15 = (core && strcmp(core, "q15") == 0);
+    int integer_input = (strcmp(input_format, "u12") == 0 || strcmp(input_format, "u16") == 0 || strcmp(input_format, "s16") == 0);
+    if (use_q15 && integer_input) {
+        luat_zbuff_t* zb_real = (luat_zbuff_t*)luaL_testudata(L, 1, LUAT_ZBUFF_TYPE);
+        luat_zbuff_t* zb_imag = (luat_zbuff_t*)luaL_testudata(L, 2, LUAT_ZBUFF_TYPE);
+        if (!zb_real) {
+            if (r_free) luat_heap_free(r);
+            if (im_free) luat_heap_free(im);
+            if (wc_free) luat_heap_free(Wc);
+            if (ws_free) luat_heap_free(Ws);
+            return luaL_error(L, "q15 模式要求 real 为整数 zbuff");
+        }
+        if ((int)zb_real->len < N * 2) {
+            if (r_free) luat_heap_free(r);
+            if (im_free) luat_heap_free(im);
+            if (wc_free) luat_heap_free(Wc);
+            if (ws_free) luat_heap_free(Ws);
+            return luaL_error(L, "real zbuff 太小");
+        }
+
+        int16_t* rq = (int16_t*)luat_heap_malloc(sizeof(int16_t) * N);
+        int16_t* iq = (int16_t*)luat_heap_malloc(sizeof(int16_t) * N);
+        if (!rq || !iq) {
+            if (rq) luat_heap_free(rq); if (iq) luat_heap_free(iq);
+            if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im);
+            if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws);
+            return luaL_error(L, "内存不足");
+        }
+
+        const uint16_t* r16 = (const uint16_t*)zb_real->addr;
+        for (int i = 0; i < N; i++) {
+            int32_t v; uint16_t u = r16[i];
+            if (strcmp(input_format, "u12") == 0) { v = ((int32_t)(u & 0x0FFF)) - 2048; v <<= 4; }
+            else if (strcmp(input_format, "u16") == 0) { v = (int32_t)u - 32768; }
+            else { v = (int16_t)u; }
+            if (v > 32767) v = 32767; if (v < -32768) v = -32768; rq[i] = (int16_t)v;
+        }
+        if (zb_imag && (int)zb_imag->len >= N * 2) {
+            const uint16_t* i16 = (const uint16_t*)zb_imag->addr;
+            for (int i = 0; i < N; i++) {
+                int32_t v; uint16_t u = i16[i];
+                if (strcmp(input_format, "u12") == 0) { v = ((int32_t)(u & 0x0FFF)) - 2048; v <<= 4; }
+                else if (strcmp(input_format, "u16") == 0) { v = (int32_t)u - 32768; }
+                else { v = (int16_t)u; }
+                if (v > 32767) v = 32767; if (v < -32768) v = -32768; iq[i] = (int16_t)v;
+            }
+        } else { memset(iq, 0, sizeof(int16_t) * N); }
+
+        // 使用传入的 Q15 旋转因子(zbuff)或按需生成
+        luat_zbuff_t* zbWc = (luat_zbuff_t*)luaL_testudata(L, 4, LUAT_ZBUFF_TYPE);
+        luat_zbuff_t* zbWs = (luat_zbuff_t*)luaL_testudata(L, 5, LUAT_ZBUFF_TYPE);
+        const int need_tw = (N/2) * 2;
+        const int16_t* Wcq = NULL; const int16_t* Wsq = NULL;
+        int16_t* Wcq_alloc = NULL; int16_t* Wsq_alloc = NULL;
+        if (zbWc && zbWs && (int)zbWc->len >= need_tw && (int)zbWs->len >= need_tw) {
+            Wcq = (const int16_t*)zbWc->addr;
+            Wsq = (const int16_t*)zbWs->addr;
+        } else {
+            Wcq_alloc = (int16_t*)luat_heap_malloc(sizeof(int16_t) * (N/2));
+            Wsq_alloc = (int16_t*)luat_heap_malloc(sizeof(int16_t) * (N/2));
+            if (!Wcq_alloc || !Wsq_alloc) { if (Wcq_alloc) luat_heap_free(Wcq_alloc); if (Wsq_alloc) luat_heap_free(Wsq_alloc); luat_heap_free(rq); luat_heap_free(iq);
+                if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws); return luaL_error(L, "内存不足"); }
+            fft_generate_twiddles_q15(Wcq_alloc, Wsq_alloc, N);
+            Wcq = Wcq_alloc; Wsq = Wsq_alloc;
+        }
+
+        int scale_exp = 0;
+        int rc = fft_inplace_q15(rq, iq, N, 1, Wcq, Wsq, 0, &scale_exp); // inverse=1
+        if (Wcq_alloc) luat_heap_free(Wcq_alloc);
+        if (Wsq_alloc) luat_heap_free(Wsq_alloc);
+        if (rc != 0) { luat_heap_free(rq); luat_heap_free(iq);
+            if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws); return luaL_error(L, "q15 内核执行失败"); }
+
+        for (int i = 0; i < N; i++) ((int16_t*)zb_real->addr)[i] = rq[i];
+        if (zb_imag && (int)zb_imag->len >= N * 2) { for (int i = 0; i < N; i++) ((int16_t*)zb_imag->addr)[i] = iq[i]; }
+        luat_heap_free(rq); luat_heap_free(iq);
+        if (r_free) luat_heap_free(r); if (im_free) luat_heap_free(im); if (wc_free) luat_heap_free(Wc); if (ws_free) luat_heap_free(Ws);
+        return 0;
+    }
 
     ifft_run_inplace(r, im, N, Wc, Ws);
 
@@ -117,6 +416,18 @@ static int l_fft_ifft(lua_State* L) {
     return 0;
 }
 
+/*
+频域积分(1/(jω))
+@api fft.fft_integral(real, imag, n, df)
+@param real 实部(float32,Lua 数组或 zbuff)
+@param imag 虚部(float32,Lua 数组或 zbuff)
+@int n 点数,2 的幂
+@number df 频率分辨率(fs/n)
+@return nil 原地修改 real/imag(DC 置 0)
+@usage
+-- 先完成 FFT 得到谱 (real, imag),再调用积分:
+fft.fft_integral(real, imag, N, fs/N)
+*/
 static int l_fft_integral(lua_State* L) {
     int n = luaL_checkinteger(L, 3);
     float df = (float)luaL_checknumber(L, 4);
@@ -137,6 +448,7 @@ static int l_fft_integral(lua_State* L) {
 
 static const rotable_Reg_t reg_fft[] = {
     {"generate_twiddles", ROREG_FUNC(l_fft_generate_twiddles)},
+    {"generate_twiddles_q15_to_zbuff", ROREG_FUNC(l_fft_generate_twiddles_q15_to_zbuff)},
     {"run",               ROREG_FUNC(l_fft_run)},
     {"ifft",              ROREG_FUNC(l_fft_ifft)},
     {"fft_integral",      ROREG_FUNC(l_fft_integral)},

+ 0 - 117
components/fft/fft_core.c

@@ -1,117 +0,0 @@
-#include "fft_core.h"
-#include <math.h>
-#include <string.h>
-#include <stdint.h>
-#ifndef M_PI
-#define M_PI 3.14159265358979323846
-#endif
-
-static inline int reverse_bits(int x, int bits) {
-    int y = 0;
-    for (int i = 0; i < bits; i++) {
-        y = (y << 1) | (x & 1);
-        x >>= 1;
-    }
-    return y;
-}
-
-void fft_generate_twiddles(int N, float* Wc, float* Ws) {
-    for (int k = 0; k < N/2; k++) {
-        float angle = (float)(-2.0 * M_PI * (double)k / (double)N);
-        Wc[k] = (float)cos(angle);
-        Ws[k] = (float)sin(angle);
-    }
-}
-
-static void fft_inplace_core(float* real, float* imag, int N, int inverse,
-                             const float* Wc, const float* Ws) {
-    // bit-reverse permutation
-    int bits = 0; while ((1 << bits) < N) bits++;
-    for (int i = 0; i < N; i++) {
-        int j = reverse_bits(i, bits);
-        if (j > i) {
-            float tr = real[i]; real[i] = real[j]; real[j] = tr;
-            float ti = imag[i]; imag[i] = imag[j]; imag[j] = ti;
-        }
-    }
-
-    // iterative stages
-    for (int len = 2; len <= N; len <<= 1) {
-        int half = len >> 1;
-        int step = N / len; // twiddle step
-        for (int i = 0; i < N; i += len) {
-            for (int j = 0; j < half; j++) {
-                int idx = j * step;
-                float wr, wi;
-                if (Wc && Ws) {
-                    wr = Wc[idx];
-                    wi = Ws[idx];
-                } else {
-                    float angle = (float)(-2.0 * M_PI * (double)idx / (double)N);
-                    wr = (float)cos(angle);
-                    wi = (float)sin(angle);
-                }
-                if (inverse) wi = -wi;
-                int p = i + j;
-                int q = p + half;
-                float tr = wr * real[q] - wi * imag[q];
-                float ti = wr * imag[q] + wi * real[q];
-                float ur = real[p];
-                float ui = imag[p];
-                real[p] = ur + tr;
-                imag[p] = ui + ti;
-                real[q] = ur - tr;
-                imag[q] = ui - ti;
-            }
-        }
-    }
-}
-
-void fft_run_inplace(float* real, float* imag, int N,
-                     const float* Wc, const float* Ws) {
-    fft_inplace_core(real, imag, N, 0, Wc, Ws);
-}
-
-void ifft_run_inplace(float* real, float* imag, int N,
-                      const float* Wc, const float* Ws) {
-    fft_inplace_core(real, imag, N, 1, Wc, Ws);
-    // 1/N normalization
-    float invN = (float)(1.0 / (double)N);
-    for (int i = 0; i < N; i++) {
-        real[i] *= invN;
-        imag[i] *= invN;
-    }
-}
-
-void fft_integral_inplace(float* xr, float* xi, int n, float df) {
-    const float two_pi_df = (float)(2.0 * M_PI) * df;
-    // positive freqs (exclude DC)
-    for (int i = 1; i < n/2; i++) {
-        float omega = two_pi_df * (float)i;
-        if (omega != 0.0) {
-            float xr0 = xr[i];
-            float xi0 = xi[i];
-            xr[i] =  xi0 / omega;
-            xi[i] = -xr0 / omega;
-        } else {
-            xr[i] = 0.0; xi[i] = 0.0;
-        }
-    }
-    // negative freqs
-    for (int i = n/2 + 1; i < n; i++) {
-        int k = i - n; // negative bin index
-        float omega = two_pi_df * (float)k;
-        if (omega != 0.0) {
-            float xr0 = xr[i];
-            float xi0 = xi[i];
-            xr[i] =  xi0 / omega;
-            xi[i] = -xr0 / omega;
-        } else {
-            xr[i] = 0.0; xi[i] = 0.0;
-        }
-    }
-    // DC
-    xr[0] = 0.0; xi[0] = 0.0;
-}
-
-

+ 0 - 30
components/fft/fft_core.h

@@ -1,30 +0,0 @@
-#pragma once
-
-#include <stddef.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Generate twiddle factors cos/sin for size N (N must be power of 2)
-// Wc and Ws must have length at least N/2 (float32)
-void fft_generate_twiddles(int N, float* Wc, float* Ws);
-
-// In-place iterative radix-2 FFT
-// If Wc/Ws are NULL, twiddles are computed on the fly
-void fft_run_inplace(float* real, float* imag, int N,
-                     const float* Wc, const float* Ws);
-
-// In-place IFFT with 1/N normalization
-void ifft_run_inplace(float* real, float* imag, int N,
-                      const float* Wc, const float* Ws);
-
-// Frequency-domain integral: X(ω) -> X(ω)/(jω)
-// n is FFT size, df is frequency resolution fs/n
-void fft_integral_inplace(float* xr, float* xi, int n, float df);
-
-#ifdef __cplusplus
-}
-#endif
-
-

+ 33 - 0
components/fft/inc/fft_core_q15.h

@@ -0,0 +1,33 @@
+#ifndef LUAT_FFT_CORE_Q15_H
+#define LUAT_FFT_CORE_Q15_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// 生成 Q15 旋转因子表(长度 N/2):
+// Wc[k] = cos(2πk/N) 的 Q15 表示;
+// Ws[k] = -sin(2πk/N) 的 Q15 表示(前向FFT用负号)
+void fft_generate_twiddles_q15(int16_t* Wc, int16_t* Ws, int N);
+
+// Q15 原地FFT/IFFT
+// 参数:
+// - real/imag: 长度 N 的 int16_t 数组(Q15)
+// - inverse: 0=FFT, 1=IFFT(内部对 Ws 取反或外置 sign)
+// - Wc/Ws: 长度 N/2 的 Q15 twiddle 表
+// - block_scaling_mode: 0=每级固定右移1位;1=条件缩放(按级最大值)
+// - out_scale_exp: 输出累计右移的位数(用于幅值/单位还原)
+// 返回:0 成功,<0 表示参数错误
+int fft_inplace_q15(int16_t* real, int16_t* imag, int N, int inverse,
+                    const int16_t* Wc, const int16_t* Ws,
+                    int block_scaling_mode, int* out_scale_exp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // LUAT_FFT_CORE_Q15_H
+
+

+ 137 - 0
components/fft/src/fft_core_q15.c

@@ -0,0 +1,137 @@
+#include "fft_core_q15.h"
+// 去除浮点依赖
+
+static inline int16_t q15_saturate(int32_t x) {
+    if (x > 32767) return 32767;
+    if (x < -32768) return -32768;
+    return (int16_t)x;
+}
+
+static inline int16_t q15_mul(int16_t a, int16_t b) {
+    int32_t t = (int32_t)a * (int32_t)b; // Q15*Q15 -> Q30
+    t += 1 << 14;                        // round
+    t >>= 15;                            // back to Q15
+    return (int16_t)t;                   // 饱和延后到写回
+}
+
+// 占位:不再提供浮点生成(使用绑定层的整型 LUT 生成)
+void fft_generate_twiddles_q15(int16_t* Wc, int16_t* Ws, int N) { (void)Wc; (void)Ws; (void)N; }
+
+int fft_inplace_q15(int16_t* real, int16_t* imag, int N, int inverse,
+                    const int16_t* Wc, const int16_t* Ws,
+                    int block_scaling_mode, int* out_scale_exp) {
+    // 参数检查:验证输入指针和FFT大小的有效性
+    if (!real || !imag || !Wc || !Ws) return -1;
+    if (N <= 1 || (N & (N - 1))) return -2;  // N必须是2的幂
+    int scale_exp = 0;  // 记录总缩放指数
+
+    // 位逆序重排(原地、迭代版本)
+    // 对所有元素进行位逆序重排,为FFT算法做准备
+    for (int i = 1, j = 0; i < N; i++) {
+        int bit = N >> 1;  // 从最高位开始
+        // 计算i的位逆序索引j
+        for (; j & bit; bit >>= 1) j &= ~bit;
+        j |= bit;
+        // 只交换一次,避免重复交换
+        if (i < j) {
+            int16_t tr = real[i]; real[i] = real[j]; real[j] = tr;
+            int16_t ti = imag[i]; imag[i] = imag[j]; imag[j] = ti;
+        }
+    }
+
+    // 确定逆变换的符号:逆变换需要对旋转因子虚部取共轭
+    // 预共轭:调用方需根据 inverse 提供已处理好的 Ws
+
+    // FFT主循环:Cooley-Tukey算法的迭代实现
+    // len从2开始,每次翻倍,直到N
+    for (int len = 2; len <= N; len <<= 1) {
+        int half = len >> 1;          // 当前阶段的半长度
+        int step = N / len;           // 旋转因子的步长
+
+        int need_scale_this_stage = 0; // 本级是否需要缩放的标志
+        int32_t stage_max_abs = 0;     // 本级的最大绝对值
+
+        // 处理每个长度为len的子序列
+        for (int base = 0; base < N; base += len) {
+            int idx = 0;  // 旋转因子索引
+            // 对当前子序列进行蝶形运算
+            for (int j = 0; j < half; j++) {
+                // 获取旋转因子W = wr + i*wi
+                int16_t wr = Wc[idx];
+                int16_t wi = Ws[idx];  // 已按方向传入
+                idx += step;
+
+                // 蝶形运算的两个数据点索引
+                int p = base + j;      // 上半部分索引
+                int q = p + half;      // 下半部分索引
+
+                // 读取输入数据
+                int16_t ur = real[p];  // X[p]的实部
+                int16_t ui = imag[p];  // X[p]的虚部
+                int16_t vr = real[q];  // X[q]的实部
+                int16_t vi = imag[q];  // X[q]的虚部
+
+                // 计算旋转后的值:W * X[q] = (wr + i*wi) * (vr + i*vi)
+                int16_t tr = (int16_t)((int32_t)q15_mul(wr, vr) - (int32_t)q15_mul(wi, vi));
+                int16_t ti = (int16_t)((int32_t)q15_mul(wr, vi) + (int32_t)q15_mul(wi, vr));
+
+                // 蝶形运算:计算输出
+                int32_t xr = (int32_t)ur + (int32_t)tr;  // X[p] = X[p] + W*X[q]
+                int32_t xi = (int32_t)ui + (int32_t)ti;
+                int32_t yr = (int32_t)ur - (int32_t)tr;  // X[q] = X[p] - W*X[q]
+                int32_t yi = (int32_t)ui - (int32_t)ti;
+
+                if (block_scaling_mode == 1) {
+                    // 条件缩放模式:跟踪本级的最大绝对值
+                    int32_t a0 = xr < 0 ? -xr : xr;
+                    int32_t a1 = xi < 0 ? -xi : xi;
+                    int32_t a2 = yr < 0 ? -yr : yr;
+                    int32_t a3 = yi < 0 ? -yi : yi;
+                    if (a0 > stage_max_abs) stage_max_abs = a0;
+                    if (a1 > stage_max_abs) stage_max_abs = a1;
+                    if (a2 > stage_max_abs) stage_max_abs = a2;
+                    if (a3 > stage_max_abs) stage_max_abs = a3;
+                    // 暂存到输出(先不缩放,避免重复计算)
+                    real[p] = q15_saturate(xr);
+                    imag[p] = q15_saturate(xi);
+                    real[q] = q15_saturate(yr);
+                    imag[q] = q15_saturate(yi);
+                } else {
+                    // 固定缩放模式:每级固定右移1位防止溢出
+                    xr >>= 1; xi >>= 1; yr >>= 1; yi >>= 1;
+                    real[p] = q15_saturate(xr);
+                    imag[p] = q15_saturate(xi);
+                    real[q] = q15_saturate(yr);
+                    imag[q] = q15_saturate(yi);
+                }
+            }
+        }
+
+        if (block_scaling_mode == 1) {
+            // 条件缩放:检查是否需要在本级进行缩放
+            // 如果最大值接近Q15范围上限,则统一右移1位
+            if (stage_max_abs > 16384) {  // 16384 = 2^14,接近32767的一半
+                need_scale_this_stage = 1;
+            }
+            if (need_scale_this_stage) {
+                // 对所有数据统一右移1位
+                for (int i = 0; i < N; i++) {
+                    int32_t r = real[i];
+                    int32_t im = imag[i];
+                    r >>= 1; im >>= 1;
+                    real[i] = q15_saturate(r);
+                    imag[i] = q15_saturate(im);
+                }
+                scale_exp += 1;  // 记录缩放次数
+            }
+        } else {
+            scale_exp += 1; // 固定缩放模式:每级缩放指数+1
+        }
+    }
+
+    // 输出总缩放指数(用于后续的幅度校正)
+    if (out_scale_exp) *out_scale_exp = scale_exp;
+    return 0;  // 成功返回
+}
+
+

+ 93 - 0
luat/demo/fft/main.lua

@@ -0,0 +1,93 @@
+-- LuaTools 需要 PROJECT 和 VERSION
+PROJECT = "fft_q15_test"
+VERSION = "1.0.0"
+
+_G.sys = require("sys")
+
+sys.taskInit(function()
+	if not fft then
+		while 1 do sys.wait(1000) log.info("bsp", "此BSP不支持fft库,请检查") end
+	end
+
+	-- FFT 参数配置
+	local N = 2048      -- FFT 点数
+	local fs = 2000     -- 采样频率 (Hz)
+	local freq = 200    -- 测试信号频率 (Hz)
+
+	log.info("fft", "q15 测试开始", "N=" .. N, "fs=" .. fs, "freq=" .. freq)
+
+	-- 分配 zbuff:
+	-- 整数缓冲(U12 以 16bit 承载,低12位有效)用于 q15 内核
+    local real_i16 = zbuff.create(N * 2)
+    local imag_i16 = zbuff.create(N * 2)
+
+	-- 生成 U12 整数正弦写入 i16(避免浮点预处理干扰)
+	for i = 0, N - 1 do
+		local t = i / fs
+		local x = math.sin(2 * math.pi * freq * t)
+		local val = math.floor(2048 + 2047 * x + 0.5)
+		if val < 0 then val = 0 end
+		if val > 4095 then val = 4095 end
+		real_i16:seek(i * 2, zbuff.SEEK_SET); real_i16:writeU16(val)
+		imag_i16:seek(i * 2, zbuff.SEEK_SET); imag_i16:writeU16(0)
+	end
+
+	-- 生成 Q15 旋转因子到 zbuff(避免任何浮点旋转因子)
+	local Wc_q15 = zbuff.create((N // 2) * 2)
+	local Ws_q15 = zbuff.create((N // 2) * 2)
+	fft.generate_twiddles_q15_to_zbuff(N, Wc_q15, Ws_q15)
+
+	-- 执行 Q15 FFT(输入为 U12),显式传入 Q15 twiddle
+	local t0 = mcu.ticks()
+	fft.run(real_i16, imag_i16, N, Wc_q15, Ws_q15, { core = "q15", input_format = "u12" })
+	local t1 = mcu.ticks()
+	log.info("fft", "q15 FFT 完成", "耗时:" .. (t1 - t0) .. "ms")
+
+	-- 扫描前半谱,寻找主峰
+	local peak_k, peak_pow = 1, -1
+	for k = 1, (N // 2) - 1 do
+		real_i16:seek(k * 2, zbuff.SEEK_SET)
+		imag_i16:seek(k * 2, zbuff.SEEK_SET)
+		local rr = real_i16:readI16()
+		local ii = imag_i16:readI16()
+		local p = rr * rr + ii * ii
+		if p > peak_pow then
+			peak_pow = p
+			peak_k = k
+		end
+	end
+	local peak_freq = (peak_k) * fs / N
+	log.info("fft", "主峰(Hz/bin)", string.format("%.2f", peak_freq), peak_k)
+
+	-- 比较:使用 f32 内核处理相同输入(验证 q15 与 f32 结果一致性)
+	-- 复制相同的 U12 输入到新的 zbuff
+	local real_f32 = zbuff.create(N * 4)  -- f32 需要 4 字节
+	local imag_f32 = zbuff.create(N * 4)
+	for i = 0, N - 1 do
+		real_i16:seek(i * 2, zbuff.SEEK_SET)
+		local val = real_i16:readU16()
+		real_f32:seek(i * 4, zbuff.SEEK_SET)
+		imag_f32:seek(i * 4, zbuff.SEEK_SET)
+		real_f32:writeF32(val)  -- 直接写入 U12 原始值
+		imag_f32:writeF32(0.0)
+	end
+
+	-- 生成 f32 旋转因子
+	local Wc, Ws = fft.generate_twiddles(N)
+
+	-- 执行 f32 FFT(输入为 U12)
+	local t2 = mcu.ticks()
+	fft.run(real_f32, imag_f32, N, Wc, Ws, { input_format = "u12" })
+	local t3 = mcu.ticks()
+	local dt_f32 = (t3 - t2)
+	log.info("fft", "f32 FFT 完成", "耗时:" .. dt_f32 .. "ms")
+
+	-- 总结耗时对比
+	log.info("fft", "对比(q15 vs f32, ms)", string.format("%d / %d", (t1 - t0), dt_f32))
+
+	-- 注意:本测试刻意避免浮点旋转因子与f32路径,确保纯Q15链路
+end)
+
+sys.run()
+
+