From fe7fecd060cc24b210f37f9513ec8fe0dd97bff9 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 16 Jul 2021 20:03:17 +0100
Subject: [PATCH] make a test for if size of FFT is less than 8, if so call a
 separate function idea is to replace that function with assembler

---
 media/fft/Makefile        |  2 ++
 media/fft/fft-real-pair.c | 61 +++++++++++++++++++++++++++++----------
 2 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/media/fft/Makefile b/media/fft/Makefile
index b65b4cd1..a4252351 100644
--- a/media/fft/Makefile
+++ b/media/fft/Makefile
@@ -1,4 +1,6 @@
 fft-real: fft-real-pair.c fft-real-pair-test.c fft-real-pair.h
+	powerpc64le-linux-gnu-gcc -S -fverbose-asm -mno-vsx \
+                                 fft-real-pair.c -o fft-real-pair.s
 	gcc fft-real-pair.c fft-real-pair-test.c fft-real-pair.h -o fft-real -lm
 
 clean:
diff --git a/media/fft/fft-real-pair.c b/media/fft/fft-real-pair.c
index 0150f3c3..d2814177 100644
--- a/media/fft/fft-real-pair.c
+++ b/media/fft/fft-real-pair.c
@@ -46,6 +46,33 @@ bool Fft_inverseTransform(float real[], float imag[], size_t n) {
 }
 
 
+/* looking to replace this with assembler, so limit it to n <= 8 */
+void Fft_transformRadixL8(float real[], float imag[],
+                          float cos_table[], float sin_table[],
+                          size_t n)
+{
+    size_t size = (n / 2) * sizeof(float);
+
+    // Cooley-Tukey decimation-in-time radix-2 FFT
+    for (size_t size = 2; size <= n; size *= 2) {
+        size_t halfsize = size / 2;
+        size_t tablestep = n / size;
+        for (size_t i = 0; i < n; i += size) {
+            for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
+                size_t l = j + halfsize;
+                float tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
+                float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
+                real[l] = real[j] - tpre;
+                imag[l] = imag[j] - tpim;
+                real[j] += tpre;
+                imag[j] += tpim;
+            }
+        }
+        if (size == n)  // Prevent overflow in 'size *= 2'
+            break;
+    }
+}
+
 bool Fft_transformRadix2(float real[], float imag[], size_t n) {
     // Length variables
     bool status = false;
@@ -81,23 +108,27 @@ bool Fft_transformRadix2(float real[], float imag[], size_t n) {
         }
     }
 
-    // Cooley-Tukey decimation-in-time radix-2 FFT
-    for (size_t size = 2; size <= n; size *= 2) {
-        size_t halfsize = size / 2;
-        size_t tablestep = n / size;
-        for (size_t i = 0; i < n; i += size) {
-            for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
-                size_t l = j + halfsize;
-                float tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
-                float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
-                real[l] = real[j] - tpre;
-                imag[l] = imag[j] - tpim;
-                real[j] += tpre;
-                imag[j] += tpim;
+    if (n <= 8) {
+        Fft_transformRadixL8(real, imag, cos_table, sin_table, n);
+    } else {
+        // Cooley-Tukey decimation-in-time radix-2 FFT
+        for (size_t size = 2; size <= n; size *= 2) {
+            size_t halfsize = size / 2;
+            size_t tablestep = n / size;
+            for (size_t i = 0; i < n; i += size) {
+                for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
+                    size_t l = j + halfsize;
+                    float tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
+                    float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
+                    real[l] = real[j] - tpre;
+                    imag[l] = imag[j] - tpim;
+                    real[j] += tpre;
+                    imag[j] += tpim;
+                }
             }
+            if (size == n)  // Prevent overflow in 'size *= 2'
+                break;
         }
-        if (size == n)  // Prevent overflow in 'size *= 2'
-            break;
     }
     status = true;
 
-- 
2.30.2