make a test for if size of FFT is less than 8, if so call a separate function
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 16 Jul 2021 19:03:17 +0000 (20:03 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 16 Jul 2021 19:03:17 +0000 (20:03 +0100)
idea is to replace that function with assembler

media/fft/Makefile
media/fft/fft-real-pair.c

index b65b4cd1af2700b686488d6800b046930c1c629d..a4252351aed799c86628f52d0c1f5adb893d3780 100644 (file)
@@ -1,4 +1,6 @@
 fft-real: fft-real-pair.c fft-real-pair-test.c fft-real-pair.h
+       powerpc64le-linux-gnu-gcc -S -fverbose-asm -mno-vsx \
+                                 fft-real-pair.c -o fft-real-pair.s
        gcc fft-real-pair.c fft-real-pair-test.c fft-real-pair.h -o fft-real -lm
 
 clean:
index 0150f3c39b00879e749c3bed3555c7bdd107161c..d28141773c7293893e07036a165212d46cb3b379 100644 (file)
@@ -46,6 +46,33 @@ bool Fft_inverseTransform(float real[], float imag[], size_t n) {
 }
 
 
+/* looking to replace this with assembler, so limit it to n <= 8 */
+void Fft_transformRadixL8(float real[], float imag[],
+                          float cos_table[], float sin_table[],
+                          size_t n)
+{
+    size_t size = (n / 2) * sizeof(float);
+
+    // Cooley-Tukey decimation-in-time radix-2 FFT
+    for (size_t size = 2; size <= n; size *= 2) {
+        size_t halfsize = size / 2;
+        size_t tablestep = n / size;
+        for (size_t i = 0; i < n; i += size) {
+            for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
+                size_t l = j + halfsize;
+                float tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
+                float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
+                real[l] = real[j] - tpre;
+                imag[l] = imag[j] - tpim;
+                real[j] += tpre;
+                imag[j] += tpim;
+            }
+        }
+        if (size == n)  // Prevent overflow in 'size *= 2'
+            break;
+    }
+}
+
 bool Fft_transformRadix2(float real[], float imag[], size_t n) {
     // Length variables
     bool status = false;
@@ -81,23 +108,27 @@ bool Fft_transformRadix2(float real[], float imag[], size_t n) {
         }
     }
 
-    // Cooley-Tukey decimation-in-time radix-2 FFT
-    for (size_t size = 2; size <= n; size *= 2) {
-        size_t halfsize = size / 2;
-        size_t tablestep = n / size;
-        for (size_t i = 0; i < n; i += size) {
-            for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
-                size_t l = j + halfsize;
-                float tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
-                float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
-                real[l] = real[j] - tpre;
-                imag[l] = imag[j] - tpim;
-                real[j] += tpre;
-                imag[j] += tpim;
+    if (n <= 8) {
+        Fft_transformRadixL8(real, imag, cos_table, sin_table, n);
+    } else {
+        // Cooley-Tukey decimation-in-time radix-2 FFT
+        for (size_t size = 2; size <= n; size *= 2) {
+            size_t halfsize = size / 2;
+            size_t tablestep = n / size;
+            for (size_t i = 0; i < n; i += size) {
+                for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
+                    size_t l = j + halfsize;
+                    float tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
+                    float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
+                    real[l] = real[j] - tpre;
+                    imag[l] = imag[j] - tpim;
+                    real[j] += tpre;
+                    imag[j] += tpim;
+                }
             }
+            if (size == n)  // Prevent overflow in 'size *= 2'
+                break;
         }
-        if (size == n)  // Prevent overflow in 'size *= 2'
-            break;
     }
     status = true;