From: Luke Kenneth Casson Leighton Date: Fri, 16 Jul 2021 19:03:17 +0000 (+0100) Subject: make a test for if size of FFT is less than 8, if so call a separate function X-Git-Tag: xlen-bcd~277 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fe7fecd060cc24b210f37f9513ec8fe0dd97bff9;p=openpower-isa.git make a test for if size of FFT is less than 8, if so call a separate function idea is to replace that function with assembler --- diff --git a/media/fft/Makefile b/media/fft/Makefile index b65b4cd1..a4252351 100644 --- a/media/fft/Makefile +++ b/media/fft/Makefile @@ -1,4 +1,6 @@ fft-real: fft-real-pair.c fft-real-pair-test.c fft-real-pair.h + powerpc64le-linux-gnu-gcc -S -fverbose-asm -mno-vsx \ + fft-real-pair.c -o fft-real-pair.s gcc fft-real-pair.c fft-real-pair-test.c fft-real-pair.h -o fft-real -lm clean: diff --git a/media/fft/fft-real-pair.c b/media/fft/fft-real-pair.c index 0150f3c3..d2814177 100644 --- a/media/fft/fft-real-pair.c +++ b/media/fft/fft-real-pair.c @@ -46,6 +46,33 @@ bool Fft_inverseTransform(float real[], float imag[], size_t n) { } +/* looking to replace this with assembler, so limit it to n <= 8 */ +void Fft_transformRadixL8(float real[], float imag[], + float cos_table[], float sin_table[], + size_t n) +{ + size_t size = (n / 2) * sizeof(float); + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + size_t l = j + halfsize; + float tpre = real[l] * cos_table[k] + imag[l] * sin_table[k]; + float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k]; + real[l] = real[j] - tpre; + imag[l] = imag[j] - tpim; + real[j] += tpre; + imag[j] += tpim; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } +} + bool Fft_transformRadix2(float real[], float imag[], size_t n) { // Length variables bool status = false; @@ -81,23 +108,27 @@ bool Fft_transformRadix2(float real[], float imag[], size_t n) { } } - // Cooley-Tukey decimation-in-time radix-2 FFT - for (size_t size = 2; size <= n; size *= 2) { - size_t halfsize = size / 2; - size_t tablestep = n / size; - for (size_t i = 0; i < n; i += size) { - for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { - size_t l = j + halfsize; - float tpre = real[l] * cos_table[k] + imag[l] * sin_table[k]; - float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k]; - real[l] = real[j] - tpre; - imag[l] = imag[j] - tpim; - real[j] += tpre; - imag[j] += tpim; + if (n <= 8) { + Fft_transformRadixL8(real, imag, cos_table, sin_table, n); + } else { + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + size_t l = j + halfsize; + float tpre = real[l] * cos_table[k] + imag[l] * sin_table[k]; + float tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k]; + real[l] = real[j] - tpre; + imag[l] = imag[j] - tpim; + real[j] += tpre; + imag[j] += tpim; + } } + if (size == n) // Prevent overflow in 'size *= 2' + break; } - if (size == n) // Prevent overflow in 'size *= 2' - break; } status = true;