From: Luke Kenneth Casson Leighton Date: Fri, 16 Jul 2021 15:59:41 +0000 (+0100) Subject: add nayuki project reference code X-Git-Tag: xlen-bcd~281 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a985b0f6c97205101e60a1115045a1274f1801a6;p=openpower-isa.git add nayuki project reference code https://www.nayuki.io/page/free-small-fft-in-multiple-languages --- diff --git a/media/fft/fft-real-pair-test.c b/media/fft/fft-real-pair-test.c new file mode 100644 index 00000000..514531d1 --- /dev/null +++ b/media/fft/fft-real-pair-test.c @@ -0,0 +1,222 @@ +/* + * FFT and convolution test (C) + * + * Copyright (c) 2020 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "fft-real-pair.h" + + +// Private function prototypes +static void test_fft(int n); +static void test_convolution(int n); +static void naive_dft(const double *inreal, const double *inimag, + double *outreal, double *outimag, int n, bool inverse); +static void naive_convolve(const double *xreal, const double *ximag, + const double *yreal, const double *yimag, + double *outreal, double *outimag, int n); +static double log10_rms_err(const double *xreal, const double *ximag, + const double *yreal, const double *yimag, int n); +static double *random_reals(int n); +static void *memdup(const void *src, size_t n); + +static double max_log_error = -INFINITY; + + +/*---- Main and test functions ----*/ + +int main(void) { + srand(time(NULL)); + + // Test power-of-2 size FFTs + for (int i = 0; i <= 12; i++) + test_fft(1 << i); + + // Test small size FFTs + for (int i = 0; i < 30; i++) + test_fft(i); + + // Test diverse size FFTs + for (int i = 0, prev = 0; i <= 100; i++) { + int n = (int)lround(pow(1500, i / 100.0)); + if (n > prev) { + test_fft(n); + prev = n; + } + } + + // Test power-of-2 size convolutions + for (int i = 0; i <= 12; i++) + test_convolution(1 << i); + + // Test diverse size convolutions + for (int i = 0, prev = 0; i <= 100; i++) { + int n = (int)lround(pow(1500, i / 100.0)); + if (n > prev) { + test_convolution(n); + prev = n; + } + } + + printf("\n"); + printf("Max log err = %.1f\n", max_log_error); + printf("Test %s\n", max_log_error < -10 ? "passed" : "failed"); + return EXIT_SUCCESS; +} + + +static void test_fft(int n) { + double *inputreal = random_reals(n); + double *inputimag = random_reals(n); + + double *expectreal = malloc(n * sizeof(double)); + double *expectimag = malloc(n * sizeof(double)); + naive_dft(inputreal, inputimag, expectreal, expectimag, n, false); + + double *actualreal = memdup(inputreal, n * sizeof(double)); + double *actualimag = memdup(inputimag, n * sizeof(double)); + Fft_transform(actualreal, actualimag, n); + double err0 = log10_rms_err(expectreal, expectimag, actualreal, actualimag, n); + + for (int i = 0; i < n; i++) { + actualreal[i] /= n; + actualimag[i] /= n; + } + Fft_inverseTransform(actualreal, actualimag, n); + double err1 = log10_rms_err(inputreal, inputimag, actualreal, actualimag, n); + printf("fftsize=%4d logerr=%5.1f\n", n, (err0 > err1 ? err0 : err1)); + + free(inputreal); + free(inputimag); + free(expectreal); + free(expectimag); + free(actualreal); + free(actualimag); +} + + +static void test_convolution(int n) { + double *input0real = random_reals(n); + double *input0imag = random_reals(n); + double *input1real = random_reals(n); + double *input1imag = random_reals(n); + + double *expectreal = malloc(n * sizeof(double)); + double *expectimag = malloc(n * sizeof(double)); + naive_convolve(input0real, input0imag, input1real, input1imag, expectreal, expectimag, n); + + double *actualreal = malloc(n * sizeof(double)); + double *actualimag = malloc(n * sizeof(double)); + Fft_convolveComplex(input0real, input0imag, input1real, input1imag, actualreal, actualimag, n); + + printf("convsize=%4d logerr=%5.1f\n", n, + log10_rms_err(expectreal, expectimag, actualreal, actualimag, n)); + + free(input0real); + free(input0imag); + free(input1real); + free(input1imag); + free(expectreal); + free(expectimag); + free(actualreal); + free(actualimag); +} + + +/*---- Naive reference computation functions ----*/ + +static void naive_dft(const double *inreal, const double *inimag, + double *outreal, double *outimag, int n, bool inverse) { + + double coef = (inverse ? 2 : -2) * M_PI; + for (int k = 0; k < n; k++) { // For each output element + double sumreal = 0; + double sumimag = 0; + for (int t = 0; t < n; t++) { // For each input element + double angle = coef * ((long long)t * k % n) / n; + sumreal += inreal[t] * cos(angle) - inimag[t] * sin(angle); + sumimag += inreal[t] * sin(angle) + inimag[t] * cos(angle); + } + outreal[k] = sumreal; + outimag[k] = sumimag; + } +} + + +static void naive_convolve(const double *xreal, const double *ximag, + const double *yreal, const double *yimag, + double *outreal, double *outimag, int n) { + + for (int i = 0; i < n; i++) { + outreal[i] = 0; + outimag[i] = 0; + } + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + int k = (i + j) % n; + outreal[k] += xreal[i] * yreal[j] - ximag[i] * yimag[j]; + outimag[k] += xreal[i] * yimag[j] + ximag[i] * yreal[j]; + } + } +} + + +/*---- Utility functions ----*/ + +static double log10_rms_err(const double *xreal, const double *ximag, + const double *yreal, const double *yimag, int n) { + + double err = pow(10, -99 * 2); + for (int i = 0; i < n; i++) { + double real = xreal[i] - yreal[i]; + double imag = ximag[i] - yimag[i]; + err += real * real + imag * imag; + } + + err /= n > 0 ? n : 1; + err = sqrt(err); // Now this is a root mean square (RMS) error + err = log10(err); + if (err > max_log_error) + max_log_error = err; + return err; +} + + +static double *random_reals(int n) { + double *result = malloc(n * sizeof(double)); + for (int i = 0; i < n; i++) + result[i] = (rand() / (RAND_MAX + 1.0)) * 2 - 1; + return result; +} + + +static void *memdup(const void *src, size_t n) { + void *dest = malloc(n); + if (n > 0 && dest != NULL) + memcpy(dest, src, n); + return dest; +} diff --git a/media/fft/fft-real-pair.c b/media/fft/fft-real-pair.c new file mode 100644 index 00000000..e33c330e --- /dev/null +++ b/media/fft/fft-real-pair.c @@ -0,0 +1,263 @@ +/* + * Free FFT and convolution (C) + * + * Copyright (c) 2020 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#include +#include +#include +#include +#include "fft-real-pair.h" + + +// Private function prototypes +static size_t reverse_bits(size_t val, int width); +static void *memdup(const void *src, size_t n); + + +bool Fft_transform(double real[], double imag[], size_t n) { + if (n == 0) + return true; + else if ((n & (n - 1)) == 0) // Is power of 2 + return Fft_transformRadix2(real, imag, n); + else // More complicated algorithm for arbitrary sizes + return Fft_transformBluestein(real, imag, n); +} + + +bool Fft_inverseTransform(double real[], double imag[], size_t n) { + return Fft_transform(imag, real, n); +} + + +bool Fft_transformRadix2(double real[], double imag[], size_t n) { + // Length variables + bool status = false; + int levels = 0; // Compute levels = floor(log2(n)) + for (size_t temp = n; temp > 1U; temp >>= 1) + levels++; + if ((size_t)1U << levels != n) + return false; // n is not a power of 2 + + // Trigonometric tables + if (SIZE_MAX / sizeof(double) < n / 2) + return false; + size_t size = (n / 2) * sizeof(double); + double *cos_table = malloc(size); + double *sin_table = malloc(size); + if (cos_table == NULL || sin_table == NULL) + goto cleanup; + for (size_t i = 0; i < n / 2; i++) { + cos_table[i] = cos(2 * M_PI * i / n); + sin_table[i] = sin(2 * M_PI * i / n); + } + + // Bit-reversed addressing permutation + for (size_t i = 0; i < n; i++) { + size_t j = reverse_bits(i, levels); + if (j > i) { + double temp = real[i]; + real[i] = real[j]; + real[j] = temp; + temp = imag[i]; + imag[i] = imag[j]; + imag[j] = temp; + } + } + + // Cooley-Tukey decimation-in-time radix-2 FFT + for (size_t size = 2; size <= n; size *= 2) { + size_t halfsize = size / 2; + size_t tablestep = n / size; + for (size_t i = 0; i < n; i += size) { + for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) { + size_t l = j + halfsize; + double tpre = real[l] * cos_table[k] + imag[l] * sin_table[k]; + double tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k]; + real[l] = real[j] - tpre; + imag[l] = imag[j] - tpim; + real[j] += tpre; + imag[j] += tpim; + } + } + if (size == n) // Prevent overflow in 'size *= 2' + break; + } + status = true; + +cleanup: + free(cos_table); + free(sin_table); + return status; +} + + +bool Fft_transformBluestein(double real[], double imag[], size_t n) { + bool status = false; + + // Find a power-of-2 convolution length m such that m >= n * 2 + 1 + size_t m = 1; + while (m / 2 <= n) { + if (m > SIZE_MAX / 2) + return false; + m *= 2; + } + + // Allocate memory + if (SIZE_MAX / sizeof(double) < n || SIZE_MAX / sizeof(double) < m) + return false; + size_t size_n = n * sizeof(double); + size_t size_m = m * sizeof(double); + double *cos_table = malloc(size_n); + double *sin_table = malloc(size_n); + double *areal = calloc(m, sizeof(double)); + double *aimag = calloc(m, sizeof(double)); + double *breal = calloc(m, sizeof(double)); + double *bimag = calloc(m, sizeof(double)); + double *creal = malloc(size_m); + double *cimag = malloc(size_m); + if (cos_table == NULL || sin_table == NULL + || areal == NULL || aimag == NULL + || breal == NULL || bimag == NULL + || creal == NULL || cimag == NULL) + goto cleanup; + + // Trigonometric tables + for (size_t i = 0; i < n; i++) { + uintmax_t temp = ((uintmax_t)i * i) % ((uintmax_t)n * 2); + double angle = M_PI * temp / n; + cos_table[i] = cos(angle); + sin_table[i] = sin(angle); + } + + // Temporary vectors and preprocessing + for (size_t i = 0; i < n; i++) { + areal[i] = real[i] * cos_table[i] + imag[i] * sin_table[i]; + aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i]; + } + breal[0] = cos_table[0]; + bimag[0] = sin_table[0]; + for (size_t i = 1; i < n; i++) { + breal[i] = breal[m - i] = cos_table[i]; + bimag[i] = bimag[m - i] = sin_table[i]; + } + + // Convolution + if (!Fft_convolveComplex(areal, aimag, breal, bimag, creal, cimag, m)) + goto cleanup; + + // Postprocessing + for (size_t i = 0; i < n; i++) { + real[i] = creal[i] * cos_table[i] + cimag[i] * sin_table[i]; + imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i]; + } + status = true; + + // Deallocation +cleanup: + free(cos_table); + free(sin_table); + free(areal); + free(aimag); + free(breal); + free(bimag); + free(creal); + free(cimag); + return status; +} + + +bool Fft_convolveReal(const double xvec[], const double yvec[], double outvec[], size_t n) { + bool status = false; + double *ximag = calloc(n, sizeof(double)); + double *yimag = calloc(n, sizeof(double)); + double *zimag = calloc(n, sizeof(double)); + if (ximag == NULL || yimag == NULL || zimag == NULL) + goto cleanup; + + status = Fft_convolveComplex(xvec, ximag, yvec, yimag, outvec, zimag, n); +cleanup: + free(ximag); + free(yimag); + free(zimag); + return status; +} + + +bool Fft_convolveComplex( + const double xreal[], const double ximag[], + const double yreal[], const double yimag[], + double outreal[], double outimag[], size_t n) { + + bool status = false; + if (SIZE_MAX / sizeof(double) < n) + return false; + size_t size = n * sizeof(double); + + double *xr = memdup(xreal, size); + double *xi = memdup(ximag, size); + double *yr = memdup(yreal, size); + double *yi = memdup(yimag, size); + if (xr == NULL || xi == NULL || yr == NULL || yi == NULL) + goto cleanup; + + if (!Fft_transform(xr, xi, n)) + goto cleanup; + if (!Fft_transform(yr, yi, n)) + goto cleanup; + + for (size_t i = 0; i < n; i++) { + double temp = xr[i] * yr[i] - xi[i] * yi[i]; + xi[i] = xi[i] * yr[i] + xr[i] * yi[i]; + xr[i] = temp; + } + if (!Fft_inverseTransform(xr, xi, n)) + goto cleanup; + + for (size_t i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it) + outreal[i] = xr[i] / n; + outimag[i] = xi[i] / n; + } + status = true; + +cleanup: + free(xr); + free(xi); + free(yr); + free(yi); + return status; +} + + +static size_t reverse_bits(size_t val, int width) { + size_t result = 0; + for (int i = 0; i < width; i++, val >>= 1) + result = (result << 1) | (val & 1U); + return result; +} + + +static void *memdup(const void *src, size_t n) { + void *dest = malloc(n); + if (n > 0 && dest != NULL) + memcpy(dest, src, n); + return dest; +} diff --git a/media/fft/fft-real-pair.h b/media/fft/fft-real-pair.h new file mode 100644 index 00000000..fe27c764 --- /dev/null +++ b/media/fft/fft-real-pair.h @@ -0,0 +1,82 @@ +/* + * Free FFT and convolution (C) + * + * Copyright (c) 2020 Project Nayuki. (MIT License) + * https://www.nayuki.io/page/free-small-fft-in-multiple-languages + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * - The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * - The Software is provided "as is", without warranty of any kind, express or + * implied, including but not limited to the warranties of merchantability, + * fitness for a particular purpose and noninfringement. In no event shall the + * authors or copyright holders be liable for any claim, damages or other + * liability, whether in an action of contract, tort or otherwise, arising from, + * out of or in connection with the Software or the use or other dealings in the + * Software. + */ + +#pragma once + +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. Returns true if successful, false otherwise (out of memory). + */ +bool Fft_transform(double real[], double imag[], size_t n); + + +/* + * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse. + * Returns true if successful, false otherwise (out of memory). + */ +bool Fft_inverseTransform(double real[], double imag[], size_t n); + + +/* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. + * Returns true if successful, false otherwise (n is not a power of 2, or out of memory). + */ +bool Fft_transformRadix2(double real[], double imag[], size_t n); + + +/* + * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. + * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. + * Uses Bluestein's chirp z-transform algorithm. Returns true if successful, false otherwise (out of memory). + */ +bool Fft_transformBluestein(double real[], double imag[], size_t n); + + +/* + * Computes the circular convolution of the given real vectors. Each vector's length must be the same. + * Returns true if successful, false otherwise (out of memory). + */ +bool Fft_convolveReal(const double xvec[], const double yvec[], double outvec[], size_t n); + + +/* + * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. + * Returns true if successful, false otherwise (out of memory). + */ +bool Fft_convolveComplex(const double xreal[], const double ximag[], const double yreal[], const double yimag[], double outreal[], double outimag[], size_t n); + + +#ifdef __cplusplus +} +#endif diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index 346f9e4d..b4c94280 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -534,10 +534,10 @@ class FFTTestCase(FHDLTestCase): "svremap 31, 1, 0, 2, 0, 1, 1", """ lst = SVP64Asm( [ - # set triple butterfly mode + # set triple butterfly mode with "REMAP" schedule "svshape 8, 1, 1, 1, 1", - # tpre "svremap 31, 1, 0, 2, 0, 1, 1", + # tpre "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i # tpre = mul1_r + mul2_r