--- /dev/null
+/*
+ * FFT and convolution test (C)
+ *
+ * Copyright (c) 2020 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/free-small-fft-in-multiple-languages
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ * implied, including but not limited to the warranties of merchantability,
+ * fitness for a particular purpose and noninfringement. In no event shall the
+ * authors or copyright holders be liable for any claim, damages or other
+ * liability, whether in an action of contract, tort or otherwise, arising from,
+ * out of or in connection with the Software or the use or other dealings in the
+ * Software.
+ */
+
+#include <math.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "fft-real-pair.h"
+
+
+// Private function prototypes
+static void test_fft(int n);
+static void test_convolution(int n);
+static void naive_dft(const double *inreal, const double *inimag,
+ double *outreal, double *outimag, int n, bool inverse);
+static void naive_convolve(const double *xreal, const double *ximag,
+ const double *yreal, const double *yimag,
+ double *outreal, double *outimag, int n);
+static double log10_rms_err(const double *xreal, const double *ximag,
+ const double *yreal, const double *yimag, int n);
+static double *random_reals(int n);
+static void *memdup(const void *src, size_t n);
+
+static double max_log_error = -INFINITY;
+
+
+/*---- Main and test functions ----*/
+
+int main(void) {
+ srand(time(NULL));
+
+ // Test power-of-2 size FFTs
+ for (int i = 0; i <= 12; i++)
+ test_fft(1 << i);
+
+ // Test small size FFTs
+ for (int i = 0; i < 30; i++)
+ test_fft(i);
+
+ // Test diverse size FFTs
+ for (int i = 0, prev = 0; i <= 100; i++) {
+ int n = (int)lround(pow(1500, i / 100.0));
+ if (n > prev) {
+ test_fft(n);
+ prev = n;
+ }
+ }
+
+ // Test power-of-2 size convolutions
+ for (int i = 0; i <= 12; i++)
+ test_convolution(1 << i);
+
+ // Test diverse size convolutions
+ for (int i = 0, prev = 0; i <= 100; i++) {
+ int n = (int)lround(pow(1500, i / 100.0));
+ if (n > prev) {
+ test_convolution(n);
+ prev = n;
+ }
+ }
+
+ printf("\n");
+ printf("Max log err = %.1f\n", max_log_error);
+ printf("Test %s\n", max_log_error < -10 ? "passed" : "failed");
+ return EXIT_SUCCESS;
+}
+
+
+static void test_fft(int n) {
+ double *inputreal = random_reals(n);
+ double *inputimag = random_reals(n);
+
+ double *expectreal = malloc(n * sizeof(double));
+ double *expectimag = malloc(n * sizeof(double));
+ naive_dft(inputreal, inputimag, expectreal, expectimag, n, false);
+
+ double *actualreal = memdup(inputreal, n * sizeof(double));
+ double *actualimag = memdup(inputimag, n * sizeof(double));
+ Fft_transform(actualreal, actualimag, n);
+ double err0 = log10_rms_err(expectreal, expectimag, actualreal, actualimag, n);
+
+ for (int i = 0; i < n; i++) {
+ actualreal[i] /= n;
+ actualimag[i] /= n;
+ }
+ Fft_inverseTransform(actualreal, actualimag, n);
+ double err1 = log10_rms_err(inputreal, inputimag, actualreal, actualimag, n);
+ printf("fftsize=%4d logerr=%5.1f\n", n, (err0 > err1 ? err0 : err1));
+
+ free(inputreal);
+ free(inputimag);
+ free(expectreal);
+ free(expectimag);
+ free(actualreal);
+ free(actualimag);
+}
+
+
+static void test_convolution(int n) {
+ double *input0real = random_reals(n);
+ double *input0imag = random_reals(n);
+ double *input1real = random_reals(n);
+ double *input1imag = random_reals(n);
+
+ double *expectreal = malloc(n * sizeof(double));
+ double *expectimag = malloc(n * sizeof(double));
+ naive_convolve(input0real, input0imag, input1real, input1imag, expectreal, expectimag, n);
+
+ double *actualreal = malloc(n * sizeof(double));
+ double *actualimag = malloc(n * sizeof(double));
+ Fft_convolveComplex(input0real, input0imag, input1real, input1imag, actualreal, actualimag, n);
+
+ printf("convsize=%4d logerr=%5.1f\n", n,
+ log10_rms_err(expectreal, expectimag, actualreal, actualimag, n));
+
+ free(input0real);
+ free(input0imag);
+ free(input1real);
+ free(input1imag);
+ free(expectreal);
+ free(expectimag);
+ free(actualreal);
+ free(actualimag);
+}
+
+
+/*---- Naive reference computation functions ----*/
+
+static void naive_dft(const double *inreal, const double *inimag,
+ double *outreal, double *outimag, int n, bool inverse) {
+
+ double coef = (inverse ? 2 : -2) * M_PI;
+ for (int k = 0; k < n; k++) { // For each output element
+ double sumreal = 0;
+ double sumimag = 0;
+ for (int t = 0; t < n; t++) { // For each input element
+ double angle = coef * ((long long)t * k % n) / n;
+ sumreal += inreal[t] * cos(angle) - inimag[t] * sin(angle);
+ sumimag += inreal[t] * sin(angle) + inimag[t] * cos(angle);
+ }
+ outreal[k] = sumreal;
+ outimag[k] = sumimag;
+ }
+}
+
+
+static void naive_convolve(const double *xreal, const double *ximag,
+ const double *yreal, const double *yimag,
+ double *outreal, double *outimag, int n) {
+
+ for (int i = 0; i < n; i++) {
+ outreal[i] = 0;
+ outimag[i] = 0;
+ }
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ int k = (i + j) % n;
+ outreal[k] += xreal[i] * yreal[j] - ximag[i] * yimag[j];
+ outimag[k] += xreal[i] * yimag[j] + ximag[i] * yreal[j];
+ }
+ }
+}
+
+
+/*---- Utility functions ----*/
+
+static double log10_rms_err(const double *xreal, const double *ximag,
+ const double *yreal, const double *yimag, int n) {
+
+ double err = pow(10, -99 * 2);
+ for (int i = 0; i < n; i++) {
+ double real = xreal[i] - yreal[i];
+ double imag = ximag[i] - yimag[i];
+ err += real * real + imag * imag;
+ }
+
+ err /= n > 0 ? n : 1;
+ err = sqrt(err); // Now this is a root mean square (RMS) error
+ err = log10(err);
+ if (err > max_log_error)
+ max_log_error = err;
+ return err;
+}
+
+
+static double *random_reals(int n) {
+ double *result = malloc(n * sizeof(double));
+ for (int i = 0; i < n; i++)
+ result[i] = (rand() / (RAND_MAX + 1.0)) * 2 - 1;
+ return result;
+}
+
+
+static void *memdup(const void *src, size_t n) {
+ void *dest = malloc(n);
+ if (n > 0 && dest != NULL)
+ memcpy(dest, src, n);
+ return dest;
+}
--- /dev/null
+/*
+ * Free FFT and convolution (C)
+ *
+ * Copyright (c) 2020 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/free-small-fft-in-multiple-languages
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ * implied, including but not limited to the warranties of merchantability,
+ * fitness for a particular purpose and noninfringement. In no event shall the
+ * authors or copyright holders be liable for any claim, damages or other
+ * liability, whether in an action of contract, tort or otherwise, arising from,
+ * out of or in connection with the Software or the use or other dealings in the
+ * Software.
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fft-real-pair.h"
+
+
+// Private function prototypes
+static size_t reverse_bits(size_t val, int width);
+static void *memdup(const void *src, size_t n);
+
+
+bool Fft_transform(double real[], double imag[], size_t n) {
+ if (n == 0)
+ return true;
+ else if ((n & (n - 1)) == 0) // Is power of 2
+ return Fft_transformRadix2(real, imag, n);
+ else // More complicated algorithm for arbitrary sizes
+ return Fft_transformBluestein(real, imag, n);
+}
+
+
+bool Fft_inverseTransform(double real[], double imag[], size_t n) {
+ return Fft_transform(imag, real, n);
+}
+
+
+bool Fft_transformRadix2(double real[], double imag[], size_t n) {
+ // Length variables
+ bool status = false;
+ int levels = 0; // Compute levels = floor(log2(n))
+ for (size_t temp = n; temp > 1U; temp >>= 1)
+ levels++;
+ if ((size_t)1U << levels != n)
+ return false; // n is not a power of 2
+
+ // Trigonometric tables
+ if (SIZE_MAX / sizeof(double) < n / 2)
+ return false;
+ size_t size = (n / 2) * sizeof(double);
+ double *cos_table = malloc(size);
+ double *sin_table = malloc(size);
+ if (cos_table == NULL || sin_table == NULL)
+ goto cleanup;
+ for (size_t i = 0; i < n / 2; i++) {
+ cos_table[i] = cos(2 * M_PI * i / n);
+ sin_table[i] = sin(2 * M_PI * i / n);
+ }
+
+ // Bit-reversed addressing permutation
+ for (size_t i = 0; i < n; i++) {
+ size_t j = reverse_bits(i, levels);
+ if (j > i) {
+ double temp = real[i];
+ real[i] = real[j];
+ real[j] = temp;
+ temp = imag[i];
+ imag[i] = imag[j];
+ imag[j] = temp;
+ }
+ }
+
+ // Cooley-Tukey decimation-in-time radix-2 FFT
+ for (size_t size = 2; size <= n; size *= 2) {
+ size_t halfsize = size / 2;
+ size_t tablestep = n / size;
+ for (size_t i = 0; i < n; i += size) {
+ for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
+ size_t l = j + halfsize;
+ double tpre = real[l] * cos_table[k] + imag[l] * sin_table[k];
+ double tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
+ real[l] = real[j] - tpre;
+ imag[l] = imag[j] - tpim;
+ real[j] += tpre;
+ imag[j] += tpim;
+ }
+ }
+ if (size == n) // Prevent overflow in 'size *= 2'
+ break;
+ }
+ status = true;
+
+cleanup:
+ free(cos_table);
+ free(sin_table);
+ return status;
+}
+
+
+bool Fft_transformBluestein(double real[], double imag[], size_t n) {
+ bool status = false;
+
+ // Find a power-of-2 convolution length m such that m >= n * 2 + 1
+ size_t m = 1;
+ while (m / 2 <= n) {
+ if (m > SIZE_MAX / 2)
+ return false;
+ m *= 2;
+ }
+
+ // Allocate memory
+ if (SIZE_MAX / sizeof(double) < n || SIZE_MAX / sizeof(double) < m)
+ return false;
+ size_t size_n = n * sizeof(double);
+ size_t size_m = m * sizeof(double);
+ double *cos_table = malloc(size_n);
+ double *sin_table = malloc(size_n);
+ double *areal = calloc(m, sizeof(double));
+ double *aimag = calloc(m, sizeof(double));
+ double *breal = calloc(m, sizeof(double));
+ double *bimag = calloc(m, sizeof(double));
+ double *creal = malloc(size_m);
+ double *cimag = malloc(size_m);
+ if (cos_table == NULL || sin_table == NULL
+ || areal == NULL || aimag == NULL
+ || breal == NULL || bimag == NULL
+ || creal == NULL || cimag == NULL)
+ goto cleanup;
+
+ // Trigonometric tables
+ for (size_t i = 0; i < n; i++) {
+ uintmax_t temp = ((uintmax_t)i * i) % ((uintmax_t)n * 2);
+ double angle = M_PI * temp / n;
+ cos_table[i] = cos(angle);
+ sin_table[i] = sin(angle);
+ }
+
+ // Temporary vectors and preprocessing
+ for (size_t i = 0; i < n; i++) {
+ areal[i] = real[i] * cos_table[i] + imag[i] * sin_table[i];
+ aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i];
+ }
+ breal[0] = cos_table[0];
+ bimag[0] = sin_table[0];
+ for (size_t i = 1; i < n; i++) {
+ breal[i] = breal[m - i] = cos_table[i];
+ bimag[i] = bimag[m - i] = sin_table[i];
+ }
+
+ // Convolution
+ if (!Fft_convolveComplex(areal, aimag, breal, bimag, creal, cimag, m))
+ goto cleanup;
+
+ // Postprocessing
+ for (size_t i = 0; i < n; i++) {
+ real[i] = creal[i] * cos_table[i] + cimag[i] * sin_table[i];
+ imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i];
+ }
+ status = true;
+
+ // Deallocation
+cleanup:
+ free(cos_table);
+ free(sin_table);
+ free(areal);
+ free(aimag);
+ free(breal);
+ free(bimag);
+ free(creal);
+ free(cimag);
+ return status;
+}
+
+
+bool Fft_convolveReal(const double xvec[], const double yvec[], double outvec[], size_t n) {
+ bool status = false;
+ double *ximag = calloc(n, sizeof(double));
+ double *yimag = calloc(n, sizeof(double));
+ double *zimag = calloc(n, sizeof(double));
+ if (ximag == NULL || yimag == NULL || zimag == NULL)
+ goto cleanup;
+
+ status = Fft_convolveComplex(xvec, ximag, yvec, yimag, outvec, zimag, n);
+cleanup:
+ free(ximag);
+ free(yimag);
+ free(zimag);
+ return status;
+}
+
+
+bool Fft_convolveComplex(
+ const double xreal[], const double ximag[],
+ const double yreal[], const double yimag[],
+ double outreal[], double outimag[], size_t n) {
+
+ bool status = false;
+ if (SIZE_MAX / sizeof(double) < n)
+ return false;
+ size_t size = n * sizeof(double);
+
+ double *xr = memdup(xreal, size);
+ double *xi = memdup(ximag, size);
+ double *yr = memdup(yreal, size);
+ double *yi = memdup(yimag, size);
+ if (xr == NULL || xi == NULL || yr == NULL || yi == NULL)
+ goto cleanup;
+
+ if (!Fft_transform(xr, xi, n))
+ goto cleanup;
+ if (!Fft_transform(yr, yi, n))
+ goto cleanup;
+
+ for (size_t i = 0; i < n; i++) {
+ double temp = xr[i] * yr[i] - xi[i] * yi[i];
+ xi[i] = xi[i] * yr[i] + xr[i] * yi[i];
+ xr[i] = temp;
+ }
+ if (!Fft_inverseTransform(xr, xi, n))
+ goto cleanup;
+
+ for (size_t i = 0; i < n; i++) { // Scaling (because this FFT implementation omits it)
+ outreal[i] = xr[i] / n;
+ outimag[i] = xi[i] / n;
+ }
+ status = true;
+
+cleanup:
+ free(xr);
+ free(xi);
+ free(yr);
+ free(yi);
+ return status;
+}
+
+
+static size_t reverse_bits(size_t val, int width) {
+ size_t result = 0;
+ for (int i = 0; i < width; i++, val >>= 1)
+ result = (result << 1) | (val & 1U);
+ return result;
+}
+
+
+static void *memdup(const void *src, size_t n) {
+ void *dest = malloc(n);
+ if (n > 0 && dest != NULL)
+ memcpy(dest, src, n);
+ return dest;
+}
--- /dev/null
+/*
+ * Free FFT and convolution (C)
+ *
+ * Copyright (c) 2020 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/free-small-fft-in-multiple-languages
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ * implied, including but not limited to the warranties of merchantability,
+ * fitness for a particular purpose and noninfringement. In no event shall the
+ * authors or copyright holders be liable for any claim, damages or other
+ * liability, whether in an action of contract, tort or otherwise, arising from,
+ * out of or in connection with the Software or the use or other dealings in the
+ * Software.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector.
+ * The vector can have any length. This is a wrapper function. Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_transform(double real[], double imag[], size_t n);
+
+
+/*
+ * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector.
+ * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse.
+ * Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_inverseTransform(double real[], double imag[], size_t n);
+
+
+/*
+ * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector.
+ * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm.
+ * Returns true if successful, false otherwise (n is not a power of 2, or out of memory).
+ */
+bool Fft_transformRadix2(double real[], double imag[], size_t n);
+
+
+/*
+ * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector.
+ * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function.
+ * Uses Bluestein's chirp z-transform algorithm. Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_transformBluestein(double real[], double imag[], size_t n);
+
+
+/*
+ * Computes the circular convolution of the given real vectors. Each vector's length must be the same.
+ * Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_convolveReal(const double xvec[], const double yvec[], double outvec[], size_t n);
+
+
+/*
+ * Computes the circular convolution of the given complex vectors. Each vector's length must be the same.
+ * Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_convolveComplex(const double xreal[], const double ximag[], const double yreal[], const double yimag[], double outreal[], double outimag[], size_t n);
+
+
+#ifdef __cplusplus
+}
+#endif
"svremap 31, 1, 0, 2, 0, 1, 1",
"""
lst = SVP64Asm( [
- # set triple butterfly mode
+ # set triple butterfly mode with "REMAP" schedule
"svshape 8, 1, 1, 1, 1",
- # tpre
"svremap 31, 1, 0, 2, 0, 1, 1",
+ # tpre
"sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
"sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
# tpre = mul1_r + mul2_r