From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 16 Jul 2021 15:59:41 +0000 (+0100)
Subject: add nayuki project reference code
X-Git-Tag: xlen-bcd~281
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a985b0f6c97205101e60a1115045a1274f1801a6;p=openpower-isa.git

add nayuki project reference code
https://www.nayuki.io/page/free-small-fft-in-multiple-languages
---

diff --git a/media/fft/fft-real-pair-test.c b/media/fft/fft-real-pair-test.c
new file mode 100644
index 00000000..514531d1
--- /dev/null
+++ b/media/fft/fft-real-pair-test.c
@@ -0,0 +1,222 @@
+/* 
+ * FFT and convolution test (C)
+ * 
+ * Copyright (c) 2020 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/free-small-fft-in-multiple-languages
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ *   all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ *   implied, including but not limited to the warranties of merchantability,
+ *   fitness for a particular purpose and noninfringement. In no event shall the
+ *   authors or copyright holders be liable for any claim, damages or other
+ *   liability, whether in an action of contract, tort or otherwise, arising from,
+ *   out of or in connection with the Software or the use or other dealings in the
+ *   Software.
+ */
+
+#include <math.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "fft-real-pair.h"
+
+
+// Private function prototypes
+static void test_fft(int n);
+static void test_convolution(int n);
+static void naive_dft(const double *inreal, const double *inimag,
+	double *outreal, double *outimag, int n, bool inverse);
+static void naive_convolve(const double *xreal, const double *ximag,
+	const double *yreal, const double *yimag,
+	double *outreal, double *outimag, int n);
+static double log10_rms_err(const double *xreal, const double *ximag,
+	const double *yreal, const double *yimag, int n);
+static double *random_reals(int n);
+static void *memdup(const void *src, size_t n);
+
+static double max_log_error = -INFINITY;
+
+
+/*---- Main and test functions ----*/
+
+int main(void) {
+	srand(time(NULL));
+	
+	// Test power-of-2 size FFTs
+	for (int i = 0; i <= 12; i++)
+		test_fft(1 << i);
+	
+	// Test small size FFTs
+	for (int i = 0; i < 30; i++)
+		test_fft(i);
+	
+	// Test diverse size FFTs
+	for (int i = 0, prev = 0; i <= 100; i++) {
+		int n = (int)lround(pow(1500, i / 100.0));
+		if (n > prev) {
+			test_fft(n);
+			prev = n;
+		}
+	}
+	
+	// Test power-of-2 size convolutions
+	for (int i = 0; i <= 12; i++)
+		test_convolution(1 << i);
+	
+	// Test diverse size convolutions
+	for (int i = 0, prev = 0; i <= 100; i++) {
+		int n = (int)lround(pow(1500, i / 100.0));
+		if (n > prev) {
+			test_convolution(n);
+			prev = n;
+		}
+	}
+	
+	printf("\n");
+	printf("Max log err = %.1f\n", max_log_error);
+	printf("Test %s\n", max_log_error < -10 ? "passed" : "failed");
+	return EXIT_SUCCESS;
+}
+
+
+static void test_fft(int n) {
+	double *inputreal = random_reals(n);
+	double *inputimag = random_reals(n);
+	
+	double *expectreal = malloc(n * sizeof(double));
+	double *expectimag = malloc(n * sizeof(double));
+	naive_dft(inputreal, inputimag, expectreal, expectimag, n, false);
+	
+	double *actualreal = memdup(inputreal, n * sizeof(double));
+	double *actualimag = memdup(inputimag, n * sizeof(double));
+	Fft_transform(actualreal, actualimag, n);
+	double err0 = log10_rms_err(expectreal, expectimag, actualreal, actualimag, n);
+	
+	for (int i = 0; i < n; i++) {
+		actualreal[i] /= n;
+		actualimag[i] /= n;
+	}
+	Fft_inverseTransform(actualreal, actualimag, n);
+	double err1 = log10_rms_err(inputreal, inputimag, actualreal, actualimag, n);
+	printf("fftsize=%4d  logerr=%5.1f\n", n, (err0 > err1 ? err0 : err1));
+	
+	free(inputreal);
+	free(inputimag);
+	free(expectreal);
+	free(expectimag);
+	free(actualreal);
+	free(actualimag);
+}
+
+
+static void test_convolution(int n) {
+	double *input0real = random_reals(n);
+	double *input0imag = random_reals(n);
+	double *input1real = random_reals(n);
+	double *input1imag = random_reals(n);
+	
+	double *expectreal = malloc(n * sizeof(double));
+	double *expectimag = malloc(n * sizeof(double));
+	naive_convolve(input0real, input0imag, input1real, input1imag, expectreal, expectimag, n);
+	
+	double *actualreal = malloc(n * sizeof(double));
+	double *actualimag = malloc(n * sizeof(double));
+	Fft_convolveComplex(input0real, input0imag, input1real, input1imag, actualreal, actualimag, n);
+	
+	printf("convsize=%4d  logerr=%5.1f\n", n,
+		log10_rms_err(expectreal, expectimag, actualreal, actualimag, n));
+	
+	free(input0real);
+	free(input0imag);
+	free(input1real);
+	free(input1imag);
+	free(expectreal);
+	free(expectimag);
+	free(actualreal);
+	free(actualimag);
+}
+
+
+/*---- Naive reference computation functions ----*/
+
+static void naive_dft(const double *inreal, const double *inimag,
+		double *outreal, double *outimag, int n, bool inverse) {
+	
+	double coef = (inverse ? 2 : -2) * M_PI;
+	for (int k = 0; k < n; k++) {  // For each output element
+		double sumreal = 0;
+		double sumimag = 0;
+		for (int t = 0; t < n; t++) {  // For each input element
+			double angle = coef * ((long long)t * k % n) / n;
+			sumreal += inreal[t] * cos(angle) - inimag[t] * sin(angle);
+			sumimag += inreal[t] * sin(angle) + inimag[t] * cos(angle);
+		}
+		outreal[k] = sumreal;
+		outimag[k] = sumimag;
+	}
+}
+
+
+static void naive_convolve(const double *xreal, const double *ximag,
+		const double *yreal, const double *yimag,
+		double *outreal, double *outimag, int n) {
+	
+	for (int i = 0; i < n; i++) {
+		outreal[i] = 0;
+		outimag[i] = 0;
+	}
+	for (int i = 0; i < n; i++) {
+		for (int j = 0; j < n; j++) {
+			int k = (i + j) % n;
+			outreal[k] += xreal[i] * yreal[j] - ximag[i] * yimag[j];
+			outimag[k] += xreal[i] * yimag[j] + ximag[i] * yreal[j];
+		}
+	}
+}
+
+
+/*---- Utility functions ----*/
+
+static double log10_rms_err(const double *xreal, const double *ximag,
+		const double *yreal, const double *yimag, int n) {
+	
+	double err = pow(10, -99 * 2);
+	for (int i = 0; i < n; i++) {
+		double real = xreal[i] - yreal[i];
+		double imag = ximag[i] - yimag[i];
+		err += real * real + imag * imag;
+	}
+	
+	err /= n > 0 ? n : 1;
+	err = sqrt(err);  // Now this is a root mean square (RMS) error
+	err = log10(err);
+	if (err > max_log_error)
+		max_log_error = err;
+	return err;
+}
+
+
+static double *random_reals(int n) {
+	double *result = malloc(n * sizeof(double));
+	for (int i = 0; i < n; i++)
+		result[i] = (rand() / (RAND_MAX + 1.0)) * 2 - 1;
+	return result;
+}
+
+
+static void *memdup(const void *src, size_t n) {
+	void *dest = malloc(n);
+	if (n > 0 && dest != NULL)
+		memcpy(dest, src, n);
+	return dest;
+}
diff --git a/media/fft/fft-real-pair.c b/media/fft/fft-real-pair.c
new file mode 100644
index 00000000..e33c330e
--- /dev/null
+++ b/media/fft/fft-real-pair.c
@@ -0,0 +1,263 @@
+/* 
+ * Free FFT and convolution (C)
+ * 
+ * Copyright (c) 2020 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/free-small-fft-in-multiple-languages
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ *   all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ *   implied, including but not limited to the warranties of merchantability,
+ *   fitness for a particular purpose and noninfringement. In no event shall the
+ *   authors or copyright holders be liable for any claim, damages or other
+ *   liability, whether in an action of contract, tort or otherwise, arising from,
+ *   out of or in connection with the Software or the use or other dealings in the
+ *   Software.
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fft-real-pair.h"
+
+
+// Private function prototypes
+static size_t reverse_bits(size_t val, int width);
+static void *memdup(const void *src, size_t n);
+
+
+bool Fft_transform(double real[], double imag[], size_t n) {
+	if (n == 0)
+		return true;
+	else if ((n & (n - 1)) == 0)  // Is power of 2
+		return Fft_transformRadix2(real, imag, n);
+	else  // More complicated algorithm for arbitrary sizes
+		return Fft_transformBluestein(real, imag, n);
+}
+
+
+bool Fft_inverseTransform(double real[], double imag[], size_t n) {
+	return Fft_transform(imag, real, n);
+}
+
+
+bool Fft_transformRadix2(double real[], double imag[], size_t n) {
+	// Length variables
+	bool status = false;
+	int levels = 0;  // Compute levels = floor(log2(n))
+	for (size_t temp = n; temp > 1U; temp >>= 1)
+		levels++;
+	if ((size_t)1U << levels != n)
+		return false;  // n is not a power of 2
+	
+	// Trigonometric tables
+	if (SIZE_MAX / sizeof(double) < n / 2)
+		return false;
+	size_t size = (n / 2) * sizeof(double);
+	double *cos_table = malloc(size);
+	double *sin_table = malloc(size);
+	if (cos_table == NULL || sin_table == NULL)
+		goto cleanup;
+	for (size_t i = 0; i < n / 2; i++) {
+		cos_table[i] = cos(2 * M_PI * i / n);
+		sin_table[i] = sin(2 * M_PI * i / n);
+	}
+	
+	// Bit-reversed addressing permutation
+	for (size_t i = 0; i < n; i++) {
+		size_t j = reverse_bits(i, levels);
+		if (j > i) {
+			double temp = real[i];
+			real[i] = real[j];
+			real[j] = temp;
+			temp = imag[i];
+			imag[i] = imag[j];
+			imag[j] = temp;
+		}
+	}
+	
+	// Cooley-Tukey decimation-in-time radix-2 FFT
+	for (size_t size = 2; size <= n; size *= 2) {
+		size_t halfsize = size / 2;
+		size_t tablestep = n / size;
+		for (size_t i = 0; i < n; i += size) {
+			for (size_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
+				size_t l = j + halfsize;
+				double tpre =  real[l] * cos_table[k] + imag[l] * sin_table[k];
+				double tpim = -real[l] * sin_table[k] + imag[l] * cos_table[k];
+				real[l] = real[j] - tpre;
+				imag[l] = imag[j] - tpim;
+				real[j] += tpre;
+				imag[j] += tpim;
+			}
+		}
+		if (size == n)  // Prevent overflow in 'size *= 2'
+			break;
+	}
+	status = true;
+	
+cleanup:
+	free(cos_table);
+	free(sin_table);
+	return status;
+}
+
+
+bool Fft_transformBluestein(double real[], double imag[], size_t n) {
+	bool status = false;
+	
+	// Find a power-of-2 convolution length m such that m >= n * 2 + 1
+	size_t m = 1;
+	while (m / 2 <= n) {
+		if (m > SIZE_MAX / 2)
+			return false;
+		m *= 2;
+	}
+	
+	// Allocate memory
+	if (SIZE_MAX / sizeof(double) < n || SIZE_MAX / sizeof(double) < m)
+		return false;
+	size_t size_n = n * sizeof(double);
+	size_t size_m = m * sizeof(double);
+	double *cos_table = malloc(size_n);
+	double *sin_table = malloc(size_n);
+	double *areal = calloc(m, sizeof(double));
+	double *aimag = calloc(m, sizeof(double));
+	double *breal = calloc(m, sizeof(double));
+	double *bimag = calloc(m, sizeof(double));
+	double *creal = malloc(size_m);
+	double *cimag = malloc(size_m);
+	if (cos_table == NULL || sin_table == NULL
+			|| areal == NULL || aimag == NULL
+			|| breal == NULL || bimag == NULL
+			|| creal == NULL || cimag == NULL)
+		goto cleanup;
+	
+	// Trigonometric tables
+	for (size_t i = 0; i < n; i++) {
+		uintmax_t temp = ((uintmax_t)i * i) % ((uintmax_t)n * 2);
+		double angle = M_PI * temp / n;
+		cos_table[i] = cos(angle);
+		sin_table[i] = sin(angle);
+	}
+	
+	// Temporary vectors and preprocessing
+	for (size_t i = 0; i < n; i++) {
+		areal[i] =  real[i] * cos_table[i] + imag[i] * sin_table[i];
+		aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i];
+	}
+	breal[0] = cos_table[0];
+	bimag[0] = sin_table[0];
+	for (size_t i = 1; i < n; i++) {
+		breal[i] = breal[m - i] = cos_table[i];
+		bimag[i] = bimag[m - i] = sin_table[i];
+	}
+	
+	// Convolution
+	if (!Fft_convolveComplex(areal, aimag, breal, bimag, creal, cimag, m))
+		goto cleanup;
+	
+	// Postprocessing
+	for (size_t i = 0; i < n; i++) {
+		real[i] =  creal[i] * cos_table[i] + cimag[i] * sin_table[i];
+		imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i];
+	}
+	status = true;
+	
+	// Deallocation
+cleanup:
+	free(cos_table);
+	free(sin_table);
+	free(areal);
+	free(aimag);
+	free(breal);
+	free(bimag);
+	free(creal);
+	free(cimag);
+	return status;
+}
+
+
+bool Fft_convolveReal(const double xvec[], const double yvec[], double outvec[], size_t n) {
+	bool status = false;
+	double *ximag = calloc(n, sizeof(double));
+	double *yimag = calloc(n, sizeof(double));
+	double *zimag = calloc(n, sizeof(double));
+	if (ximag == NULL || yimag == NULL || zimag == NULL)
+		goto cleanup;
+	
+	status = Fft_convolveComplex(xvec, ximag, yvec, yimag, outvec, zimag, n);
+cleanup:
+	free(ximag);
+	free(yimag);
+	free(zimag);
+	return status;
+}
+
+
+bool Fft_convolveComplex(
+		const double xreal[], const double ximag[],
+		const double yreal[], const double yimag[],
+		double outreal[], double outimag[], size_t n) {
+	
+	bool status = false;
+	if (SIZE_MAX / sizeof(double) < n)
+		return false;
+	size_t size = n * sizeof(double);
+	
+	double *xr = memdup(xreal, size);
+	double *xi = memdup(ximag, size);
+	double *yr = memdup(yreal, size);
+	double *yi = memdup(yimag, size);
+	if (xr == NULL || xi == NULL || yr == NULL || yi == NULL)
+		goto cleanup;
+	
+	if (!Fft_transform(xr, xi, n))
+		goto cleanup;
+	if (!Fft_transform(yr, yi, n))
+		goto cleanup;
+	
+	for (size_t i = 0; i < n; i++) {
+		double temp = xr[i] * yr[i] - xi[i] * yi[i];
+		xi[i] = xi[i] * yr[i] + xr[i] * yi[i];
+		xr[i] = temp;
+	}
+	if (!Fft_inverseTransform(xr, xi, n))
+		goto cleanup;
+	
+	for (size_t i = 0; i < n; i++) {  // Scaling (because this FFT implementation omits it)
+		outreal[i] = xr[i] / n;
+		outimag[i] = xi[i] / n;
+	}
+	status = true;
+	
+cleanup:
+	free(xr);
+	free(xi);
+	free(yr);
+	free(yi);
+	return status;
+}
+
+
+static size_t reverse_bits(size_t val, int width) {
+	size_t result = 0;
+	for (int i = 0; i < width; i++, val >>= 1)
+		result = (result << 1) | (val & 1U);
+	return result;
+}
+
+
+static void *memdup(const void *src, size_t n) {
+	void *dest = malloc(n);
+	if (n > 0 && dest != NULL)
+		memcpy(dest, src, n);
+	return dest;
+}
diff --git a/media/fft/fft-real-pair.h b/media/fft/fft-real-pair.h
new file mode 100644
index 00000000..fe27c764
--- /dev/null
+++ b/media/fft/fft-real-pair.h
@@ -0,0 +1,82 @@
+/* 
+ * Free FFT and convolution (C)
+ * 
+ * Copyright (c) 2020 Project Nayuki. (MIT License)
+ * https://www.nayuki.io/page/free-small-fft-in-multiple-languages
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ * - The above copyright notice and this permission notice shall be included in
+ *   all copies or substantial portions of the Software.
+ * - The Software is provided "as is", without warranty of any kind, express or
+ *   implied, including but not limited to the warranties of merchantability,
+ *   fitness for a particular purpose and noninfringement. In no event shall the
+ *   authors or copyright holders be liable for any claim, damages or other
+ *   liability, whether in an action of contract, tort or otherwise, arising from,
+ *   out of or in connection with the Software or the use or other dealings in the
+ *   Software.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 
+ * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector.
+ * The vector can have any length. This is a wrapper function. Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_transform(double real[], double imag[], size_t n);
+
+
+/* 
+ * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector.
+ * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse.
+ * Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_inverseTransform(double real[], double imag[], size_t n);
+
+
+/* 
+ * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector.
+ * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm.
+ * Returns true if successful, false otherwise (n is not a power of 2, or out of memory).
+ */
+bool Fft_transformRadix2(double real[], double imag[], size_t n);
+
+
+/* 
+ * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector.
+ * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function.
+ * Uses Bluestein's chirp z-transform algorithm. Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_transformBluestein(double real[], double imag[], size_t n);
+
+
+/* 
+ * Computes the circular convolution of the given real vectors. Each vector's length must be the same.
+ * Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_convolveReal(const double xvec[], const double yvec[], double outvec[], size_t n);
+
+
+/* 
+ * Computes the circular convolution of the given complex vectors. Each vector's length must be the same.
+ * Returns true if successful, false otherwise (out of memory).
+ */
+bool Fft_convolveComplex(const double xreal[], const double ximag[], const double yreal[], const double yimag[], double outreal[], double outimag[], size_t n);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py
index 346f9e4d..b4c94280 100644
--- a/src/openpower/decoder/isa/test_caller_svp64_fft.py
+++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py
@@ -534,10 +534,10 @@ class FFTTestCase(FHDLTestCase):
                 "svremap 31, 1, 0, 2, 0, 1, 1",
         """
         lst = SVP64Asm( [
-                        # set triple butterfly mode
+                        # set triple butterfly mode with "REMAP" schedule
                         "svshape 8, 1, 1, 1, 1",
-                        # tpre
                         "svremap 31, 1, 0, 2, 0, 1, 1",
+                        # tpre
                         "sv.fmuls 24, 0.v, 16.v",    # mul1_r = r*cos_r
                         "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
                                                      # tpre = mul1_r + mul2_r