openpower/sv/remapfft.py

   1 # FFT and convolution test (Python), "generators" version
   2 #
   3 # Copyright (c) 2020 Project Nayuki. (MIT License)
   4 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
   5 #
   6 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   7 # https://libre-soc.org/openpower/sv/remap/
   8 #
   9 # Permission is hereby granted, free of charge, to any person obtaining a copy
  10 # of this software and associated documentation files (the "Software"), to deal
  11 # in the Software without restriction, including without limitation the rights
  12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13 # copies of the Software, and to permit persons to whom the Software is
  14 # furnished to do so, subject to the following conditions:
  15 # - The above copyright notice and this permission notice shall be included in
  16 #   all copies or substantial portions of the Software.
  17 # - The Software is provided "as is", without warranty of any kind, express or
  18 #   implied, including but not limited to the warranties of merchantability,
  19 #   fitness for a particular purpose and noninfringement. In no event shall the
  20 #   authors or copyright holders be liable for any claim, damages or other
  21 #   liability, whether in an action of contract, tort or otherwise, arising
  22 #   from, out of or in connection with the Software or the use or other
  23 #   dealings in the Software.
  24 #
  25
  26 import cmath, math, random
  27 from copy import deepcopy
  28 from remap_fft_yield import iterate_indices
  29
  30
  31 #
  32 # Computes the discrete Fourier transform (DFT) or inverse transform of the
  33 # given complex vector, returning the result as a new vector.
  34 # The vector can have any length. This is a wrapper function. The inverse
  35 # transform does not perform scaling, so it is not a true inverse.
  36 #
  37 def transform(vec, inverse, generators=False):
  38     n = len(vec)
  39     if n == 0:
  40         return []
  41     elif n & (n - 1) == 0:  # Is power of 2
  42         return transform_radix2(vec, inverse, generators)
  43     else:  # More complicated algorithm for arbitrary sizes
  44         assert False
  45
  46
  47 #
  48 # Computes the discrete Fourier transform (DFT) of the given complex vector,
  49 # returning the result as a new vector.
  50 # The vector's length must be a power of 2. Uses the Cooley-Tukey
  51 # decimation-in-time radix-2 algorithm.
  52 #
  53 def transform_radix2(vec, inverse, generators_mode):
  54     # Returns the integer whose value is the reverse of the lowest 'width'
  55     # bits of the integer 'val'.
  56     def reverse_bits(val, width):
  57         result = 0
  58         for _ in range(width):
  59             result = (result << 1) | (val & 1)
  60             val >>= 1
  61         return result
  62
  63     # Initialization
  64     n = len(vec)
  65     levels = n.bit_length() - 1
  66     if 2**levels != n:
  67         raise ValueError("Length is not a power of 2")
  68     # Now, levels = log2(n)
  69     coef = (2 if inverse else -2) * cmath.pi / n
  70     exptable = [cmath.rect(1, i * coef) for i in range(n // 2)]
  71     # Copy with bit-reversed permutation
  72     vec = [vec[reverse_bits(i, levels)] for i in range(n)]
  73
  74     #
  75     # Radix-2 decimation-in-time FFT
  76     #
  77     if generators_mode:
  78         # loop using SVP64 REMAP "generators"
  79         # set the dimension sizes here
  80
  81         # set total. err don't know how to calculate how many there are...
  82         # do it manually for now
  83         VL = 0
  84         size = 2
  85         while size <= n:
  86             halfsize = size // 2
  87             tablestep = n // size
  88             for i in range(0, n, size):
  89                 for j in range(i, i + halfsize):
  90                     VL += 1
  91             size *= 2
  92
  93         # set up an SVSHAPE
  94         class SVSHAPE:
  95             pass
  96         # j schedule
  97         SVSHAPE0 = SVSHAPE()
  98         SVSHAPE0.lims = [n, 0, 0]
  99         SVSHAPE0.order = [0,1,2]
 100         SVSHAPE0.mode = 0b01      # FFT mode
 101         SVSHAPE0.skip = 0b00
 102         SVSHAPE0.offset = 0
 103         SVSHAPE0.invxyz = [0,0,0] # inversion if desired
 104         # j+halfstep schedule
 105         SVSHAPE1 = deepcopy(SVSHAPE0)
 106         SVSHAPE1.skip = 0b01
 107         # k schedule
 108         SVSHAPE2 = deepcopy(SVSHAPE0)
 109         SVSHAPE2.skip = 0b10
 110
 111         # enumerate over the iterator function, getting 3 *different* indices
 112         for idx, (jl, jh, k) in enumerate(zip(iterate_indices(SVSHAPE0),
 113                                               iterate_indices(SVSHAPE1),
 114                                               iterate_indices(SVSHAPE2))):
 115             if idx >= VL:
 116                 break
 117             # exact same actual computation, just embedded in a single
 118             # for-loop but using triple generators to create the schedule
 119             temp1 = vec[jh] * exptable[k]
 120             temp2 = vec[jl]
 121             vec[jh] = temp2 - temp1
 122             vec[jl] = temp2 + temp1
 123     else:
 124         # loop using standard python nested for-loops
 125         size = 2
 126         while size <= n:
 127             halfsize = size // 2
 128             tablestep = n // size
 129             for i in range(0, n, size):
 130                 k = 0
 131                 for j in range(i, i + halfsize):
 132                     # exact same actual computation, just embedded in
 133                     # triple-nested for-loops
 134                     jl, jh = j, j+halfsize
 135                     temp1 = vec[jh] * exptable[k]
 136                     temp2 = vec[jl]
 137                     vec[jh] = temp2 - temp1
 138                     vec[jl] = temp2 + temp1
 139                     k += tablestep
 140             size *= 2
 141     return vec
 142
 143
 144 #
 145 # Computes the circular convolution of the given real or complex vectors,
 146 # returning the result as a new vector. Each vector's length must be the same.
 147 # realoutput=True: Extract the real part of the convolution, so that the
 148 # output is a list of floats. This is useful if both inputs are real.
 149 # realoutput=False: The output is always a list of complex numbers
 150 # (even if both inputs are real).
 151 #
 152 def convolve(xvec, yvec, realoutput=True):
 153     assert len(xvec) == len(yvec)
 154     n = len(xvec)
 155     xvec = transform(xvec, False)
 156     yvec = transform(yvec, False)
 157     for i in range(n):
 158         xvec[i] *= yvec[i]
 159     xvec = transform(xvec, True)
 160
 161     # Scaling (because this FFT implementation omits it) and postprocessing
 162     if realoutput:
 163         return [(val.real / n) for val in xvec]
 164     else:
 165         return [(val / n) for val in xvec]
 166
 167
 168 ###################################
 169 # ---- Main and test functions ----
 170 ###################################
 171
 172 def main():
 173     global _maxlogerr
 174
 175     # Test power-of-2 size FFTs
 176     for i in range(0, 12 + 1):
 177         _test_fft(1 << i)
 178
 179     # Test power-of-2 size convolutions
 180     for i in range(0, 12 + 1):
 181         _test_convolution(1 << i)
 182
 183     print()
 184     print(f"Max log err = {_maxlogerr:.1f}")
 185     print(f"Test {'passed' if _maxlogerr < -10 else 'failed'}")
 186
 187
 188 def _test_fft(size):
 189     input = _random_vector(size)
 190     expect = _naive_dft(input, False)
 191     actual = transform(input, False, False)
 192     actual_generated = transform(input, False, True)
 193     assert actual == actual_generated # check generator-version is identical
 194
 195     err_gen = _log10_rms_err(actual, actual_generated) # superfluous but hey
 196     err = _log10_rms_err(expect, actual)
 197
 198     actual = [(x / size) for x in expect]
 199     actual = transform(actual, True)
 200     err = max(_log10_rms_err(input, actual), err)
 201     print(f"fftsize={size:4d}  logerr={err:5.1f} generr={err_gen:5.1f}")
 202
 203
 204 def _test_convolution(size):
 205     input0 = _random_vector(size)
 206     input1 = _random_vector(size)
 207     expect = _naive_convolution(input0, input1)
 208     actual = convolve(input0, input1, False)
 209     print(f"convsize={size:4d}  logerr={_log10_rms_err(expect, actual):5.1f}")
 210
 211
 212 # ---- Naive reference computation functions ----
 213
 214 def _naive_dft(input, inverse):
 215     n = len(input)
 216     output = []
 217     if n == 0:
 218         return output
 219     coef = (2 if inverse else -2) * math.pi / n
 220     for k in range(n):  # For each output element
 221         s = 0
 222         for t in range(n):  # For each input element
 223             s += input[t] * cmath.rect(1, (t * k % n) * coef)
 224         output.append(s)
 225     return output
 226
 227
 228 def _naive_convolution(xvec, yvec):
 229     assert len(xvec) == len(yvec)
 230     n = len(xvec)
 231     result = [0] * n
 232     for i in range(n):
 233         for j in range(n):
 234             result[(i + j) % n] += xvec[i] * yvec[j]
 235     return result
 236
 237
 238 # ---- Utility functions ----
 239
 240 _maxlogerr = -math.inf
 241
 242 def _log10_rms_err(xvec, yvec):
 243     global _maxlogerr
 244     assert len(xvec) == len(yvec)
 245     err = 10.0**(-99 * 2)
 246     for (x, y) in zip(xvec, yvec):
 247         err += abs(x - y) ** 2
 248     err = math.sqrt(err / max(len(xvec), 1))  # a root mean square (RMS) error
 249     err = math.log10(err)
 250     _maxlogerr = max(err, _maxlogerr)
 251     return err
 252
 253
 254 def _random_vector(n):
 255     return [complex(random.uniform(-1.0, 1.0),
 256                     random.uniform(-1.0, 1.0)) for _ in range(n)]
 257
 258
 259 if __name__ == "__main__":
 260     main()