Fixed push-push to push-pull
[libreriscv.git] / openpower / sv / remapfft.py
1 # FFT and convolution test (Python), "generators" version
2 #
3 # Copyright (c) 2020 Project Nayuki. (MIT License)
4 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
5 #
6 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
7 # https://libre-soc.org/openpower/sv/remap/
8 #
9 # Permission is hereby granted, free of charge, to any person obtaining a copy
10 # of this software and associated documentation files (the "Software"), to deal
11 # in the Software without restriction, including without limitation the rights
12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 # copies of the Software, and to permit persons to whom the Software is
14 # furnished to do so, subject to the following conditions:
15 # - The above copyright notice and this permission notice shall be included in
16 # all copies or substantial portions of the Software.
17 # - The Software is provided "as is", without warranty of any kind, express or
18 # implied, including but not limited to the warranties of merchantability,
19 # fitness for a particular purpose and noninfringement. In no event shall the
20 # authors or copyright holders be liable for any claim, damages or other
21 # liability, whether in an action of contract, tort or otherwise, arising
22 # from, out of or in connection with the Software or the use or other
23 # dealings in the Software.
24 #
25
26 import cmath, math, random
27 from copy import deepcopy
28 from remap_fft_yield import iterate_indices
29
30
31 #
32 # Computes the discrete Fourier transform (DFT) or inverse transform of the
33 # given complex vector, returning the result as a new vector.
34 # The vector can have any length. This is a wrapper function. The inverse
35 # transform does not perform scaling, so it is not a true inverse.
36 #
37 def transform(vec, inverse, generators=False):
38 n = len(vec)
39 if n == 0:
40 return []
41 elif n & (n - 1) == 0: # Is power of 2
42 return transform_radix2(vec, inverse, generators)
43 else: # More complicated algorithm for arbitrary sizes
44 assert False
45
46
47 #
48 # Computes the discrete Fourier transform (DFT) of the given complex vector,
49 # returning the result as a new vector.
50 # The vector's length must be a power of 2. Uses the Cooley-Tukey
51 # decimation-in-time radix-2 algorithm.
52 #
53 def transform_radix2(vec, inverse, generators_mode):
54 # Returns the integer whose value is the reverse of the lowest 'width'
55 # bits of the integer 'val'.
56 def reverse_bits(val, width):
57 result = 0
58 for _ in range(width):
59 result = (result << 1) | (val & 1)
60 val >>= 1
61 return result
62
63 # Initialization
64 n = len(vec)
65 levels = n.bit_length() - 1
66 if 2**levels != n:
67 raise ValueError("Length is not a power of 2")
68 # Now, levels = log2(n)
69 coef = (2 if inverse else -2) * cmath.pi / n
70 exptable = [cmath.rect(1, i * coef) for i in range(n // 2)]
71 # Copy with bit-reversed permutation
72 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
73
74 #
75 # Radix-2 decimation-in-time FFT
76 #
77 if generators_mode:
78 # loop using SVP64 REMAP "generators"
79 # set the dimension sizes here
80
81 # set total. err don't know how to calculate how many there are...
82 # do it manually for now
83 VL = 0
84 size = 2
85 while size <= n:
86 halfsize = size // 2
87 tablestep = n // size
88 for i in range(0, n, size):
89 for j in range(i, i + halfsize):
90 VL += 1
91 size *= 2
92
93 # set up an SVSHAPE
94 class SVSHAPE:
95 pass
96 # j schedule
97 SVSHAPE0 = SVSHAPE()
98 SVSHAPE0.lims = [n, 0, 0]
99 SVSHAPE0.order = [0,1,2]
100 SVSHAPE0.mode = 0b01 # FFT mode
101 SVSHAPE0.skip = 0b00
102 SVSHAPE0.offset = 0
103 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
104 # j+halfstep schedule
105 SVSHAPE1 = deepcopy(SVSHAPE0)
106 SVSHAPE1.skip = 0b01
107 # k schedule
108 SVSHAPE2 = deepcopy(SVSHAPE0)
109 SVSHAPE2.skip = 0b10
110
111 # enumerate over the iterator function, getting 3 *different* indices
112 for idx, (jl, jh, k) in enumerate(zip(iterate_indices(SVSHAPE0),
113 iterate_indices(SVSHAPE1),
114 iterate_indices(SVSHAPE2))):
115 if idx >= VL:
116 break
117 # exact same actual computation, just embedded in a single
118 # for-loop but using triple generators to create the schedule
119 temp1 = vec[jh] * exptable[k]
120 temp2 = vec[jl]
121 vec[jh] = temp2 - temp1
122 vec[jl] = temp2 + temp1
123 else:
124 # loop using standard python nested for-loops
125 size = 2
126 while size <= n:
127 halfsize = size // 2
128 tablestep = n // size
129 for i in range(0, n, size):
130 k = 0
131 for j in range(i, i + halfsize):
132 # exact same actual computation, just embedded in
133 # triple-nested for-loops
134 jl, jh = j, j+halfsize
135 temp1 = vec[jh] * exptable[k]
136 temp2 = vec[jl]
137 vec[jh] = temp2 - temp1
138 vec[jl] = temp2 + temp1
139 k += tablestep
140 size *= 2
141 return vec
142
143
144 #
145 # Computes the circular convolution of the given real or complex vectors,
146 # returning the result as a new vector. Each vector's length must be the same.
147 # realoutput=True: Extract the real part of the convolution, so that the
148 # output is a list of floats. This is useful if both inputs are real.
149 # realoutput=False: The output is always a list of complex numbers
150 # (even if both inputs are real).
151 #
152 def convolve(xvec, yvec, realoutput=True):
153 assert len(xvec) == len(yvec)
154 n = len(xvec)
155 xvec = transform(xvec, False)
156 yvec = transform(yvec, False)
157 for i in range(n):
158 xvec[i] *= yvec[i]
159 xvec = transform(xvec, True)
160
161 # Scaling (because this FFT implementation omits it) and postprocessing
162 if realoutput:
163 return [(val.real / n) for val in xvec]
164 else:
165 return [(val / n) for val in xvec]
166
167
168 ###################################
169 # ---- Main and test functions ----
170 ###################################
171
172 def main():
173 global _maxlogerr
174
175 # Test power-of-2 size FFTs
176 for i in range(0, 12 + 1):
177 _test_fft(1 << i)
178
179 # Test power-of-2 size convolutions
180 for i in range(0, 12 + 1):
181 _test_convolution(1 << i)
182
183 print()
184 print(f"Max log err = {_maxlogerr:.1f}")
185 print(f"Test {'passed' if _maxlogerr < -10 else 'failed'}")
186
187
188 def _test_fft(size):
189 input = _random_vector(size)
190 expect = _naive_dft(input, False)
191 actual = transform(input, False, False)
192 actual_generated = transform(input, False, True)
193 assert actual == actual_generated # check generator-version is identical
194
195 err_gen = _log10_rms_err(actual, actual_generated) # superfluous but hey
196 err = _log10_rms_err(expect, actual)
197
198 actual = [(x / size) for x in expect]
199 actual = transform(actual, True)
200 err = max(_log10_rms_err(input, actual), err)
201 print(f"fftsize={size:4d} logerr={err:5.1f} generr={err_gen:5.1f}")
202
203
204 def _test_convolution(size):
205 input0 = _random_vector(size)
206 input1 = _random_vector(size)
207 expect = _naive_convolution(input0, input1)
208 actual = convolve(input0, input1, False)
209 print(f"convsize={size:4d} logerr={_log10_rms_err(expect, actual):5.1f}")
210
211
212 # ---- Naive reference computation functions ----
213
214 def _naive_dft(input, inverse):
215 n = len(input)
216 output = []
217 if n == 0:
218 return output
219 coef = (2 if inverse else -2) * math.pi / n
220 for k in range(n): # For each output element
221 s = 0
222 for t in range(n): # For each input element
223 s += input[t] * cmath.rect(1, (t * k % n) * coef)
224 output.append(s)
225 return output
226
227
228 def _naive_convolution(xvec, yvec):
229 assert len(xvec) == len(yvec)
230 n = len(xvec)
231 result = [0] * n
232 for i in range(n):
233 for j in range(n):
234 result[(i + j) % n] += xvec[i] * yvec[j]
235 return result
236
237
238 # ---- Utility functions ----
239
240 _maxlogerr = -math.inf
241
242 def _log10_rms_err(xvec, yvec):
243 global _maxlogerr
244 assert len(xvec) == len(yvec)
245 err = 10.0**(-99 * 2)
246 for (x, y) in zip(xvec, yvec):
247 err += abs(x - y) ** 2
248 err = math.sqrt(err / max(len(xvec), 1)) # a root mean square (RMS) error
249 err = math.log10(err)
250 _maxlogerr = max(err, _maxlogerr)
251 return err
252
253
254 def _random_vector(n):
255 return [complex(random.uniform(-1.0, 1.0),
256 random.uniform(-1.0, 1.0)) for _ in range(n)]
257
258
259 if __name__ == "__main__":
260 main()