62b8609e80ebee4ec4cf75cf149d62bd8d4f0f4d
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.power_decoder import (create_pdecode)
6 from openpower.simulator.program import Program
7 from openpower.decoder.isa.caller import SVP64State
8 from openpower.decoder.selectable_int import SelectableInt
9 from openpower.decoder.isa.test_caller import run_tst
10 from openpower.sv.trans.svp64 import SVP64Asm
11 from copy import deepcopy
12 from openpower.decoder.helpers import fp64toselectable
13 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
14
15
16 def transform_radix2(vec, exptable):
17 """
18 # FFT and convolution test (Python), based on Project Nayuki
19 #
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
22
23 """
24 # bits of the integer 'val'.
25 def reverse_bits(val, width):
26 result = 0
27 for _ in range(width):
28 result = (result << 1) | (val & 1)
29 val >>= 1
30 return result
31
32 # Initialization
33 n = len(vec)
34 levels = n.bit_length() - 1
35
36 # Copy with bit-reversed permutation
37 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
38
39 size = 2
40 while size <= n:
41 halfsize = size // 2
42 tablestep = n // size
43 for i in range(0, n, size):
44 k = 0
45 for j in range(i, i + halfsize):
46 # exact same actual computation, just embedded in
47 # triple-nested for-loops
48 jl, jh = j, j+halfsize
49 vjh = vec[jh]
50 temp1 = vec[jh] * exptable[k]
51 temp2 = vec[jl]
52 vec[jh] = temp2 - temp1
53 vec[jl] = temp2 + temp1
54 print ("xform jl jh k", jl, jh, k,
55 "vj vjh ek", temp2, vjh, exptable[k],
56 "t1, t2", temp1, temp2,
57 "v[jh] v[jl]", vec[jh], vec[jl])
58 k += tablestep
59 size *= 2
60
61 return vec
62
63
64 class FFTTestCase(FHDLTestCase):
65
66 def _check_regs(self, sim, expected):
67 for i in range(32):
68 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
69
70 def test_sv_remap_fpmadds_fft(self):
71 """>>> lst = ["svremap 8, 1, 1, 1",
72 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
73 ]
74 runs a full in-place O(N log2 N) butterfly schedule for
75 Discrete Fourier Transform.
76
77 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
78 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
79
80 there is the *option* to target a different location (non-in-place)
81 just in case.
82
83 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
84 (3 inputs, 2 outputs)
85 """
86 lst = SVP64Asm( ["svremap 8, 1, 1, 1",
87 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
88 ])
89 lst = list(lst)
90
91 # array and coefficients to test
92 av = [7.0, -9.8, 3.0, -32.3,
93 -2.0, 5.0, -9.8, 31.3] # array 0..7
94 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
95
96 # store in regfile
97 fprs = [0] * 32
98 for i, c in enumerate(coe):
99 fprs[i+8] = fp64toselectable(c)
100 for i, a in enumerate(av):
101 fprs[i+0] = fp64toselectable(a)
102
103 # set total. err don't know how to calculate how many there are...
104 # do it manually for now
105 VL = 0
106 size = 2
107 n = len(av)
108 while size <= n:
109 halfsize = size // 2
110 tablestep = n // size
111 for i in range(0, n, size):
112 for j in range(i, i + halfsize):
113 VL += 1
114 size *= 2
115
116 # SVSTATE (calculated VL)
117 svstate = SVP64State()
118 svstate.vl[0:7] = VL # VL
119 svstate.maxvl[0:7] = VL # MAXVL
120 print ("SVSTATE", bin(svstate.spr.asint()))
121
122 with Program(lst, bigendian=False) as program:
123 sim = self.run_tst_program(program, svstate=svstate,
124 initial_fprs=fprs)
125 print ("spr svshape0", sim.spr['SVSHAPE0'])
126 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
127 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
128 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
129 print ("spr svshape1", sim.spr['SVSHAPE1'])
130 print ("spr svshape2", sim.spr['SVSHAPE2'])
131 print ("spr svshape3", sim.spr['SVSHAPE3'])
132
133 # work out the results with the twin mul/add-sub
134 res = transform_radix2(av, coe)
135
136 for i, expected in enumerate(res):
137 print ("i", i, float(sim.fpr(i)), "expected", expected)
138 for i, expected in enumerate(res):
139 # convert to Power single
140 expected = DOUBLE2SINGLE(fp64toselectable(expected))
141 expected = float(expected)
142 actual = float(sim.fpr(i))
143 # approximate error calculation, good enough test
144 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
145 # and the rounding is different
146 err = abs(actual - expected) / expected
147 self.assertTrue(err < 1e-7)
148
149 def test_sv_remap_fpmadds_fft_svstep(self):
150 """>>> lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
151 "svremap 8, 1, 1, 1",
152 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
153 "setvl. 0, 0, 0, 1, 0, 0",
154 "bc 4, 2, -16"
155 ])
156 runs a full in-place O(N log2 N) butterfly schedule for
157 Discrete Fourier Transform. this version however uses
158 SVP64 "Vertical-First" Mode and so needs an explicit
159 branch, testing CR0.
160
161 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
162 (3 inputs, 2 outputs)
163 """
164 lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
165 "svremap 8, 1, 1, 1",
166 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
167 "setvl. 0, 0, 0, 1, 0, 0",
168 "bc 4, 2, -16"
169 ])
170 lst = list(lst)
171
172 # array and coefficients to test
173 av = [7.0, -9.8, 3.0, -32.3,
174 -2.0, 5.0, -9.8, 31.3] # array 0..7
175 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
176
177 # store in regfile
178 fprs = [0] * 32
179 for i, c in enumerate(coe):
180 fprs[i+8] = fp64toselectable(c)
181 for i, a in enumerate(av):
182 fprs[i+0] = fp64toselectable(a)
183
184 # set total. err don't know how to calculate how many there are...
185 # do it manually for now
186 VL = 0
187 size = 2
188 n = len(av)
189 while size <= n:
190 halfsize = size // 2
191 tablestep = n // size
192 for i in range(0, n, size):
193 for j in range(i, i + halfsize):
194 VL += 1
195 size *= 2
196
197 # SVSTATE (calculated VL)
198 svstate = SVP64State()
199 svstate.vl[0:7] = VL # VL
200 svstate.maxvl[0:7] = VL # MAXVL
201 print ("SVSTATE", bin(svstate.spr.asint()))
202
203 with Program(lst, bigendian=False) as program:
204 sim = self.run_tst_program(program, svstate=svstate,
205 initial_fprs=fprs)
206 print ("spr svshape0", sim.spr['SVSHAPE0'])
207 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
208 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
209 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
210 print ("spr svshape1", sim.spr['SVSHAPE1'])
211 print ("spr svshape2", sim.spr['SVSHAPE2'])
212 print ("spr svshape3", sim.spr['SVSHAPE3'])
213
214 # work out the results with the twin mul/add-sub
215 res = transform_radix2(av, coe)
216
217 for i, expected in enumerate(res):
218 print ("i", i, float(sim.fpr(i)), "expected", expected)
219 for i, expected in enumerate(res):
220 # convert to Power single
221 expected = DOUBLE2SINGLE(fp64toselectable(expected))
222 expected = float(expected)
223 actual = float(sim.fpr(i))
224 # approximate error calculation, good enough test
225 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
226 # and the rounding is different
227 err = abs(actual - expected) / expected
228 self.assertTrue(err < 1e-7)
229
230 def test_sv_fpmadds_fft(self):
231 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
232 ]
233 four in-place vector mul-adds, four in-place vector mul-subs
234
235 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
236 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
237
238 there is the *option* to target a different location (non-in-place)
239 just in case.
240
241 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
242 FRS to perform the two multiplies. one add, one subtract.
243
244 sv.ffmadds FRT, FRA, FRC, FRB actually does:
245 fmadds FRT , FRA, FRC, FRA
246 fnmsubs FRT+vl, FRA, FRC, FRB+vl
247 """
248 lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
249 ])
250 lst = list(lst)
251
252 fprs = [0] * 32
253 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
254 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
255 coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
256 res = []
257 # work out the results with the twin mul/add-sub
258 for i, (a, b, c) in enumerate(zip(av, bv, coe)):
259 fprs[i+2] = fp64toselectable(a)
260 fprs[i+6] = fp64toselectable(b)
261 fprs[i+10] = fp64toselectable(c)
262 mul = a * c
263 t = b + mul
264 u = b - mul
265 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
266 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
267 res.append((t, u))
268 print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
269
270 # SVSTATE (in this case, VL=2)
271 svstate = SVP64State()
272 svstate.vl[0:7] = 4 # VL
273 svstate.maxvl[0:7] = 4 # MAXVL
274 print ("SVSTATE", bin(svstate.spr.asint()))
275
276 with Program(lst, bigendian=False) as program:
277 sim = self.run_tst_program(program, svstate=svstate,
278 initial_fprs=fprs)
279 # confirm that the results are as expected
280 for i, (t, u) in enumerate(res):
281 self.assertEqual(sim.fpr(i+2), t)
282 self.assertEqual(sim.fpr(i+6), u)
283
284 def run_tst_program(self, prog, initial_regs=None,
285 svstate=None,
286 initial_mem=None,
287 initial_fprs=None):
288 if initial_regs is None:
289 initial_regs = [0] * 32
290 simulator = run_tst(prog, initial_regs, mem=initial_mem,
291 initial_fprs=initial_fprs,
292 svstate=svstate)
293
294 print ("GPRs")
295 simulator.gpr.dump()
296 print ("FPRs")
297 simulator.fpr.dump()
298
299 return simulator
300
301
302 if __name__ == "__main__":
303 unittest.main()