successful complex FFT butterfly, in-place, using Vertical-First SVP64
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.power_decoder import (create_pdecode)
6 from openpower.simulator.program import Program
7 from openpower.decoder.isa.caller import SVP64State
8 from openpower.decoder.selectable_int import SelectableInt
9 from openpower.decoder.isa.test_caller import run_tst
10 from openpower.sv.trans.svp64 import SVP64Asm
11 from copy import deepcopy
12 from openpower.decoder.helpers import fp64toselectable
13 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
14
15
16 def transform_radix2(vec, exptable):
17 """
18 # FFT and convolution test (Python), based on Project Nayuki
19 #
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
22
23 """
24 # bits of the integer 'val'.
25 def reverse_bits(val, width):
26 result = 0
27 for _ in range(width):
28 result = (result << 1) | (val & 1)
29 val >>= 1
30 return result
31
32 # Initialization
33 n = len(vec)
34 levels = n.bit_length() - 1
35
36 # Copy with bit-reversed permutation
37 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
38
39 size = 2
40 while size <= n:
41 halfsize = size // 2
42 tablestep = n // size
43 for i in range(0, n, size):
44 k = 0
45 for j in range(i, i + halfsize):
46 # exact same actual computation, just embedded in
47 # triple-nested for-loops
48 jl, jh = j, j+halfsize
49 vjh = vec[jh]
50 temp1 = vec[jh] * exptable[k]
51 temp2 = vec[jl]
52 vec[jh] = temp2 - temp1
53 vec[jl] = temp2 + temp1
54 print ("xform jl jh k", jl, jh, k,
55 "vj vjh ek", temp2, vjh, exptable[k],
56 "t1, t2", temp1, temp2,
57 "v[jh] v[jl]", vec[jh], vec[jl])
58 k += tablestep
59 size *= 2
60
61 return vec
62
63
64 def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i):
65 """
66 # FFT and convolution test (Python), based on Project Nayuki
67 #
68 # Copyright (c) 2020 Project Nayuki. (MIT License)
69 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
70
71 """
72 # bits of the integer 'val'.
73 def reverse_bits(val, width):
74 result = 0
75 for _ in range(width):
76 result = (result << 1) | (val & 1)
77 val >>= 1
78 return result
79
80 # Initialization
81 n = len(vec_r)
82 levels = n.bit_length() - 1
83
84 # Copy with bit-reversed permutation
85 #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
86
87 size = 2
88 while size <= n:
89 halfsize = size // 2
90 tablestep = n // size
91 for i in range(0, n, size):
92 k = 0
93 for j in range(i, i + halfsize):
94 # exact same actual computation, just embedded in
95 # triple-nested for-loops
96 jl, jh = j, j+halfsize
97
98 print ("xform jl jh k", jl, jh, k,
99 "vr h l", vec_r[jh], vec_r[jl],
100 "vi h l", vec_i[jh], vec_i[jl])
101 print (" cr k", cos_r[k], "si k", sin_i[k])
102 mul1_r = vec_r[jh] * cos_r[k]
103 mul2_r = vec_i[jh] * sin_i[k]
104 tpre = mul1_r + mul2_r
105 print (" vec_r[jh] * cos_r[k]", mul1_r)
106 print (" vec_i[jh] * sin_i[k]", mul2_r)
107 print (" tpre", tpre)
108 mul1_i = vec_r[jh] * sin_i[k]
109 mul2_i = vec_i[jh] * cos_r[k]
110 tpim = -mul1_i + mul2_i
111 print (" vec_r[jh] * sin_i[k]", mul1_i)
112 print (" vec_i[jh] * cos_r[k]", mul2_i)
113 print (" tpim", tpim)
114 vec_r[jh] = vec_r[jl] - tpre
115 vec_i[jh] = vec_i[jl] - tpim
116 vec_r[jl] += tpre
117 vec_i[jl] += tpim
118
119 print (" xform jl jh k", jl, jh, k,
120 "\n vr h l", vec_r[jh], vec_r[jl],
121 "\n vi h l", vec_i[jh], vec_i[jl])
122 k += tablestep
123 size *= 2
124
125 return vec_r, vec_i
126
127
128 class FFTTestCase(FHDLTestCase):
129
130 def _check_regs(self, sim, expected):
131 for i in range(32):
132 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
133
134 def test_sv_remap_fpmadds_fft(self):
135 """>>> lst = ["svshape 8, 1, 1, 1, 0",
136 "svremap 31, 1, 0, 2, 0, 1",
137 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
138 ]
139 runs a full in-place O(N log2 N) butterfly schedule for
140 Discrete Fourier Transform.
141
142 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
143 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
144
145 there is the *option* to target a different location (non-in-place)
146 just in case.
147
148 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
149 (3 inputs, 2 outputs)
150 """
151 lst = SVP64Asm( ["svshape 8, 1, 1, 1, 0",
152 "svremap 31, 1, 0, 2, 0, 1",
153 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
154 ])
155 lst = list(lst)
156
157 # array and coefficients to test
158 av = [7.0, -9.8, 3.0, -32.3,
159 -2.0, 5.0, -9.8, 31.3] # array 0..7
160 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
161
162 # store in regfile
163 fprs = [0] * 32
164 for i, c in enumerate(coe):
165 fprs[i+8] = fp64toselectable(c)
166 for i, a in enumerate(av):
167 fprs[i+0] = fp64toselectable(a)
168
169 with Program(lst, bigendian=False) as program:
170 sim = self.run_tst_program(program, initial_fprs=fprs)
171 print ("spr svshape0", sim.spr['SVSHAPE0'])
172 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
173 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
174 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
175 print ("spr svshape1", sim.spr['SVSHAPE1'])
176 print ("spr svshape2", sim.spr['SVSHAPE2'])
177 print ("spr svshape3", sim.spr['SVSHAPE3'])
178
179 # work out the results with the twin mul/add-sub
180 res = transform_radix2(av, coe)
181
182 for i, expected in enumerate(res):
183 print ("i", i, float(sim.fpr(i)), "expected", expected)
184 for i, expected in enumerate(res):
185 # convert to Power single
186 expected = DOUBLE2SINGLE(fp64toselectable(expected))
187 expected = float(expected)
188 actual = float(sim.fpr(i))
189 # approximate error calculation, good enough test
190 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
191 # and the rounding is different
192 err = abs(actual - expected) / expected
193 self.assertTrue(err < 1e-7)
194
195 def test_sv_remap_fpmadds_fft_svstep(self):
196 """>>> lst = SVP64Asm( [
197 "svshape 8, 1, 1, 1, 1",
198 "svremap 31, 1, 0, 2, 0, 1",
199 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
200 "setvl. 0, 0, 0, 1, 0, 0",
201 "bc 4, 2, -16"
202 ])
203 runs a full in-place O(N log2 N) butterfly schedule for
204 Discrete Fourier Transform. this version however uses
205 SVP64 "Vertical-First" Mode and so needs an explicit
206 branch, testing CR0.
207
208 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
209 (3 inputs, 2 outputs)
210 """
211 lst = SVP64Asm( [
212 "svshape 8, 1, 1, 1, 1",
213 "svremap 31, 1, 0, 2, 0, 1",
214 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
215 "setvl. 0, 0, 0, 1, 0, 0",
216 "bc 4, 2, -16"
217 ])
218 lst = list(lst)
219
220 # array and coefficients to test
221 av = [7.0, -9.8, 3.0, -32.3,
222 -2.0, 5.0, -9.8, 31.3] # array 0..7
223 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
224
225 # store in regfile
226 fprs = [0] * 32
227 for i, c in enumerate(coe):
228 fprs[i+8] = fp64toselectable(c)
229 for i, a in enumerate(av):
230 fprs[i+0] = fp64toselectable(a)
231
232 # set total. err don't know how to calculate how many there are...
233 # do it manually for now
234 VL = 0
235 size = 2
236 n = len(av)
237 while size <= n:
238 halfsize = size // 2
239 tablestep = n // size
240 for i in range(0, n, size):
241 for j in range(i, i + halfsize):
242 VL += 1
243 size *= 2
244
245 # SVSTATE (calculated VL)
246 svstate = SVP64State()
247 svstate.vl[0:7] = VL # VL
248 svstate.maxvl[0:7] = VL # MAXVL
249 print ("SVSTATE", bin(svstate.spr.asint()))
250
251 with Program(lst, bigendian=False) as program:
252 sim = self.run_tst_program(program, svstate=svstate,
253 initial_fprs=fprs)
254 print ("spr svshape0", sim.spr['SVSHAPE0'])
255 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
256 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
257 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
258 print ("spr svshape1", sim.spr['SVSHAPE1'])
259 print ("spr svshape2", sim.spr['SVSHAPE2'])
260 print ("spr svshape3", sim.spr['SVSHAPE3'])
261
262 # work out the results with the twin mul/add-sub
263 res = transform_radix2(av, coe)
264
265 for i, expected in enumerate(res):
266 print ("i", i, float(sim.fpr(i)), "expected", expected)
267 for i, expected in enumerate(res):
268 # convert to Power single
269 expected = DOUBLE2SINGLE(fp64toselectable(expected))
270 expected = float(expected)
271 actual = float(sim.fpr(i))
272 # approximate error calculation, good enough test
273 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
274 # and the rounding is different
275 err = abs(actual - expected) / expected
276 self.assertTrue(err < 1e-7)
277
278 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self):
279 """>>> lst = SVP64Asm( [
280 "svshape 8, 1, 1, 1, 1",
281 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
282 "svremap 5, 1, 0, 2, 0, 0",
283 "sv.fmuls 24, 0.v, 8.v",
284 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
285 "svremap 26, 0, 0, 0, 0, 1",
286 "sv.ffadds 0.v, 24, 0.v",
287 "setvl. 0, 0, 0, 1, 0, 0",
288 "bc 4, 2, -28"
289 ])
290
291 runs a full in-place O(N log2 N) butterfly schedule for
292 Discrete Fourier Transform. also uses "Vertical First"
293 but also uses temporary scalars and ffadds rather than
294 sv.ffmadds.
295
296 this represents an incremental step towards complex FFT
297
298 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
299
300 * single fmuls FRT, FRA, FRC
301 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
302 (FRS is implicit / hidden in ff* operations)
303
304 multiply: # sv.fmuls FRT, FRA, FRC
305 temp1 = vec[jh] * exptable[k]
306 temp2 = vec[jl]
307 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
308 vec[jh] = temp2 - temp1
309 vec[jl] = temp2 + temp1
310 """
311 lst = SVP64Asm( [
312 "svshape 8, 1, 1, 1, 1",
313 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
314 "svremap 5, 1, 0, 2, 0, 0",
315 "sv.fmuls 24, 0.v, 8.v",
316 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
317 "svremap 26, 0, 0, 0, 0, 1",
318 "sv.ffadds 0.v, 24, 0.v",
319 "setvl. 0, 0, 0, 1, 0, 0",
320 "bc 4, 2, -28"
321 ])
322 lst = list(lst)
323
324 # array and coefficients to test
325 av = [7.0, -9.8, 3.0, -32.3,
326 -2.0, 5.0, -9.8, 31.3] # array 0..7
327 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
328
329 # store in regfile
330 fprs = [0] * 32
331 for i, c in enumerate(coe):
332 fprs[i+8] = fp64toselectable(c)
333 for i, a in enumerate(av):
334 fprs[i+0] = fp64toselectable(a)
335
336 # set total. err don't know how to calculate how many there are...
337 # do it manually for now
338 VL = 0
339 size = 2
340 n = len(av)
341 while size <= n:
342 halfsize = size // 2
343 tablestep = n // size
344 for i in range(0, n, size):
345 for j in range(i, i + halfsize):
346 VL += 1
347 size *= 2
348
349 # SVSTATE (calculated VL)
350 svstate = SVP64State()
351 svstate.vl[0:7] = VL # VL
352 svstate.maxvl[0:7] = VL # MAXVL
353 print ("SVSTATE", bin(svstate.spr.asint()))
354
355 with Program(lst, bigendian=False) as program:
356 sim = self.run_tst_program(program, svstate=svstate,
357 initial_fprs=fprs)
358 print ("spr svshape0", sim.spr['SVSHAPE0'])
359 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
360 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
361 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
362 print ("spr svshape1", sim.spr['SVSHAPE1'])
363 print ("spr svshape2", sim.spr['SVSHAPE2'])
364 print ("spr svshape3", sim.spr['SVSHAPE3'])
365
366 # work out the results with the twin mul/add-sub
367 res = transform_radix2(av, coe)
368
369 for i, expected in enumerate(res):
370 print ("i", i, float(sim.fpr(i)), "expected", expected)
371 for i, expected in enumerate(res):
372 # convert to Power single
373 expected = DOUBLE2SINGLE(fp64toselectable(expected))
374 expected = float(expected)
375 actual = float(sim.fpr(i))
376 # approximate error calculation, good enough test
377 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
378 # and the rounding is different
379 err = abs(actual - expected) / expected
380 self.assertTrue(err < 1e-7)
381
382 def test_sv_fpmadds_fft(self):
383 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
384 ]
385 four in-place vector mul-adds, four in-place vector mul-subs
386
387 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
388 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
389
390 there is the *option* to target a different location (non-in-place)
391 just in case.
392
393 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
394 FRS to perform the two multiplies. one add, one subtract.
395
396 sv.ffmadds FRT, FRA, FRC, FRB actually does:
397 fmadds FRT , FRA, FRC, FRA
398 fnmsubs FRT+vl, FRA, FRC, FRB+vl
399 """
400 lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
401 ])
402 lst = list(lst)
403
404 fprs = [0] * 32
405 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
406 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
407 coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
408 res = []
409 # work out the results with the twin mul/add-sub
410 for i, (a, b, c) in enumerate(zip(av, bv, coe)):
411 fprs[i+2] = fp64toselectable(a)
412 fprs[i+6] = fp64toselectable(b)
413 fprs[i+10] = fp64toselectable(c)
414 mul = a * c
415 t = b + mul
416 u = b - mul
417 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
418 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
419 res.append((t, u))
420 print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
421
422 # SVSTATE (in this case, VL=2)
423 svstate = SVP64State()
424 svstate.vl[0:7] = 4 # VL
425 svstate.maxvl[0:7] = 4 # MAXVL
426 print ("SVSTATE", bin(svstate.spr.asint()))
427
428 with Program(lst, bigendian=False) as program:
429 sim = self.run_tst_program(program, svstate=svstate,
430 initial_fprs=fprs)
431 # confirm that the results are as expected
432 for i, (t, u) in enumerate(res):
433 self.assertEqual(sim.fpr(i+2), t)
434 self.assertEqual(sim.fpr(i+6), u)
435
436 def test_sv_ffadds_fft(self):
437 """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
438 ]
439 four in-place vector adds, four in-place vector subs
440
441 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
442 FRS to perform the two multiplies. one add, one subtract.
443
444 sv.ffadds FRT, FRA, FRB actually does:
445 fadds FRT , FRB, FRA
446 fsubs FRT+vl, FRA, FRB+vl
447 """
448 lst = SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
449 ])
450 lst = list(lst)
451
452 fprs = [0] * 32
453 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
454 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
455 res = []
456 # work out the results with the twin add-sub
457 for i, (a, b) in enumerate(zip(av, bv)):
458 fprs[i+2] = fp64toselectable(a)
459 fprs[i+6] = fp64toselectable(b)
460 t = b + a
461 u = b - a
462 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
463 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
464 res.append((t, u))
465 print ("FFT", i, "in", a, b, "res", t, u)
466
467 # SVSTATE (in this case, VL=2)
468 svstate = SVP64State()
469 svstate.vl[0:7] = 4 # VL
470 svstate.maxvl[0:7] = 4 # MAXVL
471 print ("SVSTATE", bin(svstate.spr.asint()))
472
473 with Program(lst, bigendian=False) as program:
474 sim = self.run_tst_program(program, svstate=svstate,
475 initial_fprs=fprs)
476 # confirm that the results are as expected
477 for i, (t, u) in enumerate(res):
478 a = float(sim.fpr(i+2))
479 b = float(sim.fpr(i+6))
480 t = float(t)
481 u = float(u)
482 print ("FFT", i, "in", a, b, "res", t, u)
483 for i, (t, u) in enumerate(res):
484 self.assertEqual(sim.fpr(i+2), t)
485 self.assertEqual(sim.fpr(i+6), u)
486
487 def test_sv_remap_fpmadds_fft_svstep_complex(self):
488 """
489 runs a full in-place O(N log2 N) butterfly schedule for
490 Discrete Fourier Transform. this version however uses
491 SVP64 "Vertical-First" Mode and so needs an explicit
492 branch, testing CR0.
493
494 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
495 (3 inputs, 2 outputs)
496
497 complex calculation (FFT):
498
499 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
500 vec_r[jh] = vec_r[jl] - tpre
501 vec_r[jl] += tpre
502
503 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
504 vec_i[jh] = vec_i[jl] - tpim
505 vec_i[jl] += tpim
506
507 real-only calculation (DFT):
508
509 temp1 = vec[jh] * exptable[k]
510 temp2 = vec[jl]
511 vec[jh] = temp2 - temp1
512 vec[jl] = temp2 + temp1
513 """
514 lst = SVP64Asm( [
515 # set triple butterfly mode
516 "svshape 8, 1, 1, 1, 1",
517 # tpre
518 "svremap 5, 1, 0, 2, 0, 0",
519 "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
520 "svremap 5, 1, 0, 2, 0, 0",
521 "sv.fmuls 25, 8.v, 20.v", # mul2_r = i*sin_i
522 "fadds 24, 24, 25", # tpre = mul1_r + mul2_r
523 # tpim
524 "svremap 5, 1, 0, 2, 0, 0",
525 "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i
526 "svremap 5, 1, 0, 2, 0, 0",
527 "sv.fmuls 27, 8.v, 16.v", # mul2_i = i*cos_r
528 "fsubs 26, 27, 26", # tpim = mul2_i - mul1_i
529 # vec_r jh/jl
530 "svremap 26, 0, 0, 0, 0, 1",
531 "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre
532 # vec_i jh/jl
533 "svremap 26, 0, 0, 0, 0, 1",
534 "sv.ffadds 8.v, 26, 8.v", # vh/vl +- tpim
535
536 # svstep loop
537 "setvl. 0, 0, 0, 1, 0, 0",
538 "bc 4, 2, -84"
539 ])
540 lst = list(lst)
541
542 # array and coefficients to test
543 ar = [7.0, -9.8, 3.0, -32.3,
544 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
545 ai = [1.0, -1.8, 3.0, 19.3,
546 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
547 coer = [-0.25, 0.5, 3.1, 6.2] # coefficients real
548 coei = [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
549
550 # store in regfile
551 fprs = [0] * 64
552 for i, a in enumerate(ar):
553 fprs[i+0] = fp64toselectable(a)
554 for i, a in enumerate(ai):
555 fprs[i+8] = fp64toselectable(a)
556 for i, cr in enumerate(coer):
557 fprs[i+16] = fp64toselectable(cr)
558 for i, ci in enumerate(coei):
559 fprs[i+20] = fp64toselectable(ci)
560
561 # set total. err don't know how to calculate how many there are...
562 # do it manually for now
563 VL = 0
564 size = 2
565 n = len(ar)
566 while size <= n:
567 halfsize = size // 2
568 tablestep = n // size
569 for i in range(0, n, size):
570 for j in range(i, i + halfsize):
571 VL += 1
572 size *= 2
573
574 # SVSTATE (calculated VL)
575 svstate = SVP64State()
576 svstate.vl[0:7] = VL # VL
577 svstate.maxvl[0:7] = VL # MAXVL
578 print ("SVSTATE", bin(svstate.spr.asint()))
579
580 with Program(lst, bigendian=False) as program:
581 sim = self.run_tst_program(program, svstate=svstate,
582 initial_fprs=fprs)
583 print ("spr svshape0", sim.spr['SVSHAPE0'])
584 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
585 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
586 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
587 print ("spr svshape1", sim.spr['SVSHAPE1'])
588 print ("spr svshape2", sim.spr['SVSHAPE2'])
589 print ("spr svshape3", sim.spr['SVSHAPE3'])
590
591 # work out the results with the twin mul/add-sub, explicit
592 # complex numbers
593 res_r, res_i = transform_radix2_complex(ar, ai, coer, coei)
594
595 for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
596 print ("i", i, float(sim.fpr(i)), float(sim.fpr(i+8)),
597 "expected_r", expected_r,
598 "expected_i", expected_i)
599 for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
600 # convert to Power single
601 expected_r = DOUBLE2SINGLE(fp64toselectable(expected_r ))
602 expected_r = float(expected_r)
603 actual_r = float(sim.fpr(i))
604 # approximate error calculation, good enough test
605 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
606 # and the rounding is different
607 err = abs(actual_r - expected_r ) / expected_r
608 self.assertTrue(err < 1e-6)
609 # convert to Power single
610 expected_i = DOUBLE2SINGLE(fp64toselectable(expected_i ))
611 expected_i = float(expected_i)
612 actual_i = float(sim.fpr(i+8))
613 # approximate error calculation, good enough test
614 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
615 # and the rounding is different
616 err = abs(actual_i - expected_i ) / expected_i
617 self.assertTrue(err < 1e-6)
618
619 def test_sv_ffadds_fft_scalar(self):
620 """>>> lst = ["sv.ffadds 2.v, 12, 13"
621 ]
622 four in-place vector adds and subs, but done with a scalar
623 pair (fp12, fp13)
624 """
625 lst = SVP64Asm(["sv.ffadds 2.v, 12, 13"
626 ])
627 lst = list(lst)
628
629 fprs = [0] * 32
630 scalar_a = 1.3
631 scalar_b = -2.0
632 fprs[12] = fp64toselectable(scalar_a)
633 fprs[13] = fp64toselectable(scalar_b)
634 res = []
635 # work out the results with the twin add-sub
636 for i in range(4):
637 t = scalar_b + scalar_a
638 u = scalar_b - scalar_a
639 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
640 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
641 res.append((t, u))
642 print ("FFT", i, "res", t, u)
643
644 # SVSTATE (in this case, VL=2)
645 svstate = SVP64State()
646 svstate.vl[0:7] = 4 # VL
647 svstate.maxvl[0:7] = 4 # MAXVL
648 print ("SVSTATE", bin(svstate.spr.asint()))
649
650 with Program(lst, bigendian=False) as program:
651 sim = self.run_tst_program(program, svstate=svstate,
652 initial_fprs=fprs)
653 # confirm that the results are as expected
654 for i, (t, u) in enumerate(res):
655 a = float(sim.fpr(i+2))
656 b = float(sim.fpr(i+6))
657 t = float(t)
658 u = float(u)
659 print ("FFT", i, "in", a, b, "res", t, u)
660 for i, (t, u) in enumerate(res):
661 self.assertEqual(sim.fpr(i+2), t)
662 self.assertEqual(sim.fpr(i+6), u)
663
664 def run_tst_program(self, prog, initial_regs=None,
665 svstate=None,
666 initial_mem=None,
667 initial_fprs=None):
668 if initial_regs is None:
669 initial_regs = [0] * 32
670 simulator = run_tst(prog, initial_regs, mem=initial_mem,
671 initial_fprs=initial_fprs,
672 svstate=svstate)
673
674 print ("GPRs")
675 simulator.gpr.dump()
676 print ("FPRs")
677 simulator.fpr.dump()
678
679 return simulator
680
681
682 if __name__ == "__main__":
683 unittest.main()