format code removing unused imports
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
1 import unittest
2
3 from nmutil.formaltest import FHDLTestCase
4 from openpower.decoder.helpers import SINGLE, fp64toselectable
5 from openpower.decoder.isa.caller import SVP64State
6 from openpower.decoder.isa.test_caller import run_tst
7 from openpower.decoder.isafunctions.double2single import ISACallerFnHelper
8 from openpower.decoder.selectable_int import SelectableInt
9 from openpower.simulator.program import Program
10 from openpower.sv.trans.svp64 import SVP64Asm
11
12 # really bad hack. need to access the DOUBLE2SINGLE function auto-generated
13 # from pseudo-code.
14 fph = ISACallerFnHelper(XLEN=64)
15
16
17 def transform_radix2(vec, exptable, reverse=False):
18 """
19 # FFT and convolution test (Python), based on Project Nayuki
20 #
21 # Copyright (c) 2020 Project Nayuki. (MIT License)
22 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
23
24 """
25 # bits of the integer 'val'.
26 def reverse_bits(val, width):
27 result = 0
28 for _ in range(width):
29 result = (result << 1) | (val & 1)
30 val >>= 1
31 return result
32
33 # Initialization
34 n = len(vec)
35 levels = n.bit_length() - 1
36
37 # Copy with bit-reversed permutation
38 if reverse:
39 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
40
41 size = 2
42 while size <= n:
43 halfsize = size // 2
44 tablestep = n // size
45 for i in range(0, n, size):
46 k = 0
47 for j in range(i, i + halfsize):
48 # exact same actual computation, just embedded in
49 # triple-nested for-loops
50 jl, jh = j, j+halfsize
51 vjh = vec[jh]
52 temp1 = vec[jh] * exptable[k]
53 temp2 = vec[jl]
54 vec[jh] = temp2 - temp1
55 vec[jl] = temp2 + temp1
56 print("xform jl jh k", jl, jh, k,
57 "vj vjh ek", temp2, vjh, exptable[k],
58 "t1, t2", temp1, temp2,
59 "v[jh] v[jl]", vec[jh], vec[jl])
60 k += tablestep
61 size *= 2
62
63 return vec
64
65
66 def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i, reverse=False):
67 """
68 # FFT and convolution test (Python), based on Project Nayuki
69 #
70 # Copyright (c) 2020 Project Nayuki. (MIT License)
71 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
72
73 """
74 # bits of the integer 'val'.
75 def reverse_bits(val, width):
76 result = 0
77 for _ in range(width):
78 result = (result << 1) | (val & 1)
79 val >>= 1
80 return result
81
82 # Initialization
83 n = len(vec_r)
84 levels = n.bit_length() - 1
85
86 # Copy with bit-reversed permutation
87 if reverse:
88 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
89
90 size = 2
91 while size <= n:
92 halfsize = size // 2
93 tablestep = n // size
94 for i in range(0, n, size):
95 k = 0
96 for j in range(i, i + halfsize):
97 # exact same actual computation, just embedded in
98 # triple-nested for-loops
99 jl, jh = j, j+halfsize
100
101 print("xform jl jh k", jl, jh, k,
102 "vr h l", vec_r[jh], vec_r[jl],
103 "vi h l", vec_i[jh], vec_i[jl])
104 print(" cr k", cos_r[k], "si k", sin_i[k])
105 mul1_r = vec_r[jh] * cos_r[k]
106 mul2_r = vec_i[jh] * sin_i[k]
107 tpre = mul1_r + mul2_r
108 print(" vec_r[jh] * cos_r[k]", mul1_r)
109 print(" vec_i[jh] * sin_i[k]", mul2_r)
110 print(" tpre", tpre)
111 mul1_i = vec_r[jh] * sin_i[k]
112 mul2_i = vec_i[jh] * cos_r[k]
113 tpim = -mul1_i + mul2_i
114 print(" vec_r[jh] * sin_i[k]", mul1_i)
115 print(" vec_i[jh] * cos_r[k]", mul2_i)
116 print(" tpim", tpim)
117 vec_r[jh] = vec_r[jl] - tpre
118 vec_i[jh] = vec_i[jl] - tpim
119 vec_r[jl] += tpre
120 vec_i[jl] += tpim
121
122 print(" xform jl jh k", jl, jh, k,
123 "\n vr h l", vec_r[jh], vec_r[jl],
124 "\n vi h l", vec_i[jh], vec_i[jl])
125 k += tablestep
126 size *= 2
127
128 return vec_r, vec_i
129
130
131 class FFTTestCase(FHDLTestCase):
132
133 def _check_regs(self, sim, expected):
134 for i in range(32):
135 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
136
137 def test_sv_remap_fpmadds_fft_4(self):
138 """>>> lst = ["svshape 2, 1, 1, 1, 0",
139 "svremap 31, 1, 0, 2, 0, 1, 0",
140 "sv.ffmadds. *2, *2, *2, *10"
141 ]
142 this is a cheap (cheating) way to run a single "ffmadds." to
143 get at least Rc=1 on sv.ffmadds to be activated. the results
144 are not actually tested because there's no checking yet on
145 FP Rc=1
146 """
147 lst = SVP64Asm(["svshape 2, 1, 1, 1, 0",
148 "svremap 31, 1, 0, 2, 0, 1, 0",
149 "sv.ffmadds *0, *0, *0, *8"
150 ])
151 lst = list(lst)
152
153 # array and coefficients to test
154 av = [7.0, -9.8] # array 0..1
155 coe = [3.1] # coefficients
156
157 # store in regfile
158 fprs = [0] * 32
159 for i, c in enumerate(coe):
160 fprs[i+8] = fp64toselectable(c)
161 for i, a in enumerate(av):
162 fprs[i+0] = fp64toselectable(a)
163
164 with Program(lst, bigendian=False) as program:
165 sim = self.run_tst_program(program, initial_fprs=fprs)
166 print("spr svshape0", sim.spr['SVSHAPE0'])
167 print(" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
168 print(" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
169 print(" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
170 print("spr svshape1", sim.spr['SVSHAPE1'])
171 print("spr svshape2", sim.spr['SVSHAPE2'])
172 print("spr svshape3", sim.spr['SVSHAPE3'])
173
174 # work out the results with the twin mul/add-sub
175 res = transform_radix2(av, coe)
176
177 for i, expected in enumerate(res):
178 print("i", i, float(sim.fpr(i)), "expected", expected)
179 for i, expected in enumerate(res):
180 # convert to Power single
181 expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
182 expected = float(expected)
183 actual = float(sim.fpr(i))
184 # approximate error calculation, good enough test
185 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
186 # and the rounding is different
187 err = abs(actual - expected) / expected
188 self.assertTrue(err < 1e-7)
189
190 def test_sv_remap_fpmadds_fft(self):
191 """>>> lst = ["svshape 8, 1, 1, 1, 0",
192 "svremap 31, 1, 0, 2, 0, 1, 0",
193 "sv.ffmadds *2, *2, *2, *10"
194 ]
195 runs a full in-place O(N log2 N) butterfly schedule for
196 Discrete Fourier Transform.
197
198 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
199 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
200
201 there is the *option* to target a different location (non-in-place)
202 just in case.
203
204 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
205 (3 inputs, 2 outputs)
206 """
207 lst = SVP64Asm(["svshape 8, 1, 1, 1, 0",
208 "svremap 31, 1, 0, 2, 0, 1, 0",
209 "sv.ffmadds *0, *0, *0, *8"
210 ])
211 lst = list(lst)
212
213 # array and coefficients to test
214 av = [7.0, -9.8, 3.0, -32.3,
215 -2.0, 5.0, -9.8, 31.3] # array 0..7
216 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
217
218 # store in regfile
219 fprs = [0] * 32
220 for i, c in enumerate(coe):
221 fprs[i+8] = fp64toselectable(c)
222 for i, a in enumerate(av):
223 fprs[i+0] = fp64toselectable(a)
224
225 with Program(lst, bigendian=False) as program:
226 sim = self.run_tst_program(program, initial_fprs=fprs)
227 print("spr svshape0", sim.spr['SVSHAPE0'])
228 print(" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
229 print(" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
230 print(" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
231 print("spr svshape1", sim.spr['SVSHAPE1'])
232 print("spr svshape2", sim.spr['SVSHAPE2'])
233 print("spr svshape3", sim.spr['SVSHAPE3'])
234
235 # work out the results with the twin mul/add-sub
236 res = transform_radix2(av, coe)
237
238 for i, expected in enumerate(res):
239 print("i", i, float(sim.fpr(i)), "expected", expected)
240 for i, expected in enumerate(res):
241 # convert to Power single
242 expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
243 expected = float(expected)
244 actual = float(sim.fpr(i))
245 # approximate error calculation, good enough test
246 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
247 # and the rounding is different
248 err = abs(actual - expected) / expected
249 self.assertTrue(err < 1e-7)
250
251 def test_sv_remap_fpmadds_fft_svstep(self):
252 """>>> lst = SVP64Asm( [
253 "svshape 8, 1, 1, 1, 1",
254 "svremap 31, 1, 0, 2, 0, 1, 0",
255 "sv.ffmadds *0, *0, *0, *8",
256 "svstep. 12, 1, 0",
257 "bc 6, 3, -16"
258 ])
259 runs a full in-place O(N log2 N) butterfly schedule for
260 Discrete Fourier Transform. this version however uses
261 SVP64 "Vertical-First" Mode and so needs an explicit
262 branch, testing CR0.
263
264 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
265 (3 inputs, 2 outputs)
266 """
267 lst = SVP64Asm([
268 "svshape 8, 1, 1, 1, 1",
269 "svremap 31, 1, 0, 2, 0, 1, 0",
270 "sv.ffmadds *0, *0, *0, *8",
271 "svstep. 27, 1, 0",
272 "bc 6, 3, -16"
273 ])
274 lst = list(lst)
275
276 # array and coefficients to test
277 av = [7.0, -9.8, 3.0, -32.3,
278 -2.0, 5.0, -9.8, 31.3] # array 0..7
279 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
280
281 # store in regfile
282 fprs = [0] * 32
283 for i, c in enumerate(coe):
284 fprs[i+8] = fp64toselectable(c)
285 for i, a in enumerate(av):
286 fprs[i+0] = fp64toselectable(a)
287
288 # set total. err don't know how to calculate how many there are...
289 # do it manually for now
290 VL = 0
291 size = 2
292 n = len(av)
293 while size <= n:
294 halfsize = size // 2
295 tablestep = n // size
296 for i in range(0, n, size):
297 for j in range(i, i + halfsize):
298 VL += 1
299 size *= 2
300
301 # SVSTATE (calculated VL)
302 svstate = SVP64State()
303 svstate.vl = VL # VL
304 svstate.maxvl = VL # MAXVL
305 print("SVSTATE", bin(svstate.asint()))
306
307 with Program(lst, bigendian=False) as program:
308 sim = self.run_tst_program(program, svstate=svstate,
309 initial_fprs=fprs)
310 print("spr svshape0", sim.spr['SVSHAPE0'])
311 print(" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
312 print(" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
313 print(" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
314 print("spr svshape1", sim.spr['SVSHAPE1'])
315 print("spr svshape2", sim.spr['SVSHAPE2'])
316 print("spr svshape3", sim.spr['SVSHAPE3'])
317
318 # work out the results with the twin mul/add-sub
319 res = transform_radix2(av, coe)
320
321 for i, expected in enumerate(res):
322 print("i", i, float(sim.fpr(i)), "expected", expected)
323 for i, expected in enumerate(res):
324 # convert to Power single
325 expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
326 expected = float(expected)
327 actual = float(sim.fpr(i))
328 # approximate error calculation, good enough test
329 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
330 # and the rounding is different
331 err = abs(actual - expected) / expected
332 self.assertTrue(err < 1e-7)
333
334 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self):
335 """>>> lst = SVP64Asm( [
336 "svshape 8, 1, 1, 1, 1",
337 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
338 "svremap 5, 1, 0, 2, 0, 0, 1",
339 "sv.fmuls 24, *0, *8",
340 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
341 "svremap 26, 0, 0, 0, 0, 1, 1",
342 "sv.ffadds *0, 24, *0",
343 "svstep. 27, 1, 0",
344 "bc 6, 3, -28"
345 ])
346
347 runs a full in-place O(N log2 N) butterfly schedule for
348 Discrete Fourier Transform. also uses "Vertical First"
349 but also uses temporary scalars and ffadds rather than
350 sv.ffmadds.
351
352 this represents an incremental step towards complex FFT
353
354 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
355
356 * single fmuls FRT, FRA, FRC
357 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
358 (FRS is implicit / hidden in ff* operations)
359
360 multiply: # sv.fmuls FRT, FRA, FRC
361 temp1 = vec[jh] * exptable[k]
362 temp2 = vec[jl]
363 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
364 vec[jh] = temp2 - temp1
365 vec[jl] = temp2 + temp1
366
367 also see notes in complex fft test: here svremap is done in
368 "non-persistent" mode (as a demo) whereas in the complex fft
369 svremap is used in "persistent" mode, where by a complete
370 coincidence the REMAP arguments all happen to line up and
371 only one persistent svremap is needed. the exact same trick
372 *could* be applied here but for illustrative purposes it is not.
373 """
374 lst = SVP64Asm([
375 "svshape 8, 1, 1, 1, 1",
376 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
377 "svremap 5, 1, 0, 2, 0, 0, 0",
378 "sv.fmuls 24, *0, *8",
379 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
380 "svremap 26, 0, 0, 0, 0, 1, 0",
381 "sv.ffadds *0, 24, *0",
382 "svstep. 27, 1, 0",
383 "bc 6, 3, -28"
384 ])
385 lst = list(lst)
386
387 # array and coefficients to test
388 av = [7.0, -9.8, 3.0, -32.3,
389 -2.0, 5.0, -9.8, 31.3] # array 0..7
390 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
391
392 # store in regfile
393 fprs = [0] * 32
394 for i, c in enumerate(coe):
395 fprs[i+8] = fp64toselectable(c)
396 for i, a in enumerate(av):
397 fprs[i+0] = fp64toselectable(a)
398
399 # set total. err don't know how to calculate how many there are...
400 # do it manually for now
401 VL = 0
402 size = 2
403 n = len(av)
404 while size <= n:
405 halfsize = size // 2
406 tablestep = n // size
407 for i in range(0, n, size):
408 for j in range(i, i + halfsize):
409 VL += 1
410 size *= 2
411
412 # SVSTATE (calculated VL)
413 svstate = SVP64State()
414 svstate.vl = VL # VL
415 svstate.maxvl = VL # MAXVL
416 print("SVSTATE", bin(svstate.asint()))
417
418 with Program(lst, bigendian=False) as program:
419 sim = self.run_tst_program(program, svstate=svstate,
420 initial_fprs=fprs)
421 print("spr svshape0", sim.spr['SVSHAPE0'])
422 print(" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
423 print(" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
424 print(" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
425 print("spr svshape1", sim.spr['SVSHAPE1'])
426 print("spr svshape2", sim.spr['SVSHAPE2'])
427 print("spr svshape3", sim.spr['SVSHAPE3'])
428
429 # work out the results with the twin mul/add-sub
430 res = transform_radix2(av, coe)
431
432 for i, expected in enumerate(res):
433 print("i", i, float(sim.fpr(i)), "expected", expected)
434 for i, expected in enumerate(res):
435 # convert to Power single
436 expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
437 expected = float(expected)
438 actual = float(sim.fpr(i))
439 # approximate error calculation, good enough test
440 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
441 # and the rounding is different
442 err = abs(actual - expected) / expected
443 self.assertTrue(err < 1e-7)
444
445 def test_sv_fpmadds_fft(self):
446 """>>> lst = ["sv.ffmadds *2, *2, *2, *10"
447 ]
448 four in-place vector mul-adds, four in-place vector mul-subs
449
450 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
451 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
452
453 there is the *option* to target a different location (non-in-place)
454 just in case.
455
456 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
457 FRS to perform the two multiplies. one add, one subtract.
458
459 sv.ffmadds FRT, FRA, FRC, FRB actually does:
460 fmadds FRT , FRA, FRC, FRA
461 fnmsubs FRT+vl, FRA, FRC, FRB+vl
462
463 """
464 lst = SVP64Asm(["sv.ffmadds *2, *2, *2, *10"
465 ])
466 lst = list(lst)
467
468 fprs = [0] * 32
469 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
470 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
471 coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
472 res = []
473 # work out the results with the twin mul/add-sub
474 for i, (a, b, c) in enumerate(zip(av, bv, coe)):
475 fprs[i+2] = fp64toselectable(a)
476 fprs[i+6] = fp64toselectable(b)
477 fprs[i+10] = fp64toselectable(c)
478 mul = a * c
479 t = b + mul
480 u = b - mul
481 # convert to Power single
482 t = fph.DOUBLE2SINGLE(fp64toselectable(t))
483 u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
484 res.append((t, u))
485 print("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
486
487 # SVSTATE (in this case, VL=2)
488 svstate = SVP64State()
489 svstate.vl = 4 # VL
490 svstate.maxvl = 4 # MAXVL
491 print("SVSTATE", bin(svstate.asint()))
492
493 with Program(lst, bigendian=False) as program:
494 sim = self.run_tst_program(program, svstate=svstate,
495 initial_fprs=fprs)
496 # confirm that the results are as expected
497 for i, (t, u) in enumerate(res):
498 self.assertEqual(sim.fpr(i+2), t)
499 self.assertEqual(sim.fpr(i+6), u)
500
501 def test_sv_ffadds_fft(self):
502 """>>> lst = ["sv.ffadds *2, *2, *2"
503 ]
504 four in-place vector adds, four in-place vector subs
505
506 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
507 FRS to perform the two multiplies. one add, one subtract.
508
509 sv.ffadds FRT, FRA, FRB actually does:
510 fadds FRT , FRB, FRA
511 fsubs FRT+vl, FRA, FRB+vl
512 """
513 lst = SVP64Asm(["sv.ffadds *2, *2, *2"
514 ])
515 lst = list(lst)
516
517 fprs = [0] * 32
518 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
519 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
520 res = []
521 # work out the results with the twin add-sub
522 for i, (a, b) in enumerate(zip(av, bv)):
523 fprs[i+2] = fp64toselectable(a)
524 fprs[i+6] = fp64toselectable(b)
525 t = b + a
526 u = b - a
527 # convert to Power single
528 t = fph.DOUBLE2SINGLE(fp64toselectable(t))
529 u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
530 res.append((t, u))
531 print("FFT", i, "in", a, b, "res", t, u)
532
533 # SVSTATE (in this case, VL=2)
534 svstate = SVP64State()
535 svstate.vl = 4 # VL
536 svstate.maxvl = 4 # MAXVL
537 print("SVSTATE", bin(svstate.asint()))
538
539 with Program(lst, bigendian=False) as program:
540 sim = self.run_tst_program(program, svstate=svstate,
541 initial_fprs=fprs)
542 # confirm that the results are as expected
543 for i, (t, u) in enumerate(res):
544 a = float(sim.fpr(i+2))
545 b = float(sim.fpr(i+6))
546 t = float(t)
547 u = float(u)
548 print("FFT", i, "in", a, b, "res", t, u)
549 for i, (t, u) in enumerate(res):
550 self.assertEqual(sim.fpr(i+2), t)
551 self.assertEqual(sim.fpr(i+6), u)
552
553 def test_sv_remap_fpmadds_fft_svstep_complex(self):
554 """
555 runs a full in-place O(N log2 N) butterfly schedule for
556 Discrete Fourier Transform. this version however uses
557 SVP64 "Vertical-First" Mode and so needs an explicit
558 branch, testing CR0.
559
560 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
561 (3 inputs, 2 outputs)
562
563 complex calculation (FFT):
564
565 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
566 vec_r[jh] = vec_r[jl] - tpre
567 vec_r[jl] += tpre
568
569 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
570 vec_i[jh] = vec_i[jl] - tpim
571 vec_i[jl] += tpim
572
573 real-only calculation (DFT):
574
575 temp1 = vec[jh] * exptable[k]
576 temp2 = vec[jl]
577 vec[jh] = temp2 - temp1
578 vec[jl] = temp2 + temp1
579
580 note: a rather nice convenience / coincidence. the meaning of
581 these two instructions is:
582 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
583 "svremap 5, 1, 0, 2, 0, 0, 1",
584 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
585 "svremap 26, 0, 0, 0, 0, 1, 1",
586
587 however it turns out that they can be *merged*, and for
588 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
589 *ignore* their REMAPs (by definition, because you can't REMAP
590 scalar operands), and for the second one (sv.ffads) exactly the
591 right REMAPs are also ignored!
592
593 therefore we can merge:
594 "svremap 5, 1, 0, 2, 0, 0, 1",
595 "svremap 26, 0, 0, 0, 0, 1, 1",
596 into:
597 "svremap 31, 1, 0, 2, 0, 1, 1",
598 and save one instruction.
599 """
600 lst = SVP64Asm([
601 # set triple butterfly mode with persistent "REMAP"
602 "svshape 8, 1, 1, 1, 1",
603 "svremap 31, 1, 0, 2, 0, 1, 1",
604 # tpre
605 "sv.fmuls 24, *0, *16", # mul1_r = r*cos_r
606 "sv.fmadds 24, *8, *20, 24", # mul2_r = i*sin_i
607 # tpre = mul1_r + mul2_r
608 # tpim
609 "sv.fmuls 26, *0, *20", # mul1_i = r*sin_i
610 "sv.fmsubs 26, *8, *16, 26", # mul2_i = i*cos_r
611 # tpim = mul2_i - mul1_i
612 # vec_r jh/jl
613 "sv.ffadds *0, 24, *0", # vh/vl +/- tpre
614 # vec_i jh/jl
615 "sv.ffadds *8, 26, *8", # vh/vl +- tpim
616
617 # svstep loop
618 "svstep. 27, 1, 0",
619 "bc 6, 3, -56"
620 ])
621 lst = list(lst)
622
623 # array and coefficients to test
624 ar = [7.0, -9.8, 3.0, -32.3,
625 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
626 ai = [1.0, -1.8, 3.0, 19.3,
627 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
628 coer = [-0.25, 0.5, 3.1, 6.2] # coefficients real
629 coei = [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
630
631 # store in regfile
632 fprs = [0] * 64
633 for i, a in enumerate(ar):
634 fprs[i+0] = fp64toselectable(a)
635 for i, a in enumerate(ai):
636 fprs[i+8] = fp64toselectable(a)
637 for i, cr in enumerate(coer):
638 fprs[i+16] = fp64toselectable(cr)
639 for i, ci in enumerate(coei):
640 fprs[i+20] = fp64toselectable(ci)
641
642 # set total. err don't know how to calculate how many there are...
643 # do it manually for now
644 VL = 0
645 size = 2
646 n = len(ar)
647 while size <= n:
648 halfsize = size // 2
649 tablestep = n // size
650 for i in range(0, n, size):
651 for j in range(i, i + halfsize):
652 VL += 1
653 size *= 2
654
655 # SVSTATE (calculated VL)
656 svstate = SVP64State()
657 svstate.vl = VL # VL
658 svstate.maxvl = VL # MAXVL
659 print("SVSTATE", bin(svstate.asint()))
660
661 with Program(lst, bigendian=False) as program:
662 sim = self.run_tst_program(program, svstate=svstate,
663 initial_fprs=fprs)
664 print("spr svshape0", sim.spr['SVSHAPE0'])
665 print(" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
666 print(" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
667 print(" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
668 print("spr svshape1", sim.spr['SVSHAPE1'])
669 print("spr svshape2", sim.spr['SVSHAPE2'])
670 print("spr svshape3", sim.spr['SVSHAPE3'])
671
672 # work out the results with the twin mul/add-sub, explicit
673 # complex numbers
674 res_r, res_i = transform_radix2_complex(ar, ai, coer, coei)
675
676 for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
677 print("i", i, float(sim.fpr(i)), float(sim.fpr(i+8)),
678 "expected_r", expected_r,
679 "expected_i", expected_i)
680 for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
681 # convert to Power single
682 expected_r = fph.DOUBLE2SINGLE(fp64toselectable(expected_r))
683 expected_r = float(expected_r)
684 actual_r = float(sim.fpr(i))
685 # approximate error calculation, good enough test
686 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
687 # and the rounding is different
688 err = abs(actual_r - expected_r) / expected_r
689 self.assertTrue(err < 1e-6)
690 # convert to Power single
691 expected_i = fph.DOUBLE2SINGLE(fp64toselectable(expected_i))
692 expected_i = float(expected_i)
693 actual_i = float(sim.fpr(i+8))
694 # approximate error calculation, good enough test
695 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
696 # and the rounding is different
697 err = abs(actual_i - expected_i) / expected_i
698 self.assertTrue(err < 1e-6)
699
700 def test_sv_ffadds_fft_scalar(self):
701 """>>> lst = ["sv.ffadds *2, 12, 13"
702 ]
703 four in-place vector adds and subs, but done with a scalar
704 pair (fp12, fp13)
705 """
706 lst = SVP64Asm(["sv.ffadds *2, 12, 13"
707 ])
708 lst = list(lst)
709
710 fprs = [0] * 32
711 scalar_a = 1.3
712 scalar_b = -2.0
713 fprs[12] = fp64toselectable(scalar_a)
714 fprs[13] = fp64toselectable(scalar_b)
715 res = []
716 # work out the results with the twin add-sub
717 for i in range(4):
718 t = scalar_b + scalar_a
719 u = scalar_b - scalar_a
720 # convert to Power single
721 t = fph.DOUBLE2SINGLE(fp64toselectable(t))
722 u = fph.DOUBLE2SINGLE(fp64toselectable(u)) # from double
723 res.append((t, u))
724 print("FFT", i, "res", t, u)
725
726 # SVSTATE (in this case, VL=2)
727 svstate = SVP64State()
728 svstate.vl = 4 # VL
729 svstate.maxvl = 4 # MAXVL
730 print("SVSTATE", bin(svstate.asint()))
731
732 with Program(lst, bigendian=False) as program:
733 sim = self.run_tst_program(program, svstate=svstate,
734 initial_fprs=fprs)
735 # confirm that the results are as expected
736 for i, (t, u) in enumerate(res):
737 a = float(sim.fpr(i+2))
738 b = float(sim.fpr(i+6))
739 t = float(t)
740 u = float(u)
741 print("FFT", i, "in", a, b, "res", t, u)
742 for i, (t, u) in enumerate(res):
743 self.assertEqual(sim.fpr(i+2), t)
744 self.assertEqual(sim.fpr(i+6), u)
745
746 def test_sv_remap_fpmadds_fft_ldst(self):
747 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
748 "sv.lfs/els *0, 4(0)",
749 "svshape 8, 1, 1, 1, 0",
750 "svremap 31, 1, 0, 2, 0, 1, 0",
751 "sv.ffmadds *0, *0, *0, *8"
752
753 runs a full in-place O(N log2 N) butterfly schedule for
754 Discrete Fourier Transform, using bit-reversed LD/ST
755 """
756 lst = SVP64Asm(["svshape 8, 1, 1, 15, 0",
757 "svremap 1, 0, 0, 0, 0, 0, 0",
758 "sv.lfs/els *0, 4(0)",
759 "svshape 8, 1, 1, 1, 0",
760 "svremap 31, 1, 0, 2, 0, 1, 0",
761 "sv.ffmadds *0, *0, *0, *8"
762 ])
763 lst = list(lst)
764
765 # array and coefficients to test
766 av = [7.0, -9.8, 3.0, -32.3,
767 -2.0, 5.0, -9.8, 31.3] # array 0..7
768 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
769
770 # store in regfile
771 fprs = [0] * 32
772 for i, c in enumerate(coe):
773 fprs[i+8] = fp64toselectable(c)
774 # store in memory
775 mem = {}
776 val = 0
777 for i, a in enumerate(av):
778 a = SINGLE(fp64toselectable(a)).value
779 shift = (i % 2) == 1
780 if shift == 0:
781 val = a
782 else:
783 mem[(i//2)*8] = val | (a << 32)
784
785 with Program(lst, bigendian=False) as program:
786 sim = self.run_tst_program(program, initial_mem=mem,
787 initial_fprs=fprs)
788 print("spr svshape0", sim.spr['SVSHAPE0'])
789 print(" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
790 print(" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
791 print(" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
792 print("spr svshape1", sim.spr['SVSHAPE1'])
793 print("spr svshape2", sim.spr['SVSHAPE2'])
794 print("spr svshape3", sim.spr['SVSHAPE3'])
795
796 print("mem dump")
797 print(sim.mem.dump())
798
799 # work out the results with the twin mul/add-sub,
800 # note bit-reverse mode requested
801 res = transform_radix2(av, coe, reverse=True)
802
803 for i, expected in enumerate(res):
804 print("i", i, float(sim.fpr(i)), "expected", expected)
805 for i, expected in enumerate(res):
806 # convert to Power single
807 expected = fph.DOUBLE2SINGLE(fp64toselectable(expected))
808 expected = float(expected)
809 actual = float(sim.fpr(i))
810 # approximate error calculation, good enough test
811 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
812 # and the rounding is different
813 err = abs(actual - expected) / expected
814 self.assertTrue(err < 1e-6)
815
816 def run_tst_program(self, prog, initial_regs=None,
817 svstate=None,
818 initial_mem=None,
819 initial_fprs=None):
820 if initial_regs is None:
821 initial_regs = [0] * 32
822 simulator = run_tst(prog, initial_regs, mem=initial_mem,
823 initial_fprs=initial_fprs,
824 svstate=svstate)
825
826 print("GPRs")
827 simulator.gpr.dump()
828 print("FPRs")
829 simulator.fpr.dump()
830
831 return simulator
832
833
834 if __name__ == "__main__":
835 unittest.main()