whitespace (keep below 80 chars)
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_fft.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 import unittest
5 from openpower.decoder.power_decoder import (create_pdecode)
6 from openpower.simulator.program import Program
7 from openpower.decoder.isa.caller import SVP64State
8 from openpower.decoder.selectable_int import SelectableInt
9 from openpower.decoder.isa.test_caller import run_tst
10 from openpower.sv.trans.svp64 import SVP64Asm
11 from copy import deepcopy
12 from openpower.decoder.helpers import fp64toselectable, SINGLE
13 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
14
15
16 def transform_radix2(vec, exptable, reverse=False):
17 """
18 # FFT and convolution test (Python), based on Project Nayuki
19 #
20 # Copyright (c) 2020 Project Nayuki. (MIT License)
21 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
22
23 """
24 # bits of the integer 'val'.
25 def reverse_bits(val, width):
26 result = 0
27 for _ in range(width):
28 result = (result << 1) | (val & 1)
29 val >>= 1
30 return result
31
32 # Initialization
33 n = len(vec)
34 levels = n.bit_length() - 1
35
36 # Copy with bit-reversed permutation
37 if reverse:
38 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
39
40 size = 2
41 while size <= n:
42 halfsize = size // 2
43 tablestep = n // size
44 for i in range(0, n, size):
45 k = 0
46 for j in range(i, i + halfsize):
47 # exact same actual computation, just embedded in
48 # triple-nested for-loops
49 jl, jh = j, j+halfsize
50 vjh = vec[jh]
51 temp1 = vec[jh] * exptable[k]
52 temp2 = vec[jl]
53 vec[jh] = temp2 - temp1
54 vec[jl] = temp2 + temp1
55 print ("xform jl jh k", jl, jh, k,
56 "vj vjh ek", temp2, vjh, exptable[k],
57 "t1, t2", temp1, temp2,
58 "v[jh] v[jl]", vec[jh], vec[jl])
59 k += tablestep
60 size *= 2
61
62 return vec
63
64
65 def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i, reverse=False):
66 """
67 # FFT and convolution test (Python), based on Project Nayuki
68 #
69 # Copyright (c) 2020 Project Nayuki. (MIT License)
70 # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
71
72 """
73 # bits of the integer 'val'.
74 def reverse_bits(val, width):
75 result = 0
76 for _ in range(width):
77 result = (result << 1) | (val & 1)
78 val >>= 1
79 return result
80
81 # Initialization
82 n = len(vec_r)
83 levels = n.bit_length() - 1
84
85 # Copy with bit-reversed permutation
86 if reverse:
87 vec = [vec[reverse_bits(i, levels)] for i in range(n)]
88
89 size = 2
90 while size <= n:
91 halfsize = size // 2
92 tablestep = n // size
93 for i in range(0, n, size):
94 k = 0
95 for j in range(i, i + halfsize):
96 # exact same actual computation, just embedded in
97 # triple-nested for-loops
98 jl, jh = j, j+halfsize
99
100 print ("xform jl jh k", jl, jh, k,
101 "vr h l", vec_r[jh], vec_r[jl],
102 "vi h l", vec_i[jh], vec_i[jl])
103 print (" cr k", cos_r[k], "si k", sin_i[k])
104 mul1_r = vec_r[jh] * cos_r[k]
105 mul2_r = vec_i[jh] * sin_i[k]
106 tpre = mul1_r + mul2_r
107 print (" vec_r[jh] * cos_r[k]", mul1_r)
108 print (" vec_i[jh] * sin_i[k]", mul2_r)
109 print (" tpre", tpre)
110 mul1_i = vec_r[jh] * sin_i[k]
111 mul2_i = vec_i[jh] * cos_r[k]
112 tpim = -mul1_i + mul2_i
113 print (" vec_r[jh] * sin_i[k]", mul1_i)
114 print (" vec_i[jh] * cos_r[k]", mul2_i)
115 print (" tpim", tpim)
116 vec_r[jh] = vec_r[jl] - tpre
117 vec_i[jh] = vec_i[jl] - tpim
118 vec_r[jl] += tpre
119 vec_i[jl] += tpim
120
121 print (" xform jl jh k", jl, jh, k,
122 "\n vr h l", vec_r[jh], vec_r[jl],
123 "\n vi h l", vec_i[jh], vec_i[jl])
124 k += tablestep
125 size *= 2
126
127 return vec_r, vec_i
128
129
130 class FFTTestCase(FHDLTestCase):
131
132 def _check_regs(self, sim, expected):
133 for i in range(32):
134 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
135
136 def test_sv_remap_fpmadds_fft(self):
137 """>>> lst = ["svshape 8, 1, 1, 1, 0",
138 "svremap 31, 1, 0, 2, 0, 1, 0",
139 "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
140 ]
141 runs a full in-place O(N log2 N) butterfly schedule for
142 Discrete Fourier Transform.
143
144 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
145 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
146
147 there is the *option* to target a different location (non-in-place)
148 just in case.
149
150 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
151 (3 inputs, 2 outputs)
152 """
153 lst = SVP64Asm( ["svshape 8, 1, 1, 1, 0",
154 "svremap 31, 1, 0, 2, 0, 1, 0",
155 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
156 ])
157 lst = list(lst)
158
159 # array and coefficients to test
160 av = [7.0, -9.8, 3.0, -32.3,
161 -2.0, 5.0, -9.8, 31.3] # array 0..7
162 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
163
164 # store in regfile
165 fprs = [0] * 32
166 for i, c in enumerate(coe):
167 fprs[i+8] = fp64toselectable(c)
168 for i, a in enumerate(av):
169 fprs[i+0] = fp64toselectable(a)
170
171 with Program(lst, bigendian=False) as program:
172 sim = self.run_tst_program(program, initial_fprs=fprs)
173 print ("spr svshape0", sim.spr['SVSHAPE0'])
174 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
175 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
176 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
177 print ("spr svshape1", sim.spr['SVSHAPE1'])
178 print ("spr svshape2", sim.spr['SVSHAPE2'])
179 print ("spr svshape3", sim.spr['SVSHAPE3'])
180
181 # work out the results with the twin mul/add-sub
182 res = transform_radix2(av, coe)
183
184 for i, expected in enumerate(res):
185 print ("i", i, float(sim.fpr(i)), "expected", expected)
186 for i, expected in enumerate(res):
187 # convert to Power single
188 expected = DOUBLE2SINGLE(fp64toselectable(expected))
189 expected = float(expected)
190 actual = float(sim.fpr(i))
191 # approximate error calculation, good enough test
192 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
193 # and the rounding is different
194 err = abs(actual - expected) / expected
195 self.assertTrue(err < 1e-7)
196
197 def test_sv_remap_fpmadds_fft_svstep(self):
198 """>>> lst = SVP64Asm( [
199 "svshape 8, 1, 1, 1, 1",
200 "svremap 31, 1, 0, 2, 0, 1, 0",
201 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
202 "setvl. 0, 0, 1, 1, 0, 0",
203 "bc 6, 3, -16"
204 ])
205 runs a full in-place O(N log2 N) butterfly schedule for
206 Discrete Fourier Transform. this version however uses
207 SVP64 "Vertical-First" Mode and so needs an explicit
208 branch, testing CR0.
209
210 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
211 (3 inputs, 2 outputs)
212 """
213 lst = SVP64Asm( [
214 "svshape 8, 1, 1, 1, 1",
215 "svremap 31, 1, 0, 2, 0, 1, 0",
216 "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
217 "setvl. 0, 0, 1, 1, 0, 0",
218 "bc 6, 3, -16"
219 ])
220 lst = list(lst)
221
222 # array and coefficients to test
223 av = [7.0, -9.8, 3.0, -32.3,
224 -2.0, 5.0, -9.8, 31.3] # array 0..7
225 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
226
227 # store in regfile
228 fprs = [0] * 32
229 for i, c in enumerate(coe):
230 fprs[i+8] = fp64toselectable(c)
231 for i, a in enumerate(av):
232 fprs[i+0] = fp64toselectable(a)
233
234 # set total. err don't know how to calculate how many there are...
235 # do it manually for now
236 VL = 0
237 size = 2
238 n = len(av)
239 while size <= n:
240 halfsize = size // 2
241 tablestep = n // size
242 for i in range(0, n, size):
243 for j in range(i, i + halfsize):
244 VL += 1
245 size *= 2
246
247 # SVSTATE (calculated VL)
248 svstate = SVP64State()
249 svstate.vl = VL # VL
250 svstate.maxvl = VL # MAXVL
251 print ("SVSTATE", bin(svstate.asint()))
252
253 with Program(lst, bigendian=False) as program:
254 sim = self.run_tst_program(program, svstate=svstate,
255 initial_fprs=fprs)
256 print ("spr svshape0", sim.spr['SVSHAPE0'])
257 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
258 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
259 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
260 print ("spr svshape1", sim.spr['SVSHAPE1'])
261 print ("spr svshape2", sim.spr['SVSHAPE2'])
262 print ("spr svshape3", sim.spr['SVSHAPE3'])
263
264 # work out the results with the twin mul/add-sub
265 res = transform_radix2(av, coe)
266
267 for i, expected in enumerate(res):
268 print ("i", i, float(sim.fpr(i)), "expected", expected)
269 for i, expected in enumerate(res):
270 # convert to Power single
271 expected = DOUBLE2SINGLE(fp64toselectable(expected))
272 expected = float(expected)
273 actual = float(sim.fpr(i))
274 # approximate error calculation, good enough test
275 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
276 # and the rounding is different
277 err = abs(actual - expected) / expected
278 self.assertTrue(err < 1e-7)
279
280 def test_sv_remap_fpmadds_fft_svstep_scalar_temp(self):
281 """>>> lst = SVP64Asm( [
282 "svshape 8, 1, 1, 1, 1",
283 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
284 "svremap 5, 1, 0, 2, 0, 0, 1",
285 "sv.fmuls 24, 0.v, 8.v",
286 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
287 "svremap 26, 0, 0, 0, 0, 1, 1",
288 "sv.ffadds 0.v, 24, 0.v",
289 "setvl. 0, 0, 1, 1, 0, 0",
290 "bc 6, 3, -28"
291 ])
292
293 runs a full in-place O(N log2 N) butterfly schedule for
294 Discrete Fourier Transform. also uses "Vertical First"
295 but also uses temporary scalars and ffadds rather than
296 sv.ffmadds.
297
298 this represents an incremental step towards complex FFT
299
300 SVP64 "REMAP" in Butterfly Mode is applied to two instructions:
301
302 * single fmuls FRT, FRA, FRC
303 * twin in-place ffadds +/- ADD/SUB (2 inputs, 2 outputs)
304 (FRS is implicit / hidden in ff* operations)
305
306 multiply: # sv.fmuls FRT, FRA, FRC
307 temp1 = vec[jh] * exptable[k]
308 temp2 = vec[jl]
309 twin-add: # sv.ffadds FRT(/FRS), FRA, FRB
310 vec[jh] = temp2 - temp1
311 vec[jl] = temp2 + temp1
312
313 also see notes in complex fft test: here svremap is done in
314 "non-persistent" mode (as a demo) whereas in the complex fft
315 svremap is used in "persistent" mode, where by a complete
316 coincidence the REMAP arguments all happen to line up and
317 only one persistent svremap is needed. the exact same trick
318 *could* be applied here but for illustrative purposes it is not.
319 """
320 lst = SVP64Asm( [
321 "svshape 8, 1, 1, 1, 1",
322 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
323 "svremap 5, 1, 0, 2, 0, 0, 0",
324 "sv.fmuls 24, 0.v, 8.v",
325 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
326 "svremap 26, 0, 0, 0, 0, 1, 0",
327 "sv.ffadds 0.v, 24, 0.v",
328 "setvl. 0, 0, 1, 1, 0, 0",
329 "bc 6, 3, -28"
330 ])
331 lst = list(lst)
332
333 # array and coefficients to test
334 av = [7.0, -9.8, 3.0, -32.3,
335 -2.0, 5.0, -9.8, 31.3] # array 0..7
336 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
337
338 # store in regfile
339 fprs = [0] * 32
340 for i, c in enumerate(coe):
341 fprs[i+8] = fp64toselectable(c)
342 for i, a in enumerate(av):
343 fprs[i+0] = fp64toselectable(a)
344
345 # set total. err don't know how to calculate how many there are...
346 # do it manually for now
347 VL = 0
348 size = 2
349 n = len(av)
350 while size <= n:
351 halfsize = size // 2
352 tablestep = n // size
353 for i in range(0, n, size):
354 for j in range(i, i + halfsize):
355 VL += 1
356 size *= 2
357
358 # SVSTATE (calculated VL)
359 svstate = SVP64State()
360 svstate.vl = VL # VL
361 svstate.maxvl = VL # MAXVL
362 print ("SVSTATE", bin(svstate.asint()))
363
364 with Program(lst, bigendian=False) as program:
365 sim = self.run_tst_program(program, svstate=svstate,
366 initial_fprs=fprs)
367 print ("spr svshape0", sim.spr['SVSHAPE0'])
368 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
369 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
370 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
371 print ("spr svshape1", sim.spr['SVSHAPE1'])
372 print ("spr svshape2", sim.spr['SVSHAPE2'])
373 print ("spr svshape3", sim.spr['SVSHAPE3'])
374
375 # work out the results with the twin mul/add-sub
376 res = transform_radix2(av, coe)
377
378 for i, expected in enumerate(res):
379 print ("i", i, float(sim.fpr(i)), "expected", expected)
380 for i, expected in enumerate(res):
381 # convert to Power single
382 expected = DOUBLE2SINGLE(fp64toselectable(expected))
383 expected = float(expected)
384 actual = float(sim.fpr(i))
385 # approximate error calculation, good enough test
386 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
387 # and the rounding is different
388 err = abs(actual - expected) / expected
389 self.assertTrue(err < 1e-7)
390
391 def test_sv_fpmadds_fft(self):
392 """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
393 ]
394 four in-place vector mul-adds, four in-place vector mul-subs
395
396 this is the twin "butterfly" mul-add-sub from Cooley-Tukey
397 https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
398
399 there is the *option* to target a different location (non-in-place)
400 just in case.
401
402 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
403 FRS to perform the two multiplies. one add, one subtract.
404
405 sv.ffmadds FRT, FRA, FRC, FRB actually does:
406 fmadds FRT , FRA, FRC, FRA
407 fnmsubs FRT+vl, FRA, FRC, FRB+vl
408
409 """
410 lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
411 ])
412 lst = list(lst)
413
414 fprs = [0] * 32
415 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
416 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
417 coe = [-1.0, 4.0, 3.1, 6.2] # coefficients
418 res = []
419 # work out the results with the twin mul/add-sub
420 for i, (a, b, c) in enumerate(zip(av, bv, coe)):
421 fprs[i+2] = fp64toselectable(a)
422 fprs[i+6] = fp64toselectable(b)
423 fprs[i+10] = fp64toselectable(c)
424 mul = a * c
425 t = b + mul
426 u = b - mul
427 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
428 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
429 res.append((t, u))
430 print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
431
432 # SVSTATE (in this case, VL=2)
433 svstate = SVP64State()
434 svstate.vl = 4 # VL
435 svstate.maxvl = 4 # MAXVL
436 print ("SVSTATE", bin(svstate.asint()))
437
438 with Program(lst, bigendian=False) as program:
439 sim = self.run_tst_program(program, svstate=svstate,
440 initial_fprs=fprs)
441 # confirm that the results are as expected
442 for i, (t, u) in enumerate(res):
443 self.assertEqual(sim.fpr(i+2), t)
444 self.assertEqual(sim.fpr(i+6), u)
445
446 def test_sv_ffadds_fft(self):
447 """>>> lst = ["sv.ffadds 2.v, 2.v, 2.v"
448 ]
449 four in-place vector adds, four in-place vector subs
450
451 SVP64 "FFT" mode will *automatically* offset FRB and an implicit
452 FRS to perform the two multiplies. one add, one subtract.
453
454 sv.ffadds FRT, FRA, FRB actually does:
455 fadds FRT , FRB, FRA
456 fsubs FRT+vl, FRA, FRB+vl
457 """
458 lst = SVP64Asm(["sv.ffadds 2.v, 2.v, 2.v"
459 ])
460 lst = list(lst)
461
462 fprs = [0] * 32
463 av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
464 bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
465 res = []
466 # work out the results with the twin add-sub
467 for i, (a, b) in enumerate(zip(av, bv)):
468 fprs[i+2] = fp64toselectable(a)
469 fprs[i+6] = fp64toselectable(b)
470 t = b + a
471 u = b - a
472 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
473 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
474 res.append((t, u))
475 print ("FFT", i, "in", a, b, "res", t, u)
476
477 # SVSTATE (in this case, VL=2)
478 svstate = SVP64State()
479 svstate.vl = 4 # VL
480 svstate.maxvl = 4 # MAXVL
481 print ("SVSTATE", bin(svstate.asint()))
482
483 with Program(lst, bigendian=False) as program:
484 sim = self.run_tst_program(program, svstate=svstate,
485 initial_fprs=fprs)
486 # confirm that the results are as expected
487 for i, (t, u) in enumerate(res):
488 a = float(sim.fpr(i+2))
489 b = float(sim.fpr(i+6))
490 t = float(t)
491 u = float(u)
492 print ("FFT", i, "in", a, b, "res", t, u)
493 for i, (t, u) in enumerate(res):
494 self.assertEqual(sim.fpr(i+2), t)
495 self.assertEqual(sim.fpr(i+6), u)
496
497 def test_sv_remap_fpmadds_fft_svstep_complex(self):
498 """
499 runs a full in-place O(N log2 N) butterfly schedule for
500 Discrete Fourier Transform. this version however uses
501 SVP64 "Vertical-First" Mode and so needs an explicit
502 branch, testing CR0.
503
504 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
505 (3 inputs, 2 outputs)
506
507 complex calculation (FFT):
508
509 tpre = vec_r[jh] * cos_r[k] + vec_i[jh] * sin_i[k]
510 vec_r[jh] = vec_r[jl] - tpre
511 vec_r[jl] += tpre
512
513 tpim = -vec_r[jh] * sin_i[k] + vec_i[jh] * cos_r[k]
514 vec_i[jh] = vec_i[jl] - tpim
515 vec_i[jl] += tpim
516
517 real-only calculation (DFT):
518
519 temp1 = vec[jh] * exptable[k]
520 temp2 = vec[jl]
521 vec[jh] = temp2 - temp1
522 vec[jl] = temp2 + temp1
523
524 note: a rather nice convenience / coincidence. the meaning of
525 these two instructions is:
526 # RA: jh (S1) RB: n/a RC: k (S2) RT: scalar EA: n/a
527 "svremap 5, 1, 0, 2, 0, 0, 1",
528 # RA: scal RB: jl (S0) RC: n/a RT: jl (S0) EA: jh (S1)
529 "svremap 26, 0, 0, 0, 0, 1, 1",
530
531 however it turns out that they can be *merged*, and for
532 the first one (sv.fmadds/sv.fmsubs) the scalar arguments (RT, RB)
533 *ignore* their REMAPs (by definition, because you can't REMAP
534 scalar operands), and for the second one (sv.ffads) exactly the
535 right REMAPs are also ignored!
536
537 therefore we can merge:
538 "svremap 5, 1, 0, 2, 0, 0, 1",
539 "svremap 26, 0, 0, 0, 0, 1, 1",
540 into:
541 "svremap 31, 1, 0, 2, 0, 1, 1",
542 and save one instruction.
543 """
544 lst = SVP64Asm( [
545 # set triple butterfly mode with persistent "REMAP"
546 "svshape 8, 1, 1, 1, 1",
547 "svremap 31, 1, 0, 2, 0, 1, 1",
548 # tpre
549 "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r
550 "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i
551 # tpre = mul1_r + mul2_r
552 # tpim
553 "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i
554 "sv.fmsubs 26, 8.v, 16.v, 26", # mul2_i = i*cos_r
555 # tpim = mul2_i - mul1_i
556 # vec_r jh/jl
557 "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre
558 # vec_i jh/jl
559 "sv.ffadds 8.v, 26, 8.v", # vh/vl +- tpim
560
561 # svstep loop
562 "setvl. 0, 0, 1, 1, 0, 0",
563 "bc 6, 3, -56"
564 ])
565 lst = list(lst)
566
567 # array and coefficients to test
568 ar = [7.0, -9.8, 3.0, -32.3,
569 -2.0, 5.0, -9.8, 31.3] # array 0..7 real
570 ai = [1.0, -1.8, 3.0, 19.3,
571 4.0, -2.0, -0.8, 1.3] # array 0..7 imaginary
572 coer = [-0.25, 0.5, 3.1, 6.2] # coefficients real
573 coei = [0.21, -0.1, 1.1, -4.0] # coefficients imaginary
574
575 # store in regfile
576 fprs = [0] * 64
577 for i, a in enumerate(ar):
578 fprs[i+0] = fp64toselectable(a)
579 for i, a in enumerate(ai):
580 fprs[i+8] = fp64toselectable(a)
581 for i, cr in enumerate(coer):
582 fprs[i+16] = fp64toselectable(cr)
583 for i, ci in enumerate(coei):
584 fprs[i+20] = fp64toselectable(ci)
585
586 # set total. err don't know how to calculate how many there are...
587 # do it manually for now
588 VL = 0
589 size = 2
590 n = len(ar)
591 while size <= n:
592 halfsize = size // 2
593 tablestep = n // size
594 for i in range(0, n, size):
595 for j in range(i, i + halfsize):
596 VL += 1
597 size *= 2
598
599 # SVSTATE (calculated VL)
600 svstate = SVP64State()
601 svstate.vl = VL # VL
602 svstate.maxvl = VL # MAXVL
603 print ("SVSTATE", bin(svstate.asint()))
604
605 with Program(lst, bigendian=False) as program:
606 sim = self.run_tst_program(program, svstate=svstate,
607 initial_fprs=fprs)
608 print ("spr svshape0", sim.spr['SVSHAPE0'])
609 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
610 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
611 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
612 print ("spr svshape1", sim.spr['SVSHAPE1'])
613 print ("spr svshape2", sim.spr['SVSHAPE2'])
614 print ("spr svshape3", sim.spr['SVSHAPE3'])
615
616 # work out the results with the twin mul/add-sub, explicit
617 # complex numbers
618 res_r, res_i = transform_radix2_complex(ar, ai, coer, coei)
619
620 for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
621 print ("i", i, float(sim.fpr(i)), float(sim.fpr(i+8)),
622 "expected_r", expected_r,
623 "expected_i", expected_i)
624 for i, (expected_r, expected_i) in enumerate(zip(res_r, res_i)):
625 # convert to Power single
626 expected_r = DOUBLE2SINGLE(fp64toselectable(expected_r ))
627 expected_r = float(expected_r)
628 actual_r = float(sim.fpr(i))
629 # approximate error calculation, good enough test
630 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
631 # and the rounding is different
632 err = abs(actual_r - expected_r ) / expected_r
633 self.assertTrue(err < 1e-6)
634 # convert to Power single
635 expected_i = DOUBLE2SINGLE(fp64toselectable(expected_i ))
636 expected_i = float(expected_i)
637 actual_i = float(sim.fpr(i+8))
638 # approximate error calculation, good enough test
639 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
640 # and the rounding is different
641 err = abs(actual_i - expected_i ) / expected_i
642 self.assertTrue(err < 1e-6)
643
644 def test_sv_ffadds_fft_scalar(self):
645 """>>> lst = ["sv.ffadds 2.v, 12, 13"
646 ]
647 four in-place vector adds and subs, but done with a scalar
648 pair (fp12, fp13)
649 """
650 lst = SVP64Asm(["sv.ffadds 2.v, 12, 13"
651 ])
652 lst = list(lst)
653
654 fprs = [0] * 32
655 scalar_a = 1.3
656 scalar_b = -2.0
657 fprs[12] = fp64toselectable(scalar_a)
658 fprs[13] = fp64toselectable(scalar_b)
659 res = []
660 # work out the results with the twin add-sub
661 for i in range(4):
662 t = scalar_b + scalar_a
663 u = scalar_b - scalar_a
664 t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
665 u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
666 res.append((t, u))
667 print ("FFT", i, "res", t, u)
668
669 # SVSTATE (in this case, VL=2)
670 svstate = SVP64State()
671 svstate.vl = 4 # VL
672 svstate.maxvl = 4 # MAXVL
673 print ("SVSTATE", bin(svstate.asint()))
674
675 with Program(lst, bigendian=False) as program:
676 sim = self.run_tst_program(program, svstate=svstate,
677 initial_fprs=fprs)
678 # confirm that the results are as expected
679 for i, (t, u) in enumerate(res):
680 a = float(sim.fpr(i+2))
681 b = float(sim.fpr(i+6))
682 t = float(t)
683 u = float(u)
684 print ("FFT", i, "in", a, b, "res", t, u)
685 for i, (t, u) in enumerate(res):
686 self.assertEqual(sim.fpr(i+2), t)
687 self.assertEqual(sim.fpr(i+6), u)
688
689 def test_sv_remap_fpmadds_fft_ldst(self):
690 """>>>lst = ["setvl 0, 0, 8, 0, 1, 1",
691 "sv.lfssh 0.v, 4(0), 20", # bit-reversed
692 "svshape 8, 1, 1, 1, 0",
693 "svremap 31, 1, 0, 2, 0, 1, 0",
694 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
695
696 runs a full in-place O(N log2 N) butterfly schedule for
697 Discrete Fourier Transform, using bit-reversed LD/ST
698 """
699 lst = SVP64Asm( ["svshape 8, 1, 1, 15, 0",
700 "svremap 1, 0, 0, 0, 0, 0, 0, 0",
701 "sv.lfssh 0.v, 4(0), 20", # shifted
702 "svshape 8, 1, 1, 1, 0",
703 "svremap 31, 1, 0, 2, 0, 1, 0",
704 "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
705 ])
706 lst = list(lst)
707
708 # array and coefficients to test
709 av = [7.0, -9.8, 3.0, -32.3,
710 -2.0, 5.0, -9.8, 31.3] # array 0..7
711 coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
712
713 # store in regfile
714 fprs = [0] * 32
715 for i, c in enumerate(coe):
716 fprs[i+8] = fp64toselectable(c)
717 # store in memory
718 mem = {}
719 val = 0
720 for i, a in enumerate(av):
721 a = SINGLE(fp64toselectable(a)).value
722 shift = (i % 2) == 1
723 if shift == 0:
724 val = a
725 else:
726 mem[(i//2)*8] = val | (a << 32)
727
728 with Program(lst, bigendian=False) as program:
729 sim = self.run_tst_program(program, initial_mem=mem,
730 initial_fprs=fprs)
731 print ("spr svshape0", sim.spr['SVSHAPE0'])
732 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
733 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
734 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
735 print ("spr svshape1", sim.spr['SVSHAPE1'])
736 print ("spr svshape2", sim.spr['SVSHAPE2'])
737 print ("spr svshape3", sim.spr['SVSHAPE3'])
738
739 print ("mem dump")
740 print (sim.mem.dump())
741
742 # work out the results with the twin mul/add-sub,
743 # note bit-reverse mode requested
744 res = transform_radix2(av, coe, reverse=True)
745
746 for i, expected in enumerate(res):
747 print ("i", i, float(sim.fpr(i)), "expected", expected)
748 for i, expected in enumerate(res):
749 # convert to Power single
750 expected = DOUBLE2SINGLE(fp64toselectable(expected))
751 expected = float(expected)
752 actual = float(sim.fpr(i))
753 # approximate error calculation, good enough test
754 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
755 # and the rounding is different
756 err = abs(actual - expected) / expected
757 self.assertTrue(err < 1e-6)
758
759 def run_tst_program(self, prog, initial_regs=None,
760 svstate=None,
761 initial_mem=None,
762 initial_fprs=None):
763 if initial_regs is None:
764 initial_regs = [0] * 32
765 simulator = run_tst(prog, initial_regs, mem=initial_mem,
766 initial_fprs=initial_fprs,
767 svstate=svstate)
768
769 print ("GPRs")
770 simulator.gpr.dump()
771 print ("FPRs")
772 simulator.fpr.dump()
773
774 return simulator
775
776
777 if __name__ == "__main__":
778 unittest.main()