adding reduced COS table DCT test
[openpower-isa.git] / src / openpower / decoder / isa / test_caller_svp64_dct.py
1 from nmigen import Module, Signal
2 from nmigen.back.pysim import Simulator, Delay, Settle
3 from nmutil.formaltest import FHDLTestCase
4 from openpower.decoder.power_decoder import (create_pdecode)
5 from openpower.simulator.program import Program
6 from openpower.decoder.isa.caller import SVP64State
7 from openpower.decoder.selectable_int import SelectableInt
8 from openpower.decoder.isa.test_caller import run_tst
9 from openpower.sv.trans.svp64 import SVP64Asm
10 from copy import deepcopy
11 from openpower.decoder.helpers import fp64toselectable, SINGLE
12 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
13 from openpower.decoder.isa.remap_dct_yield import (halfrev2, reverse_bits,
14 iterate_dct_inner_butterfly_indices,
15 iterate_dct_outer_butterfly_indices,
16 transform2)
17 import unittest
18 import math
19
20
21 def transform_inner_radix2(vec, ctable):
22
23 # Initialization
24 n = len(vec)
25 print ()
26 print ("transform2", n)
27 levels = n.bit_length() - 1
28
29 # reference (read/write) the in-place data in *reverse-bit-order*
30 ri = list(range(n))
31 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
32
33 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
34 # TODO: merge these two
35 vec = halfrev2(vec, False)
36 vec = [vec[ri[i]] for i in range(n)]
37
38 ################
39 # INNER butterfly
40 ################
41 xdim = n
42 ydim = 0
43 zdim = 0
44
45 # set up an SVSHAPE
46 class SVSHAPE:
47 pass
48 # j schedule
49 SVSHAPE0 = SVSHAPE()
50 SVSHAPE0.lims = [xdim, 2, zdim]
51 SVSHAPE0.mode = 0b01
52 SVSHAPE0.submode2 = 0b01
53 SVSHAPE0.skip = 0b00
54 SVSHAPE0.offset = 0 # experiment with different offset, here
55 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
56 # j+halfstep schedule
57 SVSHAPE1 = SVSHAPE()
58 SVSHAPE1.lims = [xdim, 2, zdim]
59 SVSHAPE1.mode = 0b01
60 SVSHAPE1.submode2 = 0b01
61 SVSHAPE1.skip = 0b01
62 SVSHAPE1.offset = 0 # experiment with different offset, here
63 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
64
65 # enumerate over the iterator function, getting new indices
66 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
67 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
68 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
69 t1, t2 = vec[jl], vec[jh]
70 coeff = ctable[k]
71 vec[jl] = t1 + t2
72 vec[jh] = (t1 - t2) * (1.0/coeff)
73 print ("coeff", "ci", k,
74 "jl", jl, "jh", jh,
75 "i/n", (k+0.5), 1.0/coeff,
76 "t1, t2", t1, t2, "res", vec[jl], vec[jh],
77 "end", bin(jle), bin(jhe))
78 if jle == 0b111: # all loops end
79 break
80
81 return vec
82
83 def transform_outer_radix2(vec):
84
85 # Initialization
86 n = len(vec)
87 print ()
88 print ("transform2", n)
89 levels = n.bit_length() - 1
90
91 # outer butterfly
92 xdim = n
93 ydim = 0
94 zdim = 0
95
96 # j schedule
97 class SVSHAPE:
98 pass
99 SVSHAPE0 = SVSHAPE()
100 SVSHAPE0.lims = [xdim, 3, zdim]
101 SVSHAPE0.submode2 = 0b100
102 SVSHAPE0.mode = 0b01
103 SVSHAPE0.skip = 0b00
104 SVSHAPE0.offset = 0 # experiment with different offset, here
105 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
106 # j+halfstep schedule
107 SVSHAPE1 = SVSHAPE()
108 SVSHAPE1.lims = [xdim, 3, zdim]
109 SVSHAPE1.mode = 0b01
110 SVSHAPE1.submode2 = 0b100
111 SVSHAPE1.skip = 0b01
112 SVSHAPE1.offset = 0 # experiment with different offset, here
113 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
114
115 # enumerate over the iterator function, getting new indices
116 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
117 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
118 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
119 print ("itersum jr", jl, jh,
120 "end", bin(jle), bin(jhe))
121 vec[jl] += vec[jh]
122 if jle == 0b111: # all loops end
123 break
124
125 print("transform2 result", vec)
126
127 return vec
128
129
130 class DCTTestCase(FHDLTestCase):
131
132 def _check_regs(self, sim, expected):
133 for i in range(32):
134 self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
135
136 def test_sv_ffadds_dct(self):
137 """>>> lst = ["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
138 ]
139 four in-place vector adds, four in-place vector mul-subs
140
141 SVP64 "DCT" mode will *automatically* offset FRB and an implicit
142 FRS to perform the two multiplies. one add, one subtract.
143
144 sv.fdadds FRT, FRA, FRC, FRB actually does:
145 fadds FRT , FRB, FRA
146 fsubs FRT+vl, FRA, FRB+vl
147 """
148 lst = SVP64Asm(["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
149 ])
150 lst = list(lst)
151
152 # cheat here with these values, they're selected so that
153 # rounding errors do not occur. sigh.
154 fprs = [0] * 32
155 av = [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
156 bv = [-2.0, 2.0, -0.8, 1.4] # second half of array 4..7
157 cv = [-1.0, 0.5, 2.5, -0.25] # coefficients
158 res = []
159 # work out the results with the twin add-sub
160 for i, (a, b, c) in enumerate(zip(av, bv, cv)):
161 fprs[i+0] = fp64toselectable(a)
162 fprs[i+4] = fp64toselectable(b)
163 fprs[i+8] = fp64toselectable(c)
164 # this isn't quite a perfect replication of the
165 # FP32 mul-add-sub. better really to use FPMUL32, FPADD32
166 # and FPSUB32 directly to be honest.
167 t = a + b
168 diff = (a - b)
169 diff = DOUBLE2SINGLE(fp64toselectable(diff)) # FP32 round
170 diff = float(diff)
171 u = diff * c
172 tc = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
173 uc = DOUBLE2SINGLE(fp64toselectable(u)) # from double
174 res.append((uc, tc))
175 print ("DCT", i, "in", a, b, "c", c, "res", t, u)
176
177 # SVSTATE (in this case, VL=2)
178 svstate = SVP64State()
179 svstate.vl = 4 # VL
180 svstate.maxvl = 4 # MAXVL
181 print ("SVSTATE", bin(svstate.asint()))
182
183 with Program(lst, bigendian=False) as program:
184 sim = self.run_tst_program(program, svstate=svstate,
185 initial_fprs=fprs)
186 # confirm that the results are as expected
187 for i, (t, u) in enumerate(res):
188 a = float(sim.fpr(i+0))
189 b = float(sim.fpr(i+4))
190 t = float(t)
191 u = float(u)
192 print ("DCT", i, "in", a, b, "res", t, u)
193 for i, (t, u) in enumerate(res):
194 self.assertEqual(sim.fpr(i+0), t)
195 self.assertEqual(sim.fpr(i+4), u)
196
197 def test_sv_remap_fpmadds_dct_inner_4(self):
198 """>>> lst = ["svshape 4, 1, 1, 2, 0",
199 "svremap 27, 1, 0, 2, 0, 1, 0",
200 "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
201 ]
202 runs a full in-place 4-long O(N log2 N) inner butterfly schedule
203 for DCT
204
205 SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
206 (3 inputs, 2 outputs)
207
208 Note that the coefficient (FRC) is not on a "schedule", it
209 is straight Vectorised (0123...) because DCT coefficients
210 cannot be shared between butterfly layers (due to +0.5)
211 """
212 lst = SVP64Asm( ["svshape 4, 1, 1, 2, 0",
213 "svremap 27, 1, 0, 2, 0, 1, 0",
214 "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
215 ])
216 lst = list(lst)
217
218 # array and coefficients to test
219 n = 4
220 av = [7.0, -9.8, 3.0, -32.3]
221 coe = [-0.25, 0.5, 3.1, 6.2] # 4 coefficients
222
223 levels = n.bit_length() - 1
224 ri = list(range(n))
225 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
226 avi = [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
227 av = halfrev2(avi, False)
228 av = [av[ri[i]] for i in range(n)]
229
230 # store in regfile
231 fprs = [0] * 32
232 for i, c in enumerate(coe):
233 fprs[i+8] = fp64toselectable(1.0 / c) # invert
234 for i, a in enumerate(av):
235 fprs[i+0] = fp64toselectable(a)
236
237 with Program(lst, bigendian=False) as program:
238 sim = self.run_tst_program(program, initial_fprs=fprs)
239 print ("spr svshape0", sim.spr['SVSHAPE0'])
240 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
241 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
242 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
243 print ("spr svshape1", sim.spr['SVSHAPE1'])
244 print ("spr svshape2", sim.spr['SVSHAPE2'])
245 print ("spr svshape3", sim.spr['SVSHAPE3'])
246
247 # work out the results with the twin mul/add-sub
248 res = transform_inner_radix2(avi, coe)
249
250 for i, expected in enumerate(res):
251 print ("i", i, float(sim.fpr(i)), "expected", expected)
252 for i, expected in enumerate(res):
253 # convert to Power single
254 expected = DOUBLE2SINGLE(fp64toselectable(expected))
255 expected = float(expected)
256 actual = float(sim.fpr(i))
257 # approximate error calculation, good enough test
258 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
259 # and the rounding is different
260 err = abs((actual - expected) / expected)
261 print ("err", i, err)
262 self.assertTrue(err < 1e-6)
263
264 def test_sv_remap_fpmadds_dct_outer_8(self):
265 """>>> lst = ["svshape 8, 1, 1, 3, 0",
266 "svremap 27, 1, 0, 2, 0, 1, 0",
267 "sv.fadds 0.v, 0.v, 0.v"
268 ]
269 runs a full in-place 8-long O(N log2 N) outer butterfly schedule
270 for DCT, does the iterative overlapped ADDs
271
272 SVP64 "REMAP" in Butterfly Mode.
273 """
274 lst = SVP64Asm( ["svshape 8, 1, 1, 3, 0",
275 "svremap 27, 1, 0, 2, 0, 1, 0",
276 "sv.fadds 0.v, 0.v, 0.v"
277 ])
278 lst = list(lst)
279
280 # array and coefficients to test
281 av = [7.0, -9.8, 3.0, -32.3, 2.1, 3.6, 0.7, -0.2]
282
283 # store in regfile
284 fprs = [0] * 32
285 for i, a in enumerate(av):
286 fprs[i+0] = fp64toselectable(a)
287
288 with Program(lst, bigendian=False) as program:
289 sim = self.run_tst_program(program, initial_fprs=fprs)
290 print ("spr svshape0", sim.spr['SVSHAPE0'])
291 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
292 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
293 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
294 print ("spr svshape1", sim.spr['SVSHAPE1'])
295 print ("spr svshape2", sim.spr['SVSHAPE2'])
296 print ("spr svshape3", sim.spr['SVSHAPE3'])
297
298 # outer iterative sum
299 res = transform_outer_radix2(av)
300
301 for i, expected in enumerate(res):
302 print ("i", i, float(sim.fpr(i)), "expected", expected)
303 for i, expected in enumerate(res):
304 # convert to Power single
305 expected = DOUBLE2SINGLE(fp64toselectable(expected))
306 expected = float(expected)
307 actual = float(sim.fpr(i))
308 # approximate error calculation, good enough test
309 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
310 # and the rounding is different
311 err = abs((actual - expected) / expected)
312 print ("err", i, err)
313 self.assertTrue(err < 1e-6)
314
315 def test_sv_remap_fpmadds_dct_8(self):
316 """>>> lst = ["svremap 27, 1, 0, 2, 0, 1, 1",
317 "svshape 8, 1, 1, 2, 0",
318 "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
319 "svshape 8, 1, 1, 3, 0",
320 "sv.fadds 0.v, 0.v, 0.v"
321 ]
322 runs a full in-place 8-long O(N log2 N) DCT, both
323 inner and outer butterfly "REMAP" schedules.
324 """
325 lst = SVP64Asm( ["svremap 27, 1, 0, 2, 0, 1, 1",
326 "svshape 8, 1, 1, 2, 0",
327 "sv.fdmadds 0.v, 0.v, 0.v, 8.v",
328 "svshape 8, 1, 1, 3, 0",
329 "sv.fadds 0.v, 0.v, 0.v"
330 ])
331 lst = list(lst)
332
333 # array and coefficients to test
334 avi = [7.0, -9.8, 3.0, -32.3, 2.1, 3.6, 0.7, -0.2]
335 n = len(avi)
336 levels = n.bit_length() - 1
337 ri = list(range(n))
338 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
339 av = halfrev2(avi, False)
340 av = [av[ri[i]] for i in range(n)]
341 ctable = []
342 size = n
343 while size >= 2:
344 halfsize = size // 2
345 for i in range(n//size):
346 for ci in range(halfsize):
347 ctable.append(math.cos((ci + 0.5) * math.pi / size) * 2.0)
348 size //= 2
349
350 # store in regfile
351 fprs = [0] * 32
352 for i, a in enumerate(av):
353 fprs[i+0] = fp64toselectable(a)
354 for i, c in enumerate(ctable):
355 fprs[i+8] = fp64toselectable(1.0 / c) # invert
356
357 with Program(lst, bigendian=False) as program:
358 sim = self.run_tst_program(program, initial_fprs=fprs)
359 print ("spr svshape0", sim.spr['SVSHAPE0'])
360 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
361 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
362 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
363 print ("spr svshape1", sim.spr['SVSHAPE1'])
364 print ("spr svshape2", sim.spr['SVSHAPE2'])
365 print ("spr svshape3", sim.spr['SVSHAPE3'])
366
367 # outer iterative sum
368 res = transform2(avi)
369
370 for i, expected in enumerate(res):
371 print ("i", i, float(sim.fpr(i)), "expected", expected)
372 for i, expected in enumerate(res):
373 # convert to Power single
374 expected = DOUBLE2SINGLE(fp64toselectable(expected))
375 expected = float(expected)
376 actual = float(sim.fpr(i))
377 # approximate error calculation, good enough test
378 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
379 # and the rounding is different
380 err = abs((actual - expected) / expected)
381 print ("err", i, err)
382 self.assertTrue(err < 1e-5)
383
384 def test_sv_remap_dct_cos_precompute_8(self):
385 """pre-computes a DCT COS table, deliberately using a lot of
386 registers so as to be able to see what is going on (dumping all
387 regs after the run).
388
389 the simpler (scalar) version is in test_caller_transcendentals.py
390 (test_fp_coss_cvt), this is the SVP64 variant. TODO: really
391 need the new version of fcfids which doesn't spam memory with
392 LD/STs.
393 """
394 lst = SVP64Asm(["svshape 8, 1, 1, 2, 0",
395 "svremap 0, 0, 0, 2, 0, 1, 1",
396 "sv.svstep 4.v, 4, 1", # svstep get vector of ci
397 "sv.svstep 16.v, 3, 1", # svstep get vector of step
398 "addi 1, 0, 0x0000",
399 "setvl 0, 0, 12, 0, 1, 1",
400 "sv.std 4.v, 0(1)",
401 "sv.lfd 64.v, 0(1)",
402 "sv.fcfids 48.v, 64.v",
403 "addi 1, 0, 0x0060",
404 "sv.std 16.v, 0(1)",
405 "sv.lfd 12.v, 0(1)",
406 "sv.fcfids 24.v, 12.v",
407 "sv.fadds 0.v, 24.v, 43", # plus 0.5
408 "sv.fmuls 0.v, 0.v, 41", # times PI
409 "sv.fdivs 0.v, 0.v, 48.v", # div size
410 "sv.fcoss 80.v, 0.v",
411 "sv.fdivs 80.v, 43, 80.v", # div 0.5 / x
412 ])
413 lst = list(lst)
414
415 gprs = [0] * 32
416 fprs = [0] * 128
417 # constants
418 fprs[43] = fp64toselectable(0.5) # 0.5
419 fprs[41] = fp64toselectable(math.pi) # pi
420 fprs[44] = fp64toselectable(2.0) # 2.0
421
422 n = 8
423
424 ctable = []
425 size = n
426 while size >= 2:
427 halfsize = size // 2
428 for i in range(n//size):
429 for ci in range(halfsize):
430 ctable.append(math.cos((ci + 0.5) * math.pi / size) * 2.0)
431 size //= 2
432
433 with Program(lst, bigendian=False) as program:
434 sim = self.run_tst_program(program, gprs, initial_fprs=fprs)
435 print ("MEM")
436 sim.mem.dump()
437 print ("ci FP")
438 for i in range(len(ctable)):
439 actual = float(sim.fpr(i+24))
440 print ("i", i, actual)
441 print ("size FP")
442 for i in range(len(ctable)):
443 actual = float(sim.fpr(i+48))
444 print ("i", i, actual)
445 print ("temps")
446 for i in range(len(ctable)):
447 actual = float(sim.fpr(i))
448 print ("i", i, actual)
449 for i in range(len(ctable)):
450 expected = 1.0/ctable[i]
451 actual = float(sim.fpr(i+80))
452 err = abs((actual - expected) / expected)
453 print ("i", i, actual, "1/expect", 1/expected,
454 "expected", expected,
455 "err", err)
456 self.assertTrue(err < 1e-6)
457
458 def test_sv_remap_dct_cos_precompute_inner_8(self):
459 """pre-computes a DCT COS table, using the shorter costable
460 indices schedule. turns out, some COS values are repeated
461 in each layer of the DCT butterfly.
462
463 the simpler (scalar) version is in test_caller_transcendentals.py
464 (test_fp_coss_cvt), this is the SVP64 variant. TODO: really
465 need the new version of fcfids which doesn't spam memory with
466 LD/STs.
467 """
468 lst = SVP64Asm(["svshape 8, 1, 1, 5, 0",
469 "svremap 0, 0, 0, 2, 0, 1, 1",
470 "sv.svstep 4.v, 3, 1", # svstep get vector of ci
471 "sv.svstep 16.v, 2, 1", # svstep get vector of step
472 "addi 1, 0, 0x0000",
473 "setvl 0, 0, 7, 0, 1, 1",
474 "sv.std 4.v, 0(1)",
475 "sv.lfd 64.v, 0(1)",
476 "sv.fcfids 48.v, 64.v",
477 "addi 1, 0, 0x0060",
478 "sv.std 16.v, 0(1)",
479 "sv.lfd 12.v, 0(1)",
480 "sv.fcfids 24.v, 12.v",
481 "sv.fadds 0.v, 24.v, 43", # plus 0.5
482 "sv.fmuls 0.v, 0.v, 41", # times PI
483 "sv.fdivs 0.v, 0.v, 48.v", # div size
484 "sv.fcoss 80.v, 0.v",
485 "sv.fdivs 80.v, 43, 80.v", # div 0.5 / x
486 ])
487 lst = list(lst)
488
489 gprs = [0] * 32
490 fprs = [0] * 128
491 # constants
492 fprs[43] = fp64toselectable(0.5) # 0.5
493 fprs[41] = fp64toselectable(math.pi) # pi
494 fprs[44] = fp64toselectable(2.0) # 2.0
495
496 n = 8
497
498 ctable = []
499 size = n
500 while size >= 2:
501 halfsize = size // 2
502 for ci in range(halfsize):
503 coeff = math.cos((ci + 0.5) * math.pi / size) * 2.0
504 ctable.append(coeff)
505 print ("coeff", "ci", ci, "size", size,
506 "i/n", (ci+0.5), 1.0/coeff)
507 size //= 2
508
509 with Program(lst, bigendian=False) as program:
510 sim = self.run_tst_program(program, gprs, initial_fprs=fprs)
511 print ("MEM")
512 sim.mem.dump()
513 print ("ci FP")
514 for i in range(len(ctable)):
515 actual = float(sim.fpr(i+24))
516 print ("i", i, actual)
517 print ("size FP")
518 for i in range(len(ctable)):
519 actual = float(sim.fpr(i+48))
520 print ("i", i, actual)
521 print ("temps")
522 for i in range(len(ctable)):
523 actual = float(sim.fpr(i))
524 print ("i", i, actual)
525 for i in range(len(ctable)):
526 expected = 1.0/ctable[i]
527 actual = float(sim.fpr(i+80))
528 err = abs((actual - expected) / expected)
529 print ("i", i, actual, "1/expect", 1/expected,
530 "expected", expected,
531 "err", err)
532 self.assertTrue(err < 1e-6)
533
534 def test_sv_remap_fpmadds_dct_8_mode_4(self):
535 """>>> lst = ["svremap 31, 1, 0, 2, 0, 1, 1",
536 "svshape 8, 1, 1, 4, 0",
537 "sv.fdmadds 0.v, 0.v, 0.v, 8.v"
538 "svshape 8, 1, 1, 3, 0",
539 "sv.fadds 0.v, 0.v, 0.v"
540 ]
541 runs a full in-place 8-long O(N log2 N) DCT, both
542 inner and outer butterfly "REMAP" schedules.
543 uses shorter tables: FRC also needs to be on a Schedule
544 """
545 lst = SVP64Asm( ["svremap 31, 1, 0, 2, 0, 1, 1",
546 "svshape 8, 1, 1, 4, 0",
547 "sv.fdmadds 0.v, 0.v, 0.v, 8.v",
548 "svshape 8, 1, 1, 3, 0",
549 "sv.fadds 0.v, 0.v, 0.v"
550 ])
551 lst = list(lst)
552
553 # array and coefficients to test
554 avi = [7.0, -9.8, 3.0, -32.3, 2.1, 3.6, 0.7, -0.2]
555 n = len(avi)
556 levels = n.bit_length() - 1
557 ri = list(range(n))
558 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
559 av = halfrev2(avi, False)
560 av = [av[ri[i]] for i in range(n)]
561 ctable = []
562 size = n
563 while size >= 2:
564 halfsize = size // 2
565 for ci in range(halfsize):
566 ctable.append(math.cos((ci + 0.5) * math.pi / size) * 2.0)
567 size //= 2
568
569 # store in regfile
570 fprs = [0] * 32
571 for i, a in enumerate(av):
572 fprs[i+0] = fp64toselectable(a)
573 for i, c in enumerate(ctable):
574 fprs[i+8] = fp64toselectable(1.0 / c) # invert
575
576 with Program(lst, bigendian=False) as program:
577 sim = self.run_tst_program(program, initial_fprs=fprs)
578 print ("spr svshape0", sim.spr['SVSHAPE0'])
579 print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz)
580 print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz)
581 print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz)
582 print ("spr svshape1", sim.spr['SVSHAPE1'])
583 print ("spr svshape2", sim.spr['SVSHAPE2'])
584 print ("spr svshape3", sim.spr['SVSHAPE3'])
585
586 # outer iterative sum
587 res = transform2(avi)
588
589 for i, expected in enumerate(res):
590 print ("i", i, float(sim.fpr(i)), "expected", expected)
591 for i, expected in enumerate(res):
592 # convert to Power single
593 expected = DOUBLE2SINGLE(fp64toselectable(expected))
594 expected = float(expected)
595 actual = float(sim.fpr(i))
596 # approximate error calculation, good enough test
597 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
598 # and the rounding is different
599 err = abs((actual - expected) / expected)
600 print ("err", i, err)
601 self.assertTrue(err < 1e-5)
602
603 def run_tst_program(self, prog, initial_regs=None,
604 svstate=None,
605 initial_mem=None,
606 initial_fprs=None):
607 if initial_regs is None:
608 initial_regs = [0] * 32
609 simulator = run_tst(prog, initial_regs, mem=initial_mem,
610 initial_fprs=initial_fprs,
611 svstate=svstate)
612
613 print ("GPRs")
614 simulator.gpr.dump()
615 print ("FPRs")
616 simulator.fpr.dump()
617
618 return simulator
619
620
621 if __name__ == "__main__":
622 unittest.main()