From 12a15c4fe7ac571ed8feb114517ecb9ed9db5e1a Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 9 Jul 2021 17:03:31 +0100 Subject: [PATCH] add svstep variant of fpmadds fft test --- .../decoder/isa/test_caller_svp64_fft.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index df93c763..152b4f1b 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -146,6 +146,87 @@ class FFTTestCase(FHDLTestCase): err = abs(actual - expected) / expected self.assertTrue(err < 1e-7) + def test_sv_remap_fpmadds_fft_svstep(self): + """>>> lst = ["svremap 8, 1, 1, 1", + "sv.ffmadds 2.v, 2.v, 2.v, 10.v" + ] + runs a full in-place O(N log2 N) butterfly schedule for + Discrete Fourier Transform. + + this is the twin "butterfly" mul-add-sub from Cooley-Tukey + https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms + + there is the *option* to target a different location (non-in-place) + just in case. + + SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC + (3 inputs, 2 outputs) + """ + lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1", + "svremap 8, 1, 1, 1", + "sv.ffmadds 0.v, 0.v, 0.v, 8.v", + "setvl. 0, 0, 0, 1, 0, 0", + "bc 4, 2, -16" + ]) + lst = list(lst) + + # array and coefficients to test + av = [7.0, -9.8, 3.0, -32.3, + -2.0, 5.0, -9.8, 31.3] # array 0..7 + coe = [-0.25, 0.5, 3.1, 6.2] # coefficients + + # store in regfile + fprs = [0] * 32 + for i, c in enumerate(coe): + fprs[i+8] = fp64toselectable(c) + for i, a in enumerate(av): + fprs[i+0] = fp64toselectable(a) + + # set total. err don't know how to calculate how many there are... + # do it manually for now + VL = 0 + size = 2 + n = len(av) + while size <= n: + halfsize = size // 2 + tablestep = n // size + for i in range(0, n, size): + for j in range(i, i + halfsize): + VL += 1 + size *= 2 + + # SVSTATE (calculated VL) + svstate = SVP64State() + svstate.vl[0:7] = VL # VL + svstate.maxvl[0:7] = VL # MAXVL + print ("SVSTATE", bin(svstate.spr.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate, + initial_fprs=fprs) + print ("spr svshape0", sim.spr['SVSHAPE0']) + print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz) + print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz) + print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz) + print ("spr svshape1", sim.spr['SVSHAPE1']) + print ("spr svshape2", sim.spr['SVSHAPE2']) + print ("spr svshape3", sim.spr['SVSHAPE3']) + + # work out the results with the twin mul/add-sub + res = transform_radix2(av, coe) + + for i, expected in enumerate(res): + print ("i", i, float(sim.fpr(i)), "expected", expected) + for i, expected in enumerate(res): + # convert to Power single + expected = DOUBLE2SINGLE(fp64toselectable(expected)) + expected = float(expected) + actual = float(sim.fpr(i)) + # approximate error calculation, good enough test + # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB + # and the rounding is different + err = abs(actual - expected) / expected + self.assertTrue(err < 1e-7) def test_sv_fpmadds_fft(self): """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v" -- 2.30.2