From 657f4bb3198fe7f1f5111e84aedfe40f6e5e3956 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 6 Jul 2021 20:34:40 +0100 Subject: [PATCH] add FFT REMAP butterfly unit test --- openpower/isatables/fields.text | 2 +- src/openpower/decoder/isa/remap_fft_yield.py | 17 ++-- src/openpower/decoder/isa/svshape.py | 8 +- .../decoder/isa/test_caller_svp64_fft.py | 87 ++++++++++++++++++- src/openpower/sv/trans/svp64.py | 3 +- 5 files changed, 105 insertions(+), 12 deletions(-) diff --git a/openpower/isatables/fields.text b/openpower/isatables/fields.text index 490d992f..bcdcdbd1 100644 --- a/openpower/isatables/fields.text +++ b/openpower/isatables/fields.text @@ -761,7 +761,7 @@ SVi (16:23) Simple-V immediate field for setting VL or MVL Formats: SVL - SVRM (16:20) + SVRM (21:25) Simple-V "REMAP" Mode Formats: SVM SVxd (6:10) diff --git a/src/openpower/decoder/isa/remap_fft_yield.py b/src/openpower/decoder/isa/remap_fft_yield.py index 4b1f411e..f7ed5ad5 100644 --- a/src/openpower/decoder/isa/remap_fft_yield.py +++ b/src/openpower/decoder/isa/remap_fft_yield.py @@ -79,7 +79,7 @@ def iterate_butterfly_indices(SVSHAPE): def demo(): # set the dimension sizes here - xdim = 16 + xdim = 8 ydim = 0 # not needed zdim = 0 # again, not needed @@ -103,29 +103,32 @@ def demo(): SVSHAPE0 = SVSHAPE() SVSHAPE0.lims = [xdim, ydim, zdim] SVSHAPE0.order = [0,1,2] # experiment with different permutations, here - SVSHAPE0.mode = 0b00 + SVSHAPE0.mode = 0b01 + SVSHAPE0.skip = 0b00 SVSHAPE0.offset = 0 # experiment with different offset, here SVSHAPE0.invxyz = [0,0,0] # inversion if desired # j+halfstep schedule SVSHAPE1 = SVSHAPE() SVSHAPE1.lims = [xdim, ydim, zdim] SVSHAPE1.order = [0,1,2] # experiment with different permutations, here - SVSHAPE1.mode = 0b01 + SVSHAPE0.mode = 0b01 + SVSHAPE1.skip = 0b01 SVSHAPE1.offset = 0 # experiment with different offset, here SVSHAPE1.invxyz = [0,0,0] # inversion if desired # k schedule SVSHAPE2 = SVSHAPE() SVSHAPE2.lims = [xdim, ydim, zdim] SVSHAPE2.order = [0,1,2] # experiment with different permutations, here - SVSHAPE2.mode = 0b10 + SVSHAPE0.mode = 0b01 + SVSHAPE2.skip = 0b10 SVSHAPE2.offset = 0 # experiment with different offset, here SVSHAPE2.invxyz = [0,0,0] # inversion if desired # enumerate over the iterator function, getting new indices schedule = [] - for idx, (jl, jh, k) in enumerate(zip(iterate_indices(SVSHAPE0), - iterate_indices(SVSHAPE1), - iterate_indices(SVSHAPE2))): + for idx, (jl, jh, k) in enumerate(zip(iterate_butterfly_indices(SVSHAPE0), + iterate_butterfly_indices(SVSHAPE1), + iterate_butterfly_indices(SVSHAPE2))): if idx >= VL: break schedule.append((jl, jh, k)) diff --git a/src/openpower/decoder/isa/svshape.py b/src/openpower/decoder/isa/svshape.py index 03dbbb0b..e2eeaeae 100644 --- a/src/openpower/decoder/isa/svshape.py +++ b/src/openpower/decoder/isa/svshape.py @@ -5,6 +5,7 @@ from openpower.decoder.isa.remap_fft_yield import iterate_butterfly_indices from openpower.sv.svp64 import SVP64REMAP import os from copy import deepcopy +from openpower.util import log class SVSHAPE(SelectableInt): @@ -16,9 +17,12 @@ class SVSHAPE(SelectableInt): l = deepcopy(SVP64REMAP.layout) l.reverse() for field, width in l: - v = FieldSelectableInt(self, tuple(range(offs, offs+width))) + end = offs+width + fs = tuple(range(offs, end)) + v = FieldSelectableInt(self, fs) self.fsi[field] = v - offs += width + #log("SVSHAPE setup field", field, offs, end) + offs = end @property def order(self): diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index 6cb2b522..adbdfd43 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -23,7 +23,7 @@ class DecoderTestCase(FHDLTestCase): for i in range(32): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) - def test_sv_fpmadds_fft(self): + def tst_sv_fpmadds_fft(self): """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v" ] four in-place vector mul-adds, four in-place vector mul-subs @@ -77,6 +77,91 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.fpr(i+2), t) self.assertEqual(sim.fpr(i+6), u) + def test_sv_remap_fpmadds_fft(self): + """>>> lst = ["svremap 8, 1, 1, 1", + "sv.ffmadds 2.v, 2.v, 2.v, 10.v" + ] + four in-place vector mul-adds, four in-place vector mul-subs + + this is the twin "butterfly" mul-add-sub from Cooley-Tukey + https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms + + there is the *option* to target a different location (non-in-place) + just in case. + + SVP64 "FFT" mode will *automatically* offset FRB and an implicit + FRS to perform the two multiplies. one add, one subtract. + + sv.ffmadds FRT, FRA, FRC, FRB actually does: + fmadds FRT , FRA, FRC, FRA + fnmsubs FRT+vl, FRA, FRC, FRB+vl + """ + lst = SVP64Asm( ["svremap 8, 1, 1, 1", + "sv.ffmadds 2.v, 2.v, 2.v, 10.v" + ]) + lst = list(lst) + + fprs = [0] * 32 + av = [7.0, -9.8, 2.0, -32.3, + -2.0, 2.0, -9.8, 32.3] # array 0..7 + coe = [-1.0, 4.0, 3.1, 6.2] # coefficients + # store in regfile + for i, c in enumerate(coe): + fprs[i+10] = fp64toselectable(c) + for i, a in enumerate(av): + fprs[i+2] = fp64toselectable(a) + # work out the results with the twin mul/add-sub + res = [] + for i, (a, c) in enumerate(zip(av, coe)): + continue + fprs[i+2] = fp64toselectable(a) + fprs[i+10] = fp64toselectable(c) + mul = a * c + t = a + mul + u = b - mul + t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single + u = DOUBLE2SINGLE(fp64toselectable(u)) # from double + res.append((t, u)) + print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u) + + # set total. err don't know how to calculate how many there are... + # do it manually for now + VL = 0 + size = 2 + n = len(av) + while size <= n: + halfsize = size // 2 + tablestep = n // size + for i in range(0, n, size): + for j in range(i, i + halfsize): + VL += 1 + size *= 2 + + # SVSTATE (calculated VL) + svstate = SVP64State() + svstate.vl[0:7] = VL # VL + svstate.maxvl[0:7] = VL # MAXVL + print ("SVSTATE", bin(svstate.spr.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate, + initial_fprs=fprs) + print ("spr svshape0", sim.spr['SVSHAPE0']) + print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz) + print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz) + print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz) + print ("spr svshape1", sim.spr['SVSHAPE1']) + print ("spr svshape2", sim.spr['SVSHAPE2']) + print ("spr svshape3", sim.spr['SVSHAPE3']) + for i in range(8): + print ("i", i, float(sim.fpr(i))) + + return + # confirm that the results are as expected + for i, (t, u) in enumerate(res): + self.assertEqual(sim.fpr(i+2), t) + self.assertEqual(sim.fpr(i+6), u) + def run_tst_program(self, prog, initial_regs=None, svstate=None, initial_mem=None, diff --git a/src/openpower/sv/trans/svp64.py b/src/openpower/sv/trans/svp64.py index 5fa76eeb..1c4c73ad 100644 --- a/src/openpower/sv/trans/svp64.py +++ b/src/openpower/sv/trans/svp64.py @@ -205,6 +205,7 @@ class SVP64Asm: insn |= (fields[2]-1) << (31-20) # SVzd , bits 16-20 insn |= (fields[3]) << (31-25) # SVRM , bits 21-25 insn |= 0b00001 << (31-30) # XO , bits 26..30 + #insn &= ((1<<32)-1) log ("svremap", bin(insn)) yield ".long 0x%x" % insn return @@ -967,10 +968,10 @@ if __name__ == '__main__': #'sv.lhzbr 5.v, 11(9.v), 15', #'sv.lwzbr 5.v, 11(9.v), 15', 'sv.ffmadds 6.v, 2.v, 4.v, 6.v', - 'svremap 2, 2, 3, 0', ] lst = [ 'sv.fmadds 0.v, 8.v, 16.v, 4.v', + 'svremap 8, 1, 1, 1', ] isa = SVP64Asm(lst, macros=macros) print ("list", list(isa)) -- 2.30.2