From: Luke Kenneth Casson Leighton Date: Sat, 17 Jul 2021 12:07:40 +0000 (+0100) Subject: add FP LOAD bit-reversed operations to ISACaller simulator X-Git-Tag: xlen-bcd~276 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=69fb5156f5fab27b9f29d404334de010150d1452;p=openpower-isa.git add FP LOAD bit-reversed operations to ISACaller simulator --- diff --git a/openpower/isa/svfpload.mdwn b/openpower/isa/svfpload.mdwn new file mode 100644 index 00000000..533ec829 --- /dev/null +++ b/openpower/isa/svfpload.mdwn @@ -0,0 +1,72 @@ + + + + +# Load Floating-Point Single + +SVD-Form + +* lfsbr FRT,SVD(RA),RC + +Pseudo-code: + + b <- (RA|0) + n <- (RC)[58:63] + EA <- b + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n) + FRT <- DOUBLE(MEM(EA, 4)) + +Special Registers Altered: + + None + +# Load Floating-Point Single with Update + +SVD-Form + +* lfsubr FRT,SVD(RA),RC + +Pseudo-code: + + n <- (RC)[58:63] + EA <- (RA) + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n) + FRT <- DOUBLE(MEM(EA, 4)) + RA <- EA + +Special Registers Altered: + + None + +# Load Floating-Point Double + +SVD-Form + +* lfdbr FRT,SVD(RA),RC + +Pseudo-code: + + b <- (RA|0) + n <- (RC)[58:63] + EA <- b + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n) + FRT <- MEM(EA, 8) + +Special Registers Altered: + + None + +# Load Floating-Point Double with Update + +SVD-Form + +* lfdubr FRT,SVD(RA),RC + +Pseudo-code: + + n <- (RC)[58:63] + EA <- (RA) + SHL64(bitrev(srcstep, VL) * EXTS(SVD), n) + FRT <- MEM(EA, 8) + RA <- EA + +Special Registers Altered: + + None + diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index b4c94280..ae71a449 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -13,7 +13,7 @@ from openpower.decoder.helpers import fp64toselectable from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE -def transform_radix2(vec, exptable): +def transform_radix2(vec, exptable, reverse=False): """ # FFT and convolution test (Python), based on Project Nayuki # @@ -34,7 +34,8 @@ def transform_radix2(vec, exptable): levels = n.bit_length() - 1 # Copy with bit-reversed permutation - #vec = [vec[reverse_bits(i, levels)] for i in range(n)] + if reverse: + vec = [vec[reverse_bits(i, levels)] for i in range(n)] size = 2 while size <= n: @@ -61,7 +62,7 @@ def transform_radix2(vec, exptable): return vec -def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i): +def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i, reverse=False): """ # FFT and convolution test (Python), based on Project Nayuki # @@ -82,7 +83,8 @@ def transform_radix2_complex(vec_r, vec_i, cos_r, sin_i): levels = n.bit_length() - 1 # Copy with bit-reversed permutation - #vec = [vec[reverse_bits(i, levels)] for i in range(n)] + if reverse: + vec = [vec[reverse_bits(i, levels)] for i in range(n)] size = 2 while size <= n: @@ -534,7 +536,7 @@ class FFTTestCase(FHDLTestCase): "svremap 31, 1, 0, 2, 0, 1, 1", """ lst = SVP64Asm( [ - # set triple butterfly mode with "REMAP" schedule + # set triple butterfly mode with persistent "REMAP" "svshape 8, 1, 1, 1, 1", "svremap 31, 1, 0, 2, 0, 1, 1", # tpre @@ -678,6 +680,70 @@ class FFTTestCase(FHDLTestCase): self.assertEqual(sim.fpr(i+2), t) self.assertEqual(sim.fpr(i+6), u) + def test_sv_remap_fpmadds_fft_ldst(self): + """>>> lst = ["svshape 8, 1, 1, 1, 0", + "svremap 31, 1, 0, 2, 0, 1, 0", + "sv.ffmadds 2.v, 2.v, 2.v, 10.v" + ] + runs a full in-place O(N log2 N) butterfly schedule for + Discrete Fourier Transform, using bit-reversed LD/ST + """ + lst = SVP64Asm( ["setvl 0, 0, 8, 0, 1, 1", + "sv.lfsbr 0.v, 4(0), 20", # bit-reversed + #"svshape 8, 1, 1, 1, 0", + #"svremap 31, 1, 0, 2, 0, 1, 0", + #"sv.ffmadds 0.v, 0.v, 0.v, 8.v" + ]) + lst = list(lst) + + # array and coefficients to test + av = [7.0, -9.8, 3.0, -32.3, + -2.0, 5.0, -9.8, 31.3] # array 0..7 + coe = [-0.25, 0.5, 3.1, 6.2] # coefficients + + # store in regfile + fprs = [0] * 32 + for i, c in enumerate(coe): + fprs[i+8] = fp64toselectable(c) + # store in memory + mem = {} + for i, a in enumerate(av): + shift = (i % 2) == 1 + if shift == 0: + mem[(i//2)*8] = fp64toselectable(a).value + else: + mem[(i//2)*8] |= fp64toselectable(a).value << 32 + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, initial_mem=mem, + initial_fprs=fprs) + print ("spr svshape0", sim.spr['SVSHAPE0']) + print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz) + print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz) + print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz) + print ("spr svshape1", sim.spr['SVSHAPE1']) + print ("spr svshape2", sim.spr['SVSHAPE2']) + print ("spr svshape3", sim.spr['SVSHAPE3']) + + print ("mem dump") + print (sim.mem.dump()) + + # work out the results with the twin mul/add-sub + res = transform_radix2(av, coe) + + for i, expected in enumerate(res): + print ("i", i, float(sim.fpr(i)), "expected", expected) + for i, expected in enumerate(res): + # convert to Power single + expected = DOUBLE2SINGLE(fp64toselectable(expected)) + expected = float(expected) + actual = float(sim.fpr(i)) + # approximate error calculation, good enough test + # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB + # and the rounding is different + err = abs(actual - expected) / expected + self.assertTrue(err < 1e-7) + def run_tst_program(self, prog, initial_regs=None, svstate=None, initial_mem=None, diff --git a/src/openpower/decoder/isa/test_caller_svp64_ldst.py b/src/openpower/decoder/isa/test_caller_svp64_ldst.py index 64755d58..56ebf1bf 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_ldst.py +++ b/src/openpower/decoder/isa/test_caller_svp64_ldst.py @@ -13,6 +13,7 @@ from openpower.decoder.isa.all import ISA from openpower.decoder.isa.test_caller import Register, run_tst from openpower.sv.trans.svp64 import SVP64Asm from openpower.consts import SVP64CROffs +from openpower.decoder.helpers import fp64toselectable from copy import deepcopy @@ -22,6 +23,10 @@ class DecoderTestCase(FHDLTestCase): for i in range(32): self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64)) + def _check_fpregs(self, sim, expected): + for i in range(32): + self.assertEqual(sim.fpr(i), SelectableInt(expected[i], 64)) + def test_sv_load_store_elementstride(self): """>>> lst = ["addi 1, 0, 0x0010", "addi 2, 0, 0x0008", @@ -119,7 +124,7 @@ class DecoderTestCase(FHDLTestCase): "addi 7, 0, 0x303", "addi 8, 0, 0x404", "sv.stw 5.v, 0(1)", - "sv.lwzbr 9.v, 4(1), 2"] + "sv.lwzbr 12.v, 4(1), 2"] note: bitreverse mode is... odd. it's the butterfly generator from Cooley-Tukey FFT: @@ -142,14 +147,14 @@ class DecoderTestCase(FHDLTestCase): "addi 7, 0, 0x303", "addi 8, 0, 0x404", "sv.stw 5.v, 0(1)", # scalar r1 + 0 + wordlen*offs - "sv.lwzbr 9.v, 4(1), 2"]) # bit-reversed + "sv.lwzbr 12.v, 4(1), 2"]) # bit-reversed lst = list(lst) # SVSTATE (in this case, VL=4) svstate = SVP64State() - svstate.vl[0:7] = 4 # VL - svstate.maxvl[0:7] = 4 # MAXVL - print ("SVSTATE", bin(svstate.spr.asint())) + svstate.vl = 4 # VL + svstate.maxvl = 4 # MAXVL + print ("SVSTATE", bin(svstate.asint())) with Program(lst, bigendian=False) as program: sim = self.run_tst_program(program, svstate=svstate) @@ -174,16 +179,81 @@ class DecoderTestCase(FHDLTestCase): # r10 => mem[0x18] which was stored from r6 # r11 => mem[0x18] which was stored from r7 # r12 => mem[0x1c] which was stored from r8 - self.assertEqual(sim.gpr(9), SelectableInt(0x101, 64)) - self.assertEqual(sim.gpr(10), SelectableInt(0x303, 64)) - self.assertEqual(sim.gpr(11), SelectableInt(0x202, 64)) - self.assertEqual(sim.gpr(12), SelectableInt(0x404, 64)) + self.assertEqual(sim.gpr(12), SelectableInt(0x101, 64)) + self.assertEqual(sim.gpr(13), SelectableInt(0x303, 64)) + self.assertEqual(sim.gpr(14), SelectableInt(0x202, 64)) + self.assertEqual(sim.gpr(15), SelectableInt(0x404, 64)) + + def test_sv_load_store_bitreverse(self): + """>>> lst = ["addi 1, 0, 0x0010", + "addi 2, 0, 0x0004", + "addi 3, 0, 0x0002", + "sv.stfs 4.v, 0(1)", + "sv.lfsbr 12.v, 4(1), 2"] + + note: bitreverse mode is... odd. it's the butterfly generator + from Cooley-Tukey FFT: + https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms + + bitreverse LD is computed as: + for i in range(VL): + EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC + + bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11 + produces 0 2 1 3 in binary 0b00 0b10 0b01 0b11 + + and thus creates the butterfly needed for one iteration of FFT. + the RC (shift) is to be able to offset the LDs by Radix-2 spans + """ + lst = SVP64Asm(["addi 1, 0, 0x0010", + "addi 2, 0, 0x0000", + "sv.stfs 4.v, 0(1)", # scalar r1 + 0 + wordlen*offs + "sv.lfsbr 12.v, 4(1), 2"]) # bit-reversed + lst = list(lst) + + # SVSTATE (in this case, VL=4) + svstate = SVP64State() + svstate.vl = 4 # VL + svstate.maxvl = 4 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + fprs = [0] * 32 + scalar_a = 1.3 + scalar_b = -2.0 + fprs[4] = fp64toselectable(1.0) + fprs[5] = fp64toselectable(2.0) + fprs[6] = fp64toselectable(3.0) + fprs[7] = fp64toselectable(4.0) + + # expected results, remember that bit-reversed load has been done + expected_fprs = deepcopy(fprs) + expected_fprs[12] = fprs[4] # 0b00 -> 0b00 + expected_fprs[13] = fprs[6] # 0b01 -> 0b10 + expected_fprs[14] = fprs[5] # 0b10 -> 0b01 + expected_fprs[15] = fprs[7] # 0b11 -> 0b11 + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate, + initial_fprs=fprs) + mem = sim.mem.dump(printout=False) + print ("mem dump") + print (mem) + + print ("FPRs") + sim.fpr.dump() + + #self.assertEqual(mem, [(16, 0x020200000101), + # (24, 0x040400000303)]) + self._check_fpregs(sim, expected_fprs) def run_tst_program(self, prog, initial_regs=None, - svstate=None): + svstate=None, initial_fprs=None): if initial_regs is None: initial_regs = [0] * 32 - simulator = run_tst(prog, initial_regs, svstate=svstate) + if initial_fprs is None: + initial_fprs = [0] * 32 + simulator = run_tst(prog, initial_regs, svstate=svstate, + initial_fprs=initial_fprs) simulator.gpr.dump() return simulator diff --git a/src/openpower/sv/trans/svp64.py b/src/openpower/sv/trans/svp64.py index 3b5eebfa..769daa82 100644 --- a/src/openpower/sv/trans/svp64.py +++ b/src/openpower/sv/trans/svp64.py @@ -1036,6 +1036,10 @@ if __name__ == '__main__': 'svshape 8, 1, 1, 1, 0', 'svshape 8, 1, 1, 1, 1', ] + lst = [ + 'sv.lfsbr 4.v, 11(8.v), 15', + #'sv.lwzbr 4.v, 11(8.v), 15', + ] isa = SVP64Asm(lst, macros=macros) print ("list", list(isa)) csvs = SVP64RM()