From: Luke Kenneth Casson Leighton Date: Sat, 31 Jul 2021 13:28:25 +0000 (+0100) Subject: add SVP64 i-DCT unit test for inner butterfly, coefficients pre-computed X-Git-Tag: xlen-bcd~172 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5ace543e1b637b2c12894a16e351ce2f7fad4463;p=openpower-isa.git add SVP64 i-DCT unit test for inner butterfly, coefficients pre-computed at present --- diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn index 2971ace1..c04301dc 100644 --- a/openpower/isa/simplev.mdwn +++ b/openpower/isa/simplev.mdwn @@ -166,14 +166,15 @@ Pseudo-code: SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim if (SVRM = 0b1010) | (SVRM = 0b1100) then SVSHAPE0[30:31] <- 0b11 # iDCT mode + SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode else SVSHAPE0[30:31] <- 0b01 # DCT mode + SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode + SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop if (SVRM = 0b1100) | (SVRM = 0b0100) then SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4 else SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2 - SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode - SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] diff --git a/src/openpower/decoder/isa/test_caller_svp64_dct.py b/src/openpower/decoder/isa/test_caller_svp64_dct.py index 53537ffb..2bc6dd2c 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_dct.py +++ b/src/openpower/decoder/isa/test_caller_svp64_dct.py @@ -137,7 +137,7 @@ def transform_inner_radix2_idct(vec, ctable): levels = n.bit_length() - 1 # pretend we LDed data in half-swapped order - vec = halfrev2(vec, True) + vec = halfrev2(vec, False) ################ # INNER butterfly @@ -375,6 +375,68 @@ class DCTTestCase(FHDLTestCase): print ("err", i, err) self.assertTrue(err < 1e-6) + def test_sv_remap_fpmadds_dct_inner_4(self): + """>>> lst = ["svshape 4, 1, 1, 10, 0", + "svremap 27, 1, 0, 2, 0, 1, 0", + "sv.fdmadds 0.v, 0.v, 0.v, 8.v" + ] + runs a full in-place 4-long O(N log2 N) inner butterfly schedule + for inverse-DCT + + SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC + (3 inputs, 2 outputs) + + Note that the coefficient (FRC) is not on a "schedule", it + is straight Vectorised (0123...) because DCT coefficients + cannot be shared between butterfly layers (due to +0.5) + """ + lst = SVP64Asm( ["svshape 4, 1, 1, 10, 0", + "svremap 27, 1, 0, 2, 0, 1, 0", + "sv.fdmadds 0.v, 0.v, 0.v, 8.v" + ]) + lst = list(lst) + + # array and coefficients to test + n = 4 + levels = n.bit_length() - 1 + coe = [-0.25, 0.5, 3.1, 6.2] # 4 coefficients + avi = [7.0, -0.8, 2.0, -2.3] # first half of array 0..3 + av = halfrev2(avi, False) + + # store in regfile + fprs = [0] * 32 + for i, c in enumerate(coe): + fprs[i+8] = fp64toselectable(1.0 / c) # invert + for i, a in enumerate(av): + fprs[i+0] = fp64toselectable(a) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, initial_fprs=fprs) + print ("spr svshape0", sim.spr['SVSHAPE0']) + print (" xdimsz", sim.spr['SVSHAPE0'].xdimsz) + print (" ydimsz", sim.spr['SVSHAPE0'].ydimsz) + print (" zdimsz", sim.spr['SVSHAPE0'].zdimsz) + print ("spr svshape1", sim.spr['SVSHAPE1']) + print ("spr svshape2", sim.spr['SVSHAPE2']) + print ("spr svshape3", sim.spr['SVSHAPE3']) + + # work out the results with the twin mul/add-sub + res = transform_inner_radix2_idct(avi, coe) + + for i, expected in enumerate(res): + print ("i", i, float(sim.fpr(i)), "expected", expected) + for i, expected in enumerate(res): + # convert to Power single + expected = DOUBLE2SINGLE(fp64toselectable(expected)) + expected = float(expected) + actual = float(sim.fpr(i)) + # approximate error calculation, good enough test + # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB + # and the rounding is different + err = abs((actual - expected) / expected) + print ("err", i, err) + self.assertTrue(err < 1e-6) + def test_sv_remap_fpmadds_idct_outer_8(self): """>>> lst = ["svshape 8, 1, 1, 11, 0", "svremap 27, 1, 0, 2, 0, 1, 0",