From: Luke Kenneth Casson Leighton Date: Tue, 27 Jul 2021 15:17:52 +0000 (+0100) Subject: add new DCT inner butterfly shorter COS-gen mode unit test X-Git-Tag: xlen-bcd~201 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=09a444283f40fecc9ff74a61b55a7c84fe6ceb6b;p=openpower-isa.git add new DCT inner butterfly shorter COS-gen mode unit test --- diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn index 6af33bb8..fd0c242a 100644 --- a/openpower/isa/simplev.mdwn +++ b/openpower/isa/simplev.mdwn @@ -90,6 +90,7 @@ Pseudo-code: # for convenience, VL to be calculated and stored in SVSTATE vlen <- [0] * 7 + itercount[0:6] <- [0] * 7 SVSTATE[0:31] <- [0] * 32 # only overwrite REMAP if "persistence" is zero if (SVSTATE[62] = 0b0) then @@ -220,7 +221,7 @@ Pseudo-code: # set up FRB and FRS SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode - SVSHAPE0[6:11] <- 0b000011 # DCT Inner Butterfly COS-gen mode + SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] diff --git a/src/openpower/decoder/isa/svshape.py b/src/openpower/decoder/isa/svshape.py index 75b392a0..8275e05e 100644 --- a/src/openpower/decoder/isa/svshape.py +++ b/src/openpower/decoder/isa/svshape.py @@ -125,7 +125,7 @@ class SVSHAPE(SelectableInt): iterate_fn = iterate_dct_inner_butterfly_indices elif self.ydimsz == 3: iterate_fn = iterate_dct_outer_butterfly_indices - elif self.ydimsz == 3: + elif self.ydimsz == 5: iterate_fn = iterate_dct_inner_costable_indices # create a **NEW** iterator each time this is called return iterate_fn(deepcopy(self)) diff --git a/src/openpower/decoder/isa/test_caller_svp64_dct.py b/src/openpower/decoder/isa/test_caller_svp64_dct.py index dc0551a5..4b7c62b5 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_dct.py +++ b/src/openpower/decoder/isa/test_caller_svp64_dct.py @@ -455,6 +455,81 @@ class DCTTestCase(FHDLTestCase): "err", err) self.assertTrue(err < 1e-6) + def test_sv_remap_dct_cos_precompute_inner_8(self): + """pre-computes a DCT COS table, using the shorter costable + indices schedule + + the simpler (scalar) version is in test_caller_transcendentals.py + (test_fp_coss_cvt), this is the SVP64 variant. TODO: really + need the new version of fcfids which doesn't spam memory with + LD/STs. + """ + lst = SVP64Asm(["svshape 8, 1, 1, 5, 0", + "svremap 0, 0, 0, 2, 0, 1, 1", + "sv.svstep 4.v, 3, 1", # svstep get vector of ci + "sv.svstep 16.v, 2, 1", # svstep get vector of step + "addi 1, 0, 0x0000", + "setvl 0, 0, 7, 0, 1, 1", + "sv.std 4.v, 0(1)", + "sv.lfd 64.v, 0(1)", + "sv.fcfids 48.v, 64.v", + "addi 1, 0, 0x0060", + "sv.std 16.v, 0(1)", + "sv.lfd 12.v, 0(1)", + "sv.fcfids 24.v, 12.v", + "sv.fadds 0.v, 24.v, 43", # plus 0.5 + "sv.fmuls 0.v, 0.v, 41", # times PI + "sv.fdivs 0.v, 0.v, 48.v", # div size + "sv.fcoss 80.v, 0.v", + "sv.fdivs 80.v, 43, 80.v", # div 0.5 / x + ]) + lst = list(lst) + + gprs = [0] * 32 + fprs = [0] * 128 + # constants + fprs[43] = fp64toselectable(0.5) # 0.5 + fprs[41] = fp64toselectable(math.pi) # pi + fprs[44] = fp64toselectable(2.0) # 2.0 + + n = 8 + + ctable = [] + size = n + while size >= 2: + halfsize = size // 2 + for ci in range(halfsize): + coeff = math.cos((ci + 0.5) * math.pi / size) * 2.0 + ctable.append(coeff) + print ("coeff", "ci", ci, "size", size, + "i/n", (ci+0.5), 1.0/coeff) + size //= 2 + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, gprs, initial_fprs=fprs) + print ("MEM") + sim.mem.dump() + print ("ci FP") + for i in range(len(ctable)): + actual = float(sim.fpr(i+24)) + print ("i", i, actual) + print ("size FP") + for i in range(len(ctable)): + actual = float(sim.fpr(i+48)) + print ("i", i, actual) + print ("temps") + for i in range(len(ctable)): + actual = float(sim.fpr(i)) + print ("i", i, actual) + for i in range(len(ctable)): + expected = 1.0/ctable[i] + actual = float(sim.fpr(i+80)) + err = abs((actual - expected) / expected) + print ("i", i, actual, "1/expect", 1/expected, + "expected", expected, + "err", err) + self.assertTrue(err < 1e-6) + def run_tst_program(self, prog, initial_regs=None, svstate=None, initial_mem=None,