From: Luke Kenneth Casson Leighton Date: Mon, 19 Jul 2021 19:45:25 +0000 (+0100) Subject: bit of a reorg, adding option to test end of inner loops of SVSTATE(s) X-Git-Tag: xlen-bcd~249 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fb899063f59a2a28358405230f3bf059c6487470;p=openpower-isa.git bit of a reorg, adding option to test end of inner loops of SVSTATE(s) needed to pass the immediate to svstep as an option of which SVSTATE0-3 to test --- diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn index b825e490..d76d823a 100644 --- a/openpower/isa/simplev.mdwn +++ b/openpower/isa/simplev.mdwn @@ -12,7 +12,7 @@ SVL-Form Pseudo-code: if (vf & (¬vs) & ¬(ms)) = 1 then - SVSTATE_NEXT + SVSTATE_NEXT(SVi) else VLimm <- SVi + 1 if vs = 1 then diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index c57434cb..7206e108 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1113,8 +1113,10 @@ class ISACaller: return # this is for setvl "Vertical" mode: if set true, - # srcstep/dststep is explicitly advanced + # srcstep/dststep is explicitly advanced. mode says which SVSTATE to + # test for Rc=1 end condition. 3 bits of all 3 loops are put into CR0 self.allow_next_step_inc = False + self.svstate_next_mode = 0 # nop has to be supported, we could let the actual op calculate # but PowerDecoder has a pattern for nop @@ -1201,6 +1203,7 @@ class ISACaller: ] # go through all iterators in lock-step, advance to next remap_idx remap_idxs = [] + self.remap_loopends = [] for i, (shape, remap) in enumerate(remaps): # zero is "disabled" if shape.value == 0x0: @@ -1208,10 +1211,11 @@ class ISACaller: # pick src or dststep depending on reg num (0-2=in, 3-4=out) step = dststep if (i in [3, 4]) else srcstep # this is terrible. O(N^2) looking for the match. but hey. - for idx, remap_idx in enumerate(remap): + for idx, (remap_idx, loopends) in enumerate(remap): if idx == step: break remap_idxs.append(remap_idx) + self.remap_loopends.append(loopends) rremaps = [] # now cross-index the required SHAPE for each of 3-in 2-out regs @@ -1452,7 +1456,7 @@ class ISACaller: pre = False post = False if self.allow_next_step_inc: - log("SVSTATE_NEXT: inc requested") + log("SVSTATE_NEXT: inc requested, mode", self.svstate_next_mode) yield from self.svstate_pre_inc() pre = yield from self.update_new_svstate_steps() if pre: @@ -1479,9 +1483,17 @@ class ISACaller: if rc_en: srcstep = self.svstate.srcstep dststep = self.svstate.srcstep - endtest = 0 if (end_src or end_dst) else 1 - results = [SelectableInt(endtest, 64)] - self.handle_comparison(results) # CR0 + endtest = 1 if (end_src or end_dst) else 0 + #results = [SelectableInt(endtest, 64)] + #self.handle_comparison(results) # CR0 + + # see if svstep was requested, if so, which SVSTATE + endings = 0b111 + if self.svstate_next_mode > 0: + endings = self.remap_loopends[self.svstate_nextmode-1] + cr_field = SelectableInt((~endings)<<1 | endtest, 4) + print ("svstep Rc=1, CR0", cr_field) + self.crl[0].eq(cr_field) # CR0 if end_src or end_dst: # reset at end of loop including exit Vertical Mode log ("SVSTATE_NEXT: after increments, reset") @@ -1500,13 +1512,14 @@ class ISACaller: self.update_pc_next() - def SVSTATE_NEXT(self): + def SVSTATE_NEXT(self, mode): """explicitly moves srcstep/dststep on to next element, for "Vertical-First" mode. this function is called from setvl pseudo-code, as a pseudo-op "svstep" """ - log("SVSTATE_NEXT") + log("SVSTATE_NEXT mode", mode) self.allow_next_step_inc = True + self.svstate_next_mode = mode def svstate_pre_inc(self): """check if srcstep/dststep need to skip over masked-out predicate bits diff --git a/src/openpower/decoder/isa/remap_fft_yield.py b/src/openpower/decoder/isa/remap_fft_yield.py index 824bc2ab..ea9421b5 100644 --- a/src/openpower/decoder/isa/remap_fft_yield.py +++ b/src/openpower/decoder/isa/remap_fft_yield.py @@ -43,6 +43,7 @@ def iterate_butterfly_indices(SVSHAPE): skip = 0 while True: for size in x_r: # loop over 3rd order dimension (size) + x_end = size == x_r[-1] # y_r schedule depends on size halfsize = size // 2 tablestep = n // size @@ -52,6 +53,7 @@ def iterate_butterfly_indices(SVSHAPE): # invert if requested if SVSHAPE.invxyz[1]: y_r.reverse() for i in y_r: # loop over 2nd order dimension + y_end = i == y_r[-1] k_r = [] j_r = [] k = 0 @@ -63,10 +65,7 @@ def iterate_butterfly_indices(SVSHAPE): if SVSHAPE.invxyz[2]: k_r.reverse() if SVSHAPE.invxyz[2]: j_r.reverse() for j, k in zip(j_r, k_r): # loop over 1st order dimension - # skip the first entries up to offset - if skip < SVSHAPE.offset: - skip += 1 - continue + z_end = j == j_r[-1] # now depending on MODE return the index if SVSHAPE.skip == 0b00: result = j # for vec[j] @@ -75,11 +74,15 @@ def iterate_butterfly_indices(SVSHAPE): elif SVSHAPE.skip == 0b10: result = k # for exptable[k] - yield result + loopends = (z_end | + ((y_end and z_end)<<1) | + ((y_end and x_end and z_end)<<2)) + + yield result + SVSHAPE.offset, loopends def demo(): # set the dimension sizes here - xdim = 8 + xdim = 16 ydim = 0 # not needed zdim = 0 # again, not needed @@ -145,11 +148,13 @@ def demo(): prefix = "i %d\t" % i k = 0 for j in range(i, i + halfsize): - jl, jh, ks = schedule[idx] + (jl, je), (jh, he), (ks, ke) = schedule[idx] print (" %-3d\t%s j=%-2d jh=%-2d k=%-2d -> " - "j[jl=%-2d] j[jh=%-2d] exptable[k=%d]" % \ + "j[jl=%-2d] j[jh=%-2d] ex[k=%d]" % \ (idx, prefix, j, j+halfsize, k, - jl, jh, ks)) + jl, jh, ks, + ), + "end", bin(je)[2:], bin(je)[2:], bin(ke)[2:]) k += tablestep idx += 1 size *= 2 diff --git a/src/openpower/decoder/isa/remapyield.py b/src/openpower/decoder/isa/remapyield.py index a18cfd7e..d0cfd295 100644 --- a/src/openpower/decoder/isa/remapyield.py +++ b/src/openpower/decoder/isa/remapyield.py @@ -19,8 +19,11 @@ def iterate_indices(SVSHAPE): # start an infinite (wrapping) loop while True: for z in z_r: # loop over 1st order dimension + z_end = z == z_r[-1] for y in y_r: # loop over 2nd order dimension + y_end = y == y_r[-1] for x in x_r: # loop over 3rd order dimension + x_end = x == x_r[-1] # ok work out which order to construct things in. # start by creating a list of tuples of the dimension # and its limit @@ -61,13 +64,17 @@ def iterate_indices(SVSHAPE): result += idx # adds on this dimension mult *= lim # for the next dimension - yield result + SVSHAPE.offset + loopends = (x_end | + ((y_end and x_end)<<1) | + ((y_end and x_end and z_end)<<2)) + + yield result + SVSHAPE.offset, loopends def demo(): # set the dimension sizes here xdim = 3 ydim = 2 - zdim = 1 + zdim = 4 # set total (can repeat, e.g. VL=x*y*z*4) VL = xdim * ydim * zdim @@ -84,10 +91,10 @@ def demo(): SVSHAPE0.invxyz = [0,0,0] # inversion if desired # enumerate over the iterator function, getting new indices - for idx, new_idx in enumerate(iterate_indices(SVSHAPE0)): + for idx, (new_idx, end) in enumerate(iterate_indices(SVSHAPE0)): if idx >= VL: break - print ("%d->%d" % (idx, new_idx)) + print ("%d->%d" % (idx, new_idx), "end", bin(end)[2:]) # run the demo if __name__ == '__main__': diff --git a/src/openpower/decoder/isa/svstate.py b/src/openpower/decoder/isa/svstate.py index 3ad12a77..68c2a8b2 100644 --- a/src/openpower/decoder/isa/svstate.py +++ b/src/openpower/decoder/isa/svstate.py @@ -1,7 +1,6 @@ -from openpower.decoder.selectable_int import (FieldSelectableInt, SelectableInt, - selectconcat) -from openpower.decoder.isa.remapyield import iterate_indices -from openpower.decoder.isa.remap_fft_yield import iterate_butterfly_indices +from openpower.decoder.selectable_int import (FieldSelectableInt, + SelectableInt, + ) from openpower.sv.svstate import SVSTATERec import os from copy import deepcopy diff --git a/src/openpower/decoder/isa/test_caller_setvl.py b/src/openpower/decoder/isa/test_caller_setvl.py index a3f239b2..2706332c 100644 --- a/src/openpower/decoder/isa/test_caller_setvl.py +++ b/src/openpower/decoder/isa/test_caller_setvl.py @@ -85,10 +85,10 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.gpr(0), SelectableInt(0, 64)) CR0 = sim.crl[0] print(" CR0", bin(CR0.get_range().value)) - self.assertEqual(CR0[CRFields.EQ], 1) + self.assertEqual(CR0[CRFields.EQ], 0) self.assertEqual(CR0[CRFields.LT], 0) self.assertEqual(CR0[CRFields.GT], 0) - self.assertEqual(CR0[CRFields.SO], 0) + self.assertEqual(CR0[CRFields.SO], 1) def test__svstep_3(self): """tests svstep when it *doesn't* reach VL @@ -125,7 +125,7 @@ class DecoderTestCase(FHDLTestCase): print(" CR0", bin(CR0.get_range().value)) self.assertEqual(CR0[CRFields.EQ], 0) self.assertEqual(CR0[CRFields.LT], 0) - self.assertEqual(CR0[CRFields.GT], 1) + self.assertEqual(CR0[CRFields.GT], 0) self.assertEqual(CR0[CRFields.SO], 0) @@ -242,20 +242,20 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.svstate.vfirst, 0) CR0 = sim.crl[0] print(" CR0", bin(CR0.get_range().value)) - self.assertEqual(CR0[CRFields.EQ], 1) + self.assertEqual(CR0[CRFields.EQ], 0) self.assertEqual(CR0[CRFields.LT], 0) self.assertEqual(CR0[CRFields.GT], 0) - self.assertEqual(CR0[CRFields.SO], 0) + self.assertEqual(CR0[CRFields.SO], 1) # check registers as expected self._check_regs(sim, expected_regs) - def test__svstep_add_2(self): + def test_svstep_add_2(self): """tests svstep with a branch. lst = SVP64Asm(["setvl 3, 0, 2, 1, 1, 1", 'sv.add 1.v, 5.v, 9.v', "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -0xc" + "bc 6, 3, -0xc" ]) sequence is as follows: * setvl sets VL=2 but also "Vertical First" mode. @@ -281,7 +281,7 @@ class DecoderTestCase(FHDLTestCase): lst = SVP64Asm(["setvl 3, 0, 2, 1, 1, 1", 'sv.add 1.v, 5.v, 9.v', "setvl. 0, 0, 1, 1, 0, 0", # svstep - this is 64-bit! - "bc 4, 2, -0xc" # branch to add (64-bit op so -0xc!) + "bc 6, 3, -0xc" # branch to add (64-bit op so -0xc!) ]) lst = list(lst) @@ -320,10 +320,10 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(sim.svstate.vfirst, 0) CR0 = sim.crl[0] print(" CR0", bin(CR0.get_range().value)) - self.assertEqual(CR0[CRFields.EQ], 1) + self.assertEqual(CR0[CRFields.EQ], 0) self.assertEqual(CR0[CRFields.LT], 0) self.assertEqual(CR0[CRFields.GT], 0) - self.assertEqual(CR0[CRFields.SO], 0) + self.assertEqual(CR0[CRFields.SO], 1) # check registers as expected self._check_regs(sim, expected_regs) diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index b6a6efa6..28aca452 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -200,7 +200,7 @@ class FFTTestCase(FHDLTestCase): "svremap 31, 1, 0, 2, 0, 1, 0", "sv.ffmadds 0.v, 0.v, 0.v, 8.v", "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -16" + "bc 6, 3, -16" ]) runs a full in-place O(N log2 N) butterfly schedule for Discrete Fourier Transform. this version however uses @@ -215,7 +215,7 @@ class FFTTestCase(FHDLTestCase): "svremap 31, 1, 0, 2, 0, 1, 0", "sv.ffmadds 0.v, 0.v, 0.v, 8.v", "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -16" + "bc 6, 3, -16" ]) lst = list(lst) @@ -287,7 +287,7 @@ class FFTTestCase(FHDLTestCase): "svremap 26, 0, 0, 0, 0, 1, 1", "sv.ffadds 0.v, 24, 0.v", "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -28" + "bc 6, 3, -28" ]) runs a full in-place O(N log2 N) butterfly schedule for @@ -326,7 +326,7 @@ class FFTTestCase(FHDLTestCase): "svremap 26, 0, 0, 0, 0, 1, 0", "sv.ffadds 0.v, 24, 0.v", "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -28" + "bc 6, 3, -28" ]) lst = list(lst) @@ -554,7 +554,7 @@ class FFTTestCase(FHDLTestCase): # svstep loop "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -56" + "bc 6, 3, -56" ]) lst = list(lst) diff --git a/src/openpower/decoder/power_pseudo.py b/src/openpower/decoder/power_pseudo.py index 9e4bc81d..fde36b25 100644 --- a/src/openpower/decoder/power_pseudo.py +++ b/src/openpower/decoder/power_pseudo.py @@ -181,7 +181,12 @@ hextest = """ RT <- 0x0001_a000_0000_0000 """ -code = hextest +SVSTATE_next = """ +SVSTATE_NEXT(5) +""" + +code = SVSTATE_next +#code = hextest #code = lswx #code = testcond #code = testdo @@ -243,7 +248,7 @@ def test(): gsc.regfile = {} for i in range(32): gsc.regfile[i] = i - gsc.gpr = GPR(gsc.parser.sd, gsc.regfile) + gsc.gpr = GPR(None, None, None, gsc.regfile) gsc.mem = Mem() _compile = gsc.compile diff --git a/src/openpower/decoder/pseudo/parser.py b/src/openpower/decoder/pseudo/parser.py index 2d6d295e..4f11f6f1 100644 --- a/src/openpower/decoder/pseudo/parser.py +++ b/src/openpower/decoder/pseudo/parser.py @@ -201,6 +201,10 @@ def apply_trailer(atom, trailer, read_regs): name = arg.id if name in regs + fregs: read_regs.add(name) + # special-case, function named "SVSTATE_NEXT" must be made "self.xxxx" + if atom.id == 'SVSTATE_NEXT': + name = ast.Name("self", ast.Load()) + atom = ast.Attribute(name, atom, ast.Load()) return ast.Call(atom, trailer[1], []) # if p[1].id == 'print': # p[0] = ast.Printnl(ast.Tuple(p[2][1]), None, None)