From: Luke Kenneth Casson Leighton Date: Sat, 24 Jul 2021 16:43:09 +0000 (+0100) Subject: added an extra SVP64 instruction, svstep, to replace setvl X-Git-Tag: xlen-bcd~213 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e493511061a56e231a854d7dc2031755b0169251;p=openpower-isa.git added an extra SVP64 instruction, svstep, to replace setvl "get end state" mode --- diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn index eb827786..1ddc0ffe 100644 --- a/openpower/isa/simplev.mdwn +++ b/openpower/isa/simplev.mdwn @@ -1,17 +1,33 @@ +# svstep + +SVL-Form + +* svstep RT,SVi,vf +* svstep. RT,SVi,vf + +Pseudo-code: + + step <- SVSTATE_NEXT(SVi, vf) + RT <- [0]*57 || step + +Special Registers Altered: + + CR0 (if Rc=1) + # setvl SVL-Form -* setvl RT, RA, SVi, vf, vs, ms -* setvl. RT, RA, SVi, vf, vs, ms +* setvl RT,RA,SVi,vf,vs,ms +* setvl. RT,RA,SVi,vf,vs,ms Pseudo-code: if (vf & (¬vs) & ¬(ms)) = 1 then - step <- SVSTATE_NEXT(SVi) + step <- SVSTATE_NEXT(SVi, 0b0) if _RT != 0b00000 then GPR(_RT) <- [0]*57 || step else @@ -43,7 +59,7 @@ Special Registers Altered: SVRM-Form -* svremap SVme, mi0, mi1, mi2, mo0, mo1, pst +* svremap SVme,mi0,mi1,mi2,mo0,mo1,pst Pseudo-code: @@ -66,7 +82,7 @@ Special Registers Altered: SVM-Form -* svshape SVxd, SVyd, SVzd, SVRM, vf +* svshape SVxd,SVyd,SVzd,SVRM,vf Pseudo-code: @@ -147,8 +163,13 @@ Pseudo-code: SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] + SVSHAPE2[0:31] <- SVSHAPE0[0:31] + SVSHAPE3[0:31] <- SVSHAPE0[0:31] # for FRA and FRT SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule + # for cos coefficient + SVSHAPE2[28:29] <- 0b10 # ci schedule + SVSHAPE3[28:29] <- 0b11 # size schedule # set schedule up for DCT Outer butterfly if (SVRM = 0b0011) then # calculate O(N log2 N) number of outer butterfly overlapping adds @@ -173,12 +194,8 @@ Pseudo-code: # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] - SVSHAPE3[0:31] <- SVSHAPE0[0:31] # for FRA and FRT SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule - # for cos coefficient - SVSHAPE2[28:29] <- 0b10 # ci schedule - SVSHAPE3[28:29] <- 0b11 # size schedule # set VL, MVL and Vertical-First SVSTATE[0:6] <- vlen SVSTATE[7:13] <- vlen diff --git a/openpower/isatables/RM-1P-1D.csv b/openpower/isatables/RM-1P-1D.csv new file mode 100644 index 00000000..0e570664 --- /dev/null +++ b/openpower/isatables/RM-1P-1D.csv @@ -0,0 +1,2 @@ +insn,CONDITIONS,Ptype,Etype,0,1,2,3,in1,in2,in3,out,CR in,CR out,out2 +svstep,,1P,EXTRA3,d:RT;d:CR0,0,0,0,0,0,0,RT,0,CR0,0 diff --git a/openpower/isatables/minor_22.csv b/openpower/isatables/minor_22.csv index b9899635..6d008723 100644 --- a/openpower/isatables/minor_22.csv +++ b/openpower/isatables/minor_22.csv @@ -2,3 +2,4 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou 0b00000,VL,OP_SETVL,RA_OR_ZERO,NONE,NONE,RT_OR_ZERO,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,setvl,SVL, 0b00001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,svshape,SVM, 0b00010,VL,OP_SVREMAP,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,svremap,SVRM, +0b00011,VL,OP_SVSTEP,NONE,NONE,NONE,RT,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,svstep,SVL, diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index b3d4d8cc..bb89d4c1 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -481,6 +481,12 @@ def get_pdecode_idx_out(dec2, name): dec2.dec.RT) if out_sel == OutSel.RT.value: return out, o_isvec + elif name == 'RT_OR_ZERO': + log ("get_pdecode_idx_out", out_sel, OutSel.RT.value, + OutSel.RT_OR_ZERO.value, out, o_isvec, + dec2.dec.RT) + if out_sel == OutSel.RT_OR_ZERO.value: + return out, o_isvec elif name == 'FRA': log ("get_pdecode_idx_out", out_sel, OutSel.FRA.value, out, o_isvec) if out_sel == OutSel.FRA.value: @@ -857,6 +863,7 @@ class ISACaller: SO = self.spr['XER'][XER_bits['SO']] log("handle_comparison SO", SO) cr_field = selectconcat(negative, positive, zero, SO) + log("handle_comparison cr_field", self.cr, cr_idx, cr_field) self.crl[cr_idx].eq(cr_field) def set_pc(self, pc_val): @@ -1112,6 +1119,11 @@ class ISACaller: illegal = False ins_name = 'setvl' + # and svstep not being supported by binutils (.long) + if asmop.startswith('svstep'): + illegal = False + ins_name = 'svstep' + # and svremap not being supported by binutils (.long) if asmop.startswith('svremap'): illegal = False @@ -1286,10 +1298,14 @@ class ISACaller: reg_val = 0 inputs.append(reg_val) # arrrrgh, awful hack, to get _RT into namespace - if ins_name == 'setvl': + if ins_name in ['setvl', 'svstep']: regname = "_RT" RT = yield self.dec2.dec.RT self.namespace[regname] = SelectableInt(RT, 5) + if RT == 0: + self.namespace["RT"] = SelectableInt(0, 5) + regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, "RT") + log('hack input reg %s %s' % (name, str(regnum)), is_vec) # in SVP64 mode for LD/ST work out immediate # XXX TODO: replace_ds for DS-Form rather than D-Form. @@ -1416,7 +1432,7 @@ class ISACaller: if not self.is_svp64_mode or not pred_dst_zero: if hasattr(self.dec2.e.do, "rc"): rc_en = yield self.dec2.e.do.rc.rc - if rc_en: + if rc_en and ins_name not in ['svstep']: regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, "CR0") self.handle_comparison(results, regnum) @@ -1474,7 +1490,8 @@ class ISACaller: pre = False post = False if self.allow_next_step_inc: - log("SVSTATE_NEXT: inc requested, mode", self.svstate_next_mode) + log("SVSTATE_NEXT: inc requested, mode", + self.svstate_next_mode, self.allow_next_step_inc) yield from self.svstate_pre_inc() pre = yield from self.update_new_svstate_steps() if pre: @@ -1487,17 +1504,22 @@ class ISACaller: results = [SelectableInt(0, 64)] self.handle_comparison(results) # CR0 else: - log ("SVSTATE_NEXT: post-inc") + if self.allow_next_step_inc == 2: + log ("SVSTATE_NEXT: read") + yield from self.svstate_post_inc() + else: + log ("SVSTATE_NEXT: post-inc") srcstep, dststep = self.new_srcstep, self.new_dststep remaps = self.get_remap_indices() remap_idxs = self.remap_idxs vl = self.svstate.vl end_src = srcstep == vl-1 end_dst = dststep == vl-1 - if not end_src: - self.svstate.srcstep += SelectableInt(1, 7) - if not end_dst: - self.svstate.dststep += SelectableInt(1, 7) + if self.allow_next_step_inc != 2: + if not end_src: + self.svstate.srcstep += SelectableInt(1, 7) + if not end_dst: + self.svstate.dststep += SelectableInt(1, 7) self.namespace['SVSTATE'] = self.svstate.spr # set CR0 (if Rc=1) based on end if rc_en: @@ -1533,13 +1555,13 @@ class ISACaller: self.update_pc_next() - def SVSTATE_NEXT(self, mode): + def SVSTATE_NEXT(self, mode, submode): """explicitly moves srcstep/dststep on to next element, for "Vertical-First" mode. this function is called from setvl pseudo-code, as a pseudo-op "svstep" """ - log("SVSTATE_NEXT mode", mode) - self.allow_next_step_inc = True + self.allow_next_step_inc = submode.value + 1 + log("SVSTATE_NEXT mode", mode, submode, self.allow_next_step_inc) self.svstate_next_mode = mode if self.svstate_next_mode > 0: shape_idx = self.svstate_next_mode.value-1 @@ -1556,8 +1578,9 @@ class ISACaller: sv_a_nz = yield self.dec2.sv_a_nz fft_mode = yield self.dec2.use_svp64_fft in1 = yield self.dec2.e.read_reg1.data - log ("SVP64: VL, srcstep, dststep, sv_a_nz, in1 fft", - vl, srcstep, dststep, sv_a_nz, in1, fft_mode) + log ("SVP64: VL, srcstep, dststep, sv_a_nz, in1 fft, svp64", + vl, srcstep, dststep, sv_a_nz, in1, fft_mode, + self.is_svp64_mode) # get predicate mask srcmask = dstmask = 0xffff_ffff_ffff_ffff diff --git a/src/openpower/decoder/isa/test_caller_setvl.py b/src/openpower/decoder/isa/test_caller_setvl.py index 0d710148..fe8ffd98 100644 --- a/src/openpower/decoder/isa/test_caller_setvl.py +++ b/src/openpower/decoder/isa/test_caller_setvl.py @@ -324,6 +324,57 @@ class DecoderTestCase(FHDLTestCase): self.assertEqual(CR0[CRFields.GT], 0) self.assertEqual(CR0[CRFields.SO], 0) + def test_svstep_inner_loop_8_jl(self): + """tests svstep inner loop, running 8 times (sv.setvl.), checking + jl is copied into a *Vector* result. + + fuuun... + """ + lst = SVP64Asm([ + # set DCT triple butterfly mode with persistent "REMAP" + "svshape 8, 1, 1, 2, 0", + "svremap 0, 0, 0, 2, 0, 1, 1", + "sv.svstep 2.v, 4, 1", # svstep get vector of ci + "sv.svstep 16.v, 3, 1", # svstep get vector of step + ]) + lst = list(lst) + + # SVSTATE + svstate = SVP64State() + #svstate.vl = 2 # VL + #svstate.maxvl = 2 # MAXVL + print ("SVSTATE", bin(svstate.asint())) + + with Program(lst, bigendian=False) as program: + sim = self.run_tst_program(program, svstate=svstate) + print ("SVSTATE after", bin(sim.svstate.asint())) + print (" vl", bin(sim.svstate.vl)) + print (" mvl", bin(sim.svstate.maxvl)) + print (" srcstep", bin(sim.svstate.srcstep)) + print (" dststep", bin(sim.svstate.dststep)) + print (" vfirst", bin(sim.svstate. vfirst)) + self.assertEqual(sim.svstate.vl, 12) + self.assertEqual(sim.svstate.maxvl, 12) + # svstep called four times, reset occurs, srcstep zero + self.assertEqual(sim.svstate.srcstep, 0) + self.assertEqual(sim.svstate.dststep, 0) + for i in range(4): + self.assertEqual(sim.gpr(2+i), SelectableInt(8, 64)) + self.assertEqual(sim.gpr(6+i), SelectableInt(4, 64)) + self.assertEqual(sim.gpr(10+i), SelectableInt(2, 64)) + self.assertEqual(sim.gpr(16+i), SelectableInt(i, 64)) + self.assertEqual(sim.gpr(24+i), SelectableInt(0, 64)) + for i in range(2): + self.assertEqual(sim.gpr(20+i), SelectableInt(i, 64)) + self.assertEqual(sim.gpr(22+i), SelectableInt(i, 64)) + self.assertEqual(sim.svstate.vfirst, 0) + CR0 = sim.crl[0] + print(" CR0", bin(CR0.get_range().value)) + self.assertEqual(CR0[CRFields.EQ], 0) + self.assertEqual(CR0[CRFields.LT], 0) + self.assertEqual(CR0[CRFields.GT], 0) + self.assertEqual(CR0[CRFields.SO], 0) + def test_sv_add(self): """sets VL=2 then adds: * 1 = 5 + 9 => 0x5555 = 0x4321+0x1234 diff --git a/src/openpower/decoder/power_enums.py b/src/openpower/decoder/power_enums.py index 516d8523..97549387 100644 --- a/src/openpower/decoder/power_enums.py +++ b/src/openpower/decoder/power_enums.py @@ -283,6 +283,7 @@ _insns = [ "setvl", # https://libre-soc.org/openpower/sv/setvl "svremap", # https://libre-soc.org/openpower/sv/remap - TEMPORARY "svshape", # https://libre-soc.org/openpower/sv/remap + "svstep", # https://libre-soc.org/openpower/sv/setvl "sim_cfg", "slbia", "sld", "slw", "srad", "sradi", "sraw", "srawi", "srd", "srw", @@ -395,6 +396,7 @@ class MicrOp(Enum): OP_FP_MADD = 79 OP_SVREMAP = 80 OP_SVSHAPE = 81 + OP_SVSTEP = 82 @unique diff --git a/src/openpower/sv/sv_analysis.py b/src/openpower/sv/sv_analysis.py index 4d1f7af6..d55f13d9 100644 --- a/src/openpower/sv/sv_analysis.py +++ b/src/openpower/sv/sv_analysis.py @@ -249,6 +249,7 @@ def process_csvs(): '1R-1W-imm': 'RM-2P-1S1D', '1R-CRo': 'RM-2P-1S1D', '1R-imm': 'non-SV', + '1W-CRo': 'RM-1P-1D', '1W': 'non-SV', '1W-CRi': 'RM-2P-1S1D', 'CRio': 'RM-2P-1S1D', @@ -565,6 +566,11 @@ def process_csvs(): res['2'] = 's:FRB' # FRB: Rsrc2_EXTRA2 res['3'] = 's:FRC' # FRC: Rsrc3_EXTRA2 + elif value == 'RM-1P-1D': + res['Etype'] = 'EXTRA3' # RM EXTRA3 type + if insn_name == 'svstep': + res['0'] = 'd:RT;d:CR0' # RT,CR0: Rdest1_EXTRA2 + # add to svp64 csvs #for k in ['in1', 'in2', 'in3', 'out', 'CR in', 'CR out']: # del res[k] diff --git a/src/openpower/sv/trans/svp64.py b/src/openpower/sv/trans/svp64.py index c4cbb143..273ca6b3 100644 --- a/src/openpower/sv/trans/svp64.py +++ b/src/openpower/sv/trans/svp64.py @@ -198,6 +198,21 @@ class SVP64Asm: yield ".long 0x%x" % insn return + # sigh have to do setvl here manually for now... + # note the subtract one from SVi. + if opcode in ["svstep", "svstep."]: + insn = 22 << (31-5) # opcode 22, bits 0-5 + fields = list(map(int, fields)) + insn |= fields[0] << (31-10) # RT , bits 6-10 + insn |= (fields[1]-1) << (31-22) # SVi , bits 16-22 + insn |= fields[2] << (31-25) # vf , bit 25 + insn |= 0b00011 << (31-30) # XO , bits 26..30 + if opcode == 'svstep.': + insn |= 1 << (31-31) # Rc=1 , bit 31 + log ("svstep", bin(insn)) + yield ".long 0x%x" % insn + return + # and svshape. note that the dimension fields one subtracted from each if opcode == 'svshape': insn = 22 << (31-5) # opcode 22, bits 0-5 @@ -347,8 +362,8 @@ class SVP64Asm: # okaaay now we identify the field value (opcode N,N,N) with # the pseudo-code info (opcode RT, RA, RB) assert len(fields) == len(v30b_regs), \ - "length of fields %s must match insn `%s`" % \ - (str(v30b_regs), insn) + "length of fields %s must match insn `%s` fields %s" % \ + (str(v30b_regs), insn, str(fields)) opregfields = zip(fields, v30b_regs) # err that was easy # now for each of those find its place in the EXTRA encoding @@ -887,6 +902,33 @@ class SVP64Asm: if rc: opcode |= 1 # Rc, bit 31. yield ".long 0x%x" % opcode + # sigh have to do svstep here manually for now... + elif opcode in ["svstep", "svstep."]: + insn = 22 << (31-5) # opcode 22, bits 0-5 + insn |= int(v30b_newfields[0]) << (31-10) # RT , bits 6-10 + insn |= int(v30b_newfields[1]) << (31-22) # SVi , bits 16-22 + insn |= int(v30b_newfields[2]) << (31-25) # vf , bit 25 + insn |= 0b00011 << (31-30) # XO , bits 26..30 + if opcode == 'svstep.': + insn |= 1 << (31-31) # Rc=1 , bit 31 + log ("svstep", bin(insn)) + yield ".long 0x%x" % insn + + elif v30b_op in ["setvl", "setvl."]: + insn = 22 << (31-5) # opcode 22, bits 0-5 + fields = list(map(int, fields)) + insn |= fields[0] << (31-10) # RT , bits 6-10 + insn |= fields[1] << (31-15) # RA , bits 11-15 + insn |= (fields[2]-1) << (31-22) # SVi , bits 16-22 + insn |= fields[3] << (31-25) # ms , bit 25 + insn |= fields[4] << (31-24) # vs , bit 24 + insn |= fields[5] << (31-23) # vf , bit 23 + insn |= 0b00000 << (31-30) # XO , bits 26..30 + if opcode == 'setvl.': + insn |= 1 << (31-31) # Rc=1 , bit 31 + log ("setvl", bin(insn)) + yield ".long 0x%x" % insn + else: yield "%s %s" % (v30b_op+rc, ", ".join(v30b_newfields)) log ("new v3.0B fields", v30b_op, v30b_newfields) @@ -1022,6 +1064,7 @@ if __name__ == '__main__': 'sv.stw 5.v, 4(1.v)', 'sv.ld 5.v, 4(1.v)', 'setvl. 2, 3, 4, 0, 1, 1', + 'sv.setvl. 2, 3, 4, 0, 1, 1', ] lst = [ "sv.stfsu 0.v, 16(4.v)", @@ -1050,6 +1093,7 @@ if __name__ == '__main__': lst = [ 'sv.lfsbr 4.v, 11(8.v), 15', #'sv.lwzbr 4.v, 11(8.v), 15', + 'sv.svstep. 2.v, 4, 0', ] isa = SVP64Asm(lst, macros=macros) print ("list", list(isa))