X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fopenpower%2Fdecoder%2Fisa%2Fcaller.py;h=e51d24f62a731e006e0cab22f7ce1615c80964d4;hb=e0518a4fa19ea3bba436154de8b7d66313caec28;hp=aa53abd0625ab92b3234ccc7895f6391d01da0ba;hpb=2ce36781642bf7fafa5a1c4f489b1f35dbb5a459;p=openpower-isa.git diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index aa53abd0..e51d24f6 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1142,7 +1142,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): in the class for later use. this to avoid problems with yield """ # go through all iterators in lock-step, advance to next remap_idx - srcstep, dststep = self.get_src_dststeps() + srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps() # get four SVSHAPEs. here we are hard-coding SVSHAPE0 = self.spr['SVSHAPE0'] SVSHAPE1 = self.spr['SVSHAPE1'] @@ -1228,105 +1228,24 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if ins_name not in ['mtcrf', 'mtocrf']: illegal = ins_name != asmop - # sigh deal with setvl not being supported by binutils (.long) - if asmop.startswith('setvl'): + # list of instructions not being supported by binutils (.long) + dotstrp = asmop[:-1] if asmop[-1] == '.' else asmop + if dotstrp in [ 'fsins', 'fcoss', + 'ffmadds', 'fdmadds', 'ffadds', + 'mins', 'maxs', 'minu', 'maxu', + 'setvl', 'svindex', 'svremap', 'svstep', 'svshape', + 'grev', 'ternlogi', 'bmask', 'cprop', + 'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd', + 'fmvis', 'fishmv', + ]: illegal = False - ins_name = 'setvl' - - # and svstep not being supported by binutils (.long) - if asmop.startswith('svstep'): - illegal = False - ins_name = 'svstep' - - # and svremap not being supported by binutils (.long) - if asmop.startswith('svremap'): - illegal = False - ins_name = 'svremap' - - # and svshape not being supported by binutils (.long) - if asmop.startswith('svshape'): - illegal = False - ins_name = 'svshape' - - # and svindex - if asmop.startswith('svindex'): - illegal = False - ins_name = "svindex" - - # and fsin and fcos - if asmop == 'fsins': - illegal = False - ins_name = 'fsins' - if asmop == 'fcoss': - illegal = False - ins_name = 'fcoss' - - # sigh also deal with ffmadds not being supported by binutils (.long) - if asmop == 'ffmadds': - illegal = False - ins_name = 'ffmadds' - - # and fdmadds not being supported by binutils (.long) - if asmop == 'fdmadds': - illegal = False - ins_name = 'fdmadds' - - # and ffadds not being supported by binutils (.long) - if asmop == 'ffadds': - illegal = False - ins_name = 'ffadds' - - # and min/max/su - if asmop in ['mins', 'maxs', 'minu', 'maxu', - 'mins.', 'maxs.', 'minu.', 'maxu.']: - illegal = False - ins_name = asmop - - # and anything avgadd - if asmop.startswith('avgadd'): - illegal = False - ins_name = asmop - - # and anything absdu - if asmop.startswith('absdu'): - illegal = False - ins_name = asmop - - # and anything absds - if asmop.startswith('absds'): - illegal = False - ins_name = asmop - - # and anything absadd - if asmop.startswith('absdac'): - illegal = False - ins_name = asmop - - # and anything cprop - if asmop.startswith('cprop'): - illegal = False - ins_name = asmop - - # and anything bmask - if asmop.startswith('bmask'): - illegal = False - ins_name = asmop - - # and anything ternlog - if asmop.startswith('ternlog'): - illegal = False - ins_name = asmop - - # and anything grev - if asmop.startswith('grev'): - illegal = False - ins_name = asmop + ins_name = dotstrp # branch-conditional redirects to sv.bc if asmop.startswith('bc') and self.is_svp64_mode: ins_name = 'sv.%s' % ins_name - log(" post-processed name", ins_name, asmop) + log(" post-processed name", dotstrp, ins_name, asmop) # illegal instructions call TRAP at 0x700 if illegal: @@ -1376,10 +1295,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.update_nia() self.update_pc_next() return - srcstep, dststep = self.get_src_dststeps() + srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps() pred_dst_zero = self.pred_dst_zero pred_src_zero = self.pred_src_zero vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl # VL=0 in SVP64 mode means "do nothing: skip instruction" if self.is_svp64_mode and vl == 0: @@ -1399,45 +1319,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): if persist or self.last_op_svshape: remaps = self.get_remap_indices() if self.is_svp64_mode and (persist or self.last_op_svshape): - # just some convenient debug info - for i in range(4): - sname = 'SVSHAPE%d' % i - shape = self.spr[sname] - log(sname, bin(shape.value)) - log(" lims", shape.lims) - log(" mode", shape.mode) - log(" skip", shape.skip) - - # set up the list of steps to remap - mi0 = self.svstate.mi0 - mi1 = self.svstate.mi1 - mi2 = self.svstate.mi2 - mo0 = self.svstate.mo0 - mo1 = self.svstate.mo1 - steps = [(self.dec2.in1_step, mi0), # RA - (self.dec2.in2_step, mi1), # RB - (self.dec2.in3_step, mi2), # RC - (self.dec2.o_step, mo0), # RT - (self.dec2.o2_step, mo1), # EA - ] - remap_idxs = self.remap_idxs - rremaps = [] - # now cross-index the required SHAPE for each of 3-in 2-out regs - rnames = ['RA', 'RB', 'RC', 'RT', 'EA'] - for i, (dstep, shape_idx) in enumerate(steps): - (shape, remap) = remaps[shape_idx] - remap_idx = remap_idxs[shape_idx] - # zero is "disabled" - if shape.value == 0x0: - continue - # now set the actual requested step to the current index - yield dstep.eq(remap_idx) - - # debug printout info - rremaps.append((shape.mode, i, rnames[i], shape_idx, - remap_idx)) - for x in rremaps: - log("shape remap", x) + yield from self.remap_debug(remaps) # after that, settle down (combinatorial) to let Vector reg numbers # work themselves out yield Settle() @@ -1450,30 +1332,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # main input registers (RT, RA ...) inputs = [] for name in input_names: - # using PowerDecoder2, first, find the decoder index. - # (mapping name RA RB RC RS to in1, in2, in3) - regnum, is_vec = yield from get_pdecode_idx_in(self.dec2, name) - if regnum is None: - # doing this is not part of svp64, it's because output - # registers, to be modified, need to be in the namespace. - regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) - if regnum is None: - regnum, is_vec = yield from get_pdecode_idx_out2(self.dec2, - name) - - # in case getting the register number is needed, _RA, _RB - regname = "_" + name - self.namespace[regname] = regnum - if not self.is_svp64_mode or not pred_src_zero: - log('reading reg %s %s' % (name, str(regnum)), is_vec) - if name in fregs: - reg_val = SelectableInt(self.fpr(regnum)) - elif name is not None: - reg_val = SelectableInt(self.gpr(regnum)) - else: - log('zero input reg %s %s' % (name, str(regnum)), is_vec) - reg_val = 0 - inputs.append(reg_val) + print("name", name) + regval = (yield from self.get_input(name)) + print("regval", regval) + inputs.append(regval) + # arrrrgh, awful hack, to get _RT into namespace if ins_name in ['setvl', 'svstep']: regname = "_RT" @@ -1487,71 +1350,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # in SVP64 mode for LD/ST work out immediate # XXX TODO: replace_ds for DS-Form rather than D-Form. # use info.form to detect - replace_d = False # update / replace constant in pseudocode if self.is_svp64_mode: - ldstmode = yield self.dec2.rm_dec.ldstmode - # shift mode reads SVD (or SVDS - TODO) - # *BUT*... because this is "overloading" of LD operations, - # it gets *STORED* into D (or DS, TODO) - if ldstmode == SVP64LDSTmode.SHIFT.value: - imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11] - imm = exts(imm, 11) # sign-extend to integer - log("shift SVD", imm) - replace_d = True - else: - if info.form == 'DS': - # DS-Form, multiply by 4 then knock 2 bits off after - imm = yield self.dec2.dec.fields.FormDS.DS[0:14] * 4 - else: - imm = yield self.dec2.dec.fields.FormD.D[0:16] - imm = exts(imm, 16) # sign-extend to integer - # get the right step. LD is from srcstep, ST is dststep - op = yield self.dec2.e.do.insn_type - offsmul = 0 - if op == MicrOp.OP_LOAD.value: - if remap_active: - offsmul = yield self.dec2.in1_step - log("D-field REMAP src", imm, offsmul) - else: - offsmul = srcstep - log("D-field src", imm, offsmul) - elif op == MicrOp.OP_STORE.value: - # XXX NOTE! no bit-reversed STORE! this should not ever be used - offsmul = dststep - log("D-field dst", imm, offsmul) - # bit-reverse mode, rev already done through get_src_dst_steps() - if ldstmode == SVP64LDSTmode.SHIFT.value: - # manually look up RC, sigh - RC = yield self.dec2.dec.RC[0:5] - RC = self.gpr(RC) - log("LD-SHIFT:", "VL", vl, - "RC", RC.value, "imm", imm, - "offs", bin(offsmul), - ) - imm = SelectableInt((imm * offsmul) << RC.value, 32) - # Unit-Strided LD/ST adds offset*width to immediate - elif ldstmode == SVP64LDSTmode.UNITSTRIDE.value: - ldst_len = yield self.dec2.e.do.data_len - imm = SelectableInt(imm + offsmul * ldst_len, 32) - replace_d = True - # Element-strided multiplies the immediate by element step - elif ldstmode == SVP64LDSTmode.ELSTRIDE.value: - imm = SelectableInt(imm * offsmul, 32) - replace_d = True - if replace_d: - ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec - ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in - log("LDSTmode", SVP64LDSTmode(ldstmode), - offsmul, imm, ldst_ra_vec, ldst_imz_in) - # new replacement D... errr.. DS - if replace_d: - if info.form == 'DS': - # TODO: assert 2 LSBs are zero? - log("DS-Form, TODO, assert 2 LSBs zero?", bin(imm.value)) - imm.value = imm.value >> 2 - self.namespace['DS'] = imm - else: - self.namespace['D'] = imm + yield from self.check_replace_d(info, remap_active) # "special" registers for special in info.special_regs: @@ -1642,52 +1442,176 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # any modified return results? if info.write_regs: for name, output in zip(output_names, results): - if name == 'overflow': # ignore, done already (above) - continue - if isinstance(output, int): - output = SelectableInt(output, 256) - if name in ['CA', 'CA32']: - if carry_en: - log("writing %s to XER" % name, output) - self.spr['XER'][XER_bits[name]] = output.value - else: - log("NOT writing %s to XER" % name, output) - elif name in info.special_regs: - log('writing special %s' % name, output, special_sprs) - if name in special_sprs: - self.spr[name] = output - else: - self.namespace[name].eq(output) - if name == 'MSR': - log('msr written', hex(self.msr.value)) + yield from self.check_write(info, name, output, carry_en) + + nia_update = (yield from self.check_step_increment(results, rc_en, + asmop, ins_name)) + if nia_update: + self.update_pc_next() + + def check_replace_d(self, info, remap_active): + replace_d = False # update / replace constant in pseudocode + ldstmode = yield self.dec2.rm_dec.ldstmode + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + if info.form == 'DS': + # DS-Form, multiply by 4 then knock 2 bits off after + imm = yield self.dec2.dec.fields.FormDS.DS[0:14] * 4 + else: + imm = yield self.dec2.dec.fields.FormD.D[0:16] + imm = exts(imm, 16) # sign-extend to integer + # get the right step. LD is from srcstep, ST is dststep + op = yield self.dec2.e.do.insn_type + offsmul = 0 + if op == MicrOp.OP_LOAD.value: + if remap_active: + offsmul = yield self.dec2.in1_step + log("D-field REMAP src", imm, offsmul) + else: + offsmul = (srcstep * (subvl+1)) + ssubstep + log("D-field src", imm, offsmul) + elif op == MicrOp.OP_STORE.value: + # XXX NOTE! no bit-reversed STORE! this should not ever be used + offsmul = (dststep * (subvl+1)) + dsubstep + log("D-field dst", imm, offsmul) + # Unit-Strided LD/ST adds offset*width to immediate + if ldstmode == SVP64LDSTmode.UNITSTRIDE.value: + ldst_len = yield self.dec2.e.do.data_len + imm = SelectableInt(imm + offsmul * ldst_len, 32) + replace_d = True + # Element-strided multiplies the immediate by element step + elif ldstmode == SVP64LDSTmode.ELSTRIDE.value: + imm = SelectableInt(imm * offsmul, 32) + replace_d = True + if replace_d: + ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec + ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in + log("LDSTmode", SVP64LDSTmode(ldstmode), + offsmul, imm, ldst_ra_vec, ldst_imz_in) + # new replacement D... errr.. DS + if replace_d: + if info.form == 'DS': + # TODO: assert 2 LSBs are zero? + log("DS-Form, TODO, assert 2 LSBs zero?", bin(imm.value)) + imm.value = imm.value >> 2 + self.namespace['DS'] = imm + else: + self.namespace['D'] = imm + + def get_input(self, name): + # using PowerDecoder2, first, find the decoder index. + # (mapping name RA RB RC RS to in1, in2, in3) + regnum, is_vec = yield from get_pdecode_idx_in(self.dec2, name) + if regnum is None: + # doing this is not part of svp64, it's because output + # registers, to be modified, need to be in the namespace. + regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) + if regnum is None: + regnum, is_vec = yield from get_pdecode_idx_out2(self.dec2, name) + + # in case getting the register number is needed, _RA, _RB + regname = "_" + name + self.namespace[regname] = regnum + if not self.is_svp64_mode or not self.pred_src_zero: + log('reading reg %s %s' % (name, str(regnum)), is_vec) + if name in fregs: + reg_val = SelectableInt(self.fpr(regnum)) + elif name is not None: + reg_val = SelectableInt(self.gpr(regnum)) + else: + log('zero input reg %s %s' % (name, str(regnum)), is_vec) + reg_val = 0 + return reg_val + + def remap_debug(self, remaps): + # just some convenient debug info + for i in range(4): + sname = 'SVSHAPE%d' % i + shape = self.spr[sname] + log(sname, bin(shape.value)) + log(" lims", shape.lims) + log(" mode", shape.mode) + log(" skip", shape.skip) + + # set up the list of steps to remap + mi0 = self.svstate.mi0 + mi1 = self.svstate.mi1 + mi2 = self.svstate.mi2 + mo0 = self.svstate.mo0 + mo1 = self.svstate.mo1 + steps = [(self.dec2.in1_step, mi0), # RA + (self.dec2.in2_step, mi1), # RB + (self.dec2.in3_step, mi2), # RC + (self.dec2.o_step, mo0), # RT + (self.dec2.o2_step, mo1), # EA + ] + remap_idxs = self.remap_idxs + rremaps = [] + # now cross-index the required SHAPE for each of 3-in 2-out regs + rnames = ['RA', 'RB', 'RC', 'RT', 'EA'] + for i, (dstep, shape_idx) in enumerate(steps): + (shape, remap) = remaps[shape_idx] + remap_idx = remap_idxs[shape_idx] + # zero is "disabled" + if shape.value == 0x0: + continue + # now set the actual requested step to the current index + yield dstep.eq(remap_idx) + + # debug printout info + rremaps.append((shape.mode, i, rnames[i], shape_idx, remap_idx)) + for x in rremaps: + log("shape remap", x) + + def check_write(self, info, name, output, carry_en): + if name == 'overflow': # ignore, done already (above) + return + if isinstance(output, int): + output = SelectableInt(output, 256) + if name in ['CA', 'CA32']: + if carry_en: + log("writing %s to XER" % name, output) + self.spr['XER'][XER_bits[name]] = output.value + else: + log("NOT writing %s to XER" % name, output) + elif name in info.special_regs: + log('writing special %s' % name, output, special_sprs) + if name in special_sprs: + self.spr[name] = output + else: + self.namespace[name].eq(output) + if name == 'MSR': + log('msr written', hex(self.msr.value)) + else: + regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, name) + if regnum is None: + regnum, is_vec = yield from get_pdecode_idx_out2( + self.dec2, name) + if regnum is None: + # temporary hack for not having 2nd output + regnum = yield getattr(self.decoder, name) + is_vec = False + if self.is_svp64_mode and self.pred_dst_zero: + log('zeroing reg %d %s' % (regnum, str(output)), + is_vec) + output = SelectableInt(0, 256) + else: + if name in fregs: + ftype = 'fpr' else: - regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, - name) - if regnum is None: - regnum, is_vec = yield from get_pdecode_idx_out2( - self.dec2, name) - if regnum is None: - # temporary hack for not having 2nd output - regnum = yield getattr(self.decoder, name) - is_vec = False - if self.is_svp64_mode and pred_dst_zero: - log('zeroing reg %d %s' % (regnum, str(output)), - is_vec) - output = SelectableInt(0, 256) - else: - if name in fregs: - ftype = 'fpr' - else: - ftype = 'gpr' - log('writing %s %s %s' % (ftype, regnum, str(output)), - is_vec) - if output.bits > 64: - output = SelectableInt(output.value, 64) - if name in fregs: - self.fpr[regnum] = output - else: - self.gpr[regnum] = output + ftype = 'gpr' + log('writing %s %s %s' % (ftype, regnum, str(output)), + is_vec) + if output.bits > 64: + output = SelectableInt(output.value, 64) + if name in fregs: + self.fpr[regnum] = output + else: + self.gpr[regnum] = output + def check_step_increment(self, results, rc_en, asmop, ins_name): # check if it is the SVSTATE.src/dest step that needs incrementing # this is our Sub-Program-Counter loop from 0 to VL-1 pre = False @@ -1704,64 +1628,62 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.svp64_reset_loop() self.svstate.vfirst = 0 self.update_nia() - if rc_en: - results = [SelectableInt(0, 64)] - self.handle_comparison(results) # CR0 + if not rc_en: + return True + results = [SelectableInt(0, 64)] + self.handle_comparison(results) # CR0 + return True + if self.allow_next_step_inc == 2: + log("SVSTATE_NEXT: read") + nia_update = (yield from self.svstate_post_inc(ins_name)) else: - if self.allow_next_step_inc == 2: - log("SVSTATE_NEXT: read") - nia_update = (yield from self.svstate_post_inc(ins_name)) - else: - log("SVSTATE_NEXT: post-inc") - # use actual src/dst-step here to check end, do NOT - # use bit-reversed version - srcstep, dststep = self.new_srcstep, self.new_dststep - remaps = self.get_remap_indices() - remap_idxs = self.remap_idxs - vl = self.svstate.vl - end_src = srcstep == vl-1 - end_dst = dststep == vl-1 - if self.allow_next_step_inc != 2: - if not end_src: - self.svstate.srcstep += SelectableInt(1, 7) - if not end_dst: - self.svstate.dststep += SelectableInt(1, 7) - self.namespace['SVSTATE'] = self.svstate.spr - # set CR0 (if Rc=1) based on end - if rc_en: - srcstep = self.svstate.srcstep - dststep = self.svstate.srcstep - endtest = 1 if (end_src or end_dst) else 0 - #results = [SelectableInt(endtest, 64)] - # self.handle_comparison(results) # CR0 - - # see if svstep was requested, if so, which SVSTATE - endings = 0b111 - if self.svstate_next_mode > 0: - shape_idx = self.svstate_next_mode.value-1 - endings = self.remap_loopends[shape_idx] - cr_field = SelectableInt((~endings) << 1 | endtest, 4) - print("svstep Rc=1, CR0", cr_field) - self.crl[0].eq(cr_field) # CR0 - if end_src or end_dst: - # reset at end of loop including exit Vertical Mode - log("SVSTATE_NEXT: after increments, reset") - self.svp64_reset_loop() - self.svstate.vfirst = 0 - - elif self.is_svp64_mode: - nia_update = (yield from self.svstate_post_inc(ins_name)) - else: - # XXX only in non-SVP64 mode! - # record state of whether the current operation was an svshape, - # OR svindex! - # to be able to know if it should apply in the next instruction. - # also (if going to use this instruction) should disable ability - # to interrupt in between. sigh. - self.last_op_svshape = asmop in ['svremap', 'svindex'] + log("SVSTATE_NEXT: post-inc") + # use actual src/dst-step here to check end, do NOT + # use bit-reversed version + srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep + remaps = self.get_remap_indices() + remap_idxs = self.remap_idxs + vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl + end_src = srcstep == vl-1 + end_dst = dststep == vl-1 + if self.allow_next_step_inc != 2: + yield from self.advance_svstate_steps(end_src, end_dst) + self.namespace['SVSTATE'] = self.svstate.spr + # set CR0 (if Rc=1) based on end + if rc_en: + endtest = 1 if (end_src or end_dst) else 0 + #results = [SelectableInt(endtest, 64)] + # self.handle_comparison(results) # CR0 + + # see if svstep was requested, if so, which SVSTATE + endings = 0b111 + if self.svstate_next_mode > 0: + shape_idx = self.svstate_next_mode.value-1 + endings = self.remap_loopends[shape_idx] + cr_field = SelectableInt((~endings) << 1 | endtest, 4) + print("svstep Rc=1, CR0", cr_field) + self.crl[0].eq(cr_field) # CR0 + if end_src or end_dst: + # reset at end of loop including exit Vertical Mode + log("SVSTATE_NEXT: after increments, reset") + self.svp64_reset_loop() + self.svstate.vfirst = 0 + return nia_update - if nia_update: - self.update_pc_next() + if self.is_svp64_mode: + return (yield from self.svstate_post_inc(ins_name)) + + # XXX only in non-SVP64 mode! + # record state of whether the current operation was an svshape, + # OR svindex! + # to be able to know if it should apply in the next instruction. + # also (if going to use this instruction) should disable ability + # to interrupt in between. sigh. + self.last_op_svshape = asmop in ['svremap', 'svindex'] + + return True def SVSTATE_NEXT(self, mode, submode): """explicitly moves srcstep/dststep on to next element, for @@ -1788,22 +1710,31 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): def svstate_pre_inc(self): """check if srcstep/dststep need to skip over masked-out predicate bits + note that this is not supposed to do anything to substep, + it is purely for skipping masked-out bits """ # get SVSTATE VL (oh and print out some debug stuff) vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl srcstep = self.svstate.srcstep dststep = self.svstate.dststep + ssubstep = self.svstate.ssubstep + dsubstep = self.svstate.dsubstep sv_a_nz = yield self.dec2.sv_a_nz fft_mode = yield self.dec2.use_svp64_fft in1 = yield self.dec2.e.read_reg1.data - log("SVP64: VL, srcstep, dststep, sv_a_nz, in1 fft, svp64", - vl, srcstep, dststep, sv_a_nz, in1, fft_mode, + log("SVP64: VL, subvl, srcstep, dststep, ssubstep, dsybstep, sv_a_nz, " + "in1 fft, svp64", + vl, subvl, srcstep, dststep, ssubstep, dsubstep, + sv_a_nz, in1, fft_mode, self.is_svp64_mode) # get predicate mask (all 64 bits) srcmask = dstmask = 0xffff_ffff_ffff_ffff pmode = yield self.dec2.rm_dec.predmode + pack = yield self.dec2.rm_dec.pack + unpack = yield self.dec2.rm_dec.unpack reverse_gear = yield self.dec2.rm_dec.reverse_gear sv_ptype = yield self.dec2.dec.op.SV_Ptype srcpred = yield self.dec2.rm_dec.srcpred @@ -1818,7 +1749,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): srcmask = dstmask = get_predcr(self.crl, dstpred, vl) if sv_ptype == SVPtype.P2.value: srcmask = get_predcr(self.crl, srcpred, vl) + # work out if the ssubsteps are completed + ssubstart = ssubstep == 0 + dsubstart = dsubstep == 0 log(" pmode", pmode) + log(" pack/unpack", pack, unpack) log(" reverse", reverse_gear) log(" ptype", sv_ptype) log(" srcpred", bin(srcpred)) @@ -1827,19 +1762,25 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): log(" dstmask", bin(dstmask)) log(" pred_sz", bin(pred_src_zero)) log(" pred_dz", bin(pred_dst_zero)) + log(" ssubstart", ssubstart) + log(" dsubstart", dsubstart) # okaaay, so here we simply advance srcstep (TODO dststep) - # until the predicate mask has a "1" bit... or we run out of VL - # let srcstep==VL be the indicator to move to next instruction - if not pred_src_zero: - while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): - log(" skip", bin(1 << srcstep)) - srcstep += 1 - # same for dststep - if not pred_dst_zero: - while (((1 << dststep) & dstmask) == 0) and (dststep != vl): - log(" skip", bin(1 << dststep)) - dststep += 1 + # this can ONLY be done at the beginning of the "for" loop + # (this is all actually a FSM so it's hell to keep track sigh) + if ssubstart: + # until the predicate mask has a "1" bit... or we run out of VL + # let srcstep==VL be the indicator to move to next instruction + if not pred_src_zero: + while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl): + log(" sskip", bin(1 << srcstep)) + srcstep += 1 + if dsubstart: + # same for dststep + if not pred_dst_zero: + while (((1 << dststep) & dstmask) == 0) and (dststep != vl): + log(" dskip", bin(1 << dststep)) + dststep += 1 # now work out if the relevant mask bits require zeroing if pred_dst_zero: @@ -1848,32 +1789,45 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): pred_src_zero = ((1 << srcstep) & srcmask) == 0 # store new srcstep / dststep - self.new_srcstep, self.new_dststep = srcstep, dststep - self.pred_dst_zero, self.pred_src_zero = pred_dst_zero, pred_src_zero + self.new_srcstep, self.new_dststep = (srcstep, dststep) + self.new_ssubstep, self.new_dsubstep = (ssubstep, dsubstep) + self.pred_dst_zero, self.pred_src_zero = (pred_dst_zero, pred_src_zero) log(" new srcstep", srcstep) log(" new dststep", dststep) + log(" new ssubstep", ssubstep) + log(" new dsubstep", dsubstep) def get_src_dststeps(self): - """gets srcstep and dststep + """gets srcstep, dststep, and ssubstep, dsubstep """ - return self.new_srcstep, self.new_dststep + return (self.new_srcstep, self.new_dststep, + self.new_ssubstep, self.new_dsubstep) def update_new_svstate_steps(self): # note, do not get the bit-reversed srcstep here! srcstep, dststep = self.new_srcstep, self.new_dststep + ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep # update SVSTATE with new srcstep self.svstate.srcstep = srcstep self.svstate.dststep = dststep + self.svstate.ssubstep = ssubstep + self.svstate.dsubstep = dsubstep self.namespace['SVSTATE'] = self.svstate yield self.dec2.state.svstate.eq(self.svstate.value) yield Settle() # let decoder update srcstep = self.svstate.srcstep dststep = self.svstate.dststep + ssubstep = self.svstate.ssubstep + dsubstep = self.svstate.dsubstep vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl log(" srcstep", srcstep) log(" dststep", dststep) + log(" ssubstep", ssubstep) + log(" dsubstep", dsubstep) log(" vl", vl) + log(" subvl", subvl) # check if end reached (we let srcstep overrun, above) # nothing needs doing (TODO zeroing): just do next instruction @@ -1891,9 +1845,12 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): # this is our Sub-Program-Counter loop from 0 to VL-1 # XXX twin predication TODO vl = self.svstate.vl + subvl = yield self.dec2.rm_dec.rm_in.subvl mvl = self.svstate.maxvl srcstep = self.svstate.srcstep dststep = self.svstate.dststep + ssubstep = self.svstate.ssubstep + dsubstep = self.svstate.dsubstep rm_mode = yield self.dec2.rm_dec.mode reverse_gear = yield self.dec2.rm_dec.reverse_gear sv_ptype = yield self.dec2.dec.op.SV_Ptype @@ -1901,8 +1858,11 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): in_vec = not (yield self.dec2.no_in_vec) log(" svstate.vl", vl) log(" svstate.mvl", mvl) + log(" rm.subvl", subvl) log(" svstate.srcstep", srcstep) log(" svstate.dststep", dststep) + log(" svstate.ssubstep", ssubstep) + log(" svstate.dsubstep", dsubstep) log(" mode", rm_mode) log(" reverse", reverse_gear) log(" out_vec", out_vec) @@ -1924,24 +1884,50 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): self.svp64_reset_loop() self.update_pc_next() return False - if svp64_is_vector and srcstep != vl-1 and dststep != vl-1: - self.svstate.srcstep += SelectableInt(1, 7) - self.svstate.dststep += SelectableInt(1, 7) - self.namespace['SVSTATE'] = self.svstate - # not an SVP64 branch, so fix PC (NIA==CIA) for next loop - # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) - # this way we keep repeating the same instruction (with new steps) - self.pc.NIA.value = self.pc.CIA.value - self.namespace['NIA'] = self.pc.NIA - log("end of sub-pc call", self.namespace['CIA'], - self.namespace['NIA']) - return False # DO NOT allow PC update whilst Sub-PC loop running + # loops end at the first "hit" (source or dest) + loopend = ((srcstep == vl-1 and ssubstep == subvl) or + (dststep == vl-1 and dsubstep == subvl)) + if not svp64_is_vector or loopend: + # reset loop to zero and update NIA + self.svp64_reset_loop() + self.update_nia() + + return True - # reset loop to zero and update NIA - self.svp64_reset_loop() - self.update_nia() + # still looping, advance and update NIA + yield from self.advance_svstate_steps() + self.namespace['SVSTATE'] = self.svstate + # not an SVP64 branch, so fix PC (NIA==CIA) for next loop + # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64) + # this way we keep repeating the same instruction (with new steps) + self.pc.NIA.value = self.pc.CIA.value + self.namespace['NIA'] = self.pc.NIA + log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA']) + return False # DO NOT allow PC update whilst Sub-PC loop running - return True + def advance_svstate_steps(self, end_src=False, end_dst=False): + """ advance sub/steps. note that Pack/Unpack *INVERTS* the order. + TODO when Pack/Unpack is set, substep becomes the *outer* loop + """ + subvl = yield self.dec2.rm_dec.rm_in.subvl + # first source step + ssubstep = self.svstate.ssubstep + end_sub = ssubstep == subvl + if end_sub: + if not end_src: + self.svstate.srcstep += SelectableInt(1, 7) + self.svstate.ssubstep = SelectableInt(0, 2) # reset + else: + self.svstate.ssubstep += SelectableInt(1, 2) # advance ssubstep + # now dest step + dsubstep = self.svstate.dsubstep + end_sub = dsubstep == subvl + if end_sub: + if not end_dst: + self.svstate.dststep += SelectableInt(1, 7) + self.svstate.dsubstep = SelectableInt(0, 2) # reset + else: + self.svstate.dsubstep += SelectableInt(1, 2) # advance ssubstep def update_pc_next(self): # UPDATE program counter @@ -1954,6 +1940,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers): def svp64_reset_loop(self): self.svstate.srcstep = 0 self.svstate.dststep = 0 + self.svstate.ssubstep = 0 + self.svstate.dsubstep = 0 log(" svstate.srcstep loop end (PC to update)") self.namespace['SVSTATE'] = self.svstate