in the class for later use. this to avoid problems with yield
"""
# go through all iterators in lock-step, advance to next remap_idx
- srcstep, dststep, substep = self.get_src_dststeps()
+ srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps()
# get four SVSHAPEs. here we are hard-coding
SVSHAPE0 = self.spr['SVSHAPE0']
SVSHAPE1 = self.spr['SVSHAPE1']
'setvl', 'svindex', 'svremap', 'svstep', 'svshape',
'grev', 'ternlogi', 'bmask', 'cprop',
'absdu', 'absds', 'absdacs', 'absdacu', 'avgadd',
+ 'fmvis', 'fishmv',
]:
illegal = False
ins_name = dotstrp
self.update_nia()
self.update_pc_next()
return
- srcstep, dststep, substep = self.get_src_dststeps()
+ srcstep, dststep, ssubstep, dsubstep = self.get_src_dststeps()
pred_dst_zero = self.pred_dst_zero
pred_src_zero = self.pred_src_zero
vl = self.svstate.vl
- subvl = self.svstate.subvl
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
# VL=0 in SVP64 mode means "do nothing: skip instruction"
if self.is_svp64_mode and vl == 0:
# main input registers (RT, RA ...)
inputs = []
for name in input_names:
+ print("name", name)
regval = (yield from self.get_input(name))
+ print("regval", regval)
inputs.append(regval)
# arrrrgh, awful hack, to get _RT into namespace
# any modified return results?
if info.write_regs:
for name, output in zip(output_names, results):
- yield from self.check_write(info, name, output)
+ yield from self.check_write(info, name, output, carry_en)
nia_update = (yield from self.check_step_increment(results, rc_en,
asmop, ins_name))
def check_replace_d(self, info, remap_active):
replace_d = False # update / replace constant in pseudocode
ldstmode = yield self.dec2.rm_dec.ldstmode
- # shift mode reads SVD (or SVDS - TODO)
- # *BUT*... because this is "overloading" of LD operations,
- # it gets *STORED* into D (or DS, TODO)
- if ldstmode == SVP64LDSTmode.SHIFT.value:
- imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11]
- imm = exts(imm, 11) # sign-extend to integer
- log("shift SVD", imm)
- replace_d = True
+ vl = self.svstate.vl
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
+ srcstep, dststep = self.new_srcstep, self.new_dststep
+ ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
+ if info.form == 'DS':
+ # DS-Form, multiply by 4 then knock 2 bits off after
+ imm = yield self.dec2.dec.fields.FormDS.DS[0:14] * 4
else:
- if info.form == 'DS':
- # DS-Form, multiply by 4 then knock 2 bits off after
- imm = yield self.dec2.dec.fields.FormDS.DS[0:14] * 4
- else:
- imm = yield self.dec2.dec.fields.FormD.D[0:16]
- imm = exts(imm, 16) # sign-extend to integer
+ imm = yield self.dec2.dec.fields.FormD.D[0:16]
+ imm = exts(imm, 16) # sign-extend to integer
# get the right step. LD is from srcstep, ST is dststep
op = yield self.dec2.e.do.insn_type
offsmul = 0
offsmul = yield self.dec2.in1_step
log("D-field REMAP src", imm, offsmul)
else:
- offsmul = (srcstep * (subvl+1)) + substep
+ offsmul = (srcstep * (subvl+1)) + ssubstep
log("D-field src", imm, offsmul)
elif op == MicrOp.OP_STORE.value:
# XXX NOTE! no bit-reversed STORE! this should not ever be used
- offsmul = (dststep * (subvl+1)) + substep
+ offsmul = (dststep * (subvl+1)) + dsubstep
log("D-field dst", imm, offsmul)
- # bit-reverse mode, rev already done through get_src_dst_steps()
- if ldstmode == SVP64LDSTmode.SHIFT.value:
- # manually look up RC, sigh
- RC = yield self.dec2.dec.RC[0:5]
- RC = self.gpr(RC)
- log("LD-SHIFT:", "VL", vl,
- "RC", RC.value, "imm", imm,
- "offs", bin(offsmul),
- )
- imm = SelectableInt((imm * offsmul) << RC.value, 32)
# Unit-Strided LD/ST adds offset*width to immediate
- elif ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
+ if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
ldst_len = yield self.dec2.e.do.data_len
imm = SelectableInt(imm + offsmul * ldst_len, 32)
replace_d = True
for x in rremaps:
log("shape remap", x)
- def check_write(self, info, name, output):
+ def check_write(self, info, name, output, carry_en):
if name == 'overflow': # ignore, done already (above)
return
if isinstance(output, int):
# this is our Sub-Program-Counter loop from 0 to VL-1
pre = False
post = False
+ nia_update = True
if self.allow_next_step_inc:
log("SVSTATE_NEXT: inc requested, mode",
self.svstate_next_mode, self.allow_next_step_inc)
log("SVSTATE_NEXT: post-inc")
# use actual src/dst-step here to check end, do NOT
# use bit-reversed version
- srcstep, dststep, substep = \
- self.new_srcstep, self.new_dststep, self.new_substep
+ srcstep, dststep = self.new_srcstep, self.new_dststep
+ ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
remaps = self.get_remap_indices()
remap_idxs = self.remap_idxs
vl = self.svstate.vl
- subvl = self.svstate.subvl
- end_sub = substep == subvl
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
end_src = srcstep == vl-1
end_dst = dststep == vl-1
if self.allow_next_step_inc != 2:
- self.advance_svstate_steps(end_src, end_dst)
+ yield from self.advance_svstate_steps(end_src, end_dst)
self.namespace['SVSTATE'] = self.svstate.spr
# set CR0 (if Rc=1) based on end
if rc_en:
def svstate_pre_inc(self):
"""check if srcstep/dststep need to skip over masked-out predicate bits
+ note that this is not supposed to do anything to substep,
+ it is purely for skipping masked-out bits
"""
# get SVSTATE VL (oh and print out some debug stuff)
vl = self.svstate.vl
- subvl = self.svstate.subvl
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
srcstep = self.svstate.srcstep
dststep = self.svstate.dststep
- substep = self.svstate.substep
+ ssubstep = self.svstate.ssubstep
+ dsubstep = self.svstate.dsubstep
sv_a_nz = yield self.dec2.sv_a_nz
fft_mode = yield self.dec2.use_svp64_fft
in1 = yield self.dec2.e.read_reg1.data
- log("SVP64: VL, subvl, srcstep, dststep, substep, sv_a_nz, "
+ log("SVP64: VL, subvl, srcstep, dststep, ssubstep, dsybstep, sv_a_nz, "
"in1 fft, svp64",
- vl, subvl, srcstep, dststep, substep, sv_a_nz, in1, fft_mode,
+ vl, subvl, srcstep, dststep, ssubstep, dsubstep,
+ sv_a_nz, in1, fft_mode,
self.is_svp64_mode)
# get predicate mask (all 64 bits)
srcmask = dstmask = 0xffff_ffff_ffff_ffff
pmode = yield self.dec2.rm_dec.predmode
+ pack = yield self.dec2.rm_dec.pack
+ unpack = yield self.dec2.rm_dec.unpack
reverse_gear = yield self.dec2.rm_dec.reverse_gear
sv_ptype = yield self.dec2.dec.op.SV_Ptype
srcpred = yield self.dec2.rm_dec.srcpred
srcmask = dstmask = get_predcr(self.crl, dstpred, vl)
if sv_ptype == SVPtype.P2.value:
srcmask = get_predcr(self.crl, srcpred, vl)
- # work out if the substeps are completed
- end_sub = substep == subvl
+ # work out if the ssubsteps are completed
+ ssubstart = ssubstep == 0
+ dsubstart = dsubstep == 0
log(" pmode", pmode)
+ log(" pack/unpack", pack, unpack)
log(" reverse", reverse_gear)
log(" ptype", sv_ptype)
log(" srcpred", bin(srcpred))
log(" dstmask", bin(dstmask))
log(" pred_sz", bin(pred_src_zero))
log(" pred_dz", bin(pred_dst_zero))
- log(" end_sub", end_sub)
+ log(" ssubstart", ssubstart)
+ log(" dsubstart", dsubstart)
- if end_sub:
- # okaaay, so here we simply advance srcstep (TODO dststep)
+ # okaaay, so here we simply advance srcstep (TODO dststep)
+ # this can ONLY be done at the beginning of the "for" loop
+ # (this is all actually a FSM so it's hell to keep track sigh)
+ if ssubstart:
# until the predicate mask has a "1" bit... or we run out of VL
# let srcstep==VL be the indicator to move to next instruction
if not pred_src_zero:
while (((1 << srcstep) & srcmask) == 0) and (srcstep != vl):
- log(" skip", bin(1 << srcstep))
+ log(" sskip", bin(1 << srcstep))
srcstep += 1
+ if dsubstart:
# same for dststep
if not pred_dst_zero:
while (((1 << dststep) & dstmask) == 0) and (dststep != vl):
- log(" skip", bin(1 << dststep))
+ log(" dskip", bin(1 << dststep))
dststep += 1
- # and reset substep back to zero
- substep = 0
- else:
- substep += 1 # advance substep
# now work out if the relevant mask bits require zeroing
if pred_dst_zero:
pred_src_zero = ((1 << srcstep) & srcmask) == 0
# store new srcstep / dststep
- self.new_srcstep, self.new_dststep, self.new_substep = \
- (srcstep, dststep, substep)
- self.pred_dst_zero, self.pred_src_zero = pred_dst_zero, pred_src_zero
+ self.new_srcstep, self.new_dststep = (srcstep, dststep)
+ self.new_ssubstep, self.new_dsubstep = (ssubstep, dsubstep)
+ self.pred_dst_zero, self.pred_src_zero = (pred_dst_zero, pred_src_zero)
log(" new srcstep", srcstep)
log(" new dststep", dststep)
- log(" new substep", substep)
+ log(" new ssubstep", ssubstep)
+ log(" new dsubstep", dsubstep)
def get_src_dststeps(self):
- """gets srcstep, dststep, and substep
+ """gets srcstep, dststep, and ssubstep, dsubstep
"""
- return self.new_srcstep, self.new_dststep, self.new_substep
+ return (self.new_srcstep, self.new_dststep,
+ self.new_ssubstep, self.new_dsubstep)
def update_new_svstate_steps(self):
# note, do not get the bit-reversed srcstep here!
- srcstep, dststep, substep = \
- self.new_srcstep, self.new_dststep, self.new_substep
+ srcstep, dststep = self.new_srcstep, self.new_dststep
+ ssubstep, dsubstep = self.new_ssubstep, self.new_dsubstep
# update SVSTATE with new srcstep
self.svstate.srcstep = srcstep
self.svstate.dststep = dststep
- self.svstate.substep = substep
+ self.svstate.ssubstep = ssubstep
+ self.svstate.dsubstep = dsubstep
self.namespace['SVSTATE'] = self.svstate
yield self.dec2.state.svstate.eq(self.svstate.value)
yield Settle() # let decoder update
srcstep = self.svstate.srcstep
dststep = self.svstate.dststep
- substep = self.svstate.substep
+ ssubstep = self.svstate.ssubstep
+ dsubstep = self.svstate.dsubstep
vl = self.svstate.vl
- subvl = self.svstate.subvl
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
log(" srcstep", srcstep)
log(" dststep", dststep)
- log(" substep", substep)
+ log(" ssubstep", ssubstep)
+ log(" dsubstep", dsubstep)
log(" vl", vl)
log(" subvl", subvl)
# this is our Sub-Program-Counter loop from 0 to VL-1
# XXX twin predication TODO
vl = self.svstate.vl
- subvl = self.svstate.subvl
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
mvl = self.svstate.maxvl
srcstep = self.svstate.srcstep
dststep = self.svstate.dststep
- substep = self.svstate.substep
+ ssubstep = self.svstate.ssubstep
+ dsubstep = self.svstate.dsubstep
rm_mode = yield self.dec2.rm_dec.mode
reverse_gear = yield self.dec2.rm_dec.reverse_gear
sv_ptype = yield self.dec2.dec.op.SV_Ptype
in_vec = not (yield self.dec2.no_in_vec)
log(" svstate.vl", vl)
log(" svstate.mvl", mvl)
- log(" svstate.subvl", subvl)
+ log(" rm.subvl", subvl)
log(" svstate.srcstep", srcstep)
log(" svstate.dststep", dststep)
- log(" svstate.substep", substep)
+ log(" svstate.ssubstep", ssubstep)
+ log(" svstate.dsubstep", dsubstep)
log(" mode", rm_mode)
log(" reverse", reverse_gear)
log(" out_vec", out_vec)
self.svp64_reset_loop()
self.update_pc_next()
return False
- if svp64_is_vector and srcstep != vl-1 and dststep != vl-1:
- self.advance_svstate_steps()
- self.namespace['SVSTATE'] = self.svstate
- # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
- # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
- # this way we keep repeating the same instruction (with new steps)
- self.pc.NIA.value = self.pc.CIA.value
- self.namespace['NIA'] = self.pc.NIA
- log("end of sub-pc call", self.namespace['CIA'],
- self.namespace['NIA'])
- return False # DO NOT allow PC update whilst Sub-PC loop running
+ # loops end at the first "hit" (source or dest)
+ loopend = ((srcstep == vl-1 and ssubstep == subvl) or
+ (dststep == vl-1 and dsubstep == subvl))
+ if not svp64_is_vector or loopend:
+ # reset loop to zero and update NIA
+ self.svp64_reset_loop()
+ self.update_nia()
- # reset loop to zero and update NIA
- self.svp64_reset_loop()
- self.update_nia()
+ return True
- return True
+ # still looping, advance and update NIA
+ yield from self.advance_svstate_steps()
+ self.namespace['SVSTATE'] = self.svstate
+ # not an SVP64 branch, so fix PC (NIA==CIA) for next loop
+ # (by default, NIA is CIA+4 if v3.0B or CIA+8 if SVP64)
+ # this way we keep repeating the same instruction (with new steps)
+ self.pc.NIA.value = self.pc.CIA.value
+ self.namespace['NIA'] = self.pc.NIA
+ log("end of sub-pc call", self.namespace['CIA'], self.namespace['NIA'])
+ return False # DO NOT allow PC update whilst Sub-PC loop running
def advance_svstate_steps(self, end_src=False, end_dst=False):
- subvl = self.svstate.subvl
- substep = self.svstate.substep
- end_sub = substep == subvl
+ """ advance sub/steps. note that Pack/Unpack *INVERTS* the order.
+ TODO when Pack/Unpack is set, substep becomes the *outer* loop
+ """
+ subvl = yield self.dec2.rm_dec.rm_in.subvl
+ # first source step
+ ssubstep = self.svstate.ssubstep
+ end_sub = ssubstep == subvl
if end_sub:
if not end_src:
self.svstate.srcstep += SelectableInt(1, 7)
+ self.svstate.ssubstep = SelectableInt(0, 2) # reset
+ else:
+ self.svstate.ssubstep += SelectableInt(1, 2) # advance ssubstep
+ # now dest step
+ dsubstep = self.svstate.dsubstep
+ end_sub = dsubstep == subvl
+ if end_sub:
if not end_dst:
self.svstate.dststep += SelectableInt(1, 7)
- self.svstate.substep = SelectableInt(0, 2)
+ self.svstate.dsubstep = SelectableInt(0, 2) # reset
else:
- self.svstate.substep += SelectableInt(1, 2) # advance substep
+ self.svstate.dsubstep += SelectableInt(1, 2) # advance ssubstep
def update_pc_next(self):
# UPDATE program counter
def svp64_reset_loop(self):
self.svstate.srcstep = 0
self.svstate.dststep = 0
- self.svstate.substep = 0
+ self.svstate.ssubstep = 0
+ self.svstate.dsubstep = 0
log(" svstate.srcstep loop end (PC to update)")
self.namespace['SVSTATE'] = self.svstate