<!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
<!-- These are not described in book 1 -->
+# svstep
+
+SVL-Form
+
+* svstep RT,SVi,vf
+* svstep. RT,SVi,vf
+
+Pseudo-code:
+
+ step <- SVSTATE_NEXT(SVi, vf)
+ RT <- [0]*57 || step
+
+Special Registers Altered:
+
+ CR0 (if Rc=1)
+
# setvl
SVL-Form
-* setvl RT, RA, SVi, vf, vs, ms
-* setvl. RT, RA, SVi, vf, vs, ms
+* setvl RT,RA,SVi,vf,vs,ms
+* setvl. RT,RA,SVi,vf,vs,ms
Pseudo-code:
if (vf & (¬vs) & ¬(ms)) = 1 then
- step <- SVSTATE_NEXT(SVi)
+ step <- SVSTATE_NEXT(SVi, 0b0)
if _RT != 0b00000 then
GPR(_RT) <- [0]*57 || step
else
SVRM-Form
-* svremap SVme, mi0, mi1, mi2, mo0, mo1, pst
+* svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
Pseudo-code:
SVM-Form
-* svshape SVxd, SVyd, SVzd, SVRM, vf
+* svshape SVxd,SVyd,SVzd,SVRM,vf
Pseudo-code:
SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE3[0:31] <- SVSHAPE0[0:31]
# for FRA and FRT
SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
+ # for cos coefficient
+ SVSHAPE2[28:29] <- 0b10 # ci schedule
+ SVSHAPE3[28:29] <- 0b11 # size schedule
# set schedule up for DCT Outer butterfly
if (SVRM = 0b0011) then
# calculate O(N log2 N) number of outer butterfly overlapping adds
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
SVSHAPE2[0:31] <- SVSHAPE0[0:31]
- SVSHAPE3[0:31] <- SVSHAPE0[0:31]
# for FRA and FRT
SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
- # for cos coefficient
- SVSHAPE2[28:29] <- 0b10 # ci schedule
- SVSHAPE3[28:29] <- 0b11 # size schedule
# set VL, MVL and Vertical-First
SVSTATE[0:6] <- vlen
SVSTATE[7:13] <- vlen
--- /dev/null
+insn,CONDITIONS,Ptype,Etype,0,1,2,3,in1,in2,in3,out,CR in,CR out,out2
+svstep,,1P,EXTRA3,d:RT;d:CR0,0,0,0,0,0,0,RT,0,CR0,0
0b00000,VL,OP_SETVL,RA_OR_ZERO,NONE,NONE,RT_OR_ZERO,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,setvl,SVL,
0b00001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,svshape,SVM,
0b00010,VL,OP_SVREMAP,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,svremap,SVRM,
+0b00011,VL,OP_SVSTEP,NONE,NONE,NONE,RT,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,svstep,SVL,
dec2.dec.RT)
if out_sel == OutSel.RT.value:
return out, o_isvec
+ elif name == 'RT_OR_ZERO':
+ log ("get_pdecode_idx_out", out_sel, OutSel.RT.value,
+ OutSel.RT_OR_ZERO.value, out, o_isvec,
+ dec2.dec.RT)
+ if out_sel == OutSel.RT_OR_ZERO.value:
+ return out, o_isvec
elif name == 'FRA':
log ("get_pdecode_idx_out", out_sel, OutSel.FRA.value, out, o_isvec)
if out_sel == OutSel.FRA.value:
SO = self.spr['XER'][XER_bits['SO']]
log("handle_comparison SO", SO)
cr_field = selectconcat(negative, positive, zero, SO)
+ log("handle_comparison cr_field", self.cr, cr_idx, cr_field)
self.crl[cr_idx].eq(cr_field)
def set_pc(self, pc_val):
illegal = False
ins_name = 'setvl'
+ # and svstep not being supported by binutils (.long)
+ if asmop.startswith('svstep'):
+ illegal = False
+ ins_name = 'svstep'
+
# and svremap not being supported by binutils (.long)
if asmop.startswith('svremap'):
illegal = False
reg_val = 0
inputs.append(reg_val)
# arrrrgh, awful hack, to get _RT into namespace
- if ins_name == 'setvl':
+ if ins_name in ['setvl', 'svstep']:
regname = "_RT"
RT = yield self.dec2.dec.RT
self.namespace[regname] = SelectableInt(RT, 5)
+ if RT == 0:
+ self.namespace["RT"] = SelectableInt(0, 5)
+ regnum, is_vec = yield from get_pdecode_idx_out(self.dec2, "RT")
+ log('hack input reg %s %s' % (name, str(regnum)), is_vec)
# in SVP64 mode for LD/ST work out immediate
# XXX TODO: replace_ds for DS-Form rather than D-Form.
if not self.is_svp64_mode or not pred_dst_zero:
if hasattr(self.dec2.e.do, "rc"):
rc_en = yield self.dec2.e.do.rc.rc
- if rc_en:
+ if rc_en and ins_name not in ['svstep']:
regnum, is_vec = yield from get_pdecode_cr_out(self.dec2, "CR0")
self.handle_comparison(results, regnum)
pre = False
post = False
if self.allow_next_step_inc:
- log("SVSTATE_NEXT: inc requested, mode", self.svstate_next_mode)
+ log("SVSTATE_NEXT: inc requested, mode",
+ self.svstate_next_mode, self.allow_next_step_inc)
yield from self.svstate_pre_inc()
pre = yield from self.update_new_svstate_steps()
if pre:
results = [SelectableInt(0, 64)]
self.handle_comparison(results) # CR0
else:
- log ("SVSTATE_NEXT: post-inc")
+ if self.allow_next_step_inc == 2:
+ log ("SVSTATE_NEXT: read")
+ yield from self.svstate_post_inc()
+ else:
+ log ("SVSTATE_NEXT: post-inc")
srcstep, dststep = self.new_srcstep, self.new_dststep
remaps = self.get_remap_indices()
remap_idxs = self.remap_idxs
vl = self.svstate.vl
end_src = srcstep == vl-1
end_dst = dststep == vl-1
- if not end_src:
- self.svstate.srcstep += SelectableInt(1, 7)
- if not end_dst:
- self.svstate.dststep += SelectableInt(1, 7)
+ if self.allow_next_step_inc != 2:
+ if not end_src:
+ self.svstate.srcstep += SelectableInt(1, 7)
+ if not end_dst:
+ self.svstate.dststep += SelectableInt(1, 7)
self.namespace['SVSTATE'] = self.svstate.spr
# set CR0 (if Rc=1) based on end
if rc_en:
self.update_pc_next()
- def SVSTATE_NEXT(self, mode):
+ def SVSTATE_NEXT(self, mode, submode):
"""explicitly moves srcstep/dststep on to next element, for
"Vertical-First" mode. this function is called from
setvl pseudo-code, as a pseudo-op "svstep"
"""
- log("SVSTATE_NEXT mode", mode)
- self.allow_next_step_inc = True
+ self.allow_next_step_inc = submode.value + 1
+ log("SVSTATE_NEXT mode", mode, submode, self.allow_next_step_inc)
self.svstate_next_mode = mode
if self.svstate_next_mode > 0:
shape_idx = self.svstate_next_mode.value-1
sv_a_nz = yield self.dec2.sv_a_nz
fft_mode = yield self.dec2.use_svp64_fft
in1 = yield self.dec2.e.read_reg1.data
- log ("SVP64: VL, srcstep, dststep, sv_a_nz, in1 fft",
- vl, srcstep, dststep, sv_a_nz, in1, fft_mode)
+ log ("SVP64: VL, srcstep, dststep, sv_a_nz, in1 fft, svp64",
+ vl, srcstep, dststep, sv_a_nz, in1, fft_mode,
+ self.is_svp64_mode)
# get predicate mask
srcmask = dstmask = 0xffff_ffff_ffff_ffff
self.assertEqual(CR0[CRFields.GT], 0)
self.assertEqual(CR0[CRFields.SO], 0)
+ def test_svstep_inner_loop_8_jl(self):
+ """tests svstep inner loop, running 8 times (sv.setvl.), checking
+ jl is copied into a *Vector* result.
+
+ fuuun...
+ """
+ lst = SVP64Asm([
+ # set DCT triple butterfly mode with persistent "REMAP"
+ "svshape 8, 1, 1, 2, 0",
+ "svremap 0, 0, 0, 2, 0, 1, 1",
+ "sv.svstep 2.v, 4, 1", # svstep get vector of ci
+ "sv.svstep 16.v, 3, 1", # svstep get vector of step
+ ])
+ lst = list(lst)
+
+ # SVSTATE
+ svstate = SVP64State()
+ #svstate.vl = 2 # VL
+ #svstate.maxvl = 2 # MAXVL
+ print ("SVSTATE", bin(svstate.asint()))
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program, svstate=svstate)
+ print ("SVSTATE after", bin(sim.svstate.asint()))
+ print (" vl", bin(sim.svstate.vl))
+ print (" mvl", bin(sim.svstate.maxvl))
+ print (" srcstep", bin(sim.svstate.srcstep))
+ print (" dststep", bin(sim.svstate.dststep))
+ print (" vfirst", bin(sim.svstate. vfirst))
+ self.assertEqual(sim.svstate.vl, 12)
+ self.assertEqual(sim.svstate.maxvl, 12)
+ # svstep called four times, reset occurs, srcstep zero
+ self.assertEqual(sim.svstate.srcstep, 0)
+ self.assertEqual(sim.svstate.dststep, 0)
+ for i in range(4):
+ self.assertEqual(sim.gpr(2+i), SelectableInt(8, 64))
+ self.assertEqual(sim.gpr(6+i), SelectableInt(4, 64))
+ self.assertEqual(sim.gpr(10+i), SelectableInt(2, 64))
+ self.assertEqual(sim.gpr(16+i), SelectableInt(i, 64))
+ self.assertEqual(sim.gpr(24+i), SelectableInt(0, 64))
+ for i in range(2):
+ self.assertEqual(sim.gpr(20+i), SelectableInt(i, 64))
+ self.assertEqual(sim.gpr(22+i), SelectableInt(i, 64))
+ self.assertEqual(sim.svstate.vfirst, 0)
+ CR0 = sim.crl[0]
+ print(" CR0", bin(CR0.get_range().value))
+ self.assertEqual(CR0[CRFields.EQ], 0)
+ self.assertEqual(CR0[CRFields.LT], 0)
+ self.assertEqual(CR0[CRFields.GT], 0)
+ self.assertEqual(CR0[CRFields.SO], 0)
+
def test_sv_add(self):
"""sets VL=2 then adds:
* 1 = 5 + 9 => 0x5555 = 0x4321+0x1234
"setvl", # https://libre-soc.org/openpower/sv/setvl
"svremap", # https://libre-soc.org/openpower/sv/remap - TEMPORARY
"svshape", # https://libre-soc.org/openpower/sv/remap
+ "svstep", # https://libre-soc.org/openpower/sv/setvl
"sim_cfg",
"slbia", "sld", "slw", "srad", "sradi",
"sraw", "srawi", "srd", "srw",
OP_FP_MADD = 79
OP_SVREMAP = 80
OP_SVSHAPE = 81
+ OP_SVSTEP = 82
@unique
'1R-1W-imm': 'RM-2P-1S1D',
'1R-CRo': 'RM-2P-1S1D',
'1R-imm': 'non-SV',
+ '1W-CRo': 'RM-1P-1D',
'1W': 'non-SV',
'1W-CRi': 'RM-2P-1S1D',
'CRio': 'RM-2P-1S1D',
res['2'] = 's:FRB' # FRB: Rsrc2_EXTRA2
res['3'] = 's:FRC' # FRC: Rsrc3_EXTRA2
+ elif value == 'RM-1P-1D':
+ res['Etype'] = 'EXTRA3' # RM EXTRA3 type
+ if insn_name == 'svstep':
+ res['0'] = 'd:RT;d:CR0' # RT,CR0: Rdest1_EXTRA2
+
# add to svp64 csvs
#for k in ['in1', 'in2', 'in3', 'out', 'CR in', 'CR out']:
# del res[k]
yield ".long 0x%x" % insn
return
+ # sigh have to do setvl here manually for now...
+ # note the subtract one from SVi.
+ if opcode in ["svstep", "svstep."]:
+ insn = 22 << (31-5) # opcode 22, bits 0-5
+ fields = list(map(int, fields))
+ insn |= fields[0] << (31-10) # RT , bits 6-10
+ insn |= (fields[1]-1) << (31-22) # SVi , bits 16-22
+ insn |= fields[2] << (31-25) # vf , bit 25
+ insn |= 0b00011 << (31-30) # XO , bits 26..30
+ if opcode == 'svstep.':
+ insn |= 1 << (31-31) # Rc=1 , bit 31
+ log ("svstep", bin(insn))
+ yield ".long 0x%x" % insn
+ return
+
# and svshape. note that the dimension fields one subtracted from each
if opcode == 'svshape':
insn = 22 << (31-5) # opcode 22, bits 0-5
# okaaay now we identify the field value (opcode N,N,N) with
# the pseudo-code info (opcode RT, RA, RB)
assert len(fields) == len(v30b_regs), \
- "length of fields %s must match insn `%s`" % \
- (str(v30b_regs), insn)
+ "length of fields %s must match insn `%s` fields %s" % \
+ (str(v30b_regs), insn, str(fields))
opregfields = zip(fields, v30b_regs) # err that was easy
# now for each of those find its place in the EXTRA encoding
if rc:
opcode |= 1 # Rc, bit 31.
yield ".long 0x%x" % opcode
+ # sigh have to do svstep here manually for now...
+ elif opcode in ["svstep", "svstep."]:
+ insn = 22 << (31-5) # opcode 22, bits 0-5
+ insn |= int(v30b_newfields[0]) << (31-10) # RT , bits 6-10
+ insn |= int(v30b_newfields[1]) << (31-22) # SVi , bits 16-22
+ insn |= int(v30b_newfields[2]) << (31-25) # vf , bit 25
+ insn |= 0b00011 << (31-30) # XO , bits 26..30
+ if opcode == 'svstep.':
+ insn |= 1 << (31-31) # Rc=1 , bit 31
+ log ("svstep", bin(insn))
+ yield ".long 0x%x" % insn
+
+ elif v30b_op in ["setvl", "setvl."]:
+ insn = 22 << (31-5) # opcode 22, bits 0-5
+ fields = list(map(int, fields))
+ insn |= fields[0] << (31-10) # RT , bits 6-10
+ insn |= fields[1] << (31-15) # RA , bits 11-15
+ insn |= (fields[2]-1) << (31-22) # SVi , bits 16-22
+ insn |= fields[3] << (31-25) # ms , bit 25
+ insn |= fields[4] << (31-24) # vs , bit 24
+ insn |= fields[5] << (31-23) # vf , bit 23
+ insn |= 0b00000 << (31-30) # XO , bits 26..30
+ if opcode == 'setvl.':
+ insn |= 1 << (31-31) # Rc=1 , bit 31
+ log ("setvl", bin(insn))
+ yield ".long 0x%x" % insn
+
else:
yield "%s %s" % (v30b_op+rc, ", ".join(v30b_newfields))
log ("new v3.0B fields", v30b_op, v30b_newfields)
'sv.stw 5.v, 4(1.v)',
'sv.ld 5.v, 4(1.v)',
'setvl. 2, 3, 4, 0, 1, 1',
+ 'sv.setvl. 2, 3, 4, 0, 1, 1',
]
lst = [
"sv.stfsu 0.v, 16(4.v)",
lst = [
'sv.lfsbr 4.v, 11(8.v), 15',
#'sv.lwzbr 4.v, 11(8.v), 15',
+ 'sv.svstep. 2.v, 4, 0',
]
isa = SVP64Asm(lst, macros=macros)
print ("list", list(isa))