# then "yield" fields only from op_fields rather than hard-coded
# list, here.
fields = self.decoder.sigforms[formname]
+ log("prep_namespace", formname, op_fields)
for name in op_fields:
if name == 'spr':
sig = getattr(fields, name.upper())
illegal = False
name = 'setvl'
+ # and svremap not being supported by binutils (.long)
+ if asmop.startswith('svremap'):
+ illegal = False
+ name = 'svremap'
+
# and svshape not being supported by binutils (.long)
if asmop.startswith('svshape'):
illegal = False
# preserve order of register names
input_names = create_args(list(info.read_regs) +
list(info.uninit_regs))
- log(input_names)
+ log("input names", input_names)
# get SVP64 entry for the current instruction
sv_rm = self.svp64rm.instrs.get(name)
self.namespace['NIA'])
return
- # for when SVSHAPE is active, a very bad hack here (to be replaced)
- # using pre-arranged schedule. all of this is awful but it is a
- # start. next job will be to put the proper activation in place
- yield self.dec2.remap_active.eq(1 if self.last_op_svshape else 0)
+ # get the REMAP SPR
+ SVREMAP = self.spr['SVREMAP']
+ # for when SVREMAP is active, using pre-arranged schedule.
+ # note: modifying PowerDecoder2 needs to "settle"
+ remap_en = SVREMAP.men
+ active = self.last_op_svshape and remap_en != 0
+ yield self.dec2.remap_active.eq(remap_en if active else 0)
yield Settle()
if self.is_svp64_mode and self.last_op_svshape:
# get four SVSHAPEs. here we are hard-coding
- # SVSHAPE0 to FRT, SVSHAPE1 to FRA, SVSHAPE2 to FRC and
- # SVSHAPE3 to FRB, assuming "fmadd FRT, FRA, FRC, FRB."
SVSHAPE0 = self.spr['SVSHAPE0']
SVSHAPE1 = self.spr['SVSHAPE1']
SVSHAPE2 = self.spr['SVSHAPE2']
SVSHAPE3 = self.spr['SVSHAPE3']
+ # just some convenient debug info
for i in range(4):
sname = 'SVSHAPE%d' % i
shape = self.spr[sname]
- print (sname, bin(shape.value))
- print (" lims", shape.lims)
- print (" mode", shape.mode)
- print (" skip", shape.skip)
-
+ log (sname, bin(shape.value))
+ log (" lims", shape.lims)
+ log (" mode", shape.mode)
+ log (" skip", shape.skip)
+
+ # set up the list of steps to remap
+ steps = [(self.dec2.in1_step, SVREMAP.mi0), # RA
+ (self.dec2.in2_step, SVREMAP.mi1), # RB
+ (self.dec2.in3_step, SVREMAP.mi2), # RC
+ (self.dec2.o_step, SVREMAP.mo0), # RT
+ (self.dec2.o2_step, SVREMAP.mo1), # EA
+ ]
+ # set up the iterators
remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
(SVSHAPE1, SVSHAPE1.get_iterator()),
(SVSHAPE2, SVSHAPE2.get_iterator()),
(SVSHAPE3, SVSHAPE3.get_iterator()),
]
- rremaps = []
+ # go through all iterators in lock-step, advance to next remap_idx
+ remap_idxs = []
for i, (shape, remap) in enumerate(remaps):
# zero is "disabled"
if shape.value == 0x0:
- continue
- # XXX hardcoded! pick dststep for out (i==0) else srcstep
- if shape.mode == 0b00: # multiply mode
- step = dststep if (i == 0) else srcstep
- if shape.mode == 0b01: # FFT butterfly mode
- step = srcstep # XXX HACK - for now only use srcstep
+ remap_idxs.append(0)
+ # pick src or dststep depending on reg num (0-2=in, 3-4=out)
+ step = dststep if (i in [3, 4]) else srcstep
# this is terrible. O(N^2) looking for the match. but hey.
for idx, remap_idx in enumerate(remap):
if idx == step:
break
- # multiply mode
- if shape.mode == 0b00:
- if i == 0:
- yield self.dec2.o_step.eq(remap_idx) # RT
- yield self.dec2.o2_step.eq(remap_idx) # EA
- elif i == 1:
- yield self.dec2.in1_step.eq(remap_idx) # RA
- elif i == 2:
- yield self.dec2.in3_step.eq(remap_idx) # RB
- elif i == 3:
- yield self.dec2.in2_step.eq(remap_idx) # RC
- # FFT butterfly mode
- if shape.mode == 0b01:
- if i == 0:
- yield self.dec2.o_step.eq(remap_idx) # RT
- yield self.dec2.in2_step.eq(remap_idx) # RB
- elif i == 1:
- yield self.dec2.in1_step.eq(remap_idx) # RA
- yield self.dec2.o2_step.eq(remap_idx) # EA (FRS)
- elif i == 2:
- yield self.dec2.in3_step.eq(remap_idx) # RC
- elif i == 3:
- pass # no SVSHAPE3
- rremaps.append((shape.mode, i, idx, remap_idx)) # debug printing
+ remap_idxs.append(remap_idx)
+
+ rremaps = []
+ # now cross-index the required SHAPE for each of 3-in 2-out regs
+ rnames = ['RA', 'RB', 'RC', 'RT', 'EA']
+ for i, (dstep, shape_idx) in enumerate(steps):
+ (shape, remap) = remaps[shape_idx]
+ remap_idx = remap_idxs[shape_idx]
+ # zero is "disabled"
+ if shape.value == 0x0:
+ continue
+ # now set the actual requested step to the current index
+ yield dstep.eq(remap_idx)
+
+ # debug printout info
+ rremaps.append((shape.mode, i, rnames[i], step, shape_idx,
+ remap_idx))
for x in rremaps:
- print ("shape remap", x)
+ log ("shape remap", x)
# after that, settle down (combinatorial) to let Vector reg numbers
# work themselves out
yield Settle()
remap_active = yield self.dec2.remap_active
- print ("remap active", remap_active)
+ log ("remap active", bin(remap_active))
# main input registers (RT, RA ...)
inputs = []
if ldstmode == SVP64LDSTmode.BITREVERSE.value:
imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11]
imm = exts(imm, 11) # sign-extend to integer
- print ("bitrev SVD", imm)
+ log ("bitrev SVD", imm)
replace_d = True
else:
imm = yield self.dec2.dec.fields.FormD.D[0:16]
# to be able to know if it should apply in the next instruction.
# also (if going to use this instruction) should disable ability
# to interrupt in between. sigh.
- self.last_op_svshape = asmop == 'svshape'
+ self.last_op_svshape = asmop == 'svremap'
self.update_pc_next()
context = args[0].namespace # variables to be injected
saved_values = func_globals.copy() # Shallow copy of dict.
+ log("globals before", context.keys())
func_globals.update(context)
result = func(*args, **kwargs)
log("globals after", func_globals['CIA'], func_globals['NIA'])
# check registers as expected
self._check_regs(sim, expected_regs)
+ def test_svremap(self):
+ """svremap, see if values get set
+ """
+ lst = SVP64Asm(["svremap 11, 0, 1, 2, 3, 3",
+ ])
+ lst = list(lst)
+
+ with Program(lst, bigendian=False) as program:
+ sim = self.run_tst_program(program)
+ svremap = sim.spr('SVREMAP')
+ print ("SVREMAP after", bin(svremap.value))
+ print (" men", bin(svremap.men))
+ print (" mi0", bin(svremap.mi0))
+ print (" mi1", bin(svremap.mi1))
+ print (" mi2", bin(svremap.mi2))
+ print (" mo0", bin(svremap.mo0))
+ print (" mo1", bin(svremap.mo1))
+ self.assertEqual(svremap.men, 11)
+ self.assertEqual(svremap.mi0, 0)
+ self.assertEqual(svremap.mi1, 1)
+ self.assertEqual(svremap.mi2, 2)
+ self.assertEqual(svremap.mo0, 3)
+ self.assertEqual(svremap.mo1, 3)
+
def run_tst_program(self, prog, initial_regs=None,
svstate=None):
if initial_regs is None:
def test_sv_remap_fpmadds_fft(self):
""">>> lst = ["svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.ffmadds 2.v, 2.v, 2.v, 10.v"
]
runs a full in-place O(N log2 N) butterfly schedule for
(3 inputs, 2 outputs)
"""
lst = SVP64Asm( ["svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v"
])
lst = list(lst)
def test_sv_remap_fpmadds_fft_svstep(self):
""">>> lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
"svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v",
"setvl. 0, 0, 0, 1, 0, 0",
"bc 4, 2, -16"
"""
lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
"svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.ffmadds 0.v, 0.v, 0.v, 8.v",
"setvl. 0, 0, 0, 1, 0, 0",
"bc 4, 2, -16"
vec[jl] = temp2 + temp1
"""
lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
- # tpre
"svshape 8, 1, 1, 1",
+ # tpre
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.fmuls 24, 0.v, 16.v",
"svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.fmuls 25, 8.v, 20.v",
"fadds 24, 24, 25",
# tpim
- "svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.fmuls 26, 0.v, 20.v",
"svshape 8, 1, 1, 1",
"sv.fmuls 26, 8.v, 16.v",
"fsubs 26, 26, 27",
# vec_r jh/jl
- "svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.ffadds 0.v, 24, 25",
# vec_i jh/jl
- "svshape 8, 1, 1, 1",
+ "svremap 31, 1, 0, 2, 0, 1",
"sv.ffadds 8.v, 26, 27",
# svstep loop
def test_sv_remap1(self):
""">>> lst = ["svshape 2, 2, 3, 0",
+ "svremap 31, 1, 2, 3, 0, 0",
"sv.fmadds 0.v, 8.v, 16.v, 0.v"
]
REMAP fmadds FRT, FRA, FRC, FRB
"""
lst = SVP64Asm(["svshape 2, 2, 3, 0",
+ "svremap 31, 1, 2, 3, 0, 0",
"sv.fmadds 0.v, 16.v, 32.v, 0.v"
])
lst = list(lst)
def test_sv_remap2(self):
""">>> lst = ["svshape 5, 4, 3, 0",
+ "svremap 31, 1, 2, 3, 0, 0",
"sv.fmadds 0.v, 8.v, 16.v, 0.v"
]
REMAP fmadds FRT, FRA, FRC, FRB
"""
lst = SVP64Asm(["svshape 4, 3, 3, 0",
+ "svremap 31, 1, 2, 3, 0, 0",
"sv.fmadds 0.v, 16.v, 32.v, 0.v"
])
lst = list(lst)
self.in3_step = Signal(7, name="reg_c_step")
self.o_step = Signal(7, name="reg_o_step")
self.o2_step = Signal(7, name="reg_o2_step")
- self.remap_active = Signal(1, name="remap_active")
+ self.remap_active = Signal(5, name="remap_active") # per reg
self.no_in_vec = Signal(1, name="no_in_vec") # no inputs vector
self.no_out_vec = Signal(1, name="no_out_vec") # no outputs vector
self.loop_continue = Signal(1, name="loop_continue")
# registers a, b, c and out and out2 (LD/ST EA)
sv_etype = self.op_get("SV_Etype")
- for rname, to_reg, fromreg, svdec, remapstep, out in (
+ for i, stuff in enumerate((
("RA", e.read_reg1, dec_a.reg_out, in1_svdec, in1_step, False),
("RB", e.read_reg2, dec_b.reg_out, in2_svdec, in2_step, False),
("RC", e.read_reg3, dec_c.reg_out, in3_svdec, in3_step, False),
("RT", e.write_reg, dec_o.reg_out, o_svdec, o_step, True),
- ("EA", e.write_ea, dec_o2.reg_out, o2_svdec, o2_step, True)):
+ ("EA", e.write_ea, dec_o2.reg_out, o2_svdec, o2_step, True))):
+ rname, to_reg, fromreg, svdec, remapstep, out = stuff
comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM
comb += svdec.etype.eq(sv_etype) # EXTRA2/3 for this insn
comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA)
# however when REMAP is active, the FFT REMAP
# schedule takes care of this offset.
with m.If(dec_o2.reg_out.ok & dec_o2.fp_madd_en):
- with m.If(~self.remap_active):
+ with m.If(~self.remap_active[i]):
with m.If(svdec.isvec):
comb += offs.eq(vl) # VL for Vectors
# detect if Vectorised: add srcstep/dststep if yes.
with m.If(svdec.isvec):
selectstep = dststep if out else srcstep
step = Signal(7, name="step_%s" % rname.lower())
- with m.If(self.remap_active):
+ with m.If(self.remap_active[i]):
comb += step.eq(remapstep)
with m.Else():
comb += step.eq(selectstep)
# same trick is applied to FRA, above, but it's a lot cleaner, there
with m.If(dec_o2.reg_out.ok & dec_o2.fp_madd_en):
comb += offs.eq(0)
- with m.If(~self.remap_active):
+ with m.If(~self.remap_active[4]):
with m.If(o2_svdec.isvec):
comb += offs.eq(vl) # VL for Vectors
with m.Else():
svdec = o_svdec # yes take source as o_svdec...
with m.If(svdec.isvec):
step = Signal(7, name="step_%s" % rname.lower())
- with m.If(self.remap_active):
+ with m.If(self.remap_active[4]):
comb += step.eq(o2_step)
with m.Else():
comb += step.eq(dststep)