from nmigen import Elaboratable, Module, Signal, Const
from openpower.decoder.power_enums import (SVP64RMMode, Function, SVPtype,
- SVP64PredMode, SVP64sat)
+ SVP64PredMode, SVP64sat, SVP64LDSTmode,
+ SVP64BCPredMode, SVP64BCVLSETMode,
+ SVP64BCGate, SVP64BCCTRMode,
+ SVP64width
+ )
from openpower.consts import EXTRA3, SVP64MODE
from openpower.sv.svp64 import SVP64Rec
from nmutil.util import sel
('sv_pred_sz', 1), # predicate source zeroing
('sv_pred_dz', 1), # predicate dest zeroing
('sv_saturate', SVP64sat),
+ ('sv_ldstmode', SVP64LDSTmode),
('SV_Ptype', SVPtype),
#('sv_RC1', 1),
]
"""RM Mode
-there are three Mode variants, two for LD/ST and one for everything else
+there are four Mode variants, two for LD/ST, one for Branch-Conditional,
+and one for everything else
https://libre-soc.org/openpower/sv/svp64/
https://libre-soc.org/openpower/sv/ldst/
+https://libre-soc.org/openpower/sv/branches/
LD/ST immed:
-00 str sz dz normal mode
+00 0 zz els normal mode (with element-stride option)
+00 1 zz els Pack/unpack (with element-stride option)
01 inv CR-bit Rc=1: ffirst CR sel
01 inv els RC1 Rc=0: ffirst z/nonz
-10 N dz els sat mode: N=0/1 u/s
+10 N zz els sat mode: N=0/1 u/s
11 inv CR-bit Rc=1: pred-result CR sel
11 inv els RC1 Rc=0: pred-result z/nonz
01 inv dz RC1 Rc=0: ffirst z/nonz
10 N sz dz sat mode: N=0/1 u/s
11 inv CR-bit Rc=1: pred-result CR sel
-11 inv dz RC1 Rc=0: pred-result z/nonz
+11 inv zz RC1 Rc=0: pred-result z/nonz
Arithmetic:
-00 0 sz dz normal mode
-00 1 dz CRM reduce mode (mapreduce), SUBVL=1
-00 1 SVM CRM subvector reduce mode, SUBVL>1
-01 inv CR-bit Rc=1: ffirst CR sel
-01 inv dz RC1 Rc=0: ffirst z/nonz
-10 N sz dz sat mode: N=0/1 u/s
-11 inv CR-bit Rc=1: pred-result CR sel
-11 inv dz RC1 Rc=0: pred-result z/nonz
+| 0-1 | 2 | 3 4 | description |
+| --- | --- |---------|-------------------------- |
+| 00 | 0 | dz sz | simple mode |
+| 00 | 1 | 0 RG | scalar reduce mode (mapreduce), SUBVL=1 |
+| 00 | 1 | SVM 0 | subvector reduce mode, SUBVL>1 |
+| 00 | 1 | SVM 1 | Pack/Unpack mode, SUBVL>1 |
+| 01 | inv | CR-bit | Rc=1: ffirst CR sel |
+| 01 | inv | VLi RC1 | Rc=0: ffirst z/nonz |
+| 10 | N | dz sz | sat mode: N=0/1 u/s, SUBVL=1 |
+| 10 | N | zz 0 | sat mode: N=0/1 u/s, SUBVL>1 |
+| 10 | N | zz 1 | Pack/Unpack sat mode: N=0/1 u/s, SUBVL>1 |
+| 11 | inv | CR-bit | Rc=1: pred-result CR sel |
+| 11 | inv | zz RC1 | Rc=0: pred-result z/nonz |
+
+Branch Conditional:
+note that additional BC modes are in *other bits*, specifically
+the element-width fields: SVP64Rec.ewsrc and SVP64Rec.elwidth
+
+elwidth ewsrc mode
+4 5 6 7 19 20 21 22 23
+ALL LRu / / 0 0 / SNZ sz normal mode
+ALL LRu / VSb 0 1 VLI SNZ sz VLSET mode
+ALL LRu BRc / 1 0 / SNZ sz svstep mode
+ALL LRu BRc VSb 1 1 VLI SNZ sz svstep VLSET mode
"""
+
class SVP64RMModeDecode(Elaboratable):
def __init__(self, name=None):
+ ##### inputs #####
self.rm_in = SVP64Rec(name=name)
- self.fn_in = Signal(Function) # LD/ST is different
+ self.fn_in = Signal(Function) # LD/ST and Branch is different
+ self.svp64_vf_in = Signal() # Vertical-First Mode
self.ptype_in = Signal(SVPtype)
self.rc_in = Signal()
- self.ldst_idx = Signal()
+ self.ldst_ra_vec = Signal() # set when RA is vec, indicate Index mode
+ self.ldst_imz_in = Signal() # set when LD/ST immediate is zero
+
+ ##### outputs #####
- # main mode (normal, reduce, saturate, ffirst, pred-result)
+ # main mode (normal, reduce, saturate, ffirst, pred-result, branch)
self.mode = Signal(SVP64RMMode)
+ # Branch Conditional Modes
+ self.bc_vlset = Signal(SVP64BCVLSETMode) # Branch-Conditional VLSET
+ self.bc_ctrtest = Signal(SVP64BCCTRMode) # Branch-Conditional CTR-Test
+ self.bc_pred = Signal(SVP64BCPredMode) # BC predicate mode
+ self.bc_vsb = Signal() # BC VLSET-branch (like BO[1])
+ self.bc_gate = Signal(SVP64BCGate) # BC ALL or ANY gate
+ self.bc_lru = Signal() # BC Link Register Update
+
# predication
self.predmode = Signal(SVP64PredMode)
self.srcpred = Signal(3) # source predicate
self.pred_sz = Signal(1) # predicate source zeroing
self.pred_dz = Signal(1) # predicate dest zeroing
+ # Modes n stuff
+ self.ew_src = Signal(SVP64width) # source elwidth
+ self.ew_dst = Signal(SVP64width) # dest elwidth
+ self.pack = Signal() # pack mode
+ self.unpack = Signal() # unpack mode
self.saturate = Signal(SVP64sat)
self.RC1 = Signal()
- self.cr_sel = Signal(2)
- self.inv = Signal(1)
+ self.cr_sel = Signal(2) # bit of CR to test (index 0-3)
+ self.inv = Signal(1) # and whether it's inverted (like branch BO)
self.map_evm = Signal(1)
self.map_crm = Signal(1)
+ self.reverse_gear = Signal(1) # elements to go VL-1..0
+ self.ldstmode = Signal(SVP64LDSTmode) # LD/ST Mode (strided type)
def elaborate(self, platform):
m = Module()
# decode pieces of mode
is_ldst = Signal()
+ is_bc = Signal()
+ do_pu = Signal() # whether to decode pack/unpack
comb += is_ldst.eq(self.fn_in == Function.LDST)
+ comb += is_bc.eq(self.fn_in == Function.BRANCH)
mode2 = sel(m, mode, SVP64MODE.MOD2)
- with m.Switch(mode2):
- with m.Case(0): # needs further decoding (LDST no mapreduce)
- with m.If(is_ldst):
- comb += self.mode.eq(SVP64RMMode.NORMAL)
- with m.Elif(mode[SVP64MODE.REDUCE]):
- comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
+
+ with m.If(is_bc):
+ # Branch-Conditional is completely different
+ # Counter-Test Mode.
+ with m.If(mode[SVP64MODE.BC_CTRTEST]):
+ with m.If(self.rm_in.ewsrc[0]):
+ comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST_INV)
with m.Else():
- comb += self.mode.eq(SVP64RMMode.NORMAL)
- with m.Case(1):
- comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
- with m.Case(2):
- comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
- with m.Case(3):
- comb += self.mode.eq(SVP64RMMode.PREDRES) # predicate result
-
- # extract zeroing
- with m.Switch(mode2):
- with m.Case(0): # needs further decoding (LDST no mapreduce)
- with m.If(is_ldst):
- comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
- comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
- with m.Elif(mode[SVP64MODE.REDUCE]):
- with m.If(self.rm_in.subvl == Const(0, 2)): # no SUBVL
- comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST)
+ # VLSET mode
+ with m.If(mode[SVP64MODE.BC_VLSET]):
+ with m.If(mode[SVP64MODE.BC_VLI]):
+ comb += self.bc_vlset.eq(SVP64BCVLSETMode.VL_INCL)
with m.Else():
- comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
- comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
- with m.Case(1, 3):
- with m.If(is_ldst):
- with m.If(~self.ldst_idx):
+ comb += self.bc_vlset.eq(SVP64BCVLSETMode.VL_EXCL)
+ # BC Mode ALL or ANY (Great-Big-AND-gate or Great-Big-OR-gate)
+ comb += self.bc_gate.eq(self.rm_in.elwidth[0])
+ # Link-Register Update
+ comb += self.bc_lru.eq(self.rm_in.elwidth[1])
+ comb += self.bc_vsb.eq(self.rm_in.ewsrc[1])
+
+ with m.Else():
+ # combined arith / ldst decoding due to similarity
+ with m.Switch(mode2):
+ with m.Case(0): # needs further decoding (LDST no mapreduce)
+ with m.If(is_ldst):
+ comb += self.mode.eq(SVP64RMMode.NORMAL)
+ comb += do_pu.eq(mode[SVP64MODE.LDST_PACK]) # Pack mode
+ with m.Elif(mode[SVP64MODE.REDUCE]):
+ comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
+ # Pack only active if SVM=1 & SUBVL>1 & Mode[4]=1
+ with m.If(self.rm_in.subvl != Const(0, 2)): # active
+ comb += do_pu.eq(mode[SVP64MODE.ARITH_PACK])
+ with m.Else():
+ comb += self.mode.eq(SVP64RMMode.NORMAL)
+ with m.Case(1):
+ comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
+ with m.Case(2):
+ comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
+ with m.Case(3):
+ comb += self.mode.eq(SVP64RMMode.PREDRES) # pred result
+
+ # extract "reverse gear" for mapreduce mode
+ with m.If((~is_ldst) & # not for LD/ST
+ (mode2 == 0) & # first 2 bits == 0
+ mode[SVP64MODE.REDUCE] & # bit 2 == 1
+ (~mode[SVP64MODE.MOD3])): # bit 3 == 0
+ comb += self.reverse_gear.eq(mode[SVP64MODE.RG]) # finally whew
+
+ # extract zeroing
+ with m.Switch(mode2):
+ with m.Case(0): # needs further decoding (LDST no mapreduce)
+ with m.If(is_ldst):
+ # XXX TODO, work out which of these is most
+ # appropriate set both? or just the one?
+ # or one if LD, the other if ST?
+ comb += self.pred_sz.eq(mode[SVP64MODE.DZ])
comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
- with m.Elif(self.rc_in):
- comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
- with m.Case(2):
- with m.If(is_ldst & ~self.ldst_idx):
- comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
- with m.Else():
- comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
- comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ with m.Elif(mode[SVP64MODE.REDUCE]):
+ with m.If(self.rm_in.subvl == Const(0, 2)): # no SUBVL
+ comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ with m.Else():
+ comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
+ comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ with m.Case(1, 3):
+ with m.If(is_ldst):
+ with m.If(~self.ldst_ra_vec):
+ comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ with m.Elif(self.rc_in):
+ comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ with m.Case(2):
+ with m.If(is_ldst & ~self.ldst_ra_vec):
+ comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+ with m.Else():
+ comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
+ comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
+
+ # extract saturate
+ with m.Switch(mode2):
+ with m.Case(2):
+ with m.If(mode[SVP64MODE.N]):
+ comb += self.saturate.eq(SVP64sat.UNSIGNED)
+ with m.Else():
+ comb += self.saturate.eq(SVP64sat.SIGNED)
+ with m.Default():
+ comb += self.saturate.eq(SVP64sat.NONE)
+
+ # extract pack/unpack, actually just ELWIDTH_SRC, so
+ # do elwidth/elwidth_src at same time
+ with m.If(do_pu):
+ comb += self.pack.eq(self.rm_in.ewsrc[0])
+ comb += self.unpack.eq(self.rm_in.ewsrc[1])
+ comb += self.ew_src.eq(self.rm_in.elwidth) # make same as elwid
+ with m.Else():
+ comb += self.ew_src.eq(self.rm_in.ewsrc)
+ comb += self.ew_dst.eq(self.rm_in.elwidth)
+
+ # extract els (element strided mode bit)
+ # see https://libre-soc.org/openpower/sv/ldst/
+ els = Signal()
+ with m.If(is_ldst):
+ with m.Switch(mode2):
+ with m.Case(0):
+ comb += els.eq(mode[SVP64MODE.ELS_NORMAL])
+ with m.Case(2):
+ comb += els.eq(mode[SVP64MODE.ELS_SAT])
+ with m.Case(1, 3):
+ with m.If(self.rc_in):
+ comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
+
+ # RA is vectorised
+ with m.If(self.ldst_ra_vec):
+ comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
+ # not element-strided, therefore unit...
+ with m.Elif(~els):
+ comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE)
+ # but if the LD/ST immediate is zero, allow cache-inhibited
+ # loads from same location, therefore don't do element-striding
+ with m.Elif(~self.ldst_imz_in):
+ comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE)
# extract src/dest predicate. use EXTRA3.MASK because EXTRA2.MASK
# is in exactly the same bits