SVP64RMModeDecode detects Post-Inc LDST-imm mode
[openpower-isa.git] / src / openpower / decoder / power_svp64_rm.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Funded by NLnet http://nlnet.nl
4 """SVP64 RM (Remap) Record.
5
6 https://libre-soc.org/openpower/sv/svp64/
7
8 | Field Name | Field bits | Description |
9 |-------------|------------|----------------------------------------|
10 | MASKMODE | `0` | Execution (predication) Mask Kind |
11 | MASK | `1:3` | Execution Mask |
12 | ELWIDTH | `4:5` | Element Width |
13 | ELWIDTH_SRC | `6:7` | Element Width for Source |
14 | SUBVL | `8:9` | Sub-vector length |
15 | EXTRA | `10:18` | context-dependent extra |
16 | MODE | `19:23` | changes Vector behaviour |
17 """
18
19 from nmigen import Elaboratable, Module, Signal, Const
20 from openpower.decoder.power_enums import (SVP64RMMode, Function, SVPtype,
21 SVMode,
22 SVP64PredMode, SVP64sat, SVP64LDSTmode,
23 SVP64BCPredMode, SVP64BCVLSETMode,
24 SVP64BCGate, SVP64BCCTRMode,
25 SVP64width
26 )
27 from openpower.consts import EXTRA3, SVP64MODE
28 from openpower.sv.svp64 import SVP64Rec
29 from nmutil.util import sel
30
31 # a list of fields which need to be added to input records in order
32 # pass on vital information needed by each pipeline.
33 # make sure to keep these the same as SVP64RMModeDecode, in fact,
34 # TODO, make SVP64RMModeDecode *use* this as a Record!
35 sv_input_record_layout = [
36 ('sv_pred_sz', 1), # predicate source zeroing
37 ('sv_pred_dz', 1), # predicate dest zeroing
38 ('sv_saturate', SVP64sat),
39 ('sv_ldstmode', SVP64LDSTmode),
40 ('SV_Ptype', SVPtype),
41 ('SV_mode', SVMode),
42 #('sv_RC1', 1),
43 ]
44
45 """RM Mode
46 there are four Mode variants, two for LD/ST, one for Branch-Conditional,
47 and one for everything else
48 https://libre-soc.org/openpower/sv/svp64/
49 https://libre-soc.org/openpower/sv/ldst/
50 https://libre-soc.org/openpower/sv/branches/
51
52 LD/ST immed:
53 00 0 zz els normal mode (with element-stride option)
54 01 inv CR-bit Rc=1: ffirst CR sel
55 01 inv els RC1 Rc=0: ffirst z/nonz
56 10 N zz els sat mode: N=0/1 u/s
57 11 inv CR-bit Rc=1: pred-result CR sel
58 11 inv els RC1 Rc=0: pred-result z/nonz
59
60 LD/ST indexed:
61 00 0 sz dz normal mode
62 00 1 rsvd reserved
63 01 inv CR-bit Rc=1: ffirst CR sel
64 01 inv dz RC1 Rc=0: ffirst z/nonz
65 10 N sz dz sat mode: N=0/1 u/s
66 11 inv CR-bit Rc=1: pred-result CR sel
67 11 inv zz RC1 Rc=0: pred-result z/nonz
68
69 Arithmetic:
70 | 0-1 | 2 | 3 4 | description |
71 | --- | --- |---------|-------------------------- |
72 | 00 | 0 | dz sz | simple mode |
73 | 00 | 1 | 0 RG | scalar reduce mode (mapreduce), SUBVL=1 |
74 | 00 | 1 | SVM 0 | subvector reduce mode, SUBVL>1 |
75 | 00 | 1 | / 1 | reserved |
76 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
77 | 01 | inv | VLi RC1 | Rc=0: ffirst z/nonz |
78 | 10 | N | dz sz | sat mode: N=0/1 u/s, SUBVL=1 |
79 | 10 | N | zz 0 | sat mode: N=0/1 u/s, SUBVL>1 |
80 | 10 | N | / 1 | reserved |
81 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
82 | 11 | inv | zz RC1 | Rc=0: pred-result z/nonz |
83
84 Branch Conditional:
85 note that additional BC modes are in *other bits*, specifically
86 the element-width fields: SVP64Rec.ewsrc and SVP64Rec.elwidth
87
88 elwidth ewsrc mode
89 4 5 6 7 19 20 21 22 23
90 ALL LRu / / 0 0 / SNZ sz normal mode
91 ALL LRu / VSb 0 1 VLI SNZ sz VLSET mode
92 ALL LRu BRc / 1 0 / SNZ sz svstep mode
93 ALL LRu BRc VSb 1 1 VLI SNZ sz svstep VLSET mode
94 """
95
96
97 class SVP64RMModeDecode(Elaboratable):
98 def __init__(self, name=None):
99 ##### inputs #####
100 self.rm_in = SVP64Rec(name=name)
101 self.fn_in = Signal(Function) # LD/ST and Branch is different
102 self.sv_mode = Signal(SVMode) # BRANCH/LDST_IMM/CROP etc.
103 self.svp64_vf_in = Signal() # Vertical-First Mode
104 self.ptype_in = Signal(SVPtype)
105 self.rc_in = Signal()
106 self.cr_5bit_in = Signal() # if CR field was 5-bit
107 self.cr_2bit_in = Signal() # bottom 2 bits of CR field
108 self.ldst_ra_vec = Signal() # set when RA is vec, indicate Index mode
109 self.ldst_imz_in = Signal() # set when LD/ST immediate is zero
110 self.ldst_postinc = Signal() # set when LD/ST immediate post-inc set
111 self.ldst_ffirst = Signal() # set when LD/ST immediate fail-first set
112
113 ##### outputs #####
114
115 # main mode (normal, reduce, saturate, ffirst, pred-result, branch)
116 self.mode = Signal(SVP64RMMode)
117
118 # Branch Conditional Modes
119 self.bc_vlset = Signal(SVP64BCVLSETMode) # Branch-Conditional VLSET
120 self.bc_ctrtest = Signal(SVP64BCCTRMode) # Branch-Conditional CTR-Test
121 self.bc_pred = Signal(SVP64BCPredMode) # BC predicate mode
122 self.bc_vsb = Signal() # BC VLSET-branch (like BO[1])
123 self.bc_gate = Signal(SVP64BCGate) # BC ALL or ANY gate
124 self.bc_lru = Signal() # BC Link Register Update
125
126 # predication
127 self.predmode = Signal(SVP64PredMode)
128 self.srcpred = Signal(3) # source predicate
129 self.dstpred = Signal(3) # destination predicate
130 self.pred_sz = Signal(1) # predicate source zeroing
131 self.pred_dz = Signal(1) # predicate dest zeroing
132
133 # Modes n stuff
134 self.ew_src = Signal(SVP64width) # source elwidth
135 self.ew_dst = Signal(SVP64width) # dest elwidth
136 self.subvl= Signal(2) # subvl
137 self.saturate = Signal(SVP64sat)
138 self.RC1 = Signal()
139 self.vli = Signal()
140 self.cr_sel = Signal(2) # bit of CR to test (index 0-3)
141 self.inv = Signal(1) # and whether it's inverted (like branch BO)
142 self.map_evm = Signal(1)
143 self.map_crm = Signal(1)
144 self.reverse_gear = Signal(1) # elements to go VL-1..0
145 self.ldstmode = Signal(SVP64LDSTmode) # LD/ST Mode (strided type)
146
147 def elaborate(self, platform):
148 m = Module()
149 comb = m.d.comb
150 mode = self.rm_in.mode
151
152 # decode pieces of mode
153 is_ldst = Signal()
154 is_bc = Signal()
155 is_cr = Signal()
156 is_ldstimm = Signal()
157 comb += is_ldst.eq(self.fn_in == Function.LDST)
158 comb += is_bc.eq(self.fn_in == Function.BRANCH) # XXX TODO use SV Mode
159 comb += is_cr.eq(self.sv_mode == SVMode.CROP.value)
160 comb += is_ldstimm.eq(self.sv_mode == SVMode.LDST_IMM.value)
161 mode2 = sel(m, mode, SVP64MODE.MOD2)
162 cr = sel(m, mode, SVP64MODE.CR)
163
164 with m.If(is_bc):
165 # Branch-Conditional is completely different
166 # Counter-Test Mode.
167 with m.If(mode[SVP64MODE.BC_CTRTEST]):
168 with m.If(self.rm_in.ewsrc[1]):
169 comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST_INV)
170 with m.Else():
171 comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST)
172
173 # BC Mode ALL or ANY (Great-Big-AND-gate or Great-Big-OR-gate)
174 comb += self.bc_gate.eq(self.rm_in.elwidth[1])
175 # Link-Register Update
176 comb += self.bc_lru.eq(self.rm_in.elwidth[0])
177 comb += self.bc_vsb.eq(self.rm_in.ewsrc[0])
178
179 with m.Elif(is_cr):
180 with m.Switch(mode2):
181 with m.Case(0, 1): # needs further decoding (LDST no mapreduce)
182 with m.If(mode[SVP64MODE.REDUCE]):
183 comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
184 with m.Else():
185 comb += self.mode.eq(SVP64RMMode.NORMAL)
186 with m.Case(2,3):
187 comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
188
189 # extract failfirst
190 with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
191 comb += self.inv.eq(mode[SVP64MODE.INV])
192 comb += self.vli.eq(mode[SVP64MODE.BC_VLSET])
193 with m.If(self.cr_5bit_in):
194 comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
195 with m.Else():
196 comb += self.cr_sel.eq(cr)
197
198 with m.Else():
199 # combined arith / ldst decoding due to similarity
200 with m.Switch(mode2):
201 with m.Case(0): # needs further decoding (LDST no mapreduce)
202 with m.If(is_ldstimm & mode[SVP64MODE.LDI_POST]):
203 comb += self.mode.eq(SVP64RMMode.NORMAL)
204 comb += self.ldst_postinc.eq(mode[SVP64MODE.LDI_PI])
205 comb += self.ldst_ffirst.eq(mode[SVP64MODE.LDI_FF])
206 with m.Elif(is_ldst):
207 comb += self.mode.eq(SVP64RMMode.NORMAL)
208 with m.Elif(mode[SVP64MODE.REDUCE]):
209 comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
210 with m.Else():
211 comb += self.mode.eq(SVP64RMMode.NORMAL)
212 with m.Case(1):
213 comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
214 with m.Case(2):
215 comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
216 with m.Case(3):
217 comb += self.mode.eq(SVP64RMMode.PREDRES) # pred result
218
219 # extract "reverse gear" for mapreduce mode
220 with m.If((~is_ldst) & # not for LD/ST
221 (mode2 == 0) & # first 2 bits == 0
222 mode[SVP64MODE.REDUCE] & # bit 2 == 1
223 (~mode[SVP64MODE.MOD3])): # bit 3 == 0
224 comb += self.reverse_gear.eq(mode[SVP64MODE.RG]) # finally whew
225
226 # extract zeroing
227 with m.Switch(mode2):
228 with m.Case(0): # needs further decoding (LDST no mapreduce)
229 with m.If(is_ldstimm &
230 ~(self.ldst_postinc | self.ldst_ffirst)):
231 # no predicate-zeroing in fail-first or postinc
232 pass
233 with m.If(is_ldst):
234 # XXX TODO, work out which of these is most
235 # appropriate set both? or just the one?
236 # or one if LD, the other if ST?
237 comb += self.pred_sz.eq(mode[SVP64MODE.DZ])
238 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
239 with m.Elif(mode[SVP64MODE.REDUCE]):
240 with m.If(self.rm_in.subvl == Const(0, 2)): # no SUBVL
241 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
242 with m.Else():
243 comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
244 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
245 with m.Case(1, 3):
246 with m.If(is_ldst):
247 with m.If(~self.ldst_ra_vec):
248 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
249 with m.Elif(self.rc_in):
250 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
251 with m.Case(2):
252 with m.If(is_ldst & ~self.ldst_ra_vec):
253 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
254 with m.Else():
255 comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
256 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
257
258 # extract failfirst
259 with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
260 comb += self.inv.eq(mode[SVP64MODE.INV])
261 with m.If(self.rc_in):
262 comb += self.cr_sel.eq(cr)
263 with m.Else():
264 # only when Rc=0
265 comb += self.RC1.eq(mode[SVP64MODE.RC1])
266 comb += self.vli.eq(mode[SVP64MODE.VLI])
267 comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
268
269 # extract saturate
270 with m.Switch(mode2):
271 with m.Case(2):
272 with m.If(mode[SVP64MODE.N]):
273 comb += self.saturate.eq(SVP64sat.UNSIGNED)
274 with m.Else():
275 comb += self.saturate.eq(SVP64sat.SIGNED)
276 with m.Default():
277 comb += self.saturate.eq(SVP64sat.NONE)
278
279 # do elwidth/elwidth_src extract
280 comb += self.ew_src.eq(self.rm_in.ewsrc)
281 comb += self.ew_dst.eq(self.rm_in.elwidth)
282 comb += self.subvl.eq(self.rm_in.subvl)
283
284 # extract els (element strided mode bit)
285 # see https://libre-soc.org/openpower/sv/ldst/
286 els = Signal()
287 with m.If(is_ldst):
288 with m.Switch(mode2):
289 with m.Case(0):
290 comb += els.eq(mode[SVP64MODE.ELS_NORMAL])
291 with m.Case(2):
292 comb += els.eq(mode[SVP64MODE.ELS_SAT])
293 with m.Case(1, 3):
294 with m.If(self.rc_in):
295 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
296
297 # RA is vectorised
298 with m.If(self.ldst_ra_vec):
299 comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
300 # not element-strided, therefore unit...
301 with m.Elif(~els):
302 comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE)
303 # but if the LD/ST immediate is zero, allow cache-inhibited
304 # loads from same location, therefore don't do element-striding
305 with m.Elif(~self.ldst_imz_in):
306 comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE)
307
308 # extract src/dest predicate. use EXTRA3.MASK because EXTRA2.MASK
309 # is in exactly the same bits
310 srcmask = sel(m, self.rm_in.extra, EXTRA3.MASK)
311 dstmask = self.rm_in.mask
312 with m.If(self.ptype_in == SVPtype.P2):
313 comb += self.srcpred.eq(srcmask)
314 with m.Else():
315 comb += self.srcpred.eq(dstmask)
316 comb += self.dstpred.eq(dstmask)
317
318 # identify predicate mode
319 with m.If(self.rm_in.mmode == 1):
320 comb += self.predmode.eq(SVP64PredMode.CR) # CR Predicate
321 with m.Elif((self.srcpred == 0) & (self.dstpred == 0)):
322 comb += self.predmode.eq(SVP64PredMode.ALWAYS) # No predicate
323 with m.Else():
324 comb += self.predmode.eq(SVP64PredMode.INT) # non-zero src: INT
325
326 # TODO: detect zeroing mode, saturation mode, a few more.
327
328 return m
329