add power_decode_svp64_rm.py capability for new LD/ST format
[openpower-isa.git] / src / openpower / decoder / power_svp64_rm.py
1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Funded by NLnet http://nlnet.nl
4
5 # sigh this entire module is a laborious mess. it really should be
6 # auto-generated from the power_insn.py database but a technique
7 # for doing so (similar to python HTML/XML-node-walking) is needed
8
9 """SVP64 RM (Remap) Record.
10
11 https://libre-soc.org/openpower/sv/svp64/
12
13 | Field Name | Field bits | Description |
14 |-------------|------------|----------------------------------------|
15 | MASKMODE | `0` | Execution (predication) Mask Kind |
16 | MASK | `1:3` | Execution Mask |
17 | ELWIDTH | `4:5` | Element Width |
18 | ELWIDTH_SRC | `6:7` | Element Width for Source |
19 | SUBVL | `8:9` | Sub-vector length |
20 | EXTRA | `10:18` | context-dependent extra |
21 | MODE | `19:23` | changes Vector behaviour |
22 """
23
24 from nmigen import Elaboratable, Module, Signal, Const
25 from openpower.decoder.power_enums import (SVP64RMMode, Function, SVPType,
26 SVMode,
27 SVP64PredMode, SVP64Sat, SVP64LDSTmode,
28 SVP64BCPredMode, SVP64BCVLSETMode,
29 SVP64BCGate, SVP64BCCTRMode,
30 SVP64Width
31 )
32 from openpower.consts import EXTRA3, SVP64MODE
33 from openpower.sv.svp64 import SVP64Rec
34 from nmutil.util import sel
35
36 # a list of fields which need to be added to input records in order
37 # pass on vital information needed by each pipeline.
38 # make sure to keep these the same as SVP64RMModeDecode, in fact,
39 # TODO, make SVP64RMModeDecode *use* this as a Record!
40 sv_input_record_layout = [
41 ('sv_pred_sz', 1), # predicate source zeroing
42 ('sv_pred_dz', 1), # predicate dest zeroing
43 ('sv_saturate', SVP64Sat),
44 ('sv_ldstmode', SVP64LDSTmode),
45 ('SV_Ptype', SVPType),
46 ('SV_mode', SVMode),
47 #('sv_RC1', 1),
48 ]
49
50 """RM Mode
51 there are four Mode variants, two for LD/ST, one for Branch-Conditional,
52 and one for everything else
53 https://libre-soc.org/openpower/sv/svp64/
54 https://libre-soc.org/openpower/sv/ldst/
55 https://libre-soc.org/openpower/sv/branches/
56
57 LD/ST immed:
58 | 0 | 1 | 2 | 3 4 | description |
59 |---|---| --- |---------|--------------------------- |
60 | 0 | 0 | 0 | zz els | simple mode |
61 | 0 | 0 | 1 | PI LF | post-increment and Fault-First |
62 | 1 | 0 | N | zz els | sat mode: N=0/1 u/s |
63 |VLi| 1 | inv | CR-bit | Rc=1: ffirst CR sel |
64 |VLi| 1 | inv | els RC1 | Rc=0: ffirst z/nonz |
65
66 00 0 zz els normal mode (with element-stride option)
67 01 inv CR-bit Rc=1: ffirst CR sel
68 01 inv els RC1 Rc=0: ffirst z/nonz
69 10 N zz els sat mode: N=0/1 u/s
70 11 inv CR-bit Rc=1: pred-result CR sel
71 11 inv els RC1 Rc=0: pred-result z/nonz
72
73 LD/ST indexed:
74
75 | 0 | 1 | 2 | 3 4 | description |
76 |---|---| --- |---------|--------------------------- |
77 |els| 0 | SEA | dz sz | simple mode |
78 |VLi| 1 | inv | CR-bit | Rc=1: ffirst CR sel |
79 |VLi| 1 | inv | els RC1 | Rc=0: ffirst z/nonz |
80
81 00 0 sz dz normal mode
82 00 1 rsvd reserved
83 01 inv CR-bit Rc=1: ffirst CR sel
84 01 inv dz RC1 Rc=0: ffirst z/nonz
85 10 N sz dz sat mode: N=0/1 u/s
86 11 inv CR-bit Rc=1: pred-result CR sel
87 11 inv zz RC1 Rc=0: pred-result z/nonz
88
89 Arithmetic:
90 | 0-1 | 2 | 3 4 | description |
91 | --- | --- |---------|-------------------------- |
92 | 00 | 0 | dz sz | simple mode |
93 | 00 | 1 | 0 RG | scalar reduce mode (mapreduce), SUBVL=1 |
94 | 00 | 1 | SVM 0 | subvector reduce mode, SUBVL>1 |
95 | 00 | 1 | / 1 | reserved |
96 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
97 | 01 | inv | VLi RC1 | Rc=0: ffirst z/nonz |
98 | 10 | N | dz sz | sat mode: N=0/1 u/s, SUBVL=1 |
99 | 10 | N | zz 0 | sat mode: N=0/1 u/s, SUBVL>1 |
100 | 10 | N | / 1 | reserved |
101 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
102 | 11 | inv | zz RC1 | Rc=0: pred-result z/nonz |
103
104 Branch Conditional:
105 note that additional BC modes are in *other bits*, specifically
106 the element-width fields: SVP64Rec.ewsrc and SVP64Rec.elwidth
107
108 elwidth ewsrc mode
109 4 5 6 7 19 20 21 22 23
110 ALL LRu / / 0 0 / SNZ sz normal mode
111 ALL LRu / VSb 0 1 VLI SNZ sz VLSET mode
112 ALL LRu BRc / 1 0 / SNZ sz svstep mode
113 ALL LRu BRc VSb 1 1 VLI SNZ sz svstep VLSET mode
114 """
115
116
117 class SVP64RMModeDecode(Elaboratable):
118 def __init__(self, name=None):
119 ##### inputs #####
120 self.rm_in = SVP64Rec(name=name)
121 self.fn_in = Signal(Function) # LD/ST and Branch is different
122 self.sv_mode = Signal(SVMode) # BRANCH/LDST_IMM/CROP etc.
123 self.svp64_vf_in = Signal() # Vertical-First Mode
124 self.ptype_in = Signal(SVPType)
125 self.rc_in = Signal()
126 self.cr_5bit_in = Signal() # if CR field was 5-bit
127 self.cr_2bit_in = Signal() # bottom 2 bits of CR field
128 self.ldst_ra_vec = Signal() # set when RA is vec, indicate Index mode
129 self.ldst_imz_in = Signal() # set when LD/ST immediate is zero
130 self.ldst_postinc = Signal() # set when LD/ST immediate post-inc set
131 self.ldst_ffirst = Signal() # set when LD/ST immediate fail-first set
132
133 ##### outputs #####
134
135 # main mode (normal, reduce, saturate, ffirst, pred-result, branch)
136 self.mode = Signal(SVP64RMMode)
137
138 # Branch Conditional Modes
139 self.bc_vlset = Signal(SVP64BCVLSETMode) # Branch-Conditional VLSET
140 self.bc_ctrtest = Signal(SVP64BCCTRMode) # Branch-Conditional CTR-Test
141 self.bc_pred = Signal(SVP64BCPredMode) # BC predicate mode
142 self.bc_vsb = Signal() # BC VLSET-branch (like BO[1])
143 self.bc_gate = Signal(SVP64BCGate) # BC ALL or ANY gate
144 self.bc_lru = Signal() # BC Link Register Update
145
146 # predication
147 self.predmode = Signal(SVP64PredMode)
148 self.srcpred = Signal(3) # source predicate
149 self.dstpred = Signal(3) # destination predicate
150 self.pred_sz = Signal(1) # predicate source zeroing
151 self.pred_dz = Signal(1) # predicate dest zeroing
152
153 # Modes n stuff
154 self.ew_src = Signal(SVP64Width) # source elwidth
155 self.ew_dst = Signal(SVP64Width) # dest elwidth
156 self.subvl= Signal(2) # subvl
157 self.saturate = Signal(SVP64Sat)
158 self.RC1 = Signal()
159 self.vli = Signal()
160 self.cr_sel = Signal(2) # bit of CR to test (index 0-3)
161 self.inv = Signal(1) # and whether it's inverted (like branch BO)
162 self.map_evm = Signal(1)
163 self.map_crm = Signal(1)
164 self.reverse_gear = Signal(1) # elements to go VL-1..0
165 self.ldstmode = Signal(SVP64LDSTmode) # LD/ST Mode (strided type)
166
167 def elaborate(self, platform):
168 m = Module()
169 comb = m.d.comb
170 mode = self.rm_in.mode
171
172 # decode pieces of mode
173 is_ldst = Signal()
174 is_bc = Signal()
175 is_cr = Signal()
176 is_ldstimm = Signal()
177 comb += is_ldst.eq(self.fn_in == Function.LDST)
178 comb += is_bc.eq(self.fn_in == Function.BRANCH) # XXX TODO use SV Mode
179 comb += is_cr.eq(self.sv_mode == SVMode.CROP.value)
180 comb += is_ldstimm.eq(self.sv_mode == SVMode.LDST_IMM.value)
181 mode2 = sel(m, mode, SVP64MODE.MOD2)
182 cr = sel(m, mode, SVP64MODE.CR)
183
184 #####################
185 # Branch-Conditional decoding
186 #####################
187 with m.If(is_bc):
188 # Counter-Test Mode.
189 with m.If(mode[SVP64MODE.BC_CTRTEST]):
190 with m.If(self.rm_in.ewsrc[1]):
191 comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST_INV)
192 with m.Else():
193 comb += self.bc_ctrtest.eq(SVP64BCCTRMode.TEST)
194
195 # BC Mode ALL or ANY (Great-Big-AND-gate or Great-Big-OR-gate)
196 comb += self.bc_gate.eq(self.rm_in.elwidth[1])
197 # Link-Register Update
198 comb += self.bc_lru.eq(self.rm_in.elwidth[0])
199 comb += self.bc_vsb.eq(self.rm_in.ewsrc[0])
200
201 #####################
202 # CRops decoding
203 #####################
204 with m.Elif(is_cr):
205 with m.Switch(mode2):
206 with m.Case(0, 1): # needs further decoding (LDST no mapreduce)
207 with m.If(mode[SVP64MODE.REDUCE]):
208 comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
209 with m.Else():
210 comb += self.mode.eq(SVP64RMMode.NORMAL)
211 with m.Case(2,3):
212 comb += self.mode.eq(SVP64RMMode.FFIRST) # fail-first
213
214 # extract failfirst
215 with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
216 comb += self.inv.eq(mode[SVP64MODE.INV])
217 comb += self.vli.eq(mode[SVP64MODE.BC_VLSET])
218 with m.If(self.cr_5bit_in):
219 comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
220 with m.Else():
221 comb += self.cr_sel.eq(cr)
222
223 #####################
224 # LDST decoding (both Idx and Imm - should be separate)
225 #####################
226 with m.Elif(is_ldst):
227 with m.Switch(mode2):
228 with m.Case(0): # needs further decoding (LDST no mapreduce)
229 with m.If(is_ldstimm & mode[SVP64MODE.LDI_POST]):
230 comb += self.mode.eq(SVP64RMMode.NORMAL)
231 comb += self.ldst_postinc.eq(mode[SVP64MODE.LDI_PI])
232 comb += self.ldst_ffirst.eq(mode[SVP64MODE.LDI_FF])
233 with m.Elif(is_ldst):
234 comb += self.mode.eq(SVP64RMMode.NORMAL)
235 with m.Case(1, 3):
236 comb += self.mode.eq(SVP64RMMode.FFIRST) # ffirst
237 with m.Case(2):
238 with m.If(is_ldstimm):
239 comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
240 with m.Else():
241 comb += self.mode.eq(SVP64RMMode.NORMAL) # els-bit
242
243 # extract zeroing
244 with m.If(is_ldst & ~is_ldstimm): # LDST-Indexed
245 with m.Switch(mode2):
246 with m.Case(0,2):
247 # sz/dz only when mode2[0] = 0
248 comb += self.pred_sz.eq(mode[SVP64MODE.DZ])
249 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
250
251 with m.Elif(is_ldstimm): # LDST-Immediate
252 with m.Switch(mode2):
253 with m.Case(0): # simple mode
254 with m.If(~mode[2]): # (but not PI/LF)
255 # [MSB0-numbered] bits 0,1,2 of mode zero, use zz
256 comb += self.pred_sz.eq(mode[SVP64MODE.ZZ])
257 comb += self.pred_dz.eq(mode[SVP64MODE.ZZ])
258 with m.Case(2): # saturated mode
259 # saturated-mode also uses zz
260 comb += self.pred_sz.eq(mode[SVP64MODE.ZZ])
261 comb += self.pred_dz.eq(mode[SVP64MODE.ZZ])
262
263 # extract failfirst
264 with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
265 comb += self.inv.eq(mode[SVP64MODE.INV])
266 with m.If(is_ldst):
267 comb += self.vli.eq(mode[SVP64MODE.LDST_VLI])
268 with m.If(self.rc_in):
269 comb += self.cr_sel.eq(cr)
270 with m.Else():
271 # only when Rc=0
272 comb += self.RC1.eq(mode[SVP64MODE.RC1])
273 with m.If(~is_ldst):
274 comb += self.vli.eq(mode[SVP64MODE.VLI])
275 comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
276
277 # extract saturate
278 with m.Switch(mode2):
279 with m.Case(2):
280 with m.If(mode[SVP64MODE.N]):
281 comb += self.saturate.eq(SVP64Sat.UNSIGNED)
282 with m.Else():
283 comb += self.saturate.eq(SVP64Sat.SIGNED)
284 with m.Default():
285 comb += self.saturate.eq(SVP64Sat.NONE)
286
287 # do elwidth/elwidth_src extract
288 comb += self.ew_src.eq(self.rm_in.ewsrc)
289 comb += self.ew_dst.eq(self.rm_in.elwidth)
290 comb += self.subvl.eq(self.rm_in.subvl)
291
292 # extract els (element strided mode bit)
293 # see https://libre-soc.org/openpower/sv/ldst/
294 els = Signal()
295 with m.If(is_ldstimm): # LD/ST-immediate
296 with m.Switch(mode2):
297 with m.Case(0):
298 with m.If(~mode[2]): # (but not PI/LF)
299 comb += els.eq(mode[SVP64MODE.ELS_NORMAL])
300 with m.Case(2):
301 comb += els.eq(mode[SVP64MODE.ELS_SAT])
302 with m.Case(1, 3):
303 with m.If(self.rc_in):
304 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
305 with m.Else(): # LD/ST-Indexed
306 with m.Switch(mode2):
307 with m.Case(0, 2):
308 comb += els.eq(mode[SVP64MODE.LDIDX_ELS])
309 with m.Case(1, 3):
310 with m.If(self.rc_in):
311 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
312
313 # RA is vectorised
314 with m.If(self.ldst_ra_vec):
315 comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
316 # not element-strided, therefore unit...
317 with m.Elif(~els):
318 comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE)
319 # but if the LD/ST immediate is zero, allow cache-inhibited
320 # loads from same location, therefore don't do element-striding
321 with m.Elif(~self.ldst_imz_in):
322 comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE)
323
324 ######################
325 # arith decoding
326 ######################
327 with m.Else():
328 with m.Switch(mode2):
329 with m.Case(0): # needs further decoding (LDST no mapreduce)
330 with m.If(mode[SVP64MODE.REDUCE]):
331 comb += self.mode.eq(SVP64RMMode.MAPREDUCE)
332 with m.Else():
333 comb += self.mode.eq(SVP64RMMode.NORMAL)
334 with m.Case(1):
335 comb += self.mode.eq(SVP64RMMode.FFIRST) # ffirst
336 with m.Case(2):
337 comb += self.mode.eq(SVP64RMMode.SATURATE) # saturate
338 with m.Case(3):
339 # mode = 0b11: arithmetic predicate-result
340 comb += self.mode.eq(SVP64RMMode.PREDRES) # pred result
341
342 # extract "reverse gear" for mapreduce mode
343 with m.If((~is_ldst) & # not for LD/ST
344 (mode2 == 0) & # first 2 bits == 0
345 mode[SVP64MODE.REDUCE] & # bit 2 == 1
346 (~mode[SVP64MODE.MOD3])): # bit 3 == 0
347 comb += self.reverse_gear.eq(mode[SVP64MODE.RG]) # finally whew
348
349 # extract zeroing
350 with m.Switch(mode2):
351 with m.Case(0):
352 # normal-mode only active in simple mode
353 with m.If(~mode[SVP64MODE.REDUCE]): # bit 2 is zero?
354 comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
355 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
356 with m.Case(2):
357 # sat-mode all good...
358 comb += self.pred_sz.eq(mode[SVP64MODE.SZ])
359 comb += self.pred_dz.eq(mode[SVP64MODE.DZ])
360 with m.Case(3):
361 # predicate-result has ZZ
362 with m.If(self.rc_in):
363 comb += self.pred_sz.eq(mode[SVP64MODE.ZZ])
364 comb += self.pred_dz.eq(mode[SVP64MODE.ZZ])
365
366 # extract failfirst
367 with m.If(self.mode == SVP64RMMode.FFIRST): # fail-first
368 comb += self.inv.eq(mode[SVP64MODE.INV])
369 with m.If(self.rc_in):
370 comb += self.cr_sel.eq(cr)
371 with m.Else():
372 # only when Rc=0
373 comb += self.RC1.eq(mode[SVP64MODE.RC1])
374 with m.If(~is_ldst):
375 comb += self.vli.eq(mode[SVP64MODE.VLI])
376 comb += self.cr_sel.eq(0b10) # EQ bit index is implicit
377
378 # extract saturate
379 with m.Switch(mode2):
380 with m.Case(2):
381 with m.If(mode[SVP64MODE.N]):
382 comb += self.saturate.eq(SVP64Sat.UNSIGNED)
383 with m.Else():
384 comb += self.saturate.eq(SVP64Sat.SIGNED)
385 with m.Default():
386 comb += self.saturate.eq(SVP64Sat.NONE)
387
388 # do elwidth/elwidth_src extract
389 comb += self.ew_src.eq(self.rm_in.ewsrc)
390 comb += self.ew_dst.eq(self.rm_in.elwidth)
391 comb += self.subvl.eq(self.rm_in.subvl)
392
393 # extract els (element strided mode bit)
394 # see https://libre-soc.org/openpower/sv/ldst/
395 els = Signal()
396 with m.If(is_ldst):
397 with m.If(is_ldstimm):
398 with m.Switch(mode2):
399 with m.Case(0):
400 comb += els.eq(mode[SVP64MODE.ELS_NORMAL])
401 with m.Case(2):
402 comb += els.eq(mode[SVP64MODE.ELS_SAT])
403 with m.Case(1, 3):
404 with m.If(self.rc_in):
405 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
406 with m.Else():
407 with m.Switch(mode2):
408 with m.Case(0, 2):
409 comb += els.eq(mode[SVP64MODE.LDIDX_ELS])
410 with m.Case(1, 3):
411 with m.If(self.rc_in):
412 comb += els.eq(mode[SVP64MODE.ELS_FFIRST_PRED])
413
414 # RA is vectorised
415 with m.If(self.ldst_ra_vec):
416 comb += self.ldstmode.eq(SVP64LDSTmode.INDEXED)
417 # not element-strided, therefore unit...
418 with m.Elif(~els):
419 comb += self.ldstmode.eq(SVP64LDSTmode.UNITSTRIDE)
420 # but if the LD/ST immediate is zero, allow cache-inhibited
421 # loads from same location, therefore don't do element-striding
422 with m.Elif(~self.ldst_imz_in):
423 comb += self.ldstmode.eq(SVP64LDSTmode.ELSTRIDE)
424
425 ######################
426 # Common fields (not many, sigh)
427 ######################
428
429 # extract src/dest predicate. use EXTRA3.MASK because EXTRA2.MASK
430 # is in exactly the same bits
431 srcmask = sel(m, self.rm_in.extra, EXTRA3.MASK)
432 dstmask = self.rm_in.mask
433 with m.If(self.ptype_in == SVPType.P2):
434 comb += self.srcpred.eq(srcmask)
435 with m.Else():
436 comb += self.srcpred.eq(dstmask)
437 comb += self.dstpred.eq(dstmask)
438
439 # identify predicate mode
440 with m.If(self.rm_in.mmode == 1):
441 comb += self.predmode.eq(SVP64PredMode.CR) # CR Predicate
442 with m.Elif((self.srcpred == 0) & (self.dstpred == 0)):
443 comb += self.predmode.eq(SVP64PredMode.ALWAYS) # No predicate
444 with m.Else():
445 comb += self.predmode.eq(SVP64PredMode.INT) # non-zero src: INT
446
447 return m
448