1 # SPDX-License-Identifier: LGPLv3+
2 # Copyright (C) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
3 # Funded by NLnet http://nlnet.nl
4 """SVP64 RM (Remap) Record.
6 https://libre-soc.org/openpower/sv/svp64/
8 | Field Name | Field bits | Description |
9 |-------------|------------|----------------------------------------|
10 | MASKMODE | `0` | Execution (predication) Mask Kind |
11 | MASK | `1:3` | Execution Mask |
12 | ELWIDTH | `4:5` | Element Width |
13 | ELWIDTH_SRC | `6:7` | Element Width for Source |
14 | SUBVL | `8:9` | Sub-vector length |
15 | EXTRA | `10:18` | context-dependent extra |
16 | MODE | `19:23` | changes Vector behaviour |
19 from nmigen
import Elaboratable
, Module
, Signal
, Const
20 from openpower
.decoder
.power_enums
import (SVP64RMMode
, Function
, SVPtype
,
22 SVP64PredMode
, SVP64sat
, SVP64LDSTmode
,
23 SVP64BCPredMode
, SVP64BCVLSETMode
,
24 SVP64BCGate
, SVP64BCCTRMode
,
27 from openpower
.consts
import EXTRA3
, SVP64MODE
28 from openpower
.sv
.svp64
import SVP64Rec
29 from nmutil
.util
import sel
31 # a list of fields which need to be added to input records in order
32 # pass on vital information needed by each pipeline.
33 # make sure to keep these the same as SVP64RMModeDecode, in fact,
34 # TODO, make SVP64RMModeDecode *use* this as a Record!
35 sv_input_record_layout
= [
36 ('sv_pred_sz', 1), # predicate source zeroing
37 ('sv_pred_dz', 1), # predicate dest zeroing
38 ('sv_saturate', SVP64sat
),
39 ('sv_ldstmode', SVP64LDSTmode
),
40 ('SV_Ptype', SVPtype
),
46 there are four Mode variants, two for LD/ST, one for Branch-Conditional,
47 and one for everything else
48 https://libre-soc.org/openpower/sv/svp64/
49 https://libre-soc.org/openpower/sv/ldst/
50 https://libre-soc.org/openpower/sv/branches/
53 00 0 zz els normal mode (with element-stride option)
54 01 inv CR-bit Rc=1: ffirst CR sel
55 01 inv els RC1 Rc=0: ffirst z/nonz
56 10 N zz els sat mode: N=0/1 u/s
57 11 inv CR-bit Rc=1: pred-result CR sel
58 11 inv els RC1 Rc=0: pred-result z/nonz
61 00 0 sz dz normal mode
63 01 inv CR-bit Rc=1: ffirst CR sel
64 01 inv dz RC1 Rc=0: ffirst z/nonz
65 10 N sz dz sat mode: N=0/1 u/s
66 11 inv CR-bit Rc=1: pred-result CR sel
67 11 inv zz RC1 Rc=0: pred-result z/nonz
70 | 0-1 | 2 | 3 4 | description |
71 | --- | --- |---------|-------------------------- |
72 | 00 | 0 | dz sz | simple mode |
73 | 00 | 1 | 0 RG | scalar reduce mode (mapreduce), SUBVL=1 |
74 | 00 | 1 | SVM 0 | subvector reduce mode, SUBVL>1 |
75 | 00 | 1 | / 1 | reserved |
76 | 01 | inv | CR-bit | Rc=1: ffirst CR sel |
77 | 01 | inv | VLi RC1 | Rc=0: ffirst z/nonz |
78 | 10 | N | dz sz | sat mode: N=0/1 u/s, SUBVL=1 |
79 | 10 | N | zz 0 | sat mode: N=0/1 u/s, SUBVL>1 |
80 | 10 | N | / 1 | reserved |
81 | 11 | inv | CR-bit | Rc=1: pred-result CR sel |
82 | 11 | inv | zz RC1 | Rc=0: pred-result z/nonz |
85 note that additional BC modes are in *other bits*, specifically
86 the element-width fields: SVP64Rec.ewsrc and SVP64Rec.elwidth
89 4 5 6 7 19 20 21 22 23
90 ALL LRu / / 0 0 / SNZ sz normal mode
91 ALL LRu / VSb 0 1 VLI SNZ sz VLSET mode
92 ALL LRu BRc / 1 0 / SNZ sz svstep mode
93 ALL LRu BRc VSb 1 1 VLI SNZ sz svstep VLSET mode
97 class SVP64RMModeDecode(Elaboratable
):
98 def __init__(self
, name
=None):
100 self
.rm_in
= SVP64Rec(name
=name
)
101 self
.fn_in
= Signal(Function
) # LD/ST and Branch is different
102 self
.sv_mode
= Signal(SVMode
) # BRANCH/LDST_IMM/CROP etc.
103 self
.svp64_vf_in
= Signal() # Vertical-First Mode
104 self
.ptype_in
= Signal(SVPtype
)
105 self
.rc_in
= Signal()
106 self
.cr_5bit_in
= Signal() # if CR field was 5-bit
107 self
.cr_2bit_in
= Signal() # bottom 2 bits of CR field
108 self
.ldst_ra_vec
= Signal() # set when RA is vec, indicate Index mode
109 self
.ldst_imz_in
= Signal() # set when LD/ST immediate is zero
110 self
.ldst_postinc
= Signal() # set when LD/ST immediate post-inc set
111 self
.ldst_ffirst
= Signal() # set when LD/ST immediate fail-first set
115 # main mode (normal, reduce, saturate, ffirst, pred-result, branch)
116 self
.mode
= Signal(SVP64RMMode
)
118 # Branch Conditional Modes
119 self
.bc_vlset
= Signal(SVP64BCVLSETMode
) # Branch-Conditional VLSET
120 self
.bc_ctrtest
= Signal(SVP64BCCTRMode
) # Branch-Conditional CTR-Test
121 self
.bc_pred
= Signal(SVP64BCPredMode
) # BC predicate mode
122 self
.bc_vsb
= Signal() # BC VLSET-branch (like BO[1])
123 self
.bc_gate
= Signal(SVP64BCGate
) # BC ALL or ANY gate
124 self
.bc_lru
= Signal() # BC Link Register Update
127 self
.predmode
= Signal(SVP64PredMode
)
128 self
.srcpred
= Signal(3) # source predicate
129 self
.dstpred
= Signal(3) # destination predicate
130 self
.pred_sz
= Signal(1) # predicate source zeroing
131 self
.pred_dz
= Signal(1) # predicate dest zeroing
134 self
.ew_src
= Signal(SVP64width
) # source elwidth
135 self
.ew_dst
= Signal(SVP64width
) # dest elwidth
136 self
.subvl
= Signal(2) # subvl
137 self
.saturate
= Signal(SVP64sat
)
140 self
.cr_sel
= Signal(2) # bit of CR to test (index 0-3)
141 self
.inv
= Signal(1) # and whether it's inverted (like branch BO)
142 self
.map_evm
= Signal(1)
143 self
.map_crm
= Signal(1)
144 self
.reverse_gear
= Signal(1) # elements to go VL-1..0
145 self
.ldstmode
= Signal(SVP64LDSTmode
) # LD/ST Mode (strided type)
147 def elaborate(self
, platform
):
150 mode
= self
.rm_in
.mode
152 # decode pieces of mode
156 is_ldstimm
= Signal()
157 comb
+= is_ldst
.eq(self
.fn_in
== Function
.LDST
)
158 comb
+= is_bc
.eq(self
.fn_in
== Function
.BRANCH
) # XXX TODO use SV Mode
159 comb
+= is_cr
.eq(self
.sv_mode
== SVMode
.CROP
.value
)
160 comb
+= is_ldstimm
.eq(self
.sv_mode
== SVMode
.LDST_IMM
.value
)
161 mode2
= sel(m
, mode
, SVP64MODE
.MOD2
)
162 cr
= sel(m
, mode
, SVP64MODE
.CR
)
165 # Branch-Conditional is completely different
167 with m
.If(mode
[SVP64MODE
.BC_CTRTEST
]):
168 with m
.If(self
.rm_in
.ewsrc
[1]):
169 comb
+= self
.bc_ctrtest
.eq(SVP64BCCTRMode
.TEST_INV
)
171 comb
+= self
.bc_ctrtest
.eq(SVP64BCCTRMode
.TEST
)
173 # BC Mode ALL or ANY (Great-Big-AND-gate or Great-Big-OR-gate)
174 comb
+= self
.bc_gate
.eq(self
.rm_in
.elwidth
[1])
175 # Link-Register Update
176 comb
+= self
.bc_lru
.eq(self
.rm_in
.elwidth
[0])
177 comb
+= self
.bc_vsb
.eq(self
.rm_in
.ewsrc
[0])
180 with m
.Switch(mode2
):
181 with m
.Case(0, 1): # needs further decoding (LDST no mapreduce)
182 with m
.If(mode
[SVP64MODE
.REDUCE
]):
183 comb
+= self
.mode
.eq(SVP64RMMode
.MAPREDUCE
)
185 comb
+= self
.mode
.eq(SVP64RMMode
.NORMAL
)
187 comb
+= self
.mode
.eq(SVP64RMMode
.FFIRST
) # fail-first
190 with m
.If(self
.mode
== SVP64RMMode
.FFIRST
): # fail-first
191 comb
+= self
.inv
.eq(mode
[SVP64MODE
.INV
])
192 comb
+= self
.vli
.eq(mode
[SVP64MODE
.BC_VLSET
])
193 with m
.If(self
.cr_5bit_in
):
194 comb
+= self
.cr_sel
.eq(0b10) # EQ bit index is implicit
196 comb
+= self
.cr_sel
.eq(cr
)
199 # combined arith / ldst decoding due to similarity
200 with m
.Switch(mode2
):
201 with m
.Case(0): # needs further decoding (LDST no mapreduce)
202 with m
.If(is_ldstimm
& mode
[SVP64MODE
.LDI_POST
]):
203 comb
+= self
.mode
.eq(SVP64RMMode
.NORMAL
)
204 comb
+= self
.ldst_postinc
.eq(mode
[SVP64MODE
.LDI_PI
])
205 comb
+= self
.ldst_ffirst
.eq(mode
[SVP64MODE
.LDI_FF
])
206 with m
.Elif(is_ldst
):
207 comb
+= self
.mode
.eq(SVP64RMMode
.NORMAL
)
208 with m
.Elif(mode
[SVP64MODE
.REDUCE
]):
209 comb
+= self
.mode
.eq(SVP64RMMode
.MAPREDUCE
)
211 comb
+= self
.mode
.eq(SVP64RMMode
.NORMAL
)
213 comb
+= self
.mode
.eq(SVP64RMMode
.FFIRST
) # fail-first
215 comb
+= self
.mode
.eq(SVP64RMMode
.SATURATE
) # saturate
217 comb
+= self
.mode
.eq(SVP64RMMode
.PREDRES
) # pred result
219 # extract "reverse gear" for mapreduce mode
220 with m
.If((~is_ldst
) & # not for LD/ST
221 (mode2
== 0) & # first 2 bits == 0
222 mode
[SVP64MODE
.REDUCE
] & # bit 2 == 1
223 (~mode
[SVP64MODE
.MOD3
])): # bit 3 == 0
224 comb
+= self
.reverse_gear
.eq(mode
[SVP64MODE
.RG
]) # finally whew
227 with m
.Switch(mode2
):
228 with m
.Case(0): # needs further decoding (LDST no mapreduce)
229 with m
.If(is_ldstimm
&
230 ~
(self
.ldst_postinc | self
.ldst_ffirst
)):
231 # no predicate-zeroing in fail-first or postinc
234 # XXX TODO, work out which of these is most
235 # appropriate set both? or just the one?
236 # or one if LD, the other if ST?
237 comb
+= self
.pred_sz
.eq(mode
[SVP64MODE
.DZ
])
238 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
239 with m
.Elif(mode
[SVP64MODE
.REDUCE
]):
240 with m
.If(self
.rm_in
.subvl
== Const(0, 2)): # no SUBVL
241 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
243 comb
+= self
.pred_sz
.eq(mode
[SVP64MODE
.SZ
])
244 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
247 with m
.If(~self
.ldst_ra_vec
):
248 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
249 with m
.Elif(self
.rc_in
):
250 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
252 with m
.If(is_ldst
& ~self
.ldst_ra_vec
):
253 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
255 comb
+= self
.pred_sz
.eq(mode
[SVP64MODE
.SZ
])
256 comb
+= self
.pred_dz
.eq(mode
[SVP64MODE
.DZ
])
259 with m
.If(self
.mode
== SVP64RMMode
.FFIRST
): # fail-first
260 comb
+= self
.inv
.eq(mode
[SVP64MODE
.INV
])
261 with m
.If(self
.rc_in
):
262 comb
+= self
.cr_sel
.eq(cr
)
265 comb
+= self
.RC1
.eq(mode
[SVP64MODE
.RC1
])
266 comb
+= self
.vli
.eq(mode
[SVP64MODE
.VLI
])
267 comb
+= self
.cr_sel
.eq(0b10) # EQ bit index is implicit
270 with m
.Switch(mode2
):
272 with m
.If(mode
[SVP64MODE
.N
]):
273 comb
+= self
.saturate
.eq(SVP64sat
.UNSIGNED
)
275 comb
+= self
.saturate
.eq(SVP64sat
.SIGNED
)
277 comb
+= self
.saturate
.eq(SVP64sat
.NONE
)
279 # do elwidth/elwidth_src extract
280 comb
+= self
.ew_src
.eq(self
.rm_in
.ewsrc
)
281 comb
+= self
.ew_dst
.eq(self
.rm_in
.elwidth
)
282 comb
+= self
.subvl
.eq(self
.rm_in
.subvl
)
284 # extract els (element strided mode bit)
285 # see https://libre-soc.org/openpower/sv/ldst/
288 with m
.Switch(mode2
):
290 comb
+= els
.eq(mode
[SVP64MODE
.ELS_NORMAL
])
292 comb
+= els
.eq(mode
[SVP64MODE
.ELS_SAT
])
294 with m
.If(self
.rc_in
):
295 comb
+= els
.eq(mode
[SVP64MODE
.ELS_FFIRST_PRED
])
298 with m
.If(self
.ldst_ra_vec
):
299 comb
+= self
.ldstmode
.eq(SVP64LDSTmode
.INDEXED
)
300 # not element-strided, therefore unit...
302 comb
+= self
.ldstmode
.eq(SVP64LDSTmode
.UNITSTRIDE
)
303 # but if the LD/ST immediate is zero, allow cache-inhibited
304 # loads from same location, therefore don't do element-striding
305 with m
.Elif(~self
.ldst_imz_in
):
306 comb
+= self
.ldstmode
.eq(SVP64LDSTmode
.ELSTRIDE
)
308 # extract src/dest predicate. use EXTRA3.MASK because EXTRA2.MASK
309 # is in exactly the same bits
310 srcmask
= sel(m
, self
.rm_in
.extra
, EXTRA3
.MASK
)
311 dstmask
= self
.rm_in
.mask
312 with m
.If(self
.ptype_in
== SVPtype
.P2
):
313 comb
+= self
.srcpred
.eq(srcmask
)
315 comb
+= self
.srcpred
.eq(dstmask
)
316 comb
+= self
.dstpred
.eq(dstmask
)
318 # identify predicate mode
319 with m
.If(self
.rm_in
.mmode
== 1):
320 comb
+= self
.predmode
.eq(SVP64PredMode
.CR
) # CR Predicate
321 with m
.Elif((self
.srcpred
== 0) & (self
.dstpred
== 0)):
322 comb
+= self
.predmode
.eq(SVP64PredMode
.ALWAYS
) # No predicate
324 comb
+= self
.predmode
.eq(SVP64PredMode
.INT
) # non-zero src: INT
326 # TODO: detect zeroing mode, saturation mode, a few more.