Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / decoder / power_decoder2.py
1 """Power ISA Decoder second stage
2
3 based on Anton Blanchard microwatt decode2.vhdl
4
5 Note: OP_TRAP is used for exceptions and interrupts (micro-code style) by
6 over-riding the internal opcode when an exception is needed.
7 """
8
9 from nmigen import Module, Elaboratable, Signal, Mux, Const, Cat, Repl, Record
10 from nmigen.cli import rtlil
11 from nmutil.util import sel
12
13 from soc.regfile.regfiles import XERRegs
14
15 from nmutil.picker import PriorityPicker
16 from nmutil.iocontrol import RecordObject
17 from nmutil.extend import exts
18
19 from soc.experiment.mem_types import LDSTException
20
21 from soc.decoder.power_regspec_map import regspec_decode_read
22 from soc.decoder.power_regspec_map import regspec_decode_write
23 from soc.decoder.power_decoder import create_pdecode
24 from soc.decoder.power_enums import (MicrOp, CryIn, Function,
25 CRInSel, CROutSel,
26 LdstLen, In1Sel, In2Sel, In3Sel,
27 OutSel, SPR, RC, LDSTMode,
28 SVEXTRA, SVEtype)
29 from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data,
30 Decode2ToOperand)
31 from soc.sv.svp64 import SVP64Rec
32 from soc.consts import (MSR, SPEC, EXTRA2, EXTRA3, SVP64P, field,
33 SPEC_SIZE, SPECb, SPEC_AUG_SIZE, SVP64CROffs)
34
35 from soc.regfile.regfiles import FastRegs
36 from soc.consts import TT
37 from soc.config.state import CoreState
38 from soc.regfile.util import spr_to_fast
39
40
41 def decode_spr_num(spr):
42 return Cat(spr[5:10], spr[0:5])
43
44
45 def instr_is_priv(m, op, insn):
46 """determines if the instruction is privileged or not
47 """
48 comb = m.d.comb
49 is_priv_insn = Signal(reset_less=True)
50 with m.Switch(op):
51 with m.Case(MicrOp.OP_ATTN, MicrOp.OP_MFMSR, MicrOp.OP_MTMSRD,
52 MicrOp.OP_MTMSR, MicrOp.OP_RFID):
53 comb += is_priv_insn.eq(1)
54 with m.Case(MicrOp.OP_TLBIE) : comb += is_priv_insn.eq(1)
55 with m.Case(MicrOp.OP_MFSPR, MicrOp.OP_MTSPR):
56 with m.If(insn[20]): # field XFX.spr[-1] i think
57 comb += is_priv_insn.eq(1)
58 return is_priv_insn
59
60
61 class SPRMap(Elaboratable):
62 """SPRMap: maps POWER9 SPR numbers to internal enum values, fast and slow
63 """
64
65 def __init__(self):
66 self.spr_i = Signal(10, reset_less=True)
67 self.spr_o = Data(SPR, name="spr_o")
68 self.fast_o = Data(3, name="fast_o")
69
70 def elaborate(self, platform):
71 m = Module()
72 with m.Switch(self.spr_i):
73 for i, x in enumerate(SPR):
74 with m.Case(x.value):
75 m.d.comb += self.spr_o.data.eq(i)
76 m.d.comb += self.spr_o.ok.eq(1)
77 for x, v in spr_to_fast.items():
78 with m.Case(x.value):
79 m.d.comb += self.fast_o.data.eq(v)
80 m.d.comb += self.fast_o.ok.eq(1)
81 return m
82
83
84 class SVP64ExtraSpec(Elaboratable):
85 """SVP64ExtraSpec - decodes SVP64 Extra specification.
86
87 selects the required EXTRA2/3 field.
88
89 see https://libre-soc.org/openpower/sv/svp64/
90 """
91 def __init__(self):
92 self.extra = Signal(9, reset_less=True)
93 self.etype = Signal(SVEtype, reset_less=True) # 2 or 3 bits
94 self.idx = Signal(SVEXTRA, reset_less=True) # which part of extra
95 self.spec = Signal(3) # EXTRA spec for the register
96
97 def elaborate(self, platform):
98 m = Module()
99 comb = m.d.comb
100 spec = self.spec
101 extra = self.extra
102
103 # back in the LDSTRM-* and RM-* files generated by sv_analysis.py
104 # we marked every op with an Etype: EXTRA2 or EXTRA3, and also said
105 # which of the 4 (or 3 for EXTRA3) sub-fields of bits 10:18 contain
106 # the register-extension information. extract those now
107 with m.Switch(self.etype):
108 # 2-bit index selection mode
109 with m.Case(SVEtype.EXTRA2):
110 with m.Switch(self.idx):
111 with m.Case(SVEXTRA.Idx0): # 1st 2 bits [0:1]
112 comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX0_VEC])
113 comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX0_MSB])
114 with m.Case(SVEXTRA.Idx1): # 2nd 2 bits [2:3]
115 comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX1_VEC])
116 comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX1_MSB])
117 with m.Case(SVEXTRA.Idx2): # 3rd 2 bits [4:5]
118 comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX2_VEC])
119 comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX2_MSB])
120 with m.Case(SVEXTRA.Idx3): # 4th 2 bits [6:7]
121 comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX3_VEC])
122 comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX3_MSB])
123 # 3-bit index selection mode
124 with m.Case(SVEtype.EXTRA3):
125 with m.Switch(self.idx):
126 with m.Case(SVEXTRA.Idx0): # 1st 3 bits [0:2]
127 extra3_idx0 = sel(m, extra, EXTRA3.IDX0)
128 comb += spec.eq(extra3_idx0)
129 with m.Case(SVEXTRA.Idx1): # 2nd 3 bits [3:5]
130 extra3_idx1 = sel(m, extra, EXTRA3.IDX1)
131 comb += spec.eq(extra3_idx1)
132 with m.Case(SVEXTRA.Idx2): # 3rd 3 bits [6:8]
133 extra3_idx2 = sel(m, extra, EXTRA3.IDX2)
134 comb += spec.eq(extra3_idx2)
135 # cannot fit more than 9 bits so there is no 4th thing
136
137 return m
138
139
140 class SVP64RegExtra(SVP64ExtraSpec):
141 """SVP64RegExtra - decodes SVP64 Extra fields to determine reg extension
142
143 incoming 5-bit GPR/FP is turned into a 7-bit and marked as scalar/vector
144 depending on info in one of the positions in the EXTRA field.
145
146 designed so that "no change" to the 5-bit register number occurs if
147 SV either does not apply or the relevant EXTRA2/3 field bits are zero.
148
149 see https://libre-soc.org/openpower/sv/svp64/
150 """
151 def __init__(self):
152 SVP64ExtraSpec.__init__(self)
153 self.reg_in = Signal(5) # incoming reg number (5 bits, RA, RB)
154 self.reg_out = Signal(7) # extra-augmented output (7 bits)
155 self.isvec = Signal(1) # reg is marked as vector if true
156
157 def elaborate(self, platform):
158 m = super().elaborate(platform) # select required EXTRA2/3
159 comb = m.d.comb
160
161 # first get the spec. if not changed it's "scalar identity behaviour"
162 # which is zero which is ok.
163 spec = self.spec
164
165 # now decode it. bit 0 is "scalar/vector". note that spec could be zero
166 # from above, which (by design) has the effect of "no change", below.
167
168 # simple: isvec is top bit of spec
169 comb += self.isvec.eq(spec[SPEC.VEC])
170 # extra bits for register number augmentation
171 spec_aug = Signal(SPEC_AUG_SIZE)
172 comb += spec_aug.eq(field(spec, SPECb.MSB, SPECb.LSB, SPEC_SIZE))
173
174 # decode vector differently from scalar
175 with m.If(self.isvec):
176 # Vector: shifted up, extra in LSBs (RA << 2) | spec[1:2]
177 comb += self.reg_out.eq(Cat(spec_aug, self.reg_in))
178 with m.Else():
179 # Scalar: not shifted up, extra in MSBs RA | (spec[1:2] << 5)
180 comb += self.reg_out.eq(Cat(self.reg_in, spec_aug))
181
182 return m
183
184
185 class SVP64CRExtra(SVP64ExtraSpec):
186 """SVP64CRExtra - decodes SVP64 Extra fields to determine CR extension
187
188 incoming 3-bit CR is turned into a 7-bit and marked as scalar/vector
189 depending on info in one of the positions in the EXTRA field.
190
191 yes, really, 128 CRs. INT is 128, FP is 128, therefore CRs are 128.
192
193 designed so that "no change" to the 3-bit CR register number occurs if
194 SV either does not apply or the relevant EXTRA2/3 field bits are zero.
195
196 see https://libre-soc.org/openpower/sv/svp64/appendix
197 """
198 def __init__(self):
199 SVP64ExtraSpec.__init__(self)
200 self.cr_in = Signal(3) # incoming CR number (3 bits, BA[0:2], BFA)
201 self.cr_out = Signal(7) # extra-augmented CR output (7 bits)
202 self.isvec = Signal(1) # reg is marked as vector if true
203
204 def elaborate(self, platform):
205 m = super().elaborate(platform) # select required EXTRA2/3
206 comb = m.d.comb
207
208 # first get the spec. if not changed it's "scalar identity behaviour"
209 # which is zero which is ok.
210 spec = self.spec
211
212 # now decode it. bit 0 is "scalar/vector". note that spec could be zero
213 # from above, which (by design) has the effect of "no change", below.
214
215 # simple: isvec is top bit of spec
216 comb += self.isvec.eq(spec[SPEC.VEC])
217 # extra bits for register number augmentation
218 spec_aug = Signal(SPEC_AUG_SIZE)
219 comb += spec_aug.eq(field(spec, SPECb.MSB, SPECb.LSB, SPEC_SIZE))
220
221 # decode vector differently from scalar, insert bits 1 and 2 accordingly
222 with m.If(self.isvec):
223 # Vector: shifted up, extra in LSBs (CR << 4) | (spec[1:2] << 2)
224 comb += self.cr_out.eq(Cat(Const(0, 2), spec_aug, self.cr_in))
225 with m.Else():
226 # Scalar: not shifted up, extra in MSBs CR | (spec[1:2] << 3)
227 comb += self.cr_out.eq(Cat(self.cr_in, spec_aug))
228
229 return m
230
231
232 class DecodeA(Elaboratable):
233 """DecodeA from instruction
234
235 decodes register RA, implicit and explicit CSRs
236 """
237
238 def __init__(self, dec):
239 self.dec = dec
240 self.sel_in = Signal(In1Sel, reset_less=True)
241 self.insn_in = Signal(32, reset_less=True)
242 self.reg_out = Data(5, name="reg_a")
243 self.spr_out = Data(SPR, "spr_a")
244 self.fast_out = Data(3, "fast_a")
245
246 def elaborate(self, platform):
247 m = Module()
248 comb = m.d.comb
249 op = self.dec.op
250 reg = self.reg_out
251 m.submodules.sprmap = sprmap = SPRMap()
252
253 # select Register A field
254 ra = Signal(5, reset_less=True)
255 comb += ra.eq(self.dec.RA)
256 with m.If((self.sel_in == In1Sel.RA) |
257 ((self.sel_in == In1Sel.RA_OR_ZERO) &
258 (ra != Const(0, 5)))):
259 comb += reg.data.eq(ra)
260 comb += reg.ok.eq(1)
261
262 # some Logic/ALU ops have RS as the 3rd arg, but no "RA".
263 # moved it to 1st position (in1_sel)... because
264 rs = Signal(5, reset_less=True)
265 comb += rs.eq(self.dec.RS)
266 with m.If(self.sel_in == In1Sel.RS):
267 comb += reg.data.eq(rs)
268 comb += reg.ok.eq(1)
269
270 # decode Fast-SPR based on instruction type
271 with m.Switch(op.internal_op):
272
273 # BC or BCREG: implicit register (CTR) NOTE: same in DecodeOut
274 with m.Case(MicrOp.OP_BC):
275 with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg
276 # constant: CTR
277 comb += self.fast_out.data.eq(FastRegs.CTR)
278 comb += self.fast_out.ok.eq(1)
279 with m.Case(MicrOp.OP_BCREG):
280 xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO
281 xo5 = self.dec.FormXL.XO[5] # 3.0B p38
282 with m.If(xo9 & ~xo5):
283 # constant: CTR
284 comb += self.fast_out.data.eq(FastRegs.CTR)
285 comb += self.fast_out.ok.eq(1)
286
287 # MFSPR move from SPRs
288 with m.Case(MicrOp.OP_MFSPR):
289 spr = Signal(10, reset_less=True)
290 comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
291 comb += sprmap.spr_i.eq(spr)
292 comb += self.spr_out.eq(sprmap.spr_o)
293 comb += self.fast_out.eq(sprmap.fast_o)
294
295 return m
296
297
298 class DecodeAImm(Elaboratable):
299 """DecodeA immediate from instruction
300
301 decodes register RA, whether immediate-zero, implicit and
302 explicit CSRs
303 """
304
305 def __init__(self, dec):
306 self.dec = dec
307 self.sel_in = Signal(In1Sel, reset_less=True)
308 self.immz_out = Signal(reset_less=True)
309
310 def elaborate(self, platform):
311 m = Module()
312 comb = m.d.comb
313
314 # zero immediate requested
315 ra = Signal(5, reset_less=True)
316 comb += ra.eq(self.dec.RA)
317 with m.If((self.sel_in == In1Sel.RA_OR_ZERO) & (ra == Const(0, 5))):
318 comb += self.immz_out.eq(1)
319
320 return m
321
322
323 class DecodeB(Elaboratable):
324 """DecodeB from instruction
325
326 decodes register RB, different forms of immediate (signed, unsigned),
327 and implicit SPRs. register B is basically "lane 2" into the CompUnits.
328 by industry-standard convention, "lane 2" is where fully-decoded
329 immediates are muxed in.
330 """
331
332 def __init__(self, dec):
333 self.dec = dec
334 self.sel_in = Signal(In2Sel, reset_less=True)
335 self.insn_in = Signal(32, reset_less=True)
336 self.reg_out = Data(7, "reg_b")
337 self.reg_isvec = Signal(1, name="reg_b_isvec") # TODO: in reg_out
338 self.fast_out = Data(3, "fast_b")
339
340 def elaborate(self, platform):
341 m = Module()
342 comb = m.d.comb
343 op = self.dec.op
344 reg = self.reg_out
345
346 # select Register B field
347 with m.Switch(self.sel_in):
348 with m.Case(In2Sel.RB):
349 comb += reg.data.eq(self.dec.RB)
350 comb += reg.ok.eq(1)
351 with m.Case(In2Sel.RS):
352 # for M-Form shiftrot
353 comb += reg.data.eq(self.dec.RS)
354 comb += reg.ok.eq(1)
355
356 # decode SPR2 based on instruction type
357 # BCREG implicitly uses LR or TAR for 2nd reg
358 # CTR however is already in fast_spr1 *not* 2.
359 with m.If(op.internal_op == MicrOp.OP_BCREG):
360 xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO
361 xo5 = self.dec.FormXL.XO[5] # 3.0B p38
362 with m.If(~xo9):
363 comb += self.fast_out.data.eq(FastRegs.LR)
364 comb += self.fast_out.ok.eq(1)
365 with m.Elif(xo5):
366 comb += self.fast_out.data.eq(FastRegs.TAR)
367 comb += self.fast_out.ok.eq(1)
368
369 return m
370
371
372 class DecodeBImm(Elaboratable):
373 """DecodeB immediate from instruction
374 """
375 def __init__(self, dec):
376 self.dec = dec
377 self.sel_in = Signal(In2Sel, reset_less=True)
378 self.imm_out = Data(64, "imm_b")
379
380 def elaborate(self, platform):
381 m = Module()
382 comb = m.d.comb
383
384 # select Register B Immediate
385 with m.Switch(self.sel_in):
386 with m.Case(In2Sel.CONST_UI): # unsigned
387 comb += self.imm_out.data.eq(self.dec.UI)
388 comb += self.imm_out.ok.eq(1)
389 with m.Case(In2Sel.CONST_SI): # sign-extended 16-bit
390 si = Signal(16, reset_less=True)
391 comb += si.eq(self.dec.SI)
392 comb += self.imm_out.data.eq(exts(si, 16, 64))
393 comb += self.imm_out.ok.eq(1)
394 with m.Case(In2Sel.CONST_SI_HI): # sign-extended 16+16=32 bit
395 si_hi = Signal(32, reset_less=True)
396 comb += si_hi.eq(self.dec.SI << 16)
397 comb += self.imm_out.data.eq(exts(si_hi, 32, 64))
398 comb += self.imm_out.ok.eq(1)
399 with m.Case(In2Sel.CONST_UI_HI): # unsigned
400 ui = Signal(16, reset_less=True)
401 comb += ui.eq(self.dec.UI)
402 comb += self.imm_out.data.eq(ui << 16)
403 comb += self.imm_out.ok.eq(1)
404 with m.Case(In2Sel.CONST_LI): # sign-extend 24+2=26 bit
405 li = Signal(26, reset_less=True)
406 comb += li.eq(self.dec.LI << 2)
407 comb += self.imm_out.data.eq(exts(li, 26, 64))
408 comb += self.imm_out.ok.eq(1)
409 with m.Case(In2Sel.CONST_BD): # sign-extend (14+2)=16 bit
410 bd = Signal(16, reset_less=True)
411 comb += bd.eq(self.dec.BD << 2)
412 comb += self.imm_out.data.eq(exts(bd, 16, 64))
413 comb += self.imm_out.ok.eq(1)
414 with m.Case(In2Sel.CONST_DS): # sign-extended (14+2=16) bit
415 ds = Signal(16, reset_less=True)
416 comb += ds.eq(self.dec.DS << 2)
417 comb += self.imm_out.data.eq(exts(ds, 16, 64))
418 comb += self.imm_out.ok.eq(1)
419 with m.Case(In2Sel.CONST_M1): # signed (-1)
420 comb += self.imm_out.data.eq(~Const(0, 64)) # all 1s
421 comb += self.imm_out.ok.eq(1)
422 with m.Case(In2Sel.CONST_SH): # unsigned - for shift
423 comb += self.imm_out.data.eq(self.dec.sh)
424 comb += self.imm_out.ok.eq(1)
425 with m.Case(In2Sel.CONST_SH32): # unsigned - for shift
426 comb += self.imm_out.data.eq(self.dec.SH32)
427 comb += self.imm_out.ok.eq(1)
428
429 return m
430
431
432 class DecodeC(Elaboratable):
433 """DecodeC from instruction
434
435 decodes register RC. this is "lane 3" into some CompUnits (not many)
436 """
437
438 def __init__(self, dec):
439 self.dec = dec
440 self.sel_in = Signal(In3Sel, reset_less=True)
441 self.insn_in = Signal(32, reset_less=True)
442 self.reg_out = Data(5, "reg_c")
443
444 def elaborate(self, platform):
445 m = Module()
446 comb = m.d.comb
447 op = self.dec.op
448 reg = self.reg_out
449
450 # select Register C field
451 with m.Switch(self.sel_in):
452 with m.Case(In3Sel.RB):
453 # for M-Form shiftrot
454 comb += reg.data.eq(self.dec.RB)
455 comb += reg.ok.eq(1)
456 with m.Case(In3Sel.RS):
457 comb += reg.data.eq(self.dec.RS)
458 comb += reg.ok.eq(1)
459
460 return m
461
462
463 class DecodeOut(Elaboratable):
464 """DecodeOut from instruction
465
466 decodes output register RA, RT or SPR
467 """
468
469 def __init__(self, dec):
470 self.dec = dec
471 self.sel_in = Signal(OutSel, reset_less=True)
472 self.insn_in = Signal(32, reset_less=True)
473 self.reg_out = Data(5, "reg_o")
474 self.spr_out = Data(SPR, "spr_o")
475 self.fast_out = Data(3, "fast_o")
476
477 def elaborate(self, platform):
478 m = Module()
479 comb = m.d.comb
480 m.submodules.sprmap = sprmap = SPRMap()
481 op = self.dec.op
482 reg = self.reg_out
483
484 # select Register out field
485 with m.Switch(self.sel_in):
486 with m.Case(OutSel.RT):
487 comb += reg.data.eq(self.dec.RT)
488 comb += reg.ok.eq(1)
489 with m.Case(OutSel.RA):
490 comb += reg.data.eq(self.dec.RA)
491 comb += reg.ok.eq(1)
492 with m.Case(OutSel.SPR):
493 spr = Signal(10, reset_less=True)
494 comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
495 # MFSPR move to SPRs - needs mapping
496 with m.If(op.internal_op == MicrOp.OP_MTSPR):
497 comb += sprmap.spr_i.eq(spr)
498 comb += self.spr_out.eq(sprmap.spr_o)
499 comb += self.fast_out.eq(sprmap.fast_o)
500
501 # determine Fast Reg
502 with m.Switch(op.internal_op):
503
504 # BC or BCREG: implicit register (CTR) NOTE: same in DecodeA
505 with m.Case(MicrOp.OP_BC, MicrOp.OP_BCREG):
506 with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg
507 # constant: CTR
508 comb += self.fast_out.data.eq(FastRegs.CTR)
509 comb += self.fast_out.ok.eq(1)
510
511 # RFID 1st spr (fast)
512 with m.Case(MicrOp.OP_RFID):
513 comb += self.fast_out.data.eq(FastRegs.SRR0) # constant: SRR0
514 comb += self.fast_out.ok.eq(1)
515
516 return m
517
518
519 class DecodeOut2(Elaboratable):
520 """DecodeOut2 from instruction
521
522 decodes output registers (2nd one). note that RA is *implicit* below,
523 which now causes problems with SVP64
524
525 TODO: SVP64 is a little more complex, here. svp64 allows extending
526 by one more destination by having one more EXTRA field. RA-as-src
527 is not the same as RA-as-dest. limited in that it's the same first
528 5 bits (from the v3.0B opcode), but still kinda cool. mostly used
529 for operations that have src-as-dest: mostly this is LD/ST-with-update
530 but there are others.
531 """
532
533 def __init__(self, dec):
534 self.dec = dec
535 self.sel_in = Signal(OutSel, reset_less=True)
536 self.lk = Signal(reset_less=True)
537 self.insn_in = Signal(32, reset_less=True)
538 self.reg_out = Data(5, "reg_o2")
539 self.fast_out = Data(3, "fast_o2")
540
541 def elaborate(self, platform):
542 m = Module()
543 comb = m.d.comb
544 op = self.dec.op
545 #m.submodules.svdec = svdec = SVP64RegExtra()
546
547 # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec
548 #reg = Signal(5, reset_less=True)
549
550 if hasattr(self.dec.op, "upd"):
551 # update mode LD/ST uses read-reg A also as an output
552 with m.If(self.dec.op.upd == LDSTMode.update):
553 comb += self.reg_out.data.eq(self.dec.RA)
554 comb += self.reg_out.ok.eq(1)
555
556 # B, BC or BCREG: potential implicit register (LR) output
557 # these give bl, bcl, bclrl, etc.
558 with m.Switch(op.internal_op):
559
560 # BC* implicit register (LR)
561 with m.Case(MicrOp.OP_BC, MicrOp.OP_B, MicrOp.OP_BCREG):
562 with m.If(self.lk): # "link" mode
563 comb += self.fast_out.data.eq(FastRegs.LR) # constant: LR
564 comb += self.fast_out.ok.eq(1)
565
566 # RFID 2nd spr (fast)
567 with m.Case(MicrOp.OP_RFID):
568 comb += self.fast_out.data.eq(FastRegs.SRR1) # constant: SRR1
569 comb += self.fast_out.ok.eq(1)
570
571 return m
572
573
574 class DecodeRC(Elaboratable):
575 """DecodeRc from instruction
576
577 decodes Record bit Rc
578 """
579
580 def __init__(self, dec):
581 self.dec = dec
582 self.sel_in = Signal(RC, reset_less=True)
583 self.insn_in = Signal(32, reset_less=True)
584 self.rc_out = Data(1, "rc")
585
586 def elaborate(self, platform):
587 m = Module()
588 comb = m.d.comb
589
590 # select Record bit out field
591 with m.Switch(self.sel_in):
592 with m.Case(RC.RC):
593 comb += self.rc_out.data.eq(self.dec.Rc)
594 comb += self.rc_out.ok.eq(1)
595 with m.Case(RC.ONE):
596 comb += self.rc_out.data.eq(1)
597 comb += self.rc_out.ok.eq(1)
598 with m.Case(RC.NONE):
599 comb += self.rc_out.data.eq(0)
600 comb += self.rc_out.ok.eq(1)
601
602 return m
603
604
605 class DecodeOE(Elaboratable):
606 """DecodeOE from instruction
607
608 decodes OE field: uses RC decode detection which might not be good
609
610 -- For now, use "rc" in the decode table to decide whether oe exists.
611 -- This is not entirely correct architecturally: For mulhd and
612 -- mulhdu, the OE field is reserved. It remains to be seen what an
613 -- actual POWER9 does if we set it on those instructions, for now we
614 -- test that further down when assigning to the multiplier oe input.
615 """
616
617 def __init__(self, dec):
618 self.dec = dec
619 self.sel_in = Signal(RC, reset_less=True)
620 self.insn_in = Signal(32, reset_less=True)
621 self.oe_out = Data(1, "oe")
622
623 def elaborate(self, platform):
624 m = Module()
625 comb = m.d.comb
626 op = self.dec.op
627
628 with m.Switch(op.internal_op):
629
630 # mulhw, mulhwu, mulhd, mulhdu - these *ignore* OE
631 # also rotate
632 # XXX ARGH! ignoring OE causes incompatibility with microwatt
633 # http://lists.libre-soc.org/pipermail/libre-soc-dev/2020-August/000302.html
634 with m.Case(MicrOp.OP_MUL_H64, MicrOp.OP_MUL_H32,
635 MicrOp.OP_EXTS, MicrOp.OP_CNTZ,
636 MicrOp.OP_SHL, MicrOp.OP_SHR, MicrOp.OP_RLC,
637 MicrOp.OP_LOAD, MicrOp.OP_STORE,
638 MicrOp.OP_RLCL, MicrOp.OP_RLCR,
639 MicrOp.OP_EXTSWSLI):
640 pass
641
642 # all other ops decode OE field
643 with m.Default():
644 # select OE bit out field
645 with m.Switch(self.sel_in):
646 with m.Case(RC.RC):
647 comb += self.oe_out.data.eq(self.dec.OE)
648 comb += self.oe_out.ok.eq(1)
649
650 return m
651
652
653 class DecodeCRIn(Elaboratable):
654 """Decodes input CR from instruction
655
656 CR indices - insn fields - (not the data *in* the CR) require only 3
657 bits because they refer to CR0-CR7
658 """
659
660 def __init__(self, dec):
661 self.dec = dec
662 self.sel_in = Signal(CRInSel, reset_less=True)
663 self.insn_in = Signal(32, reset_less=True)
664 self.cr_bitfield = Data(3, "cr_bitfield")
665 self.cr_bitfield_b = Data(3, "cr_bitfield_b")
666 self.cr_bitfield_o = Data(3, "cr_bitfield_o")
667 self.whole_reg = Data(8, "cr_fxm")
668 self.sv_override = Signal(2, reset_less=True) # do not do EXTRA spec
669
670 def elaborate(self, platform):
671 m = Module()
672 comb = m.d.comb
673 op = self.dec.op
674 m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True,
675 reverse_o=True)
676
677 # zero-initialisation
678 comb += self.cr_bitfield.ok.eq(0)
679 comb += self.cr_bitfield_b.ok.eq(0)
680 comb += self.cr_bitfield_o.ok.eq(0)
681 comb += self.whole_reg.ok.eq(0)
682 comb += self.sv_override.eq(0)
683
684 # select the relevant CR bitfields
685 with m.Switch(self.sel_in):
686 with m.Case(CRInSel.NONE):
687 pass # No bitfield activated
688 with m.Case(CRInSel.CR0):
689 comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering)
690 comb += self.cr_bitfield.ok.eq(1)
691 comb += self.sv_override.eq(1)
692 with m.Case(CRInSel.CR1):
693 comb += self.cr_bitfield.data.eq(1) # CR1 (MSB0 numbering)
694 comb += self.cr_bitfield.ok.eq(1)
695 comb += self.sv_override.eq(2)
696 with m.Case(CRInSel.BI):
697 comb += self.cr_bitfield.data.eq(self.dec.BI[2:5])
698 comb += self.cr_bitfield.ok.eq(1)
699 with m.Case(CRInSel.BFA):
700 comb += self.cr_bitfield.data.eq(self.dec.FormX.BFA)
701 comb += self.cr_bitfield.ok.eq(1)
702 with m.Case(CRInSel.BA_BB):
703 comb += self.cr_bitfield.data.eq(self.dec.BA[2:5])
704 comb += self.cr_bitfield.ok.eq(1)
705 comb += self.cr_bitfield_b.data.eq(self.dec.BB[2:5])
706 comb += self.cr_bitfield_b.ok.eq(1)
707 comb += self.cr_bitfield_o.data.eq(self.dec.BT[2:5])
708 comb += self.cr_bitfield_o.ok.eq(1)
709 with m.Case(CRInSel.BC):
710 comb += self.cr_bitfield.data.eq(self.dec.BC[2:5])
711 comb += self.cr_bitfield.ok.eq(1)
712 with m.Case(CRInSel.WHOLE_REG):
713 comb += self.whole_reg.ok.eq(1)
714 move_one = Signal(reset_less=True)
715 comb += move_one.eq(self.insn_in[20]) # MSB0 bit 11
716 with m.If((op.internal_op == MicrOp.OP_MFCR) & move_one):
717 # must one-hot the FXM field
718 comb += ppick.i.eq(self.dec.FXM)
719 comb += self.whole_reg.data.eq(ppick.o)
720 with m.Else():
721 # otherwise use all of it
722 comb += self.whole_reg.data.eq(0xff)
723
724 return m
725
726
727 class DecodeCROut(Elaboratable):
728 """Decodes input CR from instruction
729
730 CR indices - insn fields - (not the data *in* the CR) require only 3
731 bits because they refer to CR0-CR7
732 """
733
734 def __init__(self, dec):
735 self.dec = dec
736 self.rc_in = Signal(reset_less=True)
737 self.sel_in = Signal(CROutSel, reset_less=True)
738 self.insn_in = Signal(32, reset_less=True)
739 self.cr_bitfield = Data(3, "cr_bitfield")
740 self.whole_reg = Data(8, "cr_fxm")
741 self.sv_override = Signal(2, reset_less=True) # do not do EXTRA spec
742
743 def elaborate(self, platform):
744 m = Module()
745 comb = m.d.comb
746 op = self.dec.op
747 m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True,
748 reverse_o=True)
749
750 comb += self.cr_bitfield.ok.eq(0)
751 comb += self.whole_reg.ok.eq(0)
752 comb += self.sv_override.eq(0)
753
754 # please note these MUST match (setting of cr_bitfield.ok) exactly
755 # with write_cr0 below in PowerDecoder2. the reason it's separated
756 # is to avoid having duplicate copies of DecodeCROut in multiple
757 # PowerDecoderSubsets. register decoding should be a one-off in
758 # PowerDecoder2. see https://bugs.libre-soc.org/show_bug.cgi?id=606
759
760 with m.Switch(self.sel_in):
761 with m.Case(CROutSel.NONE):
762 pass # No bitfield activated
763 with m.Case(CROutSel.CR0):
764 comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering)
765 comb += self.cr_bitfield.ok.eq(self.rc_in) # only when RC=1
766 comb += self.sv_override.eq(1)
767 with m.Case(CROutSel.CR1):
768 comb += self.cr_bitfield.data.eq(1) # CR1 (MSB0 numbering)
769 comb += self.cr_bitfield.ok.eq(self.rc_in) # only when RC=1
770 comb += self.sv_override.eq(2)
771 with m.Case(CROutSel.BF):
772 comb += self.cr_bitfield.data.eq(self.dec.FormX.BF)
773 comb += self.cr_bitfield.ok.eq(1)
774 with m.Case(CROutSel.BT):
775 comb += self.cr_bitfield.data.eq(self.dec.FormXL.BT[2:5])
776 comb += self.cr_bitfield.ok.eq(1)
777 with m.Case(CROutSel.WHOLE_REG):
778 comb += self.whole_reg.ok.eq(1)
779 move_one = Signal(reset_less=True)
780 comb += move_one.eq(self.insn_in[20])
781 with m.If((op.internal_op == MicrOp.OP_MTCRF)):
782 with m.If(move_one):
783 # must one-hot the FXM field
784 comb += ppick.i.eq(self.dec.FXM)
785 with m.If(ppick.en_o):
786 comb += self.whole_reg.data.eq(ppick.o)
787 with m.Else():
788 comb += self.whole_reg.data.eq(0b00000001) # CR7
789 with m.Else():
790 comb += self.whole_reg.data.eq(self.dec.FXM)
791 with m.Else():
792 # otherwise use all of it
793 comb += self.whole_reg.data.eq(0xff)
794
795 return m
796
797 # dictionary of Input Record field names that, if they exist,
798 # will need a corresponding CSV Decoder file column (actually, PowerOp)
799 # to be decoded (this includes the single bit names)
800 record_names = {'insn_type': 'internal_op',
801 'fn_unit': 'function_unit',
802 'rc': 'rc_sel',
803 'oe': 'rc_sel',
804 'zero_a': 'in1_sel',
805 'imm_data': 'in2_sel',
806 'invert_in': 'inv_a',
807 'invert_out': 'inv_out',
808 'rc': 'cr_out',
809 'oe': 'cr_in',
810 'output_carry': 'cry_out',
811 'input_carry': 'cry_in',
812 'is_32bit': 'is_32b',
813 'is_signed': 'sgn',
814 'lk': 'lk',
815 'data_len': 'ldst_len',
816 'byte_reverse': 'br',
817 'sign_extend': 'sgn_ext',
818 'ldst_mode': 'upd',
819 }
820
821
822 class PowerDecodeSubset(Elaboratable):
823 """PowerDecodeSubset: dynamic subset decoder
824
825 only fields actually requested are copied over. hence, "subset" (duh).
826 """
827 def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None):
828
829 self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field
830 self.final = final
831 self.opkls = opkls
832 self.fn_name = fn_name
833 if opkls is None:
834 opkls = Decode2ToOperand
835 self.do = opkls(fn_name)
836 col_subset = self.get_col_subset(self.do)
837
838 # only needed for "main" PowerDecode2
839 if not self.final:
840 self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do)
841
842 # create decoder if one not already given
843 if dec is None:
844 dec = create_pdecode(name=fn_name, col_subset=col_subset,
845 row_subset=self.rowsubsetfn)
846 self.dec = dec
847
848 # state information needed by the Decoder
849 if state is None:
850 state = CoreState("dec2")
851 self.state = state
852
853 def get_col_subset(self, do):
854 subset = { 'cr_in', 'cr_out', 'rc_sel'} # needed, non-optional
855 for k, v in record_names.items():
856 if hasattr(do, k):
857 subset.add(v)
858 print ("get_col_subset", self.fn_name, do.fields, subset)
859 return subset
860
861 def rowsubsetfn(self, opcode, row):
862 return row['unit'] == self.fn_name
863
864 def ports(self):
865 return self.dec.ports() + self.e.ports() + self.sv_rm.ports()
866
867 def needs_field(self, field, op_field):
868 if self.final:
869 do = self.do
870 else:
871 do = self.e_tmp.do
872 return hasattr(do, field) and self.op_get(op_field) is not None
873
874 def do_copy(self, field, val, final=False):
875 if final or self.final:
876 do = self.do
877 else:
878 do = self.e_tmp.do
879 if hasattr(do, field) and val is not None:
880 return getattr(do, field).eq(val)
881 return []
882
883 def op_get(self, op_field):
884 return getattr(self.dec.op, op_field, None)
885
886 def elaborate(self, platform):
887 m = Module()
888 comb = m.d.comb
889 state = self.state
890 op, do = self.dec.op, self.do
891 msr, cia = state.msr, state.pc
892 # fill in for a normal instruction (not an exception)
893 # copy over if non-exception, non-privileged etc. is detected
894 if not self.final:
895 if self.fn_name is None:
896 name = "tmp"
897 else:
898 name = self.fn_name + "tmp"
899 self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls)
900
901 # set up submodule decoders
902 m.submodules.dec = self.dec
903 m.submodules.dec_rc = self.dec_rc = dec_rc = DecodeRC(self.dec)
904 m.submodules.dec_oe = dec_oe = DecodeOE(self.dec)
905
906 # copy instruction through...
907 for i in [do.insn, dec_rc.insn_in, dec_oe.insn_in, ]:
908 comb += i.eq(self.dec.opcode_in)
909
910 # ...and subdecoders' input fields
911 comb += dec_rc.sel_in.eq(op.rc_sel)
912 comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel
913
914 # copy "state" over
915 comb += self.do_copy("msr", msr)
916 comb += self.do_copy("cia", cia)
917
918 # set up instruction type
919 # no op: defaults to OP_ILLEGAL
920 if self.fn_name=="MMU":
921 # mmu is special case: needs SPR opcode as well
922 mmu0 = self.mmu0_spr_dec
923 with m.If(((mmu0.dec.op.internal_op == MicrOp.OP_MTSPR) |
924 (mmu0.dec.op.internal_op == MicrOp.OP_MFSPR))):
925 comb += self.do_copy("insn_type", mmu0.op_get("internal_op"))
926 with m.Else():
927 comb += self.do_copy("insn_type", self.op_get("internal_op"))
928 else:
929 comb += self.do_copy("insn_type", self.op_get("internal_op"))
930
931 # function unit for decoded instruction: requires minor redirect
932 # for SPR set/get
933 fn = self.op_get("function_unit")
934 spr = Signal(10, reset_less=True)
935 comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
936
937 # XXX BUG - don't use hardcoded magic constants.
938 # also use ".value" otherwise the test fails. bit of a pain
939 # https://bugs.libre-soc.org/show_bug.cgi?id=603
940
941 SPR_PID = 48 # TODO read docs for POWER9
942 # Microwatt doesn't implement the partition table
943 # instead has PRTBL register (SPR) to point to process table
944 SPR_PRTBL = 720 # see common.vhdl in microwatt, not in POWER9
945 with m.If(((self.dec.op.internal_op == MicrOp.OP_MTSPR) |
946 (self.dec.op.internal_op == MicrOp.OP_MFSPR)) &
947 ((spr == SPR.DSISR) | (spr == SPR.DAR)
948 | (spr==SPR_PRTBL) | (spr==SPR_PID))):
949 comb += self.do_copy("fn_unit", Function.MMU)
950 with m.Else():
951 comb += self.do_copy("fn_unit",fn)
952
953 # immediates
954 if self.needs_field("zero_a", "in1_sel"):
955 m.submodules.dec_ai = dec_ai = DecodeAImm(self.dec)
956 comb += dec_ai.sel_in.eq(op.in1_sel)
957 comb += self.do_copy("zero_a", dec_ai.immz_out) # RA==0 detected
958 if self.needs_field("imm_data", "in2_sel"):
959 m.submodules.dec_bi = dec_bi = DecodeBImm(self.dec)
960 comb += dec_bi.sel_in.eq(op.in2_sel)
961 comb += self.do_copy("imm_data", dec_bi.imm_out) # imm in RB
962
963 # rc and oe out
964 comb += self.do_copy("rc", dec_rc.rc_out)
965 comb += self.do_copy("oe", dec_oe.oe_out)
966
967 # CR in/out - note: these MUST match with what happens in
968 # DecodeCROut!
969 rc_out = self.dec_rc.rc_out.data
970 with m.Switch(op.cr_out):
971 with m.Case(CROutSel.CR0, CROutSel.CR1):
972 comb += self.do_copy("write_cr0", rc_out) # only when RC=1
973 with m.Case(CROutSel.BF, CROutSel.BT):
974 comb += self.do_copy("write_cr0", 1)
975
976 comb += self.do_copy("input_cr", self.op_get("cr_in")) # CR in
977 comb += self.do_copy("output_cr", self.op_get("cr_out")) # CR out
978
979 # decoded/selected instruction flags
980 comb += self.do_copy("data_len", self.op_get("ldst_len"))
981 comb += self.do_copy("invert_in", self.op_get("inv_a"))
982 comb += self.do_copy("invert_out", self.op_get("inv_out"))
983 comb += self.do_copy("input_carry", self.op_get("cry_in"))
984 comb += self.do_copy("output_carry", self.op_get("cry_out"))
985 comb += self.do_copy("is_32bit", self.op_get("is_32b"))
986 comb += self.do_copy("is_signed", self.op_get("sgn"))
987 lk = self.op_get("lk")
988 if lk is not None:
989 with m.If(lk):
990 comb += self.do_copy("lk", self.dec.LK) # XXX TODO: accessor
991
992 comb += self.do_copy("byte_reverse", self.op_get("br"))
993 comb += self.do_copy("sign_extend", self.op_get("sgn_ext"))
994 comb += self.do_copy("ldst_mode", self.op_get("upd")) # LD/ST mode
995
996 return m
997
998
999 class PowerDecode2(PowerDecodeSubset):
1000 """PowerDecode2: the main instruction decoder.
1001
1002 whilst PowerDecode is responsible for decoding the actual opcode, this
1003 module encapsulates further specialist, sparse information and
1004 expansion of fields that is inconvenient to have in the CSV files.
1005 for example: the encoding of the immediates, which are detected
1006 and expanded out to their full value from an annotated (enum)
1007 representation.
1008
1009 implicit register usage is also set up, here. for example: OP_BC
1010 requires implicitly reading CTR, OP_RFID requires implicitly writing
1011 to SRR1 and so on.
1012
1013 in addition, PowerDecoder2 is responsible for detecting whether
1014 instructions are illegal (or privileged) or not, and instead of
1015 just leaving at that, *replacing* the instruction to execute with
1016 a suitable alternative (trap).
1017
1018 LDSTExceptions are done the cycle _after_ they're detected (after
1019 they come out of LDSTCompUnit). basically despite the instruction
1020 being decoded, the results of the decode are completely ignored
1021 and "exception.happened" used to set the "actual" instruction to
1022 "OP_TRAP". the LDSTException data structure gets filled in,
1023 in the CompTrapOpSubset and that's what it fills in SRR.
1024
1025 to make this work, TestIssuer must notice "exception.happened"
1026 after the (failed) LD/ST and copies the LDSTException info from
1027 the output, into here (PowerDecoder2). without incrementing PC.
1028 """
1029
1030 def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None):
1031 super().__init__(dec, opkls, fn_name, final, state)
1032 self.exc = LDSTException("dec2_exc")
1033
1034 self.cr_out_isvec = Signal(1, name="cr_out_isvec")
1035 self.cr_in_isvec = Signal(1, name="cr_in_isvec")
1036 self.cr_in_b_isvec = Signal(1, name="cr_in_b_isvec")
1037 self.cr_in_o_isvec = Signal(1, name="cr_in_o_isvec")
1038 self.in1_isvec = Signal(1, name="reg_a_isvec")
1039 self.in2_isvec = Signal(1, name="reg_b_isvec")
1040 self.in3_isvec = Signal(1, name="reg_c_isvec")
1041 self.o_isvec = Signal(1, name="reg_o_isvec")
1042 self.o2_isvec = Signal(1, name="reg_o2_isvec")
1043 self.no_out_vec = Signal(1, name="no_out_vec") # no outputs are vectors
1044
1045 def get_col_subset(self, opkls):
1046 subset = super().get_col_subset(opkls)
1047 subset.add("asmcode")
1048 subset.add("in1_sel")
1049 subset.add("in2_sel")
1050 subset.add("in3_sel")
1051 subset.add("out_sel")
1052 subset.add("sv_in1")
1053 subset.add("sv_in2")
1054 subset.add("sv_in3")
1055 subset.add("sv_out")
1056 subset.add("sv_cr_in")
1057 subset.add("sv_cr_out")
1058 subset.add("SV_Etype")
1059 subset.add("SV_Ptype")
1060 subset.add("lk")
1061 subset.add("internal_op")
1062 subset.add("form")
1063 return subset
1064
1065 def elaborate(self, platform):
1066 m = super().elaborate(platform)
1067 comb = m.d.comb
1068 state = self.state
1069 e_out, op, do_out = self.e, self.dec.op, self.e.do
1070 dec_spr, msr, cia, ext_irq = state.dec, state.msr, state.pc, state.eint
1071 rc_out = self.dec_rc.rc_out.data
1072 e = self.e_tmp
1073 do = e.do
1074
1075 # fill in for a normal instruction (not an exception)
1076 # copy over if non-exception, non-privileged etc. is detected
1077
1078 # set up submodule decoders
1079 m.submodules.dec_a = dec_a = DecodeA(self.dec)
1080 m.submodules.dec_b = dec_b = DecodeB(self.dec)
1081 m.submodules.dec_c = dec_c = DecodeC(self.dec)
1082 m.submodules.dec_o = dec_o = DecodeOut(self.dec)
1083 m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec)
1084 m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec)
1085 m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec)
1086
1087 # and SVP64 Extra decoders
1088 m.submodules.crout_svdec = crout_svdec = SVP64CRExtra()
1089 m.submodules.crin_svdec = crin_svdec = SVP64CRExtra()
1090 m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra()
1091 m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra()
1092 m.submodules.in1_svdec = in1_svdec = SVP64RegExtra()
1093 m.submodules.in2_svdec = in2_svdec = SVP64RegExtra()
1094 m.submodules.in3_svdec = in3_svdec = SVP64RegExtra()
1095 m.submodules.o_svdec = o_svdec = SVP64RegExtra()
1096 m.submodules.o2_svdec = o2_svdec = SVP64RegExtra()
1097
1098 # debug access to crout_svdec (used in get_pdecode_cr_out)
1099 self.crout_svdec = crout_svdec
1100
1101 # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec
1102 reg = Signal(5, reset_less=True)
1103
1104 # copy instruction through...
1105 for i in [do.insn, dec_a.insn_in, dec_b.insn_in,
1106 self.dec_cr_in.insn_in, self.dec_cr_out.insn_in,
1107 dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]:
1108 comb += i.eq(self.dec.opcode_in)
1109
1110 # CR setup
1111 comb += self.dec_cr_in.sel_in.eq(op.cr_in)
1112 comb += self.dec_cr_out.sel_in.eq(op.cr_out)
1113 comb += self.dec_cr_out.rc_in.eq(rc_out)
1114
1115 # CR register info
1116 comb += self.do_copy("read_cr_whole", self.dec_cr_in.whole_reg)
1117 comb += self.do_copy("write_cr_whole", self.dec_cr_out.whole_reg)
1118
1119 # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from
1120 # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv
1121 # which in turn were auto-generated by sv_analysis.py
1122 extra = self.sv_rm.extra # SVP64 extra bits 10:18
1123
1124 #######
1125 # CR out
1126 comb += crout_svdec.idx.eq(op.sv_cr_out) # SVP64 CR out
1127 comb += self.cr_out_isvec.eq(crout_svdec.isvec)
1128
1129 #######
1130 # CR in - index selection slightly different due to shared CR field sigh
1131 cr_a_idx = Signal(SVEXTRA)
1132 cr_b_idx = Signal(SVEXTRA)
1133
1134 # these change slightly, when decoding BA/BB. really should have
1135 # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey
1136 comb += cr_a_idx.eq(op.sv_cr_in)
1137 comb += cr_b_idx.eq(SVEXTRA.NONE)
1138 with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value):
1139 comb += cr_a_idx.eq(SVEXTRA.Idx1)
1140 comb += cr_b_idx.eq(SVEXTRA.Idx2)
1141
1142 comb += self.cr_in_isvec.eq(crin_svdec.isvec)
1143 comb += self.cr_in_b_isvec.eq(crin_svdec_b.isvec)
1144 comb += self.cr_in_o_isvec.eq(crin_svdec_o.isvec)
1145
1146 # indices are slightly different, BA/BB mess sorted above
1147 comb += crin_svdec.idx.eq(cr_a_idx) # SVP64 CR in A
1148 comb += crin_svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B
1149 comb += crin_svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out
1150
1151 # ...and subdecoders' input fields
1152 comb += dec_a.sel_in.eq(op.in1_sel)
1153 comb += dec_b.sel_in.eq(op.in2_sel)
1154 comb += dec_c.sel_in.eq(op.in3_sel)
1155 comb += dec_o.sel_in.eq(op.out_sel)
1156 comb += dec_o2.sel_in.eq(op.out_sel)
1157 if hasattr(do, "lk"):
1158 comb += dec_o2.lk.eq(do.lk)
1159
1160 # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below
1161 srcstep = Signal.like(self.state.svstate.srcstep)
1162 comb += srcstep.eq(self.state.svstate.srcstep)
1163
1164 # registers a, b, c and out and out2 (LD/ST EA)
1165 for to_reg, fromreg, svdec in (
1166 (e.read_reg1, dec_a.reg_out, in1_svdec),
1167 (e.read_reg2, dec_b.reg_out, in2_svdec),
1168 (e.read_reg3, dec_c.reg_out, in3_svdec),
1169 (e.write_reg, dec_o.reg_out, o_svdec),
1170 (e.write_ea, dec_o2.reg_out, o2_svdec)):
1171 comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM
1172 comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn
1173 comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA)
1174 comb += to_reg.ok.eq(fromreg.ok)
1175 # detect if Vectorised: add srcstep if yes. TODO: a LOT.
1176 # this trick only holds when elwidth=default and in single-pred
1177 with m.If(svdec.isvec):
1178 comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output
1179 with m.Else():
1180 comb += to_reg.data.eq(svdec.reg_out) # 7-bit output
1181
1182 comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel)
1183 comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel)
1184 comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel)
1185 comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (matches out_sel)
1186 # XXX TODO - work out where this should come from. the problem is
1187 # that LD-with-update is implied (computed from "is instruction in
1188 # "update mode" rather than specified cleanly as its own CSV column
1189 #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit)
1190
1191 # output reg-is-vectorised (and when no output is vectorised)
1192 comb += self.in1_isvec.eq(in1_svdec.isvec)
1193 comb += self.in2_isvec.eq(in2_svdec.isvec)
1194 comb += self.in3_isvec.eq(in3_svdec.isvec)
1195 comb += self.o_isvec.eq(o_svdec.isvec)
1196 comb += self.o2_isvec.eq(o2_svdec.isvec)
1197 # TODO: include SPRs and CRs here! must be True when *all* are scalar
1198 comb += self.no_out_vec.eq((~o2_svdec.isvec) & (~o_svdec.isvec) &
1199 (~crout_svdec.isvec))
1200
1201 # SPRs out
1202 comb += e.read_spr1.eq(dec_a.spr_out)
1203 comb += e.write_spr.eq(dec_o.spr_out)
1204
1205 # Fast regs out
1206 comb += e.read_fast1.eq(dec_a.fast_out)
1207 comb += e.read_fast2.eq(dec_b.fast_out)
1208 comb += e.write_fast1.eq(dec_o.fast_out)
1209 comb += e.write_fast2.eq(dec_o2.fast_out)
1210
1211 # condition registers (CR)
1212 for to_reg, cr, name, svdec in (
1213 (e.read_cr1, self.dec_cr_in, "cr_bitfield", crin_svdec),
1214 (e.read_cr2, self.dec_cr_in, "cr_bitfield_b", crin_svdec_b),
1215 (e.read_cr3, self.dec_cr_in, "cr_bitfield_o", crin_svdec_o),
1216 (e.write_cr, self.dec_cr_out, "cr_bitfield", crout_svdec)):
1217 fromreg = getattr(cr, name)
1218 comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM
1219 comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn
1220 comb += svdec.cr_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA)
1221 with m.If(svdec.isvec):
1222 # check if this is CR0 or CR1: treated differently
1223 # (does not "listen" to EXTRA2/3 spec for a start)
1224 # also: the CRs start from completely different locations
1225 with m.If(cr.sv_override == 1): # CR0
1226 offs = SVP64CROffs.CR0
1227 comb += to_reg.data.eq(srcstep+offs)
1228 with m.Elif(cr.sv_override == 2): # CR1
1229 offs = SVP64CROffs.CR1
1230 comb += to_reg.data.eq(srcstep+1)
1231 with m.Else():
1232 comb += to_reg.data.eq(srcstep+svdec.cr_out) # 7-bit output
1233 with m.Else():
1234 comb += to_reg.data.eq(svdec.cr_out) # 7-bit output
1235 comb += to_reg.ok.eq(fromreg.ok)
1236
1237 # sigh this is exactly the sort of thing for which the
1238 # decoder is designed to not need. MTSPR, MFSPR and others need
1239 # access to the XER bits. however setting e.oe is not appropriate
1240 with m.If(op.internal_op == MicrOp.OP_MFSPR):
1241 comb += e.xer_in.eq(0b111) # SO, CA, OV
1242 with m.If(op.internal_op == MicrOp.OP_CMP):
1243 comb += e.xer_in.eq(1<<XERRegs.SO) # SO
1244 with m.If(op.internal_op == MicrOp.OP_MTSPR):
1245 comb += e.xer_out.eq(1)
1246
1247 # set the trapaddr to 0x700 for a td/tw/tdi/twi operation
1248 with m.If(op.internal_op == MicrOp.OP_TRAP):
1249 # *DO NOT* call self.trap here. that would reset absolutely
1250 # everything including destroying read of RA and RB.
1251 comb += self.do_copy("trapaddr", 0x70) # strip first nibble
1252
1253 ####################
1254 # ok so the instruction's been decoded, blah blah, however
1255 # now we need to determine if it's actually going to go ahead...
1256 # *or* if in fact it's a privileged operation, whether there's
1257 # an external interrupt, etc. etc. this is a simple priority
1258 # if-elif-elif sequence. decrement takes highest priority,
1259 # EINT next highest, privileged operation third.
1260
1261 # check if instruction is privileged
1262 is_priv_insn = instr_is_priv(m, op.internal_op, e.do.insn)
1263
1264 # different IRQ conditions
1265 ext_irq_ok = Signal()
1266 dec_irq_ok = Signal()
1267 priv_ok = Signal()
1268 illeg_ok = Signal()
1269 exc = self.exc
1270
1271 comb += ext_irq_ok.eq(ext_irq & msr[MSR.EE]) # v3.0B p944 (MSR.EE)
1272 comb += dec_irq_ok.eq(dec_spr[63] & msr[MSR.EE]) # 6.5.11 p1076
1273 comb += priv_ok.eq(is_priv_insn & msr[MSR.PR])
1274 comb += illeg_ok.eq(op.internal_op == MicrOp.OP_ILLEGAL)
1275
1276 # LD/ST exceptions. TestIssuer copies the exception info at us
1277 # after a failed LD/ST.
1278 with m.If(exc.happened):
1279 with m.If(exc.alignment):
1280 self.trap(m, TT.PRIV, 0x600)
1281 with m.Elif(exc.instr_fault):
1282 with m.If(exc.segment_fault):
1283 self.trap(m, TT.PRIV, 0x480)
1284 with m.Else():
1285 # pass exception info to trap to create SRR1
1286 self.trap(m, TT.MEMEXC, 0x400, exc)
1287 with m.Else():
1288 with m.If(exc.segment_fault):
1289 self.trap(m, TT.PRIV, 0x380)
1290 with m.Else():
1291 self.trap(m, TT.PRIV, 0x300)
1292
1293 # decrement counter (v3.0B p1099): TODO 32-bit version (MSR.LPCR)
1294 with m.Elif(dec_irq_ok):
1295 self.trap(m, TT.DEC, 0x900) # v3.0B 6.5 p1065
1296
1297 # external interrupt? only if MSR.EE set
1298 with m.Elif(ext_irq_ok):
1299 self.trap(m, TT.EINT, 0x500)
1300
1301 # privileged instruction trap
1302 with m.Elif(priv_ok):
1303 self.trap(m, TT.PRIV, 0x700)
1304
1305 # illegal instruction must redirect to trap. this is done by
1306 # *overwriting* the decoded instruction and starting again.
1307 # (note: the same goes for interrupts and for privileged operations,
1308 # just with different trapaddr and traptype)
1309 with m.Elif(illeg_ok):
1310 # illegal instruction trap
1311 self.trap(m, TT.ILLEG, 0x700)
1312
1313 # no exception, just copy things to the output
1314 with m.Else():
1315 comb += e_out.eq(e)
1316
1317 ####################
1318 # follow-up after trap/irq to set up SRR0/1
1319
1320 # trap: (note e.insn_type so this includes OP_ILLEGAL) set up fast regs
1321 # Note: OP_SC could actually be modified to just be a trap
1322 with m.If((do_out.insn_type == MicrOp.OP_TRAP) |
1323 (do_out.insn_type == MicrOp.OP_SC)):
1324 # TRAP write fast1 = SRR0
1325 comb += e_out.write_fast1.data.eq(FastRegs.SRR0) # constant: SRR0
1326 comb += e_out.write_fast1.ok.eq(1)
1327 # TRAP write fast2 = SRR1
1328 comb += e_out.write_fast2.data.eq(FastRegs.SRR1) # constant: SRR1
1329 comb += e_out.write_fast2.ok.eq(1)
1330
1331 # RFID: needs to read SRR0/1
1332 with m.If(do_out.insn_type == MicrOp.OP_RFID):
1333 # TRAP read fast1 = SRR0
1334 comb += e_out.read_fast1.data.eq(FastRegs.SRR0) # constant: SRR0
1335 comb += e_out.read_fast1.ok.eq(1)
1336 # TRAP read fast2 = SRR1
1337 comb += e_out.read_fast2.data.eq(FastRegs.SRR1) # constant: SRR1
1338 comb += e_out.read_fast2.ok.eq(1)
1339
1340 # annoying simulator bug
1341 if hasattr(e_out, "asmcode") and hasattr(self.dec.op, "asmcode"):
1342 comb += e_out.asmcode.eq(self.dec.op.asmcode)
1343
1344 return m
1345
1346 def trap(self, m, traptype, trapaddr, exc=None):
1347 """trap: this basically "rewrites" the decoded instruction as a trap
1348 """
1349 comb = m.d.comb
1350 op, e = self.dec.op, self.e
1351 comb += e.eq(0) # reset eeeeeverything
1352
1353 # start again
1354 comb += self.do_copy("insn", self.dec.opcode_in, True)
1355 comb += self.do_copy("insn_type", MicrOp.OP_TRAP, True)
1356 comb += self.do_copy("fn_unit", Function.TRAP, True)
1357 comb += self.do_copy("trapaddr", trapaddr >> 4, True) # bottom 4 bits
1358 comb += self.do_copy("traptype", traptype, True) # request type
1359 comb += self.do_copy("ldst_exc", exc, True) # request type
1360 comb += self.do_copy("msr", self.state.msr, True) # copy of MSR "state"
1361 comb += self.do_copy("cia", self.state.pc, True) # copy of PC "state"
1362
1363
1364 # SVP64 Prefix fields: see https://libre-soc.org/openpower/sv/svp64/
1365 # identifies if an instruction is a SVP64-encoded prefix, and extracts
1366 # the 24-bit SVP64 context (RM) if it is
1367 class SVP64PrefixDecoder(Elaboratable):
1368
1369 def __init__(self):
1370 self.opcode_in = Signal(32, reset_less=True)
1371 self.raw_opcode_in = Signal.like(self.opcode_in, reset_less=True)
1372 self.is_svp64_mode = Signal(1, reset_less=True)
1373 self.svp64_rm = Signal(24, reset_less=True)
1374 self.bigendian = Signal(reset_less=True)
1375
1376 def elaborate(self, platform):
1377 m = Module()
1378 opcode_in = self.opcode_in
1379 comb = m.d.comb
1380 # sigh copied this from TopPowerDecoder
1381 # raw opcode in assumed to be in LE order: byte-reverse it to get BE
1382 raw_le = self.raw_opcode_in
1383 l = []
1384 for i in range(0, 32, 8):
1385 l.append(raw_le[i:i+8])
1386 l.reverse()
1387 raw_be = Cat(*l)
1388 comb += opcode_in.eq(Mux(self.bigendian, raw_be, raw_le))
1389
1390 # start identifying if the incoming opcode is SVP64 prefix)
1391 major = sel(m, opcode_in, SVP64P.OPC)
1392 ident = sel(m, opcode_in, SVP64P.SVP64_7_9)
1393
1394 comb += self.is_svp64_mode.eq(
1395 (major == Const(1, 6)) & # EXT01
1396 (ident == Const(0b11, 2)) # identifier bits
1397 )
1398
1399 with m.If(self.is_svp64_mode):
1400 # now grab the 24-bit ReMap context bits,
1401 rm = sel(m, opcode_in, SVP64P.RM)
1402 comb += self.svp64_rm.eq(rm)
1403
1404 return m
1405
1406 def ports(self):
1407 return [self.opcode_in, self.raw_opcode_in, self.is_svp64_mode,
1408 self.svp64_rm, self.bigendian]
1409
1410 def get_rdflags(e, cu):
1411 rdl = []
1412 for idx in range(cu.n_src):
1413 regfile, regname, _ = cu.get_in_spec(idx)
1414 rdflag, read = regspec_decode_read(e, regfile, regname)
1415 rdl.append(rdflag)
1416 print("rdflags", rdl)
1417 return Cat(*rdl)
1418
1419
1420 if __name__ == '__main__':
1421 svp64 = SVP64PowerDecoder()
1422 vl = rtlil.convert(svp64, ports=svp64.ports())
1423 with open("svp64_dec.il", "w") as f:
1424 f.write(vl)
1425 pdecode = create_pdecode()
1426 dec2 = PowerDecode2(pdecode)
1427 vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())
1428 with open("dec2.il", "w") as f:
1429 f.write(vl)