X-Git-Url: https://git.libre-soc.org/?p=soc.git;a=blobdiff_plain;f=src%2Fsoc%2Fdecoder%2Fpower_decoder2.py;h=1633d1912aa468bf44b8f4ed1568cbc28a1ddd82;hp=1dc9b37f141fd95559be53bb973dc863a7b8308e;hb=eb30c83009e3a01162ea1de537884e0b8bf6a154;hpb=756a92619526c8704d149bb5996509f9cc08ea2d diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index 1dc9b37f..1633d191 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -2,27 +2,85 @@ based on Anton Blanchard microwatt decode2.vhdl +Note: OP_TRAP is used for exceptions and interrupts (micro-code style) by +over-riding the internal opcode when an exception is needed. """ + from nmigen import Module, Elaboratable, Signal, Mux, Const, Cat, Repl, Record from nmigen.cli import rtlil +from soc.regfile.regfiles import XERRegs +from nmutil.picker import PriorityPicker from nmutil.iocontrol import RecordObject from nmutil.extend import exts -from soc.decoder.power_regspec_map import regspec_decode +from soc.experiment.mem_types import LDSTException + +from soc.decoder.power_regspec_map import regspec_decode_read +from soc.decoder.power_regspec_map import regspec_decode_write from soc.decoder.power_decoder import create_pdecode -from soc.decoder.power_enums import (InternalOp, CryIn, Function, +from soc.decoder.power_enums import (MicrOp, CryIn, Function, CRInSel, CROutSel, LdstLen, In1Sel, In2Sel, In3Sel, - OutSel, SPR, RC) + OutSel, SPR, RC, LDSTMode) +from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data, + Decode2ToOperand) +from soc.consts import MSR from soc.regfile.regfiles import FastRegs +from soc.consts import TT +from soc.config.state import CoreState +from soc.regfile.util import spr_to_fast + + +def decode_spr_num(spr): + return Cat(spr[5:10], spr[0:5]) + + +def instr_is_priv(m, op, insn): + """determines if the instruction is privileged or not + """ + comb = m.d.comb + is_priv_insn = Signal(reset_less=True) + with m.Switch(op): + with m.Case(MicrOp.OP_ATTN, MicrOp.OP_MFMSR, MicrOp.OP_MTMSRD, + MicrOp.OP_MTMSR, MicrOp.OP_RFID): + comb += is_priv_insn.eq(1) + # XXX TODO + #with m.Case(MicrOp.OP_TLBIE) : comb += is_priv_insn.eq(1) + with m.Case(MicrOp.OP_MFSPR, MicrOp.OP_MTSPR): + with m.If(insn[20]): # field XFX.spr[-1] i think + comb += is_priv_insn.eq(1) + return is_priv_insn + + +class SPRMap(Elaboratable): + """SPRMap: maps POWER9 SPR numbers to internal enum values, fast and slow + """ + + def __init__(self): + self.spr_i = Signal(10, reset_less=True) + self.spr_o = Data(SPR, name="spr_o") + self.fast_o = Data(3, name="fast_o") + + def elaborate(self, platform): + m = Module() + with m.Switch(self.spr_i): + for i, x in enumerate(SPR): + with m.Case(x.value): + m.d.comb += self.spr_o.data.eq(i) + m.d.comb += self.spr_o.ok.eq(1) + for x, v in spr_to_fast.items(): + with m.Case(x.value): + m.d.comb += self.fast_o.data.eq(v) + m.d.comb += self.fast_o.ok.eq(1) + return m + class DecodeA(Elaboratable): """DecodeA from instruction - decodes register RA, whether immediate-zero, implicit and - explicit CSRs + decodes register RA, implicit and explicit CSRs """ def __init__(self, dec): @@ -30,13 +88,13 @@ class DecodeA(Elaboratable): self.sel_in = Signal(In1Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.reg_out = Data(5, name="reg_a") - self.immz_out = Signal(reset_less=True) - self.spr_out = Data(10, "spr_a") + self.spr_out = Data(SPR, "spr_a") self.fast_out = Data(3, "fast_a") def elaborate(self, platform): m = Module() comb = m.d.comb + m.submodules.sprmap = sprmap = SPRMap() # select Register A field ra = Signal(5, reset_less=True) @@ -47,11 +105,6 @@ class DecodeA(Elaboratable): comb += self.reg_out.data.eq(ra) comb += self.reg_out.ok.eq(1) - # zero immediate requested - with m.If((self.sel_in == In1Sel.RA_OR_ZERO) & - (self.reg_out.data == Const(0, 5))): - comb += self.immz_out.eq(1) - # some Logic/ALU ops have RS as the 3rd arg, but no "RA". with m.If(self.sel_in == In1Sel.RS): comb += self.reg_out.data.eq(self.dec.RS) @@ -59,40 +112,56 @@ class DecodeA(Elaboratable): # decode Fast-SPR based on instruction type op = self.dec.op - # BC or BCREG: potential implicit register (CTR) NOTE: same in DecodeOut - with m.If(op.internal_op == InternalOp.OP_BC): - with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg - comb += self.fast_out.data.eq(FastRegs.CTR) # constant: CTR - comb += self.fast_out.ok.eq(1) - with m.Elif(op.internal_op == InternalOp.OP_BCREG): - xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO - xo5 = self.dec.FormXL.XO[5] # 3.0B p38 - with m.If(xo9 & ~xo5): - comb += self.fast_out.data.eq(FastRegs.CTR) # constant: CTR - comb += self.fast_out.ok.eq(1) - - # MFSPR move from SPRs - with m.If(op.internal_op == InternalOp.OP_MFSPR): - # XXX TODO: fast/slow SPR decoding and mapping - comb += self.spr_out.data.eq(self.dec.SPR) # SPR field, XFX - comb += self.spr_out.ok.eq(1) + with m.Switch(op.internal_op): + + # BC or BCREG: implicit register (CTR) NOTE: same in DecodeOut + with m.Case(MicrOp.OP_BC): + with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg + # constant: CTR + comb += self.fast_out.data.eq(FastRegs.CTR) + comb += self.fast_out.ok.eq(1) + with m.Case(MicrOp.OP_BCREG): + xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO + xo5 = self.dec.FormXL.XO[5] # 3.0B p38 + with m.If(xo9 & ~xo5): + # constant: CTR + comb += self.fast_out.data.eq(FastRegs.CTR) + comb += self.fast_out.ok.eq(1) + + # MFSPR move from SPRs + with m.Case(MicrOp.OP_MFSPR): + spr = Signal(10, reset_less=True) + comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX + comb += sprmap.spr_i.eq(spr) + comb += self.spr_out.eq(sprmap.spr_o) + comb += self.fast_out.eq(sprmap.fast_o) return m -class Data(Record): +class DecodeAImm(Elaboratable): + """DecodeA immediate from instruction - def __init__(self, width, name): - name_ok = "%s_ok" % name - layout = ((name, width), (name_ok, 1)) - Record.__init__(self, layout) - self.data = getattr(self, name) # convenience - self.ok = getattr(self, name_ok) # convenience - self.data.reset_less = True # grrr - self.reset_less = True # grrr + decodes register RA, whether immediate-zero, implicit and + explicit CSRs + """ - def ports(self): - return [self.data, self.ok] + def __init__(self, dec): + self.dec = dec + self.sel_in = Signal(In1Sel, reset_less=True) + self.immz_out = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + # zero immediate requested + ra = Signal(5, reset_less=True) + comb += ra.eq(self.dec.RA) + with m.If((self.sel_in == In1Sel.RA_OR_ZERO) & (ra == Const(0, 5))): + comb += self.immz_out.eq(1) + + return m class DecodeB(Elaboratable): @@ -109,7 +178,6 @@ class DecodeB(Elaboratable): self.sel_in = Signal(In2Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.reg_out = Data(5, "reg_b") - self.imm_out = Data(64, "imm_b") self.fast_out = Data(3, "fast_b") def elaborate(self, platform): @@ -122,56 +190,84 @@ class DecodeB(Elaboratable): comb += self.reg_out.data.eq(self.dec.RB) comb += self.reg_out.ok.eq(1) with m.Case(In2Sel.RS): - comb += self.reg_out.data.eq(self.dec.RS) # for M-Form shiftrot + # for M-Form shiftrot + comb += self.reg_out.data.eq(self.dec.RS) comb += self.reg_out.ok.eq(1) - with m.Case(In2Sel.CONST_UI): + + # decode SPR2 based on instruction type + op = self.dec.op + # BCREG implicitly uses LR or TAR for 2nd reg + # CTR however is already in fast_spr1 *not* 2. + with m.If(op.internal_op == MicrOp.OP_BCREG): + xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO + xo5 = self.dec.FormXL.XO[5] # 3.0B p38 + with m.If(~xo9): + comb += self.fast_out.data.eq(FastRegs.LR) + comb += self.fast_out.ok.eq(1) + with m.Elif(xo5): + comb += self.fast_out.data.eq(FastRegs.TAR) + comb += self.fast_out.ok.eq(1) + + return m + + +class DecodeBImm(Elaboratable): + """DecodeB immediate from instruction + """ + def __init__(self, dec): + self.dec = dec + self.sel_in = Signal(In2Sel, reset_less=True) + self.imm_out = Data(64, "imm_b") + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + # select Register B Immediate + with m.Switch(self.sel_in): + with m.Case(In2Sel.CONST_UI): # unsigned comb += self.imm_out.data.eq(self.dec.UI) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_SI): # TODO: sign-extend here? - comb += self.imm_out.data.eq( - exts(self.dec.SI, 16, 64)) + with m.Case(In2Sel.CONST_SI): # sign-extended 16-bit + si = Signal(16, reset_less=True) + comb += si.eq(self.dec.SI) + comb += self.imm_out.data.eq(exts(si, 16, 64)) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_UI_HI): - comb += self.imm_out.data.eq(self.dec.UI<<16) + with m.Case(In2Sel.CONST_SI_HI): # sign-extended 16+16=32 bit + si_hi = Signal(32, reset_less=True) + comb += si_hi.eq(self.dec.SI << 16) + comb += self.imm_out.data.eq(exts(si_hi, 32, 64)) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_SI_HI): # TODO: sign-extend here? - comb += self.imm_out.data.eq(self.dec.SI<<16) - comb += self.imm_out.data.eq( - exts(self.dec.SI << 16, 32, 64)) + with m.Case(In2Sel.CONST_UI_HI): # unsigned + ui = Signal(16, reset_less=True) + comb += ui.eq(self.dec.UI) + comb += self.imm_out.data.eq(ui << 16) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_LI): - comb += self.imm_out.data.eq(self.dec.LI<<2) + with m.Case(In2Sel.CONST_LI): # sign-extend 24+2=26 bit + li = Signal(26, reset_less=True) + comb += li.eq(self.dec.LI << 2) + comb += self.imm_out.data.eq(exts(li, 26, 64)) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_BD): - comb += self.imm_out.data.eq(self.dec.BD<<2) + with m.Case(In2Sel.CONST_BD): # sign-extend (14+2)=16 bit + bd = Signal(16, reset_less=True) + comb += bd.eq(self.dec.BD << 2) + comb += self.imm_out.data.eq(exts(bd, 16, 64)) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_DS): - comb += self.imm_out.data.eq(self.dec.DS<<2) + with m.Case(In2Sel.CONST_DS): # sign-extended (14+2=16) bit + ds = Signal(16, reset_less=True) + comb += ds.eq(self.dec.DS << 2) + comb += self.imm_out.data.eq(exts(ds, 16, 64)) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_M1): - comb += self.imm_out.data.eq(~Const(0, 64)) # all 1s + with m.Case(In2Sel.CONST_M1): # signed (-1) + comb += self.imm_out.data.eq(~Const(0, 64)) # all 1s comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_SH): + with m.Case(In2Sel.CONST_SH): # unsigned - for shift comb += self.imm_out.data.eq(self.dec.sh) comb += self.imm_out.ok.eq(1) - with m.Case(In2Sel.CONST_SH32): + with m.Case(In2Sel.CONST_SH32): # unsigned - for shift comb += self.imm_out.data.eq(self.dec.SH32) comb += self.imm_out.ok.eq(1) - # decode SPR2 based on instruction type - op = self.dec.op - # BCREG implicitly uses LR or TAR for 2nd reg - # CTR however is already in fast_spr1 *not* 2. - with m.If(op.internal_op == InternalOp.OP_BCREG): - xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO - xo5 = self.dec.FormXL.XO[5] # 3.0B p38 - with m.If(~xo9): - comb += self.fast_out.data.eq(FastRegs.LR) - comb += self.fast_out.ok.eq(1) - with m.Elif(xo5): - comb += self.fast_out.data.eq(FastRegs.TAR) - comb += self.fast_out.ok.eq(1) - return m @@ -194,7 +290,8 @@ class DecodeC(Elaboratable): # select Register C field with m.Switch(self.sel_in): with m.Case(In3Sel.RB): - comb += self.reg_out.data.eq(self.dec.RB) # for M-Form shiftrot + # for M-Form shiftrot + comb += self.reg_out.data.eq(self.dec.RB) comb += self.reg_out.ok.eq(1) with m.Case(In3Sel.RS): comb += self.reg_out.data.eq(self.dec.RS) @@ -214,12 +311,13 @@ class DecodeOut(Elaboratable): self.sel_in = Signal(OutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.reg_out = Data(5, "reg_o") - self.spr_out = Data(10, "spr_o") + self.spr_out = Data(SPR, "spr_o") self.fast_out = Data(3, "fast_o") def elaborate(self, platform): m = Module() comb = m.d.comb + m.submodules.sprmap = sprmap = SPRMap() op = self.dec.op # select Register out field @@ -231,40 +329,28 @@ class DecodeOut(Elaboratable): comb += self.reg_out.data.eq(self.dec.RA) comb += self.reg_out.ok.eq(1) with m.Case(OutSel.SPR): - comb += self.spr_out.data.eq(self.dec.SPR) # from XFX - comb += self.spr_out.ok.eq(1) - # TODO MTSPR 1st spr (fast) - with m.If(op.internal_op == InternalOp.OP_MTSPR): - pass - """ - sprn := decode_spr_num(f_in.insn); - v.ispr1 := fast_spr_num(sprn); - -- Make slow SPRs single issue - if is_fast_spr(v.ispr1) = '0' then - v.decode.sgl_pipe := '1'; - -- send MMU-related SPRs to loadstore1 - case sprn is - when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL => - v.decode.unit := LDST; - when others => - end case; - end if; - """ - - - # BC or BCREG: potential implicit register (CTR) NOTE: same in DecodeA - op = self.dec.op - with m.If((op.internal_op == InternalOp.OP_BC) | - (op.internal_op == InternalOp.OP_BCREG)): - with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg - comb += self.fast_out.data.eq(FastRegs.CTR) # constant: CTR + spr = Signal(10, reset_less=True) + comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX + # MFSPR move to SPRs - needs mapping + with m.If(op.internal_op == MicrOp.OP_MTSPR): + comb += sprmap.spr_i.eq(spr) + comb += self.spr_out.eq(sprmap.spr_o) + comb += self.fast_out.eq(sprmap.fast_o) + + with m.Switch(op.internal_op): + + # BC or BCREG: implicit register (CTR) NOTE: same in DecodeA + with m.Case(MicrOp.OP_BC, MicrOp.OP_BCREG): + with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg + # constant: CTR + comb += self.fast_out.data.eq(FastRegs.CTR) + comb += self.fast_out.ok.eq(1) + + # RFID 1st spr (fast) + with m.Case(MicrOp.OP_RFID): + comb += self.fast_out.data.eq(FastRegs.SRR0) # constant: SRR0 comb += self.fast_out.ok.eq(1) - # RFID 1st spr (fast) - with m.If(op.internal_op == InternalOp.OP_RFID): - comb += self.fast_out.data.eq(FastRegs.SRR0) # constant: SRR0 - comb += self.fast_out.ok.eq(1) - return m @@ -286,22 +372,26 @@ class DecodeOut2(Elaboratable): m = Module() comb = m.d.comb - # update mode LD/ST uses read-reg A also as an output - with m.If(self.dec.op.upd): - comb += self.reg_out.eq(self.dec.RA) - comb += self.reg_out.ok.eq(1) + if hasattr(self.dec.op, "upd"): + # update mode LD/ST uses read-reg A also as an output + with m.If(self.dec.op.upd == LDSTMode.update): + comb += self.reg_out.eq(self.dec.RA) + comb += self.reg_out.ok.eq(1) - # BC or BCREG: potential implicit register (LR) output + # B, BC or BCREG: potential implicit register (LR) output + # these give bl, bcl, bclrl, etc. op = self.dec.op - with m.If((op.internal_op == InternalOp.OP_BC) | - (op.internal_op == InternalOp.OP_BCREG)): - with m.If(self.lk): # "link" mode - comb += self.fast_out.data.eq(FastRegs.LR) # constant: LR - comb += self.fast_out.ok.eq(1) + with m.Switch(op.internal_op): + + # BC* implicit register (LR) + with m.Case(MicrOp.OP_BC, MicrOp.OP_B, MicrOp.OP_BCREG): + with m.If(self.lk): # "link" mode + comb += self.fast_out.data.eq(FastRegs.LR) # constant: LR + comb += self.fast_out.ok.eq(1) - # RFID 2nd spr (fast) - with m.If(op.internal_op == InternalOp.OP_RFID): - comb += self.fast_out.data.eq(FastRegs.SRR1) # constant: SRR1 + # RFID 2nd spr (fast) + with m.Case(MicrOp.OP_RFID): + comb += self.fast_out.data.eq(FastRegs.SRR1) # constant: SRR1 comb += self.fast_out.ok.eq(1) return m @@ -312,6 +402,7 @@ class DecodeRC(Elaboratable): decodes Record bit Rc """ + def __init__(self, dec): self.dec = dec self.sel_in = Signal(RC, reset_less=True) @@ -348,6 +439,7 @@ class DecodeOE(Elaboratable): -- actual POWER9 does if we set it on those instructions, for now we -- test that further down when assigning to the multiplier oe input. """ + def __init__(self, dec): self.dec = dec self.sel_in = Signal(RC, reset_less=True) @@ -357,15 +449,33 @@ class DecodeOE(Elaboratable): def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op - # select OE bit out field - with m.Switch(self.sel_in): - with m.Case(RC.RC): - comb += self.oe_out.data.eq(self.dec.OE) - comb += self.oe_out.ok.eq(1) + with m.Switch(op.internal_op): + + # mulhw, mulhwu, mulhd, mulhdu - these *ignore* OE + # also rotate + # XXX ARGH! ignoring OE causes incompatibility with microwatt + # http://lists.libre-soc.org/pipermail/libre-soc-dev/2020-August/000302.html + with m.Case(MicrOp.OP_MUL_H64, MicrOp.OP_MUL_H32, + MicrOp.OP_EXTS, MicrOp.OP_CNTZ, + MicrOp.OP_SHL, MicrOp.OP_SHR, MicrOp.OP_RLC, + MicrOp.OP_LOAD, MicrOp.OP_STORE, + MicrOp.OP_RLCL, MicrOp.OP_RLCR, + MicrOp.OP_EXTSWSLI): + pass + + # all other ops decode OE field + with m.Default(): + # select OE bit out field + with m.Switch(self.sel_in): + with m.Case(RC.RC): + comb += self.oe_out.data.eq(self.dec.OE) + comb += self.oe_out.ok.eq(1) return m + class DecodeCRIn(Elaboratable): """Decodes input CR from instruction @@ -380,20 +490,24 @@ class DecodeCRIn(Elaboratable): self.cr_bitfield = Data(3, "cr_bitfield") self.cr_bitfield_b = Data(3, "cr_bitfield_b") self.cr_bitfield_o = Data(3, "cr_bitfield_o") - self.whole_reg = Signal(reset_less=True) + self.whole_reg = Data(8, "cr_fxm") def elaborate(self, platform): m = Module() + m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True, + reverse_o=True) + comb = m.d.comb + op = self.dec.op comb += self.cr_bitfield.ok.eq(0) comb += self.cr_bitfield_b.ok.eq(0) - comb += self.whole_reg.eq(0) + comb += self.whole_reg.ok.eq(0) with m.Switch(self.sel_in): with m.Case(CRInSel.NONE): - pass # No bitfield activated + pass # No bitfield activated with m.Case(CRInSel.CR0): - comb += self.cr_bitfield.data.eq(0) + comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.BI): comb += self.cr_bitfield.data.eq(self.dec.BI[2:5]) @@ -412,7 +526,16 @@ class DecodeCRIn(Elaboratable): comb += self.cr_bitfield.data.eq(self.dec.BC[2:5]) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.WHOLE_REG): - comb += self.whole_reg.eq(1) + comb += self.whole_reg.ok.eq(1) + move_one = Signal(reset_less=True) + comb += move_one.eq(self.insn_in[20]) # MSB0 bit 11 + with m.If((op.internal_op == MicrOp.OP_MFCR) & move_one): + # must one-hot the FXM field + comb += ppick.i.eq(self.dec.FXM) + comb += self.whole_reg.data.eq(ppick.o) + with m.Else(): + # otherwise use all of it + comb += self.whole_reg.data.eq(0xff) return m @@ -430,224 +553,478 @@ class DecodeCROut(Elaboratable): self.sel_in = Signal(CROutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.cr_bitfield = Data(3, "cr_bitfield") - self.whole_reg = Signal(reset_less=True) + self.whole_reg = Data(8, "cr_fxm") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True, + reverse_o=True) comb += self.cr_bitfield.ok.eq(0) - comb += self.whole_reg.eq(0) + comb += self.whole_reg.ok.eq(0) with m.Switch(self.sel_in): with m.Case(CROutSel.NONE): - pass # No bitfield activated + pass # No bitfield activated with m.Case(CROutSel.CR0): - comb += self.cr_bitfield.data.eq(0) - comb += self.cr_bitfield.ok.eq(self.rc_in) # only when RC=1 + comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering) + comb += self.cr_bitfield.ok.eq(self.rc_in) # only when RC=1 with m.Case(CROutSel.BF): - comb += self.cr_bitfield.data.eq(self.dec.FormX.BF[0:-1]) + comb += self.cr_bitfield.data.eq(self.dec.FormX.BF) comb += self.cr_bitfield.ok.eq(1) with m.Case(CROutSel.BT): comb += self.cr_bitfield.data.eq(self.dec.FormXL.BT[2:5]) comb += self.cr_bitfield.ok.eq(1) with m.Case(CROutSel.WHOLE_REG): - comb += self.whole_reg.eq(1) + comb += self.whole_reg.ok.eq(1) + move_one = Signal(reset_less=True) + comb += move_one.eq(self.insn_in[20]) + with m.If((op.internal_op == MicrOp.OP_MTCRF)): + with m.If(move_one): + # must one-hot the FXM field + comb += ppick.i.eq(self.dec.FXM) + with m.If(ppick.en_o): + comb += self.whole_reg.data.eq(ppick.o) + with m.Else(): + comb += self.whole_reg.data.eq(0b00000001) # CR7 + with m.Else(): + comb += self.whole_reg.data.eq(self.dec.FXM) + with m.Else(): + # otherwise use all of it + comb += self.whole_reg.data.eq(0xff) return m +# dictionary of Input Record field names that, if they exist, +# will need a corresponding CSV Decoder file column (actually, PowerOp) +# to be decoded (this includes the single bit names) +record_names = {'insn_type': 'internal_op', + 'fn_unit': 'function_unit', + 'rc': 'rc_sel', + 'oe': 'rc_sel', + 'zero_a': 'in1_sel', + 'imm_data': 'in2_sel', + 'invert_in': 'inv_a', + 'invert_out': 'inv_out', + 'rc': 'cr_out', + 'oe': 'cr_in', + 'output_carry': 'cry_out', + 'input_carry': 'cry_in', + 'is_32bit': 'is_32b', + 'is_signed': 'sgn', + 'lk': 'lk', + 'data_len': 'ldst_len', + 'byte_reverse': 'br', + 'sign_extend': 'sgn_ext', + 'ldst_mode': 'upd', + } + + +class PowerDecodeSubset(Elaboratable): + """PowerDecodeSubset: dynamic subset decoder + """ + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + + self.final = final + self.opkls = opkls + self.fn_name = fn_name + if opkls is None: + opkls = Decode2ToOperand + self.do = opkls(fn_name) + col_subset = self.get_col_subset(self.do) + + # only needed for "main" PowerDecode2 + if not self.final: + self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do) + + # create decoder if one not already given + if dec is None: + dec = create_pdecode(name=fn_name, col_subset=col_subset, + row_subset=self.rowsubsetfn) + self.dec = dec -class XerBits: - def __init__(self): - self.ca = Signal(2, reset_less=True) - self.ov = Signal(2, reset_less=True) - self.so = Signal(reset_less=True) - - def ports(self): - return [self.ca, self.ov, self.so] - - -class Decode2ToExecute1Type(RecordObject): - - def __init__(self, name=None): - - RecordObject.__init__(self, name=name) - - self.valid = Signal(reset_less=True) - self.insn_type = Signal(InternalOp, reset_less=True) - self.fn_unit = Signal(Function, reset_less=True) - self.nia = Signal(64, reset_less=True) - self.write_reg = Data(5, name="rego") - self.write_ea = Data(5, name="ea") # for LD/ST in update mode - self.read_reg1 = Data(5, name="reg1") - self.read_reg2 = Data(5, name="reg2") - self.read_reg3 = Data(5, name="reg3") - self.imm_data = Data(64, name="imm") - self.write_spr = Data(10, name="spro") - self.read_spr1 = Data(10, name="spr1") - self.read_spr2 = Data(10, name="spr2") - - self.read_fast1 = Data(3, name="fast1") - self.read_fast2 = Data(3, name="fast2") - self.write_fast1 = Data(3, name="fasto1") - self.write_fast2 = Data(3, name="fasto2") - - self.read_cr1 = Data(3, name="cr_in1") - self.read_cr2 = Data(3, name="cr_in2") - self.read_cr3 = Data(3, name="cr_in2") - self.read_cr_whole = Signal(reset_less=True) - self.write_cr = Data(3, name="cr_out") - self.write_cr_whole = Signal(reset_less=True) - self.lk = Signal(reset_less=True) - self.rc = Data(1, "rc") - self.oe = Data(1, "oe") - self.invert_a = Signal(reset_less=True) - self.zero_a = Signal(reset_less=True) - self.invert_out = Signal(reset_less=True) - self.input_carry = Signal(CryIn, reset_less=True) - self.output_carry = Signal(reset_less=True) - self.input_cr = Signal(reset_less=True) # instr. has a CR as input - self.output_cr = Signal(reset_less=True) # instr. has a CR as output - self.is_32bit = Signal(reset_less=True) - self.is_signed = Signal(reset_less=True) - self.insn = Signal(32, reset_less=True) - self.data_len = Signal(4, reset_less=True) # bytes - self.byte_reverse = Signal(reset_less=True) - self.sign_extend = Signal(reset_less=True)# do we need this? - self.update = Signal(reset_less=True) # LD/ST is "update" variant - - -class PowerDecode2(Elaboratable): + # state information needed by the Decoder + if state is None: + state = CoreState("dec2") + self.state = state - def __init__(self, dec): + def get_col_subset(self, do): + subset = {'cr_in', 'cr_out', 'rc_sel'} # needed, non-optional + for k, v in record_names.items(): + if hasattr(do, k): + subset.add(v) + print ("get_col_subset", self.fn_name, do.fields, subset) + return subset - self.dec = dec - self.e = Decode2ToExecute1Type() + def rowsubsetfn(self, opcode, row): + return row['unit'] == self.fn_name def ports(self): return self.dec.ports() + self.e.ports() + def needs_field(self, field, op_field): + if self.final: + do = self.do + else: + do = self.e_tmp.do + return hasattr(do, field) and self.op_get(op_field) is not None + + def do_copy(self, field, val, final=False): + if final or self.final: + do = self.do + else: + do = self.e_tmp.do + if hasattr(do, field) and val is not None: + return getattr(do, field).eq(val) + return [] + + def op_get(self, op_field): + return getattr(self.dec.op, op_field, None) + def elaborate(self, platform): m = Module() comb = m.d.comb + state = self.state + op, do = self.dec.op, self.do + msr, cia = state.msr, state.pc + + # fill in for a normal instruction (not an exception) + # copy over if non-exception, non-privileged etc. is detected + if not self.final: + if self.fn_name is None: + name = "tmp" + else: + name = self.fn_name + "tmp" + self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls) # set up submodule decoders m.submodules.dec = self.dec - m.submodules.dec_a = dec_a = DecodeA(self.dec) - m.submodules.dec_b = dec_b = DecodeB(self.dec) - m.submodules.dec_c = dec_c = DecodeC(self.dec) - m.submodules.dec_o = dec_o = DecodeOut(self.dec) - m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec) m.submodules.dec_rc = dec_rc = DecodeRC(self.dec) m.submodules.dec_oe = dec_oe = DecodeOE(self.dec) - m.submodules.dec_cr_in = dec_cr_in = DecodeCRIn(self.dec) - m.submodules.dec_cr_out = dec_cr_out = DecodeCROut(self.dec) + m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) + m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) # copy instruction through... - for i in [self.e.insn, dec_a.insn_in, dec_b.insn_in, - dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in, dec_rc.insn_in, - dec_oe.insn_in, dec_cr_in.insn_in, dec_cr_out.insn_in]: + for i in [do.insn, + dec_rc.insn_in, dec_oe.insn_in, + self.dec_cr_in.insn_in, self.dec_cr_out.insn_in]: comb += i.eq(self.dec.opcode_in) # ...and subdecoders' input fields - comb += dec_a.sel_in.eq(self.dec.op.in1_sel) - comb += dec_b.sel_in.eq(self.dec.op.in2_sel) - comb += dec_c.sel_in.eq(self.dec.op.in3_sel) - comb += dec_o.sel_in.eq(self.dec.op.out_sel) - comb += dec_o2.sel_in.eq(self.dec.op.out_sel) - comb += dec_o2.lk.eq(self.e.lk) - comb += dec_rc.sel_in.eq(self.dec.op.rc_sel) - comb += dec_oe.sel_in.eq(self.dec.op.rc_sel) # XXX should be OE sel - comb += dec_cr_in.sel_in.eq(self.dec.op.cr_in) - comb += dec_cr_out.sel_in.eq(self.dec.op.cr_out) - comb += dec_cr_out.rc_in.eq(dec_rc.rc_out.data) - - # decode LD/ST length - with m.Switch(self.dec.op.ldst_len): - with m.Case(LdstLen.is1B): - comb += self.e.data_len.eq(1) - with m.Case(LdstLen.is2B): - comb += self.e.data_len.eq(2) - with m.Case(LdstLen.is4B): - comb += self.e.data_len.eq(4) - with m.Case(LdstLen.is8B): - comb += self.e.data_len.eq(8) - - comb += self.e.nia.eq(0) # XXX TODO - fu = self.dec.op.function_unit - itype = Mux(fu == Function.NONE, - InternalOp.OP_ILLEGAL, - self.dec.op.internal_op) - comb += self.e.insn_type.eq(itype) - comb += self.e.fn_unit.eq(fu) - - # registers a, b, c and out and out2 (LD/ST EA) - comb += self.e.read_reg1.eq(dec_a.reg_out) - comb += self.e.read_reg2.eq(dec_b.reg_out) - comb += self.e.read_reg3.eq(dec_c.reg_out) - comb += self.e.write_reg.eq(dec_o.reg_out) - comb += self.e.write_ea.eq(dec_o2.reg_out) - comb += self.e.imm_data.eq(dec_b.imm_out) # immediate in RB (usually) - comb += self.e.zero_a.eq(dec_a.immz_out) # RA==0 detected + comb += dec_rc.sel_in.eq(op.rc_sel) + comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel + comb += self.dec_cr_in.sel_in.eq(op.cr_in) + comb += self.dec_cr_out.sel_in.eq(op.cr_out) + comb += self.dec_cr_out.rc_in.eq(dec_rc.rc_out.data) + + # copy "state" over + comb += self.do_copy("msr", msr) + comb += self.do_copy("cia", cia) + + # set up instruction, pick fn unit + # no op: defaults to OP_ILLEGAL + comb += self.do_copy("insn_type", self.op_get("internal_op")) + + #function unit for decoded instruction + fn = self.op_get("function_unit") + spr = Signal(10, reset_less=True) + comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX + + # for first test only forward SPR 18 to mmu + with m.If(self.dec.op.internal_op == MicrOp.OP_MTSPR): + with m.If((spr == 18) | (spr == 19)): + comb += self.do_copy("fn_unit",Function.MMU) + with m.Else(): + comb += self.do_copy("fn_unit",fn) + with m.If(self.dec.op.internal_op == MicrOp.OP_MFSPR): + with m.If((spr == 18) | (spr == 19)): + comb += self.do_copy("fn_unit",Function.MMU) + with m.Else(): + comb += self.do_copy("fn_unit",fn) + with m.Else(): + comb += self.do_copy("fn_unit",fn) + + # immediates + if self.needs_field("zero_a", "in1_sel"): + m.submodules.dec_ai = dec_ai = DecodeAImm(self.dec) + comb += dec_ai.sel_in.eq(op.in1_sel) + comb += self.do_copy("zero_a", dec_ai.immz_out) # RA==0 detected + if self.needs_field("imm_data", "in2_sel"): + m.submodules.dec_bi = dec_bi = DecodeBImm(self.dec) + comb += dec_bi.sel_in.eq(op.in2_sel) + comb += self.do_copy("imm_data", dec_bi.imm_out) # imm in RB # rc and oe out - comb += self.e.rc.eq(dec_rc.rc_out) - comb += self.e.oe.eq(dec_oe.oe_out) + comb += self.do_copy("rc", dec_rc.rc_out) + comb += self.do_copy("oe", dec_oe.oe_out) - # SPRs out - comb += self.e.read_spr1.eq(dec_a.spr_out) - comb += self.e.write_spr.eq(dec_o.spr_out) + # CR in/out + comb += self.do_copy("read_cr_whole", self.dec_cr_in.whole_reg) + comb += self.do_copy("write_cr_whole", self.dec_cr_out.whole_reg) + comb += self.do_copy("write_cr0", self.dec_cr_out.cr_bitfield.ok) - # Fast regs out - comb += self.e.read_fast1.eq(dec_a.fast_out) - comb += self.e.read_fast2.eq(dec_b.fast_out) - comb += self.e.write_fast1.eq(dec_o.fast_out) - comb += self.e.write_fast2.eq(dec_o2.fast_out) + comb += self.do_copy("input_cr", self.op_get("cr_in")) # CR in + comb += self.do_copy("output_cr", self.op_get("cr_out")) # CR out - comb += self.e.read_cr1.eq(dec_cr_in.cr_bitfield) - comb += self.e.read_cr2.eq(dec_cr_in.cr_bitfield_b) - comb += self.e.read_cr3.eq(dec_cr_in.cr_bitfield_o) - comb += self.e.read_cr_whole.eq(dec_cr_in.whole_reg) + # decoded/selected instruction flags + comb += self.do_copy("data_len", self.op_get("ldst_len")) + comb += self.do_copy("invert_in", self.op_get("inv_a")) + comb += self.do_copy("invert_out", self.op_get("inv_out")) + comb += self.do_copy("input_carry", self.op_get("cry_in")) + comb += self.do_copy("output_carry", self.op_get("cry_out")) + comb += self.do_copy("is_32bit", self.op_get("is_32b")) + comb += self.do_copy("is_signed", self.op_get("sgn")) + lk = self.op_get("lk") + if lk is not None: + with m.If(lk): + comb += self.do_copy("lk", self.dec.LK) # XXX TODO: accessor + + comb += self.do_copy("byte_reverse", self.op_get("br")) + comb += self.do_copy("sign_extend", self.op_get("sgn_ext")) + comb += self.do_copy("ldst_mode", self.op_get("upd")) # LD/ST mode - comb += self.e.write_cr.eq(dec_cr_out.cr_bitfield) - comb += self.e.write_cr_whole.eq(dec_cr_out.whole_reg) + return m - # decoded/selected instruction flags - comb += self.e.invert_a.eq(self.dec.op.inv_a) - comb += self.e.invert_out.eq(self.dec.op.inv_out) - comb += self.e.input_carry.eq(self.dec.op.cry_in) # carry comes in - comb += self.e.output_carry.eq(self.dec.op.cry_out) # carry goes out - comb += self.e.is_32bit.eq(self.dec.op.is_32b) - comb += self.e.is_signed.eq(self.dec.op.sgn) - with m.If(self.dec.op.lk): - comb += self.e.lk.eq(self.dec.LK) # XXX TODO: accessor - comb += self.e.byte_reverse.eq(self.dec.op.br) - comb += self.e.sign_extend.eq(self.dec.op.sgn_ext) - comb += self.e.update.eq(self.dec.op.upd) # LD/ST "update" mode. +class PowerDecode2(PowerDecodeSubset): + """PowerDecode2: the main instruction decoder. + whilst PowerDecode is responsible for decoding the actual opcode, this + module encapsulates further specialist, sparse information and + expansion of fields that is inconvenient to have in the CSV files. + for example: the encoding of the immediates, which are detected + and expanded out to their full value from an annotated (enum) + representation. - # These should be removed eventually - comb += self.e.input_cr.eq(self.dec.op.cr_in) # condition reg comes in - comb += self.e.output_cr.eq(self.dec.op.cr_out) # condition reg goes in + implicit register usage is also set up, here. for example: OP_BC + requires implicitly reading CTR, OP_RFID requires implicitly writing + to SRR1 and so on. + in addition, PowerDecoder2 is responsible for detecting whether + instructions are illegal (or privileged) or not, and instead of + just leaving at that, *replacing* the instruction to execute with + a suitable alternative (trap). + + LDSTExceptions are done the cycle _after_ they're detected (after + they come out of LDSTCompUnit). basically despite the instruction + being decoded, the results of the decode are completely ignored + and "exception.happened" used to set the "actual" instruction to + "OP_TRAP". the LDSTException data structure gets filled in, + in the CompTrapOpSubset and that's what it fills in SRR. + + to make this work, TestIssuer must notice "exception.happened" + after the (failed) LD/ST and copies the LDSTException info from + the output, into here (PowerDecoder2). without incrementing PC. + """ + + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + super().__init__(dec, opkls, fn_name, final, state) + self.exc = LDSTException("dec2_exc") + + def get_col_subset(self, opkls): + subset = super().get_col_subset(opkls) + subset.add("in1_sel") + subset.add("asmcode") + subset.add("in2_sel") + subset.add("in3_sel") + subset.add("out_sel") + subset.add("lk") + subset.add("internal_op") + subset.add("form") + return subset + + def elaborate(self, platform): + m = super().elaborate(platform) + comb = m.d.comb + state = self.state + e_out, op, do_out = self.e, self.dec.op, self.e.do + dec_spr, msr, cia, ext_irq = state.dec, state.msr, state.pc, state.eint + e = self.e_tmp + do = e.do + + # fill in for a normal instruction (not an exception) + # copy over if non-exception, non-privileged etc. is detected + + # set up submodule decoders + m.submodules.dec_a = dec_a = DecodeA(self.dec) + m.submodules.dec_b = dec_b = DecodeB(self.dec) + m.submodules.dec_c = dec_c = DecodeC(self.dec) + m.submodules.dec_o = dec_o = DecodeOut(self.dec) + m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec) + + # copy instruction through... + for i in [do.insn, dec_a.insn_in, dec_b.insn_in, + dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: + comb += i.eq(self.dec.opcode_in) + + # ...and subdecoders' input fields + comb += dec_a.sel_in.eq(op.in1_sel) + comb += dec_b.sel_in.eq(op.in2_sel) + comb += dec_c.sel_in.eq(op.in3_sel) + comb += dec_o.sel_in.eq(op.out_sel) + comb += dec_o2.sel_in.eq(op.out_sel) + if hasattr(do, "lk"): + comb += dec_o2.lk.eq(do.lk) + + # registers a, b, c and out and out2 (LD/ST EA) + comb += e.read_reg1.eq(dec_a.reg_out) + comb += e.read_reg2.eq(dec_b.reg_out) + comb += e.read_reg3.eq(dec_c.reg_out) + comb += e.write_reg.eq(dec_o.reg_out) + comb += e.write_ea.eq(dec_o2.reg_out) + + # SPRs out + comb += e.read_spr1.eq(dec_a.spr_out) + comb += e.write_spr.eq(dec_o.spr_out) + + # Fast regs out + comb += e.read_fast1.eq(dec_a.fast_out) + comb += e.read_fast2.eq(dec_b.fast_out) + comb += e.write_fast1.eq(dec_o.fast_out) + comb += e.write_fast2.eq(dec_o2.fast_out) + + # condition registers (CR) + comb += e.read_cr1.eq(self.dec_cr_in.cr_bitfield) + comb += e.read_cr2.eq(self.dec_cr_in.cr_bitfield_b) + comb += e.read_cr3.eq(self.dec_cr_in.cr_bitfield_o) + comb += e.write_cr.eq(self.dec_cr_out.cr_bitfield) + + # sigh this is exactly the sort of thing for which the + # decoder is designed to not need. MTSPR, MFSPR and others need + # access to the XER bits. however setting e.oe is not appropriate + with m.If(op.internal_op == MicrOp.OP_MFSPR): + comb += e.xer_in.eq(0b111) # SO, CA, OV + with m.If(op.internal_op == MicrOp.OP_CMP): + comb += e.xer_in.eq(1<> 4, True) # bottom 4 bits + comb += self.do_copy("traptype", traptype, True) # request type + comb += self.do_copy("ldst_exc", exc, True) # request type + comb += self.do_copy("msr", self.state.msr, True) # copy of MSR "state" + comb += self.do_copy("cia", self.state.pc, True) # copy of PC "state" + + +def get_rdflags(e, cu): + rdl = [] + for idx in range(cu.n_src): + regfile, regname, _ = cu.get_in_spec(idx) + rdflag, read = regspec_decode_read(e, regfile, regname) + rdl.append(rdflag) + print("rdflags", rdl) + return Cat(*rdl) if __name__ == '__main__': @@ -656,4 +1033,3 @@ if __name__ == '__main__': vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports()) with open("dec2.il", "w") as f: f.write(vl) -