X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fdecoder%2Fpower_decoder2.py;h=cd9bae1bd0e40f15adebbccb870ea7cf94d59e9c;hb=b90ce1976820244dbd710d2c612933db7d5eece9;hp=389c98b58aefb79ad6dfc26031f84d4fe3841a44;hpb=a7bf25865a70088e96fabdd68c6a565c86196b88;p=soc.git diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index 389c98b5..cd9bae1b 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -14,14 +14,19 @@ from nmutil.picker import PriorityPicker from nmutil.iocontrol import RecordObject from nmutil.extend import exts +from soc.experiment.mem_types import LDSTException + from soc.decoder.power_regspec_map import regspec_decode_read from soc.decoder.power_regspec_map import regspec_decode_write from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_enums import (MicrOp, CryIn, Function, CRInSel, CROutSel, LdstLen, In1Sel, In2Sel, In3Sel, - OutSel, SPR, RC, LDSTMode) -from soc.decoder.decode2execute1 import Decode2ToExecute1Type, Data + OutSel, SPR, RC, LDSTMode, + SVEXTRA, SVEtype) +from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data, + Decode2ToOperand) +from soc.sv.svp64 import SVP64Rec from soc.consts import MSR from soc.regfile.regfiles import FastRegs @@ -74,6 +79,140 @@ class SPRMap(Elaboratable): return m +class SVP64ExtraSpec(Elaboratable): + """SVP64ExtraSpec - decodes SVP64 Extra specification. + + selects the required EXTRA2/3 field. + + see https://libre-soc.org/openpower/sv/svp64/ + """ + def __init__(self): + self.extra = Signal(10, reset_less=True) + self.etype = Signal(SVEtype, reset_less=True) # 2 or 3 bits + self.idx = Signal(SVEXTRA, reset_less=True) # which part of extra + self.spec = Signal(3) # EXTRA spec for the register + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + spec = self.spec + + # back in the LDSTRM-* and RM-* files generated by sv_analysis.py + # we marked every op with an Etype: EXTRA2 or EXTRA3, and also said + # which of the 4 (or 3 for EXTRA3) sub-fields of bits 10:18 contain + # the register-extension information. extract those now + with m.Switch(self.etype): + # 2-bit index selection mode + with m.Case(SVEtype.EXTRA2): + with m.Switch(self.idx): + with m.Case(SVEXTRA.Idx0): # 1st 2 bits + comb += spec[1:3].eq(self.extra[0:2]) + with m.Case(SVEXTRA.Idx1): # 2nd 2 bits + comb += spec[1:3].eq(self.extra[2:4]) + with m.Case(SVEXTRA.Idx2): # 3rd 2 bits + comb += spec[1:3].eq(self.extra[4:6]) + with m.Case(SVEXTRA.Idx3): # 4th 2 bits + comb += spec[1:3].eq(self.extra[6:8]) + # 3-bit index selection mode + with m.Case(SVEtype.EXTRA3): + with m.Switch(self.idx): + with m.Case(SVEXTRA.Idx0): # 1st 3 bits + comb += spec.eq(self.extra[0:3]) + with m.Case(SVEXTRA.Idx1): # 2nd 3 bits + comb += spec.eq(self.extra[3:6]) + with m.Case(SVEXTRA.Idx2): # 3rd 3 bits + comb += spec.eq(self.extra[6:9]) + # cannot fit more than 9 bits so there is no 4th thing + + return m + + +class SVP64RegExtra(SVP64ExtraSpec): + """SVP64RegExtra - decodes SVP64 Extra fields to determine reg extension + + incoming 5-bit GPR/FP is turned into a 7-bit and marked as scalar/vector + depending on info in one of the positions in the EXTRA field. + + designed so that "no change" to the 5-bit register number occurs if + SV either does not apply or the relevant EXTRA2/3 field bits are zero. + + see https://libre-soc.org/openpower/sv/svp64/ + """ + def __init__(self): + SVP64ExtraSpec.__init__(self) + self.reg_in = Signal(5) # incoming reg number (5 bits, RA, RB) + self.reg_out = Signal(7) # extra-augmented output (7 bits) + self.isvec = Signal(1) # reg is marked as vector if true + + def elaborate(self, platform): + m = super().elaborate(platform) # select required EXTRA2/3 + comb = m.d.comb + + # first get the spec. if not changed it's "scalar identity behaviour" + # which is zero which is ok. + spec = self.spec + + # now decode it. bit 2 is "scalar/vector". note that spec could be zero + # from above, which (by design) has the effect of "no change", below. + + # simple: isvec is top bit of spec + comb += self.isvec.eq(spec[2]) + + # decode vector differently from scalar + with m.If(self.isvec): + # Vector: shifted up, extra in LSBs (RA << 2) | spec[0:1] + comb += self.reg_out.eq(Cat(spec[:2], self.reg_in)) + with m.Else(): + # Scalar: not shifted up, extra in MSBs RA | (spec[0:1] << 5) + comb += self.reg_out.eq(Cat(self.reg_in, spec[:2])) + + return m + + +class SVP64CRExtra(SVP64ExtraSpec): + """SVP64CRExtra - decodes SVP64 Extra fields to determine CR extension + + incoming 3-bit CR is turned into a 7-bit and marked as scalar/vector + depending on info in one of the positions in the EXTRA field. + + yes, really, 128 CRs. INT is 128, FP is 128, therefore CRs are 128. + + designed so that "no change" to the 3-bit CR register number occurs if + SV either does not apply or the relevant EXTRA2/3 field bits are zero. + + see https://libre-soc.org/openpower/sv/svp64/appendix + """ + def __init__(self): + SVP64ExtraSpec.__init__(self) + self.cr_in = Signal(3) # incoming CR number (3 bits, BA[2:5], BFA) + self.cr_out = Signal(7) # extra-augmented CR output (7 bits) + self.isvec = Signal(1) # reg is marked as vector if true + + def elaborate(self, platform): + m = super().elaborate(platform) # select required EXTRA2/3 + comb = m.d.comb + + # first get the spec. if not changed it's "scalar identity behaviour" + # which is zero which is ok. + spec = self.spec + + # now decode it. bit 2 is "scalar/vector". note that spec could be zero + # from above, which (by design) has the effect of "no change", below. + + # simple: isvec is top bit of spec + comb += self.isvec.eq(spec[2]) + + # decode vector differently from scalar, insert bits 0 and 1 accordingly + with m.If(self.isvec): + # Vector: shifted up, extra in LSBs (CR << 4) | (spec[0:1] << 2) + comb += self.cr_out.eq(Cat(Const(0, 2), spec[:2], self.cr_in)) + with m.Else(): + # Scalar: not shifted up, extra in MSBs CR | (spec[0:1] << 3) + comb += self.cr_out.eq(Cat(self.cr_in, spec[:2])) + + return m + + class DecodeA(Elaboratable): """DecodeA from instruction @@ -82,16 +221,23 @@ class DecodeA(Elaboratable): def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(In1Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, name="reg_a") + self.reg_out = Data(7, name="reg_a") + self.reg_isvec = Signal(1, name="reg_a_isvec") # TODO: in reg_out self.spr_out = Data(SPR, "spr_a") self.fast_out = Data(3, "fast_a") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op m.submodules.sprmap = sprmap = SPRMap() + m.submodules.svdec = svdec = SVP64RegExtra() + + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + reg = Signal(5, reset_less=True) # select Register A field ra = Signal(5, reset_less=True) @@ -99,16 +245,32 @@ class DecodeA(Elaboratable): with m.If((self.sel_in == In1Sel.RA) | ((self.sel_in == In1Sel.RA_OR_ZERO) & (ra != Const(0, 5)))): - comb += self.reg_out.data.eq(ra) + comb += reg.eq(ra) comb += self.reg_out.ok.eq(1) # some Logic/ALU ops have RS as the 3rd arg, but no "RA". + # moved it to 1st position (in1_sel)... because + rs = Signal(5, reset_less=True) + comb += rs.eq(self.dec.RS) with m.If(self.sel_in == In1Sel.RS): - comb += self.reg_out.data.eq(self.dec.RS) + comb += reg.eq(rs) comb += self.reg_out.ok.eq(1) + # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from + # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv + # which in turn were auto-generated by sv_analysis.py + + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel) + comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) + + # outputs: 7-bit reg number and whether it's vectorised + comb += self.reg_out.data.eq(svdec.reg_out) + comb += self.reg_isvec.eq(svdec.isvec) + # decode Fast-SPR based on instruction type - op = self.dec.op with m.Switch(op.internal_op): # BC or BCREG: implicit register (CTR) NOTE: same in DecodeOut @@ -172,27 +334,45 @@ class DecodeB(Elaboratable): def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(In2Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, "reg_b") + self.reg_out = Data(7, "reg_b") + self.reg_isvec = Signal(1, name="reg_b_isvec") # TODO: in reg_out self.fast_out = Data(3, "fast_b") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + m.submodules.svdec = svdec = SVP64RegExtra() + + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + reg = Signal(5, reset_less=True) # select Register B field with m.Switch(self.sel_in): with m.Case(In2Sel.RB): - comb += self.reg_out.data.eq(self.dec.RB) + comb += reg.eq(self.dec.RB) comb += self.reg_out.ok.eq(1) with m.Case(In2Sel.RS): # for M-Form shiftrot - comb += self.reg_out.data.eq(self.dec.RS) + comb += reg.eq(self.dec.RS) comb += self.reg_out.ok.eq(1) + # now do the SVP64 munging. different from DecodeA only by sv_in2 + + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel) + comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) + + # outputs: 7-bit reg number and whether it's vectorised + comb += self.reg_out.data.eq(svdec.reg_out) + comb += self.reg_isvec.eq(svdec.isvec) + # decode SPR2 based on instruction type - op = self.dec.op # BCREG implicitly uses LR or TAR for 2nd reg # CTR however is already in fast_spr1 *not* 2. with m.If(op.internal_op == MicrOp.OP_BCREG): @@ -276,24 +456,43 @@ class DecodeC(Elaboratable): def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(In3Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, "reg_c") + self.reg_out = Data(7, "reg_c") + self.reg_isvec = Signal(1, name="reg_c_isvec") # TODO: in reg_out def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + m.submodules.svdec = svdec = SVP64RegExtra() + + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + reg = Signal(5, reset_less=True) # select Register C field with m.Switch(self.sel_in): with m.Case(In3Sel.RB): # for M-Form shiftrot - comb += self.reg_out.data.eq(self.dec.RB) + comb += reg.eq(self.dec.RB) comb += self.reg_out.ok.eq(1) with m.Case(In3Sel.RS): - comb += self.reg_out.data.eq(self.dec.RS) + comb += reg.eq(self.dec.RS) comb += self.reg_out.ok.eq(1) + # now do the SVP64 munging. different from DecodeA only by sv_in3 + + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel) + comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) + + # outputs: 7-bit reg number and whether it's vectorised + comb += self.reg_out.data.eq(svdec.reg_out) + comb += self.reg_isvec.eq(svdec.isvec) + return m @@ -305,9 +504,11 @@ class DecodeOut(Elaboratable): def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(OutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, "reg_o") + self.reg_out = Data(7, "reg_o") + self.reg_isvec = Signal(1, name="reg_o_isvec") # TODO: in reg_out self.spr_out = Data(SPR, "spr_o") self.fast_out = Data(3, "fast_o") @@ -316,14 +517,18 @@ class DecodeOut(Elaboratable): comb = m.d.comb m.submodules.sprmap = sprmap = SPRMap() op = self.dec.op + m.submodules.svdec = svdec = SVP64RegExtra() + + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + reg = Signal(5, reset_less=True) # select Register out field with m.Switch(self.sel_in): with m.Case(OutSel.RT): - comb += self.reg_out.data.eq(self.dec.RT) + comb += reg.eq(self.dec.RT) comb += self.reg_out.ok.eq(1) with m.Case(OutSel.RA): - comb += self.reg_out.data.eq(self.dec.RA) + comb += reg.eq(self.dec.RA) comb += self.reg_out.ok.eq(1) with m.Case(OutSel.SPR): spr = Signal(10, reset_less=True) @@ -334,6 +539,19 @@ class DecodeOut(Elaboratable): comb += self.spr_out.eq(sprmap.spr_o) comb += self.fast_out.eq(sprmap.fast_o) + # now do the SVP64 munging. different from DecodeA only by sv_out + + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.idx.eq(op.sv_out) # SVP64 reg out1 (matches out_sel) + comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) + + # outputs: 7-bit reg number and whether it's vectorised + comb += self.reg_out.data.eq(svdec.reg_out) + comb += self.reg_isvec.eq(svdec.isvec) + + # determine Fast Reg with m.Switch(op.internal_op): # BC or BCREG: implicit register (CTR) NOTE: same in DecodeA @@ -354,29 +572,44 @@ class DecodeOut(Elaboratable): class DecodeOut2(Elaboratable): """DecodeOut2 from instruction - decodes output registers + decodes output registers (2nd one). note that RA is *implicit* below, + which now causes problems with SVP64 + + TODO: SVP64 is a little more complex, here. svp64 allows extending + by one more destination by having one more EXTRA field. RA-as-src + is not the same as RA-as-dest. limited in that it's the same first + 5 bits (from the v3.0B opcode), but still kinda cool. mostly used + for operations that have src-as-dest: mostly this is LD/ST-with-update + but there are others. """ def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(OutSel, reset_less=True) self.lk = Signal(reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, "reg_o") - self.fast_out = Data(3, "fast_o") + self.reg_out = Data(7, "reg_o2") + #self.reg_isvec = Signal(1, name="reg_o2_isvec") # TODO: in reg_out + self.fast_out = Data(3, "fast_o2") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + #m.submodules.svdec = svdec = SVP64RegExtra() - # update mode LD/ST uses read-reg A also as an output - with m.If(self.dec.op.upd == LDSTMode.update): - comb += self.reg_out.eq(self.dec.RA) - comb += self.reg_out.ok.eq(1) + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + #reg = Signal(5, reset_less=True) + + if hasattr(self.dec.op, "upd"): + # update mode LD/ST uses read-reg A also as an output + with m.If(self.dec.op.upd == LDSTMode.update): + comb += self.reg_out.data.eq(self.dec.RA) + comb += self.reg_out.ok.eq(1) # B, BC or BCREG: potential implicit register (LR) output # these give bl, bcl, bclrl, etc. - op = self.dec.op with m.Switch(op.internal_op): # BC* implicit register (LR) @@ -481,45 +714,73 @@ class DecodeCRIn(Elaboratable): def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(CRInSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.cr_bitfield = Data(3, "cr_bitfield") self.cr_bitfield_b = Data(3, "cr_bitfield_b") self.cr_bitfield_o = Data(3, "cr_bitfield_o") + self.cr_isvec = Signal(1, name="cr_isvec") + self.cr_b_isvec = Signal(1, name="cr_b_isvec") + self.cr_o_isvec = Signal(1, name="cr_o_isvec") self.whole_reg = Data(8, "cr_fxm") def elaborate(self, platform): m = Module() + comb = m.d.comb + op = self.dec.op + m.submodules.svdec = svdec = SVP64CRExtra() + m.submodules.svdec_b = svdec_b = SVP64CRExtra() + m.submodules.svdec_o = svdec_o = SVP64CRExtra() m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True, reverse_o=True) - comb = m.d.comb - op = self.dec.op + # get the 3-bit reg data before svp64-munging it into 7-bit plus isvec + cr_bf = Signal(3, reset_less=True) + cr_bf_b = Signal(3, reset_less=True) + cr_bf_o = Signal(3, reset_less=True) + + # index selection slightly different due to shared CR field sigh + cr_a_idx = Signal(SVEXTRA) + cr_b_idx = Signal(SVEXTRA) + + # zero-initialisation comb += self.cr_bitfield.ok.eq(0) comb += self.cr_bitfield_b.ok.eq(0) + comb += self.cr_bitfield_o.ok.eq(0) comb += self.whole_reg.ok.eq(0) + + # these change slighly, when decoding BA/BB. really should have + # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey + comb += cr_a_idx.eq(op.sv_cr_in) + comb += cr_b_idx.eq(SVEXTRA.NONE) + with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value): + comb += cr_a_idx.eq(SVEXTRA.Idx1) + comb += cr_b_idx.eq(SVEXTRA.Idx2) + + # select the relevant CR bitfields with m.Switch(self.sel_in): with m.Case(CRInSel.NONE): pass # No bitfield activated with m.Case(CRInSel.CR0): - comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering) + comb += cr_bf.eq(0) # CR0 (MSB0 numbering) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.BI): - comb += self.cr_bitfield.data.eq(self.dec.BI[2:5]) + comb += cr_bf.eq(self.dec.BI[2:5]) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.BFA): - comb += self.cr_bitfield.data.eq(self.dec.FormX.BFA) + comb += cr_bf.eq(self.dec.FormX.BFA) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.BA_BB): - comb += self.cr_bitfield.data.eq(self.dec.BA[2:5]) + comb += cr_bf.eq(self.dec.BA[2:5]) comb += self.cr_bitfield.ok.eq(1) - comb += self.cr_bitfield_b.data.eq(self.dec.BB[2:5]) + comb += cr_bf_b.eq(self.dec.BB[2:5]) comb += self.cr_bitfield_b.ok.eq(1) - comb += self.cr_bitfield_o.data.eq(self.dec.BT[2:5]) + comb += cr_bf_o.eq(self.dec.BT[2:5]) comb += self.cr_bitfield_o.ok.eq(1) with m.Case(CRInSel.BC): - comb += self.cr_bitfield.data.eq(self.dec.BC[2:5]) + comb += cr_bf.eq(self.dec.BC[2:5]) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.WHOLE_REG): comb += self.whole_reg.ok.eq(1) @@ -533,6 +794,34 @@ class DecodeCRIn(Elaboratable): # otherwise use all of it comb += self.whole_reg.data.eq(0xff) + # now do the SVP64 munging. + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.cr_in.eq(cr_bf) # 3-bit (BFA) + + comb += svdec_b.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec_b.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec_b.cr_in.eq(cr_bf_b) # 3-bit (BB[2:5]) + + comb += svdec_o.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec_o.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec_o.cr_in.eq(cr_bf_o) # 3-bit (BT[2:5]) + + # indices are slightly different, BA/BB mess sorted above + comb += svdec.idx.eq(cr_a_idx) # SVP64 CR in A + comb += svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B + comb += svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out + + # outputs: 7-bit reg number and whether it's vectorised + comb += self.cr_bitfield.data.eq(svdec.cr_out) + comb += self.cr_isvec.eq(svdec.isvec) + comb += self.cr_bitfield_b.data.eq(svdec_b.cr_out) + comb += self.cr_b_isvec.eq(svdec_b.isvec) + comb += self.cr_bitfield_o.data.eq(svdec_o.cr_out) + comb += self.cr_o_isvec.eq(svdec_o.isvec) + return m @@ -545,6 +834,7 @@ class DecodeCROut(Elaboratable): def __init__(self, dec): self.dec = dec + self.sv_rm = SVP64Rec() # SVP64 RM field self.rc_in = Signal(reset_less=True) self.sel_in = Signal(CROutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) @@ -592,49 +882,87 @@ class DecodeCROut(Elaboratable): return m +# dictionary of Input Record field names that, if they exist, +# will need a corresponding CSV Decoder file column (actually, PowerOp) +# to be decoded (this includes the single bit names) +record_names = {'insn_type': 'internal_op', + 'fn_unit': 'function_unit', + 'rc': 'rc_sel', + 'oe': 'rc_sel', + 'zero_a': 'in1_sel', + 'imm_data': 'in2_sel', + 'invert_in': 'inv_a', + 'invert_out': 'inv_out', + 'rc': 'cr_out', + 'oe': 'cr_in', + 'output_carry': 'cry_out', + 'input_carry': 'cry_in', + 'is_32bit': 'is_32b', + 'is_signed': 'sgn', + 'lk': 'lk', + 'data_len': 'ldst_len', + 'byte_reverse': 'br', + 'sign_extend': 'sgn_ext', + 'ldst_mode': 'upd', + } + class PowerDecodeSubset(Elaboratable): """PowerDecodeSubset: dynamic subset decoder + only fields actually requested are copied over. hence, "subset" (duh). """ + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): - def __init__(self, dec, opkls=None, fn_name=None, col_subset=None, - final=False, state=None): - + self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field self.final = final + self.opkls = opkls + self.fn_name = fn_name + if opkls is None: + opkls = Decode2ToOperand + self.do = opkls(fn_name) + col_subset = self.get_col_subset(self.do) + + # only needed for "main" PowerDecode2 + if not self.final: + self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do) + + # create decoder if one not already given if dec is None: - self.opkls = opkls - self.fn_name = fn_name - self.dec = create_pdecode(name=fn_name, col_subset=col_subset, + dec = create_pdecode(name=fn_name, col_subset=col_subset, row_subset=self.rowsubsetfn) - else: - self.dec = dec - self.opkls = None - self.fn_name = None - self.e = Decode2ToExecute1Type(name=self.fn_name, opkls=self.opkls) + self.dec = dec # state information needed by the Decoder - if state is not None: - self.state = state - else: - self.state = CoreState("dec2") + if state is None: + state = CoreState("dec2") + self.state = state + + def get_col_subset(self, do): + subset = {'sv_cr_in', 'sv_cr_out', 'SV_Etype', + 'cr_in', 'cr_out', 'rc_sel'} # needed, non-optional + for k, v in record_names.items(): + if hasattr(do, k): + subset.add(v) + print ("get_col_subset", self.fn_name, do.fields, subset) + return subset def rowsubsetfn(self, opcode, row): return row['unit'] == self.fn_name def ports(self): - return self.dec.ports() + self.e.ports() + return self.dec.ports() + self.e.ports() + self.sv_rm.ports() def needs_field(self, field, op_field): if self.final: - do = self.e.do + do = self.do else: do = self.e_tmp.do return hasattr(do, field) and self.op_get(op_field) is not None def do_copy(self, field, val, final=False): if final or self.final: - do = self.e.do + do = self.do else: do = self.e_tmp.do if hasattr(do, field) and val is not None: @@ -648,20 +976,17 @@ class PowerDecodeSubset(Elaboratable): m = Module() comb = m.d.comb state = self.state - e_out, op, do_out = self.e, self.dec.op, self.e.do + op, do = self.dec.op, self.do msr, cia = state.msr, state.pc # fill in for a normal instruction (not an exception) # copy over if non-exception, non-privileged etc. is detected - if self.final: - e = self.e - else: + if not self.final: if self.fn_name is None: name = "tmp" else: name = self.fn_name + "tmp" - self.e_tmp = e = Decode2ToExecute1Type(name=name, opkls=self.opkls) - do = e.do + self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls) # set up submodule decoders m.submodules.dec = self.dec @@ -680,6 +1005,8 @@ class PowerDecodeSubset(Elaboratable): comb += dec_rc.sel_in.eq(op.rc_sel) comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel comb += self.dec_cr_in.sel_in.eq(op.cr_in) + comb += self.dec_cr_in.sv_rm.eq(self.sv_rm) + comb += self.dec_cr_out.sv_rm.eq(self.sv_rm) comb += self.dec_cr_out.sel_in.eq(op.cr_out) comb += self.dec_cr_out.rc_in.eq(dec_rc.rc_out.data) @@ -687,10 +1014,24 @@ class PowerDecodeSubset(Elaboratable): comb += self.do_copy("msr", msr) comb += self.do_copy("cia", cia) - # set up instruction, pick fn unit + # set up instruction type # no op: defaults to OP_ILLEGAL comb += self.do_copy("insn_type", self.op_get("internal_op")) - comb += self.do_copy("fn_unit", self.op_get("function_unit")) + + # function unit for decoded instruction: requires minor redirect + # for SPR set/get + fn = self.op_get("function_unit") + spr = Signal(10, reset_less=True) + comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX + + # for first test only forward SPRs 18 and 19 to MMU, when + # operation is MTSPR or MFSPR. TODO: add other MMU SPRs + with m.If(((self.dec.op.internal_op == MicrOp.OP_MTSPR) | + (self.dec.op.internal_op == MicrOp.OP_MFSPR)) & + ((spr == SPR.DSISR) | (spr == SPR.DAR))): + comb += self.do_copy("fn_unit", Function.MMU) + with m.Else(): + comb += self.do_copy("fn_unit",fn) # immediates if self.needs_field("zero_a", "in1_sel"): @@ -752,8 +1093,43 @@ class PowerDecode2(PowerDecodeSubset): instructions are illegal (or privileged) or not, and instead of just leaving at that, *replacing* the instruction to execute with a suitable alternative (trap). + + LDSTExceptions are done the cycle _after_ they're detected (after + they come out of LDSTCompUnit). basically despite the instruction + being decoded, the results of the decode are completely ignored + and "exception.happened" used to set the "actual" instruction to + "OP_TRAP". the LDSTException data structure gets filled in, + in the CompTrapOpSubset and that's what it fills in SRR. + + to make this work, TestIssuer must notice "exception.happened" + after the (failed) LD/ST and copies the LDSTException info from + the output, into here (PowerDecoder2). without incrementing PC. """ + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + super().__init__(dec, opkls, fn_name, final, state) + self.exc = LDSTException("dec2_exc") + + def get_col_subset(self, opkls): + subset = super().get_col_subset(opkls) + subset.add("asmcode") + subset.add("in1_sel") + subset.add("in2_sel") + subset.add("in3_sel") + subset.add("out_sel") + subset.add("sv_in1") + subset.add("sv_in2") + subset.add("sv_in3") + subset.add("sv_out") + subset.add("sv_cr_in") + subset.add("sv_cr_out") + subset.add("SV_Etype") + subset.add("SV_Ptype") + subset.add("lk") + subset.add("internal_op") + subset.add("form") + return subset + def elaborate(self, platform): m = super().elaborate(platform) comb = m.d.comb @@ -778,6 +1154,11 @@ class PowerDecode2(PowerDecodeSubset): dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: comb += i.eq(self.dec.opcode_in) + # ... and svp64 rm + for i in [dec_a.insn_in, dec_b.insn_in, + dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: + comb += i.eq(self.sv_rm) + # ...and subdecoders' input fields comb += dec_a.sel_in.eq(op.in1_sel) comb += dec_b.sel_in.eq(op.in2_sel) @@ -788,11 +1169,14 @@ class PowerDecode2(PowerDecodeSubset): comb += dec_o2.lk.eq(do.lk) # registers a, b, c and out and out2 (LD/ST EA) - comb += e.read_reg1.eq(dec_a.reg_out) - comb += e.read_reg2.eq(dec_b.reg_out) - comb += e.read_reg3.eq(dec_c.reg_out) - comb += e.write_reg.eq(dec_o.reg_out) - comb += e.write_ea.eq(dec_o2.reg_out) + for to_reg, fromreg in ( + (e.read_reg1, dec_a.reg_out), + (e.read_reg2, dec_b.reg_out), + (e.read_reg3, dec_c.reg_out), + (e.write_reg, dec_o.reg_out), + (e.write_ea, dec_o2.reg_out)): + comb += to_reg.data.eq(fromreg.data) + comb += to_reg.ok.eq(fromreg.ok) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) @@ -805,10 +1189,13 @@ class PowerDecode2(PowerDecodeSubset): comb += e.write_fast2.eq(dec_o2.fast_out) # condition registers (CR) - comb += e.read_cr1.eq(self.dec_cr_in.cr_bitfield) - comb += e.read_cr2.eq(self.dec_cr_in.cr_bitfield_b) - comb += e.read_cr3.eq(self.dec_cr_in.cr_bitfield_o) - comb += e.write_cr.eq(self.dec_cr_out.cr_bitfield) + for to_reg, fromreg in ( + (e.read_cr1, self.dec_cr_in.cr_bitfield), + (e.read_cr2, self.dec_cr_in.cr_bitfield_b), + (e.read_cr3, self.dec_cr_in.cr_bitfield_o), + (e.write_cr, self.dec_cr_out.cr_bitfield)): + comb += to_reg.data.eq(fromreg.data) + comb += to_reg.ok.eq(fromreg.ok) # sigh this is exactly the sort of thing for which the # decoder is designed to not need. MTSPR, MFSPR and others need @@ -842,14 +1229,32 @@ class PowerDecode2(PowerDecodeSubset): dec_irq_ok = Signal() priv_ok = Signal() illeg_ok = Signal() + exc = self.exc comb += ext_irq_ok.eq(ext_irq & msr[MSR.EE]) # v3.0B p944 (MSR.EE) - comb += dec_irq_ok.eq(dec_spr[63] & msr[MSR.EE]) # v3.0B 6.5.11 p1076 + comb += dec_irq_ok.eq(dec_spr[63] & msr[MSR.EE]) # 6.5.11 p1076 comb += priv_ok.eq(is_priv_insn & msr[MSR.PR]) comb += illeg_ok.eq(op.internal_op == MicrOp.OP_ILLEGAL) + # LD/ST exceptions. TestIssuer copies the exception info at us + # after a failed LD/ST. + with m.If(exc.happened): + with m.If(exc.alignment): + self.trap(m, TT.PRIV, 0x600) + with m.Elif(exc.instr_fault): + with m.If(exc.segment_fault): + self.trap(m, TT.PRIV, 0x480) + with m.Else(): + # pass exception info to trap to create SRR1 + self.trap(m, TT.MEMEXC, 0x400, exc) + with m.Else(): + with m.If(exc.segment_fault): + self.trap(m, TT.PRIV, 0x380) + with m.Else(): + self.trap(m, TT.PRIV, 0x300) + # decrement counter (v3.0B p1099): TODO 32-bit version (MSR.LPCR) - with m.If(dec_irq_ok): + with m.Elif(dec_irq_ok): self.trap(m, TT.DEC, 0x900) # v3.0B 6.5 p1065 # external interrupt? only if MSR.EE set @@ -895,9 +1300,13 @@ class PowerDecode2(PowerDecodeSubset): comb += e_out.read_fast2.data.eq(FastRegs.SRR1) # constant: SRR1 comb += e_out.read_fast2.ok.eq(1) + # annoying simulator bug + if hasattr(e_out, "asmcode") and hasattr(self.dec.op, "asmcode"): + comb += e_out.asmcode.eq(self.dec.op.asmcode) + return m - def trap(self, m, traptype, trapaddr): + def trap(self, m, traptype, trapaddr, exc=None): """trap: this basically "rewrites" the decoded instruction as a trap """ comb = m.d.comb @@ -910,6 +1319,7 @@ class PowerDecode2(PowerDecodeSubset): comb += self.do_copy("fn_unit", Function.TRAP, True) comb += self.do_copy("trapaddr", trapaddr >> 4, True) # bottom 4 bits comb += self.do_copy("traptype", traptype, True) # request type + comb += self.do_copy("ldst_exc", exc, True) # request type comb += self.do_copy("msr", self.state.msr, True) # copy of MSR "state" comb += self.do_copy("cia", self.state.pc, True) # copy of PC "state"