X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fdecoder%2Fpower_decoder2.py;h=6cf06f34c677591e573d942d33272c30ded3ebf1;hb=5696642162bcad72c24c58d46bbca29beffa490c;hp=046b715b19df68803c1c289cc2812b4c37ed31bf;hpb=f59bf30527aba7a557651260b3dc2b1312fa689a;p=soc.git diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index 046b715b..6cf06f34 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -8,23 +8,32 @@ over-riding the internal opcode when an exception is needed. from nmigen import Module, Elaboratable, Signal, Mux, Const, Cat, Repl, Record from nmigen.cli import rtlil +from soc.regfile.regfiles import XERRegs +from nmutil.picker import PriorityPicker from nmutil.iocontrol import RecordObject from nmutil.extend import exts +from soc.experiment.mem_types import LDSTException + from soc.decoder.power_regspec_map import regspec_decode_read from soc.decoder.power_regspec_map import regspec_decode_write from soc.decoder.power_decoder import create_pdecode from soc.decoder.power_enums import (MicrOp, CryIn, Function, CRInSel, CROutSel, LdstLen, In1Sel, In2Sel, In3Sel, - OutSel, SPR, RC, LDSTMode) -from soc.decoder.decode2execute1 import Decode2ToExecute1Type, Data -from soc.consts import MSR + OutSel, SPR, RC, LDSTMode, + SVEXTRA, SVEtype) +from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data, + Decode2ToOperand) +from soc.sv.svp64 import SVP64Rec +from soc.consts import (MSR, sel, SPEC, EXTRA2, EXTRA3, SVP64P, field, + SPEC_SIZE, SPECb, SPEC_AUG_SIZE) from soc.regfile.regfiles import FastRegs from soc.consts import TT from soc.config.state import CoreState +from soc.regfile.util import spr_to_fast def decode_spr_num(spr): @@ -40,8 +49,7 @@ def instr_is_priv(m, op, insn): with m.Case(MicrOp.OP_ATTN, MicrOp.OP_MFMSR, MicrOp.OP_MTMSRD, MicrOp.OP_MTMSR, MicrOp.OP_RFID): comb += is_priv_insn.eq(1) - # XXX TODO - #with m.Case(MicrOp.OP_TLBIE) : comb += is_priv_insn.eq(1) + with m.Case(MicrOp.OP_TLBIE) : comb += is_priv_insn.eq(1) with m.Case(MicrOp.OP_MFSPR, MicrOp.OP_MTSPR): with m.If(insn[20]): # field XFX.spr[-1] i think comb += is_priv_insn.eq(1) @@ -49,27 +57,177 @@ def instr_is_priv(m, op, insn): class SPRMap(Elaboratable): - """SPRMap: maps POWER9 SPR numbers to internal enum values + """SPRMap: maps POWER9 SPR numbers to internal enum values, fast and slow """ def __init__(self): self.spr_i = Signal(10, reset_less=True) - self.spr_o = Signal(SPR, reset_less=True) + self.spr_o = Data(SPR, name="spr_o") + self.fast_o = Data(3, name="fast_o") def elaborate(self, platform): m = Module() with m.Switch(self.spr_i): for i, x in enumerate(SPR): with m.Case(x.value): - m.d.comb += self.spr_o.eq(i) + m.d.comb += self.spr_o.data.eq(i) + m.d.comb += self.spr_o.ok.eq(1) + for x, v in spr_to_fast.items(): + with m.Case(x.value): + m.d.comb += self.fast_o.data.eq(v) + m.d.comb += self.fast_o.ok.eq(1) + return m + + +class SVP64ExtraSpec(Elaboratable): + """SVP64ExtraSpec - decodes SVP64 Extra specification. + + selects the required EXTRA2/3 field. + + see https://libre-soc.org/openpower/sv/svp64/ + """ + def __init__(self): + self.extra = Signal(9, reset_less=True) + self.etype = Signal(SVEtype, reset_less=True) # 2 or 3 bits + self.idx = Signal(SVEXTRA, reset_less=True) # which part of extra + self.spec = Signal(3) # EXTRA spec for the register + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + spec = self.spec + extra = self.extra + + # back in the LDSTRM-* and RM-* files generated by sv_analysis.py + # we marked every op with an Etype: EXTRA2 or EXTRA3, and also said + # which of the 4 (or 3 for EXTRA3) sub-fields of bits 10:18 contain + # the register-extension information. extract those now + with m.Switch(self.etype): + # 2-bit index selection mode + with m.Case(SVEtype.EXTRA2): + with m.Switch(self.idx): + with m.Case(SVEXTRA.Idx0): # 1st 2 bits [0:1] + comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX0_VEC]) + comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX0_MSB]) + with m.Case(SVEXTRA.Idx1): # 2nd 2 bits [2:3] + comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX1_VEC]) + comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX1_MSB]) + with m.Case(SVEXTRA.Idx2): # 3rd 2 bits [4:5] + comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX2_VEC]) + comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX2_MSB]) + with m.Case(SVEXTRA.Idx3): # 4th 2 bits [6:7] + comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX3_VEC]) + comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX3_MSB]) + # 3-bit index selection mode + with m.Case(SVEtype.EXTRA3): + with m.Switch(self.idx): + with m.Case(SVEXTRA.Idx0): # 1st 3 bits [0:2] + comb += spec.eq(sel(extra, EXTRA3.IDX0)) + with m.Case(SVEXTRA.Idx1): # 2nd 3 bits [3:5] + comb += spec.eq(sel(extra, EXTRA3.IDX1)) + with m.Case(SVEXTRA.Idx2): # 3rd 3 bits [6:8] + comb += spec.eq(sel(extra, EXTRA3.IDX2)) + # cannot fit more than 9 bits so there is no 4th thing + + return m + + +class SVP64RegExtra(SVP64ExtraSpec): + """SVP64RegExtra - decodes SVP64 Extra fields to determine reg extension + + incoming 5-bit GPR/FP is turned into a 7-bit and marked as scalar/vector + depending on info in one of the positions in the EXTRA field. + + designed so that "no change" to the 5-bit register number occurs if + SV either does not apply or the relevant EXTRA2/3 field bits are zero. + + see https://libre-soc.org/openpower/sv/svp64/ + """ + def __init__(self): + SVP64ExtraSpec.__init__(self) + self.reg_in = Signal(5) # incoming reg number (5 bits, RA, RB) + self.reg_out = Signal(7) # extra-augmented output (7 bits) + self.isvec = Signal(1) # reg is marked as vector if true + + def elaborate(self, platform): + m = super().elaborate(platform) # select required EXTRA2/3 + comb = m.d.comb + + # first get the spec. if not changed it's "scalar identity behaviour" + # which is zero which is ok. + spec = self.spec + + # now decode it. bit 0 is "scalar/vector". note that spec could be zero + # from above, which (by design) has the effect of "no change", below. + + # simple: isvec is top bit of spec + comb += self.isvec.eq(spec[SPEC.VEC]) + # extra bits for register number augmentation + spec_aug = Signal(SPEC_AUG_SIZE) + comb += spec_aug.eq(field(spec, SPECb.MSB, SPECb.LSB, SPEC_SIZE)) + + # decode vector differently from scalar + with m.If(self.isvec): + # Vector: shifted up, extra in LSBs (RA << 2) | spec[1:2] + comb += self.reg_out.eq(Cat(spec_aug, self.reg_in)) + with m.Else(): + # Scalar: not shifted up, extra in MSBs RA | (spec[1:2] << 5) + comb += self.reg_out.eq(Cat(self.reg_in, spec_aug)) + + return m + + +class SVP64CRExtra(SVP64ExtraSpec): + """SVP64CRExtra - decodes SVP64 Extra fields to determine CR extension + + incoming 3-bit CR is turned into a 7-bit and marked as scalar/vector + depending on info in one of the positions in the EXTRA field. + + yes, really, 128 CRs. INT is 128, FP is 128, therefore CRs are 128. + + designed so that "no change" to the 3-bit CR register number occurs if + SV either does not apply or the relevant EXTRA2/3 field bits are zero. + + see https://libre-soc.org/openpower/sv/svp64/appendix + """ + def __init__(self): + SVP64ExtraSpec.__init__(self) + self.cr_in = Signal(3) # incoming CR number (3 bits, BA[0:2], BFA) + self.cr_out = Signal(7) # extra-augmented CR output (7 bits) + self.isvec = Signal(1) # reg is marked as vector if true + + def elaborate(self, platform): + m = super().elaborate(platform) # select required EXTRA2/3 + comb = m.d.comb + + # first get the spec. if not changed it's "scalar identity behaviour" + # which is zero which is ok. + spec = self.spec + + # now decode it. bit 0 is "scalar/vector". note that spec could be zero + # from above, which (by design) has the effect of "no change", below. + + # simple: isvec is top bit of spec + comb += self.isvec.eq(spec[SPEC.VEC]) + # extra bits for register number augmentation + spec_aug = Signal(SPEC_AUG_SIZE) + comb += spec_aug.eq(field(spec, SPECb.MSB, SPECb.LSB, SPEC_SIZE)) + + # decode vector differently from scalar, insert bits 1 and 2 accordingly + with m.If(self.isvec): + # Vector: shifted up, extra in LSBs (CR << 4) | (spec[1:2] << 2) + comb += self.cr_out.eq(Cat(Const(0, 2), spec_aug, self.cr_in)) + with m.Else(): + # Scalar: not shifted up, extra in MSBs CR | (spec[1:2] << 3) + comb += self.cr_out.eq(Cat(self.cr_in, spec_aug)) + return m class DecodeA(Elaboratable): """DecodeA from instruction - decodes register RA, whether immediate-zero, implicit and - explicit CSRs + decodes register RA, implicit and explicit CSRs """ def __init__(self, dec): @@ -77,13 +235,14 @@ class DecodeA(Elaboratable): self.sel_in = Signal(In1Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.reg_out = Data(5, name="reg_a") - self.immz_out = Signal(reset_less=True) self.spr_out = Data(SPR, "spr_a") self.fast_out = Data(3, "fast_a") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + reg = self.reg_out m.submodules.sprmap = sprmap = SPRMap() # select Register A field @@ -92,21 +251,18 @@ class DecodeA(Elaboratable): with m.If((self.sel_in == In1Sel.RA) | ((self.sel_in == In1Sel.RA_OR_ZERO) & (ra != Const(0, 5)))): - comb += self.reg_out.data.eq(ra) - comb += self.reg_out.ok.eq(1) - - # zero immediate requested - with m.If((self.sel_in == In1Sel.RA_OR_ZERO) & - (self.reg_out.data == Const(0, 5))): - comb += self.immz_out.eq(1) + comb += reg.data.eq(ra) + comb += reg.ok.eq(1) # some Logic/ALU ops have RS as the 3rd arg, but no "RA". + # moved it to 1st position (in1_sel)... because + rs = Signal(5, reset_less=True) + comb += rs.eq(self.dec.RS) with m.If(self.sel_in == In1Sel.RS): - comb += self.reg_out.data.eq(self.dec.RS) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(rs) + comb += reg.ok.eq(1) # decode Fast-SPR based on instruction type - op = self.dec.op with m.Switch(op.internal_op): # BC or BCREG: implicit register (CTR) NOTE: same in DecodeOut @@ -127,31 +283,34 @@ class DecodeA(Elaboratable): with m.Case(MicrOp.OP_MFSPR): spr = Signal(10, reset_less=True) comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX - with m.Switch(spr): - # fast SPRs - with m.Case(SPR.CTR.value): - comb += self.fast_out.data.eq(FastRegs.CTR) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.LR.value): - comb += self.fast_out.data.eq(FastRegs.LR) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.TAR.value): - comb += self.fast_out.data.eq(FastRegs.TAR) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.SRR0.value): - comb += self.fast_out.data.eq(FastRegs.SRR0) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.SRR1.value): - comb += self.fast_out.data.eq(FastRegs.SRR1) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.XER.value): - pass # do nothing - # : map to internal SPR numbers - # XXX TODO: dec and tb not to go through mapping. - with m.Default(): - comb += sprmap.spr_i.eq(spr) - comb += self.spr_out.data.eq(sprmap.spr_o) - comb += self.spr_out.ok.eq(1) + comb += sprmap.spr_i.eq(spr) + comb += self.spr_out.eq(sprmap.spr_o) + comb += self.fast_out.eq(sprmap.fast_o) + + return m + + +class DecodeAImm(Elaboratable): + """DecodeA immediate from instruction + + decodes register RA, whether immediate-zero, implicit and + explicit CSRs + """ + + def __init__(self, dec): + self.dec = dec + self.sel_in = Signal(In1Sel, reset_less=True) + self.immz_out = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + # zero immediate requested + ra = Signal(5, reset_less=True) + comb += ra.eq(self.dec.RA) + with m.If((self.sel_in == In1Sel.RA_OR_ZERO) & (ra == Const(0, 5))): + comb += self.immz_out.eq(1) return m @@ -169,23 +328,56 @@ class DecodeB(Elaboratable): self.dec = dec self.sel_in = Signal(In2Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, "reg_b") - self.imm_out = Data(64, "imm_b") + self.reg_out = Data(7, "reg_b") + self.reg_isvec = Signal(1, name="reg_b_isvec") # TODO: in reg_out self.fast_out = Data(3, "fast_b") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + reg = self.reg_out # select Register B field with m.Switch(self.sel_in): with m.Case(In2Sel.RB): - comb += self.reg_out.data.eq(self.dec.RB) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RB) + comb += reg.ok.eq(1) with m.Case(In2Sel.RS): # for M-Form shiftrot - comb += self.reg_out.data.eq(self.dec.RS) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RS) + comb += reg.ok.eq(1) + + # decode SPR2 based on instruction type + # BCREG implicitly uses LR or TAR for 2nd reg + # CTR however is already in fast_spr1 *not* 2. + with m.If(op.internal_op == MicrOp.OP_BCREG): + xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO + xo5 = self.dec.FormXL.XO[5] # 3.0B p38 + with m.If(~xo9): + comb += self.fast_out.data.eq(FastRegs.LR) + comb += self.fast_out.ok.eq(1) + with m.Elif(xo5): + comb += self.fast_out.data.eq(FastRegs.TAR) + comb += self.fast_out.ok.eq(1) + + return m + + +class DecodeBImm(Elaboratable): + """DecodeB immediate from instruction + """ + def __init__(self, dec): + self.dec = dec + self.sel_in = Signal(In2Sel, reset_less=True) + self.imm_out = Data(64, "imm_b") + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + + # select Register B Immediate + with m.Switch(self.sel_in): with m.Case(In2Sel.CONST_UI): # unsigned comb += self.imm_out.data.eq(self.dec.UI) comb += self.imm_out.ok.eq(1) @@ -229,20 +421,6 @@ class DecodeB(Elaboratable): comb += self.imm_out.data.eq(self.dec.SH32) comb += self.imm_out.ok.eq(1) - # decode SPR2 based on instruction type - op = self.dec.op - # BCREG implicitly uses LR or TAR for 2nd reg - # CTR however is already in fast_spr1 *not* 2. - with m.If(op.internal_op == MicrOp.OP_BCREG): - xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO - xo5 = self.dec.FormXL.XO[5] # 3.0B p38 - with m.If(~xo9): - comb += self.fast_out.data.eq(FastRegs.LR) - comb += self.fast_out.ok.eq(1) - with m.Elif(xo5): - comb += self.fast_out.data.eq(FastRegs.TAR) - comb += self.fast_out.ok.eq(1) - return m @@ -261,16 +439,18 @@ class DecodeC(Elaboratable): def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + reg = self.reg_out # select Register C field with m.Switch(self.sel_in): with m.Case(In3Sel.RB): # for M-Form shiftrot - comb += self.reg_out.data.eq(self.dec.RB) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RB) + comb += reg.ok.eq(1) with m.Case(In3Sel.RS): - comb += self.reg_out.data.eq(self.dec.RS) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RS) + comb += reg.ok.eq(1) return m @@ -294,46 +474,26 @@ class DecodeOut(Elaboratable): comb = m.d.comb m.submodules.sprmap = sprmap = SPRMap() op = self.dec.op + reg = self.reg_out # select Register out field with m.Switch(self.sel_in): with m.Case(OutSel.RT): - comb += self.reg_out.data.eq(self.dec.RT) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RT) + comb += reg.ok.eq(1) with m.Case(OutSel.RA): - comb += self.reg_out.data.eq(self.dec.RA) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RA) + comb += reg.ok.eq(1) with m.Case(OutSel.SPR): spr = Signal(10, reset_less=True) comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX - # TODO MTSPR 1st spr (fast) + # MFSPR move to SPRs - needs mapping with m.If(op.internal_op == MicrOp.OP_MTSPR): - with m.Switch(spr): - # fast SPRs - with m.Case(SPR.CTR.value): - comb += self.fast_out.data.eq(FastRegs.CTR) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.LR.value): - comb += self.fast_out.data.eq(FastRegs.LR) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.TAR.value): - comb += self.fast_out.data.eq(FastRegs.TAR) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.SRR0.value): - comb += self.fast_out.data.eq(FastRegs.SRR0) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.SRR1.value): - comb += self.fast_out.data.eq(FastRegs.SRR1) - comb += self.fast_out.ok.eq(1) - with m.Case(SPR.XER.value): - pass # do nothing - # : map to internal SPR numbers - # XXX TODO: dec and tb not to go through mapping. - with m.Default(): - comb += sprmap.spr_i.eq(spr) - comb += self.spr_out.data.eq(sprmap.spr_o) - comb += self.spr_out.ok.eq(1) + comb += sprmap.spr_i.eq(spr) + comb += self.spr_out.eq(sprmap.spr_o) + comb += self.fast_out.eq(sprmap.fast_o) + # determine Fast Reg with m.Switch(op.internal_op): # BC or BCREG: implicit register (CTR) NOTE: same in DecodeA @@ -354,7 +514,15 @@ class DecodeOut(Elaboratable): class DecodeOut2(Elaboratable): """DecodeOut2 from instruction - decodes output registers + decodes output registers (2nd one). note that RA is *implicit* below, + which now causes problems with SVP64 + + TODO: SVP64 is a little more complex, here. svp64 allows extending + by one more destination by having one more EXTRA field. RA-as-src + is not the same as RA-as-dest. limited in that it's the same first + 5 bits (from the v3.0B opcode), but still kinda cool. mostly used + for operations that have src-as-dest: mostly this is LD/ST-with-update + but there are others. """ def __init__(self, dec): @@ -362,21 +530,26 @@ class DecodeOut2(Elaboratable): self.sel_in = Signal(OutSel, reset_less=True) self.lk = Signal(reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(5, "reg_o") - self.fast_out = Data(3, "fast_o") + self.reg_out = Data(5, "reg_o2") + self.fast_out = Data(3, "fast_o2") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + #m.submodules.svdec = svdec = SVP64RegExtra() - # update mode LD/ST uses read-reg A also as an output - with m.If(self.dec.op.upd == LDSTMode.update): - comb += self.reg_out.eq(self.dec.RA) - comb += self.reg_out.ok.eq(1) + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + #reg = Signal(5, reset_less=True) + + if hasattr(self.dec.op, "upd"): + # update mode LD/ST uses read-reg A also as an output + with m.If(self.dec.op.upd == LDSTMode.update): + comb += self.reg_out.data.eq(self.dec.RA) + comb += self.reg_out.ok.eq(1) # B, BC or BCREG: potential implicit register (LR) output # these give bl, bcl, bclrl, etc. - op = self.dec.op with m.Switch(op.internal_op): # BC* implicit register (LR) @@ -451,8 +624,12 @@ class DecodeOE(Elaboratable): # mulhw, mulhwu, mulhd, mulhdu - these *ignore* OE # also rotate + # XXX ARGH! ignoring OE causes incompatibility with microwatt + # http://lists.libre-soc.org/pipermail/libre-soc-dev/2020-August/000302.html with m.Case(MicrOp.OP_MUL_H64, MicrOp.OP_MUL_H32, + MicrOp.OP_EXTS, MicrOp.OP_CNTZ, MicrOp.OP_SHL, MicrOp.OP_SHR, MicrOp.OP_RLC, + MicrOp.OP_LOAD, MicrOp.OP_STORE, MicrOp.OP_RLCL, MicrOp.OP_RLCR, MicrOp.OP_EXTSWSLI): pass @@ -482,20 +659,27 @@ class DecodeCRIn(Elaboratable): self.cr_bitfield = Data(3, "cr_bitfield") self.cr_bitfield_b = Data(3, "cr_bitfield_b") self.cr_bitfield_o = Data(3, "cr_bitfield_o") - self.whole_reg = Signal(reset_less=True) + self.whole_reg = Data(8, "cr_fxm") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True, + reverse_o=True) + # zero-initialisation comb += self.cr_bitfield.ok.eq(0) comb += self.cr_bitfield_b.ok.eq(0) - comb += self.whole_reg.eq(0) + comb += self.cr_bitfield_o.ok.eq(0) + comb += self.whole_reg.ok.eq(0) + + # select the relevant CR bitfields with m.Switch(self.sel_in): with m.Case(CRInSel.NONE): pass # No bitfield activated with m.Case(CRInSel.CR0): - comb += self.cr_bitfield.data.eq(0) + comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.BI): comb += self.cr_bitfield.data.eq(self.dec.BI[2:5]) @@ -514,7 +698,16 @@ class DecodeCRIn(Elaboratable): comb += self.cr_bitfield.data.eq(self.dec.BC[2:5]) comb += self.cr_bitfield.ok.eq(1) with m.Case(CRInSel.WHOLE_REG): - comb += self.whole_reg.eq(1) + comb += self.whole_reg.ok.eq(1) + move_one = Signal(reset_less=True) + comb += move_one.eq(self.insn_in[20]) # MSB0 bit 11 + with m.If((op.internal_op == MicrOp.OP_MFCR) & move_one): + # must one-hot the FXM field + comb += ppick.i.eq(self.dec.FXM) + comb += self.whole_reg.data.eq(ppick.o) + with m.Else(): + # otherwise use all of it + comb += self.whole_reg.data.eq(0xff) return m @@ -532,19 +725,23 @@ class DecodeCROut(Elaboratable): self.sel_in = Signal(CROutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.cr_bitfield = Data(3, "cr_bitfield") - self.whole_reg = Signal(reset_less=True) + self.whole_reg = Data(8, "cr_fxm") def elaborate(self, platform): m = Module() comb = m.d.comb + op = self.dec.op + m.submodules.ppick = ppick = PriorityPicker(8, reverse_i=True, + reverse_o=True) comb += self.cr_bitfield.ok.eq(0) - comb += self.whole_reg.eq(0) + comb += self.whole_reg.ok.eq(0) + with m.Switch(self.sel_in): with m.Case(CROutSel.NONE): pass # No bitfield activated with m.Case(CROutSel.CR0): - comb += self.cr_bitfield.data.eq(0) + comb += self.cr_bitfield.data.eq(0) # CR0 (MSB0 numbering) comb += self.cr_bitfield.ok.eq(self.rc_in) # only when RC=1 with m.Case(CROutSel.BF): comb += self.cr_bitfield.data.eq(self.dec.FormX.BF) @@ -553,12 +750,232 @@ class DecodeCROut(Elaboratable): comb += self.cr_bitfield.data.eq(self.dec.FormXL.BT[2:5]) comb += self.cr_bitfield.ok.eq(1) with m.Case(CROutSel.WHOLE_REG): - comb += self.whole_reg.eq(1) + comb += self.whole_reg.ok.eq(1) + move_one = Signal(reset_less=True) + comb += move_one.eq(self.insn_in[20]) + with m.If((op.internal_op == MicrOp.OP_MTCRF)): + with m.If(move_one): + # must one-hot the FXM field + comb += ppick.i.eq(self.dec.FXM) + with m.If(ppick.en_o): + comb += self.whole_reg.data.eq(ppick.o) + with m.Else(): + comb += self.whole_reg.data.eq(0b00000001) # CR7 + with m.Else(): + comb += self.whole_reg.data.eq(self.dec.FXM) + with m.Else(): + # otherwise use all of it + comb += self.whole_reg.data.eq(0xff) return m +# dictionary of Input Record field names that, if they exist, +# will need a corresponding CSV Decoder file column (actually, PowerOp) +# to be decoded (this includes the single bit names) +record_names = {'insn_type': 'internal_op', + 'fn_unit': 'function_unit', + 'rc': 'rc_sel', + 'oe': 'rc_sel', + 'zero_a': 'in1_sel', + 'imm_data': 'in2_sel', + 'invert_in': 'inv_a', + 'invert_out': 'inv_out', + 'rc': 'cr_out', + 'oe': 'cr_in', + 'output_carry': 'cry_out', + 'input_carry': 'cry_in', + 'is_32bit': 'is_32b', + 'is_signed': 'sgn', + 'lk': 'lk', + 'data_len': 'ldst_len', + 'byte_reverse': 'br', + 'sign_extend': 'sgn_ext', + 'ldst_mode': 'upd', + } + + +class PowerDecodeSubset(Elaboratable): + """PowerDecodeSubset: dynamic subset decoder + + only fields actually requested are copied over. hence, "subset" (duh). + """ + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + + self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field + self.final = final + self.opkls = opkls + self.fn_name = fn_name + if opkls is None: + opkls = Decode2ToOperand + self.do = opkls(fn_name) + col_subset = self.get_col_subset(self.do) + + # only needed for "main" PowerDecode2 + if not self.final: + self.e = Decode2ToExecute1Type(name=self.fn_name, do=self.do) + + # create decoder if one not already given + if dec is None: + dec = create_pdecode(name=fn_name, col_subset=col_subset, + row_subset=self.rowsubsetfn) + self.dec = dec + + # state information needed by the Decoder + if state is None: + state = CoreState("dec2") + self.state = state + + def get_col_subset(self, do): + subset = { 'cr_in', 'cr_out', 'rc_sel'} # needed, non-optional + for k, v in record_names.items(): + if hasattr(do, k): + subset.add(v) + print ("get_col_subset", self.fn_name, do.fields, subset) + return subset + + def rowsubsetfn(self, opcode, row): + return row['unit'] == self.fn_name + + def ports(self): + return self.dec.ports() + self.e.ports() + self.sv_rm.ports() + + def needs_field(self, field, op_field): + if self.final: + do = self.do + else: + do = self.e_tmp.do + return hasattr(do, field) and self.op_get(op_field) is not None + + def do_copy(self, field, val, final=False): + if final or self.final: + do = self.do + else: + do = self.e_tmp.do + if hasattr(do, field) and val is not None: + return getattr(do, field).eq(val) + return [] + + def op_get(self, op_field): + return getattr(self.dec.op, op_field, None) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + state = self.state + op, do = self.dec.op, self.do + msr, cia = state.msr, state.pc + + # fill in for a normal instruction (not an exception) + # copy over if non-exception, non-privileged etc. is detected + if not self.final: + if self.fn_name is None: + name = "tmp" + else: + name = self.fn_name + "tmp" + self.e_tmp = Decode2ToExecute1Type(name=name, opkls=self.opkls) + + # set up submodule decoders + m.submodules.dec = self.dec + m.submodules.dec_rc = dec_rc = DecodeRC(self.dec) + m.submodules.dec_oe = dec_oe = DecodeOE(self.dec) + m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) + m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) + + # copy instruction through... + for i in [do.insn, + dec_rc.insn_in, dec_oe.insn_in, + self.dec_cr_in.insn_in, self.dec_cr_out.insn_in]: + comb += i.eq(self.dec.opcode_in) -class PowerDecode2(Elaboratable): + # ...and subdecoders' input fields + comb += dec_rc.sel_in.eq(op.rc_sel) + comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel + comb += self.dec_cr_in.sel_in.eq(op.cr_in) + comb += self.dec_cr_out.sel_in.eq(op.cr_out) + comb += self.dec_cr_out.rc_in.eq(dec_rc.rc_out.data) + + # copy "state" over + comb += self.do_copy("msr", msr) + comb += self.do_copy("cia", cia) + + # set up instruction type + # no op: defaults to OP_ILLEGAL + if self.fn_name=="MMU": + # mmu is special case: needs SPR opcode as well + mmu0 = self.mmu0_spr_dec + with m.If(((mmu0.dec.op.internal_op == MicrOp.OP_MTSPR) | + (mmu0.dec.op.internal_op == MicrOp.OP_MFSPR))): + comb += self.do_copy("insn_type", mmu0.op_get("internal_op")) + with m.Else(): + comb += self.do_copy("insn_type", self.op_get("internal_op")) + else: + comb += self.do_copy("insn_type", self.op_get("internal_op")) + + # function unit for decoded instruction: requires minor redirect + # for SPR set/get + fn = self.op_get("function_unit") + spr = Signal(10, reset_less=True) + comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX + + # XXX BUG - don't use hardcoded magic constants. + # also use ".value" otherwise the test fails. bit of a pain + # https://bugs.libre-soc.org/show_bug.cgi?id=603 + + SPR_PID = 48 # TODO read docs for POWER9 + # Microwatt doesn't implement the partition table + # instead has PRTBL register (SPR) to point to process table + SPR_PRTBL = 720 # see common.vhdl in microwatt, not in POWER9 + with m.If(((self.dec.op.internal_op == MicrOp.OP_MTSPR) | + (self.dec.op.internal_op == MicrOp.OP_MFSPR)) & + ((spr == SPR.DSISR) | (spr == SPR.DAR) + | (spr==SPR_PRTBL) | (spr==SPR_PID))): + comb += self.do_copy("fn_unit", Function.MMU) + with m.Else(): + comb += self.do_copy("fn_unit",fn) + + # immediates + if self.needs_field("zero_a", "in1_sel"): + m.submodules.dec_ai = dec_ai = DecodeAImm(self.dec) + comb += dec_ai.sel_in.eq(op.in1_sel) + comb += self.do_copy("zero_a", dec_ai.immz_out) # RA==0 detected + if self.needs_field("imm_data", "in2_sel"): + m.submodules.dec_bi = dec_bi = DecodeBImm(self.dec) + comb += dec_bi.sel_in.eq(op.in2_sel) + comb += self.do_copy("imm_data", dec_bi.imm_out) # imm in RB + + # rc and oe out + comb += self.do_copy("rc", dec_rc.rc_out) + comb += self.do_copy("oe", dec_oe.oe_out) + + # CR in/out + comb += self.do_copy("read_cr_whole", self.dec_cr_in.whole_reg) + comb += self.do_copy("write_cr_whole", self.dec_cr_out.whole_reg) + comb += self.do_copy("write_cr0", self.dec_cr_out.cr_bitfield.ok) + + comb += self.do_copy("input_cr", self.op_get("cr_in")) # CR in + comb += self.do_copy("output_cr", self.op_get("cr_out")) # CR out + + # decoded/selected instruction flags + comb += self.do_copy("data_len", self.op_get("ldst_len")) + comb += self.do_copy("invert_in", self.op_get("inv_a")) + comb += self.do_copy("invert_out", self.op_get("inv_out")) + comb += self.do_copy("input_carry", self.op_get("cry_in")) + comb += self.do_copy("output_carry", self.op_get("cry_out")) + comb += self.do_copy("is_32bit", self.op_get("is_32b")) + comb += self.do_copy("is_signed", self.op_get("sgn")) + lk = self.op_get("lk") + if lk is not None: + with m.If(lk): + comb += self.do_copy("lk", self.dec.LK) # XXX TODO: accessor + + comb += self.do_copy("byte_reverse", self.op_get("br")) + comb += self.do_copy("sign_extend", self.op_get("sgn_ext")) + comb += self.do_copy("ldst_mode", self.op_get("upd")) # LD/ST mode + + return m + + +class PowerDecode2(PowerDecodeSubset): """PowerDecode2: the main instruction decoder. whilst PowerDecode is responsible for decoding the actual opcode, this @@ -576,82 +993,172 @@ class PowerDecode2(Elaboratable): instructions are illegal (or privileged) or not, and instead of just leaving at that, *replacing* the instruction to execute with a suitable alternative (trap). - """ - def __init__(self, dec): + LDSTExceptions are done the cycle _after_ they're detected (after + they come out of LDSTCompUnit). basically despite the instruction + being decoded, the results of the decode are completely ignored + and "exception.happened" used to set the "actual" instruction to + "OP_TRAP". the LDSTException data structure gets filled in, + in the CompTrapOpSubset and that's what it fills in SRR. - self.dec = dec - self.e = Decode2ToExecute1Type() - - # state information needed by the Decoder (TODO: this as a Record) - self.state = CoreState("dec2") + to make this work, TestIssuer must notice "exception.happened" + after the (failed) LD/ST and copies the LDSTException info from + the output, into here (PowerDecoder2). without incrementing PC. + """ - def ports(self): - return self.dec.ports() + self.e.ports() + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + super().__init__(dec, opkls, fn_name, final, state) + self.exc = LDSTException("dec2_exc") + + self.cr_out_isvec = Signal(1, name="cr_out_isvec") + self.cr_in_isvec = Signal(1, name="cr_in_isvec") + self.cr_in_b_isvec = Signal(1, name="cr_in_b_isvec") + self.cr_in_o_isvec = Signal(1, name="cr_in_o_isvec") + self.in1_isvec = Signal(1, name="reg_a_isvec") + self.in2_isvec = Signal(1, name="reg_b_isvec") + self.in3_isvec = Signal(1, name="reg_c_isvec") + self.o_isvec = Signal(1, name="reg_o_isvec") + self.o2_isvec = Signal(1, name="reg_o2_isvec") + self.no_out_vec = Signal(1, name="no_out_vec") # no outputs are vectors + + def get_col_subset(self, opkls): + subset = super().get_col_subset(opkls) + subset.add("asmcode") + subset.add("in1_sel") + subset.add("in2_sel") + subset.add("in3_sel") + subset.add("out_sel") + subset.add("sv_in1") + subset.add("sv_in2") + subset.add("sv_in3") + subset.add("sv_out") + subset.add("sv_cr_in") + subset.add("sv_cr_out") + subset.add("SV_Etype") + subset.add("SV_Ptype") + subset.add("lk") + subset.add("internal_op") + subset.add("form") + return subset def elaborate(self, platform): - m = Module() + m = super().elaborate(platform) comb = m.d.comb + state = self.state e_out, op, do_out = self.e, self.dec.op, self.e.do - msr, cia = self.state.msr, self.state.pc + dec_spr, msr, cia, ext_irq = state.dec, state.msr, state.pc, state.eint + e = self.e_tmp + do = e.do # fill in for a normal instruction (not an exception) # copy over if non-exception, non-privileged etc. is detected - e = Decode2ToExecute1Type() - do = e.do # set up submodule decoders - m.submodules.dec = self.dec m.submodules.dec_a = dec_a = DecodeA(self.dec) m.submodules.dec_b = dec_b = DecodeB(self.dec) m.submodules.dec_c = dec_c = DecodeC(self.dec) m.submodules.dec_o = dec_o = DecodeOut(self.dec) m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec) - m.submodules.dec_rc = dec_rc = DecodeRC(self.dec) - m.submodules.dec_oe = dec_oe = DecodeOE(self.dec) - m.submodules.dec_cr_in = dec_cr_in = DecodeCRIn(self.dec) - m.submodules.dec_cr_out = dec_cr_out = DecodeCROut(self.dec) + + # and SVP64 Extra decoders + m.submodules.crout_svdec = crout_svdec = SVP64CRExtra() + m.submodules.crin_svdec = crin_svdec = SVP64CRExtra() + m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra() + m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra() + m.submodules.in1_svdec = in1_svdec = SVP64RegExtra() + m.submodules.in2_svdec = in2_svdec = SVP64RegExtra() + m.submodules.in3_svdec = in3_svdec = SVP64RegExtra() + m.submodules.o_svdec = o_svdec = SVP64RegExtra() + m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() + + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + reg = Signal(5, reset_less=True) # copy instruction through... for i in [do.insn, dec_a.insn_in, dec_b.insn_in, - dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in, dec_rc.insn_in, - dec_oe.insn_in, dec_cr_in.insn_in, dec_cr_out.insn_in]: + dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: comb += i.eq(self.dec.opcode_in) + # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from + # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv + # which in turn were auto-generated by sv_analysis.py + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + + ####### + # CR out + comb += crout_svdec.idx.eq(op.sv_cr_out) # SVP64 CR out + comb += self.cr_out_isvec.eq(crout_svdec.isvec) + + ####### + # CR in - index selection slightly different due to shared CR field sigh + cr_a_idx = Signal(SVEXTRA) + cr_b_idx = Signal(SVEXTRA) + + # these change slightly, when decoding BA/BB. really should have + # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey + comb += cr_a_idx.eq(op.sv_cr_in) + comb += cr_b_idx.eq(SVEXTRA.NONE) + with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value): + comb += cr_a_idx.eq(SVEXTRA.Idx1) + comb += cr_b_idx.eq(SVEXTRA.Idx2) + + comb += self.cr_in_isvec.eq(crin_svdec.isvec) + comb += self.cr_in_b_isvec.eq(crin_svdec_b.isvec) + comb += self.cr_in_o_isvec.eq(crin_svdec_o.isvec) + + # indices are slightly different, BA/BB mess sorted above + comb += crin_svdec.idx.eq(cr_a_idx) # SVP64 CR in A + comb += crin_svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B + comb += crin_svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out + # ...and subdecoders' input fields comb += dec_a.sel_in.eq(op.in1_sel) comb += dec_b.sel_in.eq(op.in2_sel) comb += dec_c.sel_in.eq(op.in3_sel) comb += dec_o.sel_in.eq(op.out_sel) comb += dec_o2.sel_in.eq(op.out_sel) - comb += dec_o2.lk.eq(do.lk) - comb += dec_rc.sel_in.eq(op.rc_sel) - comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel - comb += dec_cr_in.sel_in.eq(op.cr_in) - comb += dec_cr_out.sel_in.eq(op.cr_out) - comb += dec_cr_out.rc_in.eq(dec_rc.rc_out.data) + if hasattr(do, "lk"): + comb += dec_o2.lk.eq(do.lk) - # copy "state" over - comb += do.msr.eq(msr) - comb += do.cia.eq(cia) - - # set up instruction, pick fn unit - # no op: defaults to OP_ILLEGAL - comb += do.insn_type.eq(op.internal_op) - comb += do.fn_unit.eq(op.function_unit) + # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below + srcstep = Signal.like(self.state.svstate.srcstep) + comb += srcstep.eq(self.state.svstate.srcstep) # registers a, b, c and out and out2 (LD/ST EA) - comb += e.read_reg1.eq(dec_a.reg_out) - comb += e.read_reg2.eq(dec_b.reg_out) - comb += e.read_reg3.eq(dec_c.reg_out) - comb += e.write_reg.eq(dec_o.reg_out) - comb += e.write_ea.eq(dec_o2.reg_out) - comb += do.imm_data.eq(dec_b.imm_out) # immediate in RB (usually) - comb += do.zero_a.eq(dec_a.immz_out) # RA==0 detected - - # rc and oe out - comb += do.rc.eq(dec_rc.rc_out) - comb += do.oe.eq(dec_oe.oe_out) + for to_reg, fromreg, svdec in ( + (e.read_reg1, dec_a.reg_out, in1_svdec), + (e.read_reg2, dec_b.reg_out, in2_svdec), + (e.read_reg3, dec_c.reg_out, in3_svdec), + (e.write_reg, dec_o.reg_out, o_svdec), + (e.write_ea, dec_o2.reg_out, o2_svdec)): + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) + comb += to_reg.ok.eq(fromreg.ok) + # detect if Vectorised: add srcstep if yes. TODO: a LOT. + # this trick only holds when elwidth=default and in single-pred + with m.If(svdec.isvec): + comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output + with m.Else(): + comb += to_reg.data.eq(svdec.reg_out) # 7-bit output + + comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel) + comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel) + comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel) + comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (matches out_sel) + # XXX TODO - work out where this should come from. the problem is + # that LD-with-update is implied (computed from "is instruction in + # "update mode" rather than specified cleanly as its own CSV column + #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit) + + # output reg-is-vectorised (and when no output is vectorised) + comb += self.in1_isvec.eq(in1_svdec.isvec) + comb += self.in2_isvec.eq(in2_svdec.isvec) + comb += self.in3_isvec.eq(in3_svdec.isvec) + comb += self.o_isvec.eq(o_svdec.isvec) + comb += self.o2_isvec.eq(o2_svdec.isvec) + # TODO: include SPRs and CRs here! must be True when *all* are scalar + comb += self.no_out_vec.eq((~o2_svdec.isvec) & (~o_svdec.isvec)) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) @@ -664,58 +1171,93 @@ class PowerDecode2(Elaboratable): comb += e.write_fast2.eq(dec_o2.fast_out) # condition registers (CR) - comb += e.read_cr1.eq(dec_cr_in.cr_bitfield) - comb += e.read_cr2.eq(dec_cr_in.cr_bitfield_b) - comb += e.read_cr3.eq(dec_cr_in.cr_bitfield_o) - comb += e.write_cr.eq(dec_cr_out.cr_bitfield) - - comb += do.read_cr_whole.eq(dec_cr_in.whole_reg) - comb += do.write_cr_whole.eq(dec_cr_out.whole_reg) - comb += do.write_cr0.eq(dec_cr_out.cr_bitfield.ok) - - # decoded/selected instruction flags - comb += do.data_len.eq(op.ldst_len) - comb += do.invert_in.eq(op.inv_a) - comb += do.invert_out.eq(op.inv_out) - comb += do.input_carry.eq(op.cry_in) # carry comes in - comb += do.output_carry.eq(op.cry_out) # carry goes out - comb += do.is_32bit.eq(op.is_32b) - comb += do.is_signed.eq(op.sgn) - with m.If(op.lk): - comb += do.lk.eq(self.dec.LK) # XXX TODO: accessor - - comb += do.byte_reverse.eq(op.br) - comb += do.sign_extend.eq(op.sgn_ext) - comb += do.ldst_mode.eq(op.upd) # LD/ST mode (update, cache-inhibit) - - # These should be removed eventually - comb += do.input_cr.eq(op.cr_in) # condition reg comes in - comb += do.output_cr.eq(op.cr_out) # condition reg goes in + for to_reg, fromreg, svdec in ( + (e.read_cr1, self.dec_cr_in.cr_bitfield, crin_svdec), + (e.read_cr2, self.dec_cr_in.cr_bitfield_b, crin_svdec_b), + (e.read_cr3, self.dec_cr_in.cr_bitfield_o, crin_svdec_o), + (e.write_cr, self.dec_cr_out.cr_bitfield, crout_svdec)): + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.cr_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) + with m.If(svdec.isvec): + comb += to_reg.data.eq(srcstep+svdec.cr_out) # 7-bit output + with m.Else(): + comb += to_reg.data.eq(svdec.cr_out) # 7-bit output + comb += to_reg.ok.eq(fromreg.ok) # sigh this is exactly the sort of thing for which the # decoder is designed to not need. MTSPR, MFSPR and others need # access to the XER bits. however setting e.oe is not appropriate with m.If(op.internal_op == MicrOp.OP_MFSPR): - comb += e.xer_in.eq(1) + comb += e.xer_in.eq(0b111) # SO, CA, OV + with m.If(op.internal_op == MicrOp.OP_CMP): + comb += e.xer_in.eq(1<> 4) # cut bottom 4 bits - comb += do.traptype.eq(traptype) # request type - comb += do.msr.eq(self.state.msr) # copy of MSR "state" - comb += do.cia.eq(self.state.pc) # copy of PC "state" + comb += self.do_copy("insn", self.dec.opcode_in, True) + comb += self.do_copy("insn_type", MicrOp.OP_TRAP, True) + comb += self.do_copy("fn_unit", Function.TRAP, True) + comb += self.do_copy("trapaddr", trapaddr >> 4, True) # bottom 4 bits + comb += self.do_copy("traptype", traptype, True) # request type + comb += self.do_copy("ldst_exc", exc, True) # request type + comb += self.do_copy("msr", self.state.msr, True) # copy of MSR "state" + comb += self.do_copy("cia", self.state.pc, True) # copy of PC "state" + +# SVP64 Prefix fields: see https://libre-soc.org/openpower/sv/svp64/ +# identifies if an instruction is a SVP64-encoded prefix, and extracts +# the 24-bit SVP64 context (RM) if it is +class SVP64PrefixDecoder(Elaboratable): + + def __init__(self): + self.opcode_in = Signal(32, reset_less=True) + self.raw_opcode_in = Signal.like(self.opcode_in, reset_less=True) + self.is_svp64_mode = Signal(1, reset_less=True) + self.svp64_rm = Signal(24, reset_less=True) + self.bigendian = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + opcode_in = self.opcode_in + comb = m.d.comb + # sigh copied this from TopPowerDecoder + # raw opcode in assumed to be in LE order: byte-reverse it to get BE + raw_le = self.raw_opcode_in + l = [] + for i in range(0, 32, 8): + l.append(raw_le[i:i+8]) + l.reverse() + raw_be = Cat(*l) + comb += opcode_in.eq(Mux(self.bigendian, raw_be, raw_le)) + + # start identifying if the incoming opcode is SVP64 prefix) + major = Signal(6, reset_less=True) + ident = Signal(2, reset_less=True) + + comb += major.eq(sel(opcode_in, SVP64P.OPC)) + comb += ident.eq(sel(opcode_in, SVP64P.SVP64_7_9)) + + comb += self.is_svp64_mode.eq( + (major == Const(1, 6)) & # EXT01 + (ident == Const(0b11, 2)) # identifier bits + ) + + with m.If(self.is_svp64_mode): + # now grab the 24-bit ReMap context bits, + comb += self.svp64_rm.eq(sel(opcode_in, SVP64P.RM)) + + return m + + def ports(self): + return [self.opcode_in, self.raw_opcode_in, self.is_svp64_mode, + self.svp64_rm, self.bigendian] def get_rdflags(e, cu): rdl = [] @@ -773,6 +1371,10 @@ def get_rdflags(e, cu): if __name__ == '__main__': + svp64 = SVP64PowerDecoder() + vl = rtlil.convert(svp64, ports=svp64.ports()) + with open("svp64_dec.il", "w") as f: + f.write(vl) pdecode = create_pdecode() dec2 = PowerDecode2(pdecode) vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())