X-Git-Url: https://git.libre-soc.org/?p=soc.git;a=blobdiff_plain;f=src%2Fsoc%2Fdecoder%2Fpower_decoder2.py;h=3042da41caeba1f0b302c6e068635f167ed688ea;hp=e3b6175dd735110a3a9439f912d495ea6f2b2b25;hb=5963eef6679f6833b6b8f854868d90480e3753b2;hpb=0fa0da7ccac38dca3c3bfe0d4ab4e15a27367ef2 diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index e3b6175d..3042da41 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -8,6 +8,8 @@ over-riding the internal opcode when an exception is needed. from nmigen import Module, Elaboratable, Signal, Mux, Const, Cat, Repl, Record from nmigen.cli import rtlil +from nmutil.util import sel + from soc.regfile.regfiles import XERRegs from nmutil.picker import PriorityPicker @@ -16,6 +18,8 @@ from nmutil.extend import exts from soc.experiment.mem_types import LDSTException +from soc.decoder.power_svp64_prefix import SVP64PrefixDecoder +from soc.decoder.power_svp64_extra import SVP64CRExtra, SVP64RegExtra from soc.decoder.power_regspec_map import regspec_decode_read from soc.decoder.power_regspec_map import regspec_decode_write from soc.decoder.power_decoder import create_pdecode @@ -27,8 +31,8 @@ from soc.decoder.power_enums import (MicrOp, CryIn, Function, from soc.decoder.decode2execute1 import (Decode2ToExecute1Type, Data, Decode2ToOperand) from soc.sv.svp64 import SVP64Rec -from soc.consts import (MSR, sel, SPEC, EXTRA2, EXTRA3, SVP64P, field, - SPEC_SIZE, SPECb, SPEC_AUG_SIZE) +from soc.consts import (MSR, SPEC, EXTRA2, EXTRA3, SVP64P, field, + SPEC_SIZE, SPECb, SPEC_AUG_SIZE, SVP64CROffs) from soc.regfile.regfiles import FastRegs from soc.consts import TT @@ -79,151 +83,6 @@ class SPRMap(Elaboratable): return m -class SVP64ExtraSpec(Elaboratable): - """SVP64ExtraSpec - decodes SVP64 Extra specification. - - selects the required EXTRA2/3 field. - - see https://libre-soc.org/openpower/sv/svp64/ - """ - def __init__(self): - self.extra = Signal(9, reset_less=True) - self.etype = Signal(SVEtype, reset_less=True) # 2 or 3 bits - self.idx = Signal(SVEXTRA, reset_less=True) # which part of extra - self.spec = Signal(3) # EXTRA spec for the register - - def elaborate(self, platform): - m = Module() - comb = m.d.comb - spec = self.spec - extra = self.extra - - # back in the LDSTRM-* and RM-* files generated by sv_analysis.py - # we marked every op with an Etype: EXTRA2 or EXTRA3, and also said - # which of the 4 (or 3 for EXTRA3) sub-fields of bits 10:18 contain - # the register-extension information. extract those now - with m.Switch(self.etype): - # 2-bit index selection mode - with m.Case(SVEtype.EXTRA2): - with m.Switch(self.idx): - with m.Case(SVEXTRA.Idx0): # 1st 2 bits [0:1] - comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX0_VEC]) - comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX0_MSB]) - with m.Case(SVEXTRA.Idx1): # 2nd 2 bits [2:3] - comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX1_VEC]) - comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX1_MSB]) - with m.Case(SVEXTRA.Idx2): # 3rd 2 bits [4:5] - comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX2_VEC]) - comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX2_MSB]) - with m.Case(SVEXTRA.Idx3): # 4th 2 bits [6:7] - comb += spec[SPEC.VEC].eq(extra[EXTRA2.IDX3_VEC]) - comb += spec[SPEC.MSB].eq(extra[EXTRA2.IDX3_MSB]) - # 3-bit index selection mode - with m.Case(SVEtype.EXTRA3): - with m.Switch(self.idx): - with m.Case(SVEXTRA.Idx0): # 1st 3 bits [0:2] - comb += spec.eq(sel(extra, EXTRA3.IDX0)) - with m.Case(SVEXTRA.Idx1): # 2nd 3 bits [3:5] - comb += spec.eq(sel(extra, EXTRA3.IDX1)) - with m.Case(SVEXTRA.Idx2): # 3rd 3 bits [6:8] - comb += spec.eq(sel(extra, EXTRA3.IDX2)) - # cannot fit more than 9 bits so there is no 4th thing - - return m - - -class SVP64RegExtra(SVP64ExtraSpec): - """SVP64RegExtra - decodes SVP64 Extra fields to determine reg extension - - incoming 5-bit GPR/FP is turned into a 7-bit and marked as scalar/vector - depending on info in one of the positions in the EXTRA field. - - designed so that "no change" to the 5-bit register number occurs if - SV either does not apply or the relevant EXTRA2/3 field bits are zero. - - see https://libre-soc.org/openpower/sv/svp64/ - """ - def __init__(self): - SVP64ExtraSpec.__init__(self) - self.reg_in = Signal(5) # incoming reg number (5 bits, RA, RB) - self.reg_out = Signal(7) # extra-augmented output (7 bits) - self.isvec = Signal(1) # reg is marked as vector if true - - def elaborate(self, platform): - m = super().elaborate(platform) # select required EXTRA2/3 - comb = m.d.comb - - # first get the spec. if not changed it's "scalar identity behaviour" - # which is zero which is ok. - spec = self.spec - - # now decode it. bit 0 is "scalar/vector". note that spec could be zero - # from above, which (by design) has the effect of "no change", below. - - # simple: isvec is top bit of spec - comb += self.isvec.eq(spec[SPEC.VEC]) - # extra bits for register number augmentation - spec_aug = Signal(SPEC_AUG_SIZE) - comb += spec_aug.eq(field(spec, SPECb.MSB, SPECb.LSB, SPEC_SIZE)) - - # decode vector differently from scalar - with m.If(self.isvec): - # Vector: shifted up, extra in LSBs (RA << 2) | spec[1:2] - comb += self.reg_out.eq(Cat(spec_aug, self.reg_in)) - with m.Else(): - # Scalar: not shifted up, extra in MSBs RA | (spec[1:2] << 5) - comb += self.reg_out.eq(Cat(self.reg_in, spec_aug)) - - return m - - -class SVP64CRExtra(SVP64ExtraSpec): - """SVP64CRExtra - decodes SVP64 Extra fields to determine CR extension - - incoming 3-bit CR is turned into a 7-bit and marked as scalar/vector - depending on info in one of the positions in the EXTRA field. - - yes, really, 128 CRs. INT is 128, FP is 128, therefore CRs are 128. - - designed so that "no change" to the 3-bit CR register number occurs if - SV either does not apply or the relevant EXTRA2/3 field bits are zero. - - see https://libre-soc.org/openpower/sv/svp64/appendix - """ - def __init__(self): - SVP64ExtraSpec.__init__(self) - self.cr_in = Signal(3) # incoming CR number (3 bits, BA[0:2], BFA) - self.cr_out = Signal(7) # extra-augmented CR output (7 bits) - self.isvec = Signal(1) # reg is marked as vector if true - - def elaborate(self, platform): - m = super().elaborate(platform) # select required EXTRA2/3 - comb = m.d.comb - - # first get the spec. if not changed it's "scalar identity behaviour" - # which is zero which is ok. - spec = self.spec - - # now decode it. bit 0 is "scalar/vector". note that spec could be zero - # from above, which (by design) has the effect of "no change", below. - - # simple: isvec is top bit of spec - comb += self.isvec.eq(spec[SPEC.VEC]) - # extra bits for register number augmentation - spec_aug = Signal(SPEC_AUG_SIZE) - comb += spec_aug.eq(field(spec, SPECb.MSB, SPECb.LSB, SPEC_SIZE)) - - # decode vector differently from scalar, insert bits 1 and 2 accordingly - with m.If(self.isvec): - # Vector: shifted up, extra in LSBs (CR << 4) | (spec[1:2] << 2) - comb += self.cr_out.eq(Cat(Const(0, 2), spec_aug, self.cr_in)) - with m.Else(): - # Scalar: not shifted up, extra in MSBs CR | (spec[1:2] << 3) - comb += self.cr_out.eq(Cat(self.cr_in, spec_aug)) - - return m - - class DecodeA(Elaboratable): """DecodeA from instruction @@ -746,6 +605,12 @@ class DecodeCROut(Elaboratable): comb += self.whole_reg.ok.eq(0) comb += self.sv_override.eq(0) + # please note these MUST match (setting of cr_bitfield.ok) exactly + # with write_cr0 below in PowerDecoder2. the reason it's separated + # is to avoid having duplicate copies of DecodeCROut in multiple + # PowerDecoderSubsets. register decoding should be a one-off in + # PowerDecoder2. see https://bugs.libre-soc.org/show_bug.cgi?id=606 + with m.Switch(self.sel_in): with m.Case(CROutSel.NONE): pass # No bitfield activated @@ -813,9 +678,12 @@ class PowerDecodeSubset(Elaboratable): only fields actually requested are copied over. hence, "subset" (duh). """ - def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None, + svp64_en=True): - self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field + self.svp64_en = svp64_en + if svp64_en: + self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field self.final = final self.opkls = opkls self.fn_name = fn_name @@ -848,10 +716,29 @@ class PowerDecodeSubset(Elaboratable): return subset def rowsubsetfn(self, opcode, row): - return row['unit'] == self.fn_name + """select per-Function-Unit subset of opcodes to be processed + + normally this just looks at the "unit" column. MMU is different + in that it processes specific SPR set/get operations that the SPR + pipeline should not. + """ + return (row['unit'] == self.fn_name or + # sigh a dreadful hack: MTSPR and MFSPR need to be processed + # by the MMU pipeline so we direct those opcodes to MMU **AND** + # SPR pipelines, then selectively weed out the SPRs that should + # or should not not go to each pipeline, further down. + # really this should be done by modifying the CSV syntax + # to support multiple tasks (unit column multiple entries) + # see https://bugs.libre-soc.org/show_bug.cgi?id=310 + (self.fn_name == 'MMU' and row['unit'] == 'SPR' and + row['internal op'] in ['OP_MTSPR', 'OP_MFSPR']) + ) def ports(self): - return self.dec.ports() + self.e.ports() + self.sv_rm.ports() + ports = self.dec.ports() + self.e.ports() + if self.svp64_en: + ports += self.sv_rm.ports() + return ports def needs_field(self, field, op_field): if self.final: @@ -878,7 +765,6 @@ class PowerDecodeSubset(Elaboratable): state = self.state op, do = self.dec.op, self.do msr, cia = state.msr, state.pc - # fill in for a normal instruction (not an exception) # copy over if non-exception, non-privileged etc. is detected if not self.final: @@ -890,23 +776,16 @@ class PowerDecodeSubset(Elaboratable): # set up submodule decoders m.submodules.dec = self.dec - m.submodules.dec_rc = dec_rc = DecodeRC(self.dec) + m.submodules.dec_rc = self.dec_rc = dec_rc = DecodeRC(self.dec) m.submodules.dec_oe = dec_oe = DecodeOE(self.dec) - m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) - m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) # copy instruction through... - for i in [do.insn, - dec_rc.insn_in, dec_oe.insn_in, - self.dec_cr_in.insn_in, self.dec_cr_out.insn_in]: + for i in [do.insn, dec_rc.insn_in, dec_oe.insn_in, ]: comb += i.eq(self.dec.opcode_in) # ...and subdecoders' input fields comb += dec_rc.sel_in.eq(op.rc_sel) comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel - comb += self.dec_cr_in.sel_in.eq(op.cr_in) - comb += self.dec_cr_out.sel_in.eq(op.cr_out) - comb += self.dec_cr_out.rc_in.eq(dec_rc.rc_out.data) # copy "state" over comb += self.do_copy("msr", msr) @@ -914,16 +793,8 @@ class PowerDecodeSubset(Elaboratable): # set up instruction type # no op: defaults to OP_ILLEGAL - if self.fn_name=="MMU": - # mmu is special case: needs SPR opcode as well - mmu0 = self.mmu0_spr_dec - with m.If(((mmu0.dec.op.internal_op == MicrOp.OP_MTSPR) | - (mmu0.dec.op.internal_op == MicrOp.OP_MFSPR))): - comb += self.do_copy("insn_type", mmu0.op_get("internal_op")) - with m.Else(): - comb += self.do_copy("insn_type", self.op_get("internal_op")) - else: - comb += self.do_copy("insn_type", self.op_get("internal_op")) + internal_op = self.op_get("internal_op") + comb += self.do_copy("insn_type", internal_op) # function unit for decoded instruction: requires minor redirect # for SPR set/get @@ -931,21 +802,27 @@ class PowerDecodeSubset(Elaboratable): spr = Signal(10, reset_less=True) comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX - # XXX BUG - don't use hardcoded magic constants. - # also use ".value" otherwise the test fails. bit of a pain - # https://bugs.libre-soc.org/show_bug.cgi?id=603 - - SPR_PID = 48 # TODO read docs for POWER9 # Microwatt doesn't implement the partition table # instead has PRTBL register (SPR) to point to process table - SPR_PRTBL = 720 # see common.vhdl in microwatt, not in POWER9 - with m.If(((self.dec.op.internal_op == MicrOp.OP_MTSPR) | - (self.dec.op.internal_op == MicrOp.OP_MFSPR)) & - ((spr == SPR.DSISR) | (spr == SPR.DAR) - | (spr==SPR_PRTBL) | (spr==SPR_PID))): - comb += self.do_copy("fn_unit", Function.MMU) + is_spr_mv = Signal() + is_mmu_spr = Signal() + comb += is_spr_mv.eq((internal_op == MicrOp.OP_MTSPR) | + (internal_op == MicrOp.OP_MFSPR)) + comb += is_mmu_spr.eq((spr == SPR.DSISR.value) | + (spr == SPR.DAR.value) | + (spr == SPR.PRTBL.value) | + (spr == SPR.PIDR.value)) + # MMU must receive MMU SPRs + with m.If(is_spr_mv & (fn == Function.SPR) & is_mmu_spr): + comb += self.do_copy("fn_unit", Function.NONE) + comb += self.do_copy("insn_type", MicrOp.OP_ILLEGAL) + # SPR pipe must *not* receive MMU SPRs + with m.Elif(is_spr_mv & (fn == Function.MMU) & ~is_mmu_spr): + comb += self.do_copy("fn_unit", Function.NONE) + comb += self.do_copy("insn_type", MicrOp.OP_ILLEGAL) + # all others ok with m.Else(): - comb += self.do_copy("fn_unit",fn) + comb += self.do_copy("fn_unit", fn) # immediates if self.needs_field("zero_a", "in1_sel"): @@ -961,10 +838,14 @@ class PowerDecodeSubset(Elaboratable): comb += self.do_copy("rc", dec_rc.rc_out) comb += self.do_copy("oe", dec_oe.oe_out) - # CR in/out - comb += self.do_copy("read_cr_whole", self.dec_cr_in.whole_reg) - comb += self.do_copy("write_cr_whole", self.dec_cr_out.whole_reg) - comb += self.do_copy("write_cr0", self.dec_cr_out.cr_bitfield.ok) + # CR in/out - note: these MUST match with what happens in + # DecodeCROut! + rc_out = self.dec_rc.rc_out.data + with m.Switch(op.cr_out): + with m.Case(CROutSel.CR0, CROutSel.CR1): + comb += self.do_copy("write_cr0", rc_out) # only when RC=1 + with m.Case(CROutSel.BF, CROutSel.BT): + comb += self.do_copy("write_cr0", 1) comb += self.do_copy("input_cr", self.op_get("cr_in")) # CR in comb += self.do_copy("output_cr", self.op_get("cr_out")) # CR out @@ -1020,20 +901,26 @@ class PowerDecode2(PowerDecodeSubset): the output, into here (PowerDecoder2). without incrementing PC. """ - def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): - super().__init__(dec, opkls, fn_name, final, state) + def __init__(self, dec, opkls=None, fn_name=None, final=False, + state=None, svp64_en=True): + super().__init__(dec, opkls, fn_name, final, state, svp64_en) self.exc = LDSTException("dec2_exc") - self.cr_out_isvec = Signal(1, name="cr_out_isvec") - self.cr_in_isvec = Signal(1, name="cr_in_isvec") - self.cr_in_b_isvec = Signal(1, name="cr_in_b_isvec") - self.cr_in_o_isvec = Signal(1, name="cr_in_o_isvec") - self.in1_isvec = Signal(1, name="reg_a_isvec") - self.in2_isvec = Signal(1, name="reg_b_isvec") - self.in3_isvec = Signal(1, name="reg_c_isvec") - self.o_isvec = Signal(1, name="reg_o_isvec") - self.o2_isvec = Signal(1, name="reg_o2_isvec") - self.no_out_vec = Signal(1, name="no_out_vec") # no outputs are vectors + if self.svp64_en: + self.cr_out_isvec = Signal(1, name="cr_out_isvec") + self.cr_in_isvec = Signal(1, name="cr_in_isvec") + self.cr_in_b_isvec = Signal(1, name="cr_in_b_isvec") + self.cr_in_o_isvec = Signal(1, name="cr_in_o_isvec") + self.in1_isvec = Signal(1, name="reg_a_isvec") + self.in2_isvec = Signal(1, name="reg_b_isvec") + self.in3_isvec = Signal(1, name="reg_c_isvec") + self.o_isvec = Signal(1, name="reg_o_isvec") + self.o2_isvec = Signal(1, name="reg_o2_isvec") + self.no_in_vec = Signal(1, name="no_in_vec") # no inputs vector + self.no_out_vec = Signal(1, name="no_out_vec") # no outputs vector + else: + self.no_in_vec = Const(1, 1) + self.no_out_vec = Const(1, 1) def get_col_subset(self, opkls): subset = super().get_col_subset(opkls) @@ -1042,14 +929,15 @@ class PowerDecode2(PowerDecodeSubset): subset.add("in2_sel") subset.add("in3_sel") subset.add("out_sel") - subset.add("sv_in1") - subset.add("sv_in2") - subset.add("sv_in3") - subset.add("sv_out") - subset.add("sv_cr_in") - subset.add("sv_cr_out") - subset.add("SV_Etype") - subset.add("SV_Ptype") + if self.svp64_en: + subset.add("sv_in1") + subset.add("sv_in2") + subset.add("sv_in3") + subset.add("sv_out") + subset.add("sv_cr_in") + subset.add("sv_cr_out") + subset.add("SV_Etype") + subset.add("SV_Ptype") subset.add("lk") subset.add("internal_op") subset.add("form") @@ -1061,6 +949,7 @@ class PowerDecode2(PowerDecodeSubset): state = self.state e_out, op, do_out = self.e, self.dec.op, self.e.do dec_spr, msr, cia, ext_irq = state.dec, state.msr, state.pc, state.eint + rc_out = self.dec_rc.rc_out.data e = self.e_tmp do = e.do @@ -1073,60 +962,41 @@ class PowerDecode2(PowerDecodeSubset): m.submodules.dec_c = dec_c = DecodeC(self.dec) m.submodules.dec_o = dec_o = DecodeOut(self.dec) m.submodules.dec_o2 = dec_o2 = DecodeOut2(self.dec) + m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) + m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) - # and SVP64 Extra decoders - m.submodules.crout_svdec = crout_svdec = SVP64CRExtra() - m.submodules.crin_svdec = crin_svdec = SVP64CRExtra() - m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra() - m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra() - m.submodules.in1_svdec = in1_svdec = SVP64RegExtra() - m.submodules.in2_svdec = in2_svdec = SVP64RegExtra() - m.submodules.in3_svdec = in3_svdec = SVP64RegExtra() - m.submodules.o_svdec = o_svdec = SVP64RegExtra() - m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() - - # debug access to crout_svdec (used in get_pdecode_cr_out) - self.crout_svdec = crout_svdec + if self.svp64_en: + # and SVP64 Extra decoders + m.submodules.crout_svdec = crout_svdec = SVP64CRExtra() + m.submodules.crin_svdec = crin_svdec = SVP64CRExtra() + m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra() + m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra() + m.submodules.in1_svdec = in1_svdec = SVP64RegExtra() + m.submodules.in2_svdec = in2_svdec = SVP64RegExtra() + m.submodules.in3_svdec = in3_svdec = SVP64RegExtra() + m.submodules.o_svdec = o_svdec = SVP64RegExtra() + m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() + + # debug access to crout_svdec (used in get_pdecode_cr_out) + self.crout_svdec = crout_svdec # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec reg = Signal(5, reset_less=True) # copy instruction through... for i in [do.insn, dec_a.insn_in, dec_b.insn_in, + self.dec_cr_in.insn_in, self.dec_cr_out.insn_in, dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: comb += i.eq(self.dec.opcode_in) - # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from - # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv - # which in turn were auto-generated by sv_analysis.py - extra = self.sv_rm.extra # SVP64 extra bits 10:18 - - ####### - # CR out - comb += crout_svdec.idx.eq(op.sv_cr_out) # SVP64 CR out - comb += self.cr_out_isvec.eq(crout_svdec.isvec) - - ####### - # CR in - index selection slightly different due to shared CR field sigh - cr_a_idx = Signal(SVEXTRA) - cr_b_idx = Signal(SVEXTRA) - - # these change slightly, when decoding BA/BB. really should have - # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey - comb += cr_a_idx.eq(op.sv_cr_in) - comb += cr_b_idx.eq(SVEXTRA.NONE) - with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value): - comb += cr_a_idx.eq(SVEXTRA.Idx1) - comb += cr_b_idx.eq(SVEXTRA.Idx2) - - comb += self.cr_in_isvec.eq(crin_svdec.isvec) - comb += self.cr_in_b_isvec.eq(crin_svdec_b.isvec) - comb += self.cr_in_o_isvec.eq(crin_svdec_o.isvec) - - # indices are slightly different, BA/BB mess sorted above - comb += crin_svdec.idx.eq(cr_a_idx) # SVP64 CR in A - comb += crin_svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B - comb += crin_svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out + # CR setup + comb += self.dec_cr_in.sel_in.eq(op.cr_in) + comb += self.dec_cr_out.sel_in.eq(op.cr_out) + comb += self.dec_cr_out.rc_in.eq(rc_out) + + # CR register info + comb += self.do_copy("read_cr_whole", self.dec_cr_in.whole_reg) + comb += self.do_copy("write_cr_whole", self.dec_cr_out.whole_reg) # ...and subdecoders' input fields comb += dec_a.sel_in.eq(op.in1_sel) @@ -1137,45 +1007,128 @@ class PowerDecode2(PowerDecodeSubset): if hasattr(do, "lk"): comb += dec_o2.lk.eq(do.lk) - # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below - srcstep = Signal.like(self.state.svstate.srcstep) - comb += srcstep.eq(self.state.svstate.srcstep) - - # registers a, b, c and out and out2 (LD/ST EA) - for to_reg, fromreg, svdec in ( - (e.read_reg1, dec_a.reg_out, in1_svdec), - (e.read_reg2, dec_b.reg_out, in2_svdec), - (e.read_reg3, dec_c.reg_out, in3_svdec), - (e.write_reg, dec_o.reg_out, o_svdec), - (e.write_ea, dec_o2.reg_out, o2_svdec)): - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) - comb += to_reg.ok.eq(fromreg.ok) - # detect if Vectorised: add srcstep if yes. TODO: a LOT. - # this trick only holds when elwidth=default and in single-pred - with m.If(svdec.isvec): - comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output - with m.Else(): - comb += to_reg.data.eq(svdec.reg_out) # 7-bit output - - comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel) - comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel) - comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel) - comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (matches out_sel) - # XXX TODO - work out where this should come from. the problem is - # that LD-with-update is implied (computed from "is instruction in - # "update mode" rather than specified cleanly as its own CSV column - #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit) - - # output reg-is-vectorised (and when no output is vectorised) - comb += self.in1_isvec.eq(in1_svdec.isvec) - comb += self.in2_isvec.eq(in2_svdec.isvec) - comb += self.in3_isvec.eq(in3_svdec.isvec) - comb += self.o_isvec.eq(o_svdec.isvec) - comb += self.o2_isvec.eq(o2_svdec.isvec) - # TODO: include SPRs and CRs here! must be True when *all* are scalar - comb += self.no_out_vec.eq((~o2_svdec.isvec) & (~o_svdec.isvec)) + if self.svp64_en: + # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from + # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv + # which in turn were auto-generated by sv_analysis.py + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + + ####### + # CR out + comb += crout_svdec.idx.eq(op.sv_cr_out) # SVP64 CR out + comb += self.cr_out_isvec.eq(crout_svdec.isvec) + + ####### + # CR in - selection slightly different due to shared CR field sigh + cr_a_idx = Signal(SVEXTRA) + cr_b_idx = Signal(SVEXTRA) + + # these change slightly, when decoding BA/BB. really should have + # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey + comb += cr_a_idx.eq(op.sv_cr_in) + comb += cr_b_idx.eq(SVEXTRA.NONE) + with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value): + comb += cr_a_idx.eq(SVEXTRA.Idx1) + comb += cr_b_idx.eq(SVEXTRA.Idx2) + + comb += self.cr_in_isvec.eq(crin_svdec.isvec) + comb += self.cr_in_b_isvec.eq(crin_svdec_b.isvec) + comb += self.cr_in_o_isvec.eq(crin_svdec_o.isvec) + + # indices are slightly different, BA/BB mess sorted above + comb += crin_svdec.idx.eq(cr_a_idx) # SVP64 CR in A + comb += crin_svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B + comb += crin_svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out + + # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below + srcstep = Signal.like(self.state.svstate.srcstep) + comb += srcstep.eq(self.state.svstate.srcstep) + + # registers a, b, c and out and out2 (LD/ST EA) + for to_reg, fromreg, svdec in ( + (e.read_reg1, dec_a.reg_out, in1_svdec), + (e.read_reg2, dec_b.reg_out, in2_svdec), + (e.read_reg3, dec_c.reg_out, in3_svdec), + (e.write_reg, dec_o.reg_out, o_svdec), + (e.write_ea, dec_o2.reg_out, o2_svdec)): + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) + comb += to_reg.ok.eq(fromreg.ok) + # detect if Vectorised: add srcstep if yes. TODO: a LOT. + # this trick only holds when elwidth=default and in single-pred + with m.If(svdec.isvec): + comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output + with m.Else(): + comb += to_reg.data.eq(svdec.reg_out) # 7-bit output + + comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (in1_sel) + comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (in2_sel) + comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (in3_sel) + comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (out_sel) + # XXX TODO - work out where this should come from. the problem is + # that LD-with-update is implied (computed from "is instruction in + # "update mode" rather than specified cleanly as its own CSV column + #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit) + + # output reg-is-vectorised (and when no in/out is vectorised) + comb += self.in1_isvec.eq(in1_svdec.isvec) + comb += self.in2_isvec.eq(in2_svdec.isvec) + comb += self.in3_isvec.eq(in3_svdec.isvec) + comb += self.o_isvec.eq(o_svdec.isvec) + comb += self.o2_isvec.eq(o2_svdec.isvec) + # TODO add SPRs here. must be True when *all* are scalar + l = map(lambda svdec: svdec.isvec, [in1_svdec, in2_svdec, in3_svdec, + crin_svdec, crin_svdec_b, crin_svdec_o]) + comb += self.no_in_vec.eq(~Cat(*l).bool()) # all input scalar + l = map(lambda svdec: svdec.isvec, [o2_svdec, o_svdec, crout_svdec]) + comb += self.no_out_vec.eq(~Cat(*l).bool()) # all output scalar + + # condition registers (CR) + for to_reg, cr, name, svdec in ( + (e.read_cr1, self.dec_cr_in, "cr_bitfield", crin_svdec), + (e.read_cr2, self.dec_cr_in, "cr_bitfield_b", crin_svdec_b), + (e.read_cr3, self.dec_cr_in, "cr_bitfield_o", crin_svdec_o), + (e.write_cr, self.dec_cr_out, "cr_bitfield", crout_svdec)): + fromreg = getattr(cr, name) + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.cr_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) + with m.If(svdec.isvec): + # check if this is CR0 or CR1: treated differently + # (does not "listen" to EXTRA2/3 spec for a start) + # also: the CRs start from completely different locations + with m.If(cr.sv_override == 1): # CR0 + offs = SVP64CROffs.CR0 + comb += to_reg.data.eq(srcstep+offs) + with m.Elif(cr.sv_override == 2): # CR1 + offs = SVP64CROffs.CR1 + comb += to_reg.data.eq(srcstep+1) + with m.Else(): + comb += to_reg.data.eq(srcstep+svdec.cr_out) # 7-bit out + with m.Else(): + comb += to_reg.data.eq(svdec.cr_out) # 7-bit output + comb += to_reg.ok.eq(fromreg.ok) + + else: + # connect up to/from read/write GPRs + for to_reg, fromreg in ((e.read_reg1, dec_a.reg_out), + (e.read_reg2, dec_b.reg_out), + (e.read_reg3, dec_c.reg_out), + (e.write_reg, dec_o.reg_out), + (e.write_ea, dec_o2.reg_out)): + comb += to_reg.data.eq(fromreg.data) + comb += to_reg.ok.eq(fromreg.ok) + + # connect up to/from read/write CRs + for to_reg, cr, name in ( + (e.read_cr1, self.dec_cr_in, "cr_bitfield", ), + (e.read_cr2, self.dec_cr_in, "cr_bitfield_b", ), + (e.read_cr3, self.dec_cr_in, "cr_bitfield_o", ), + (e.write_cr, self.dec_cr_out, "cr_bitfield", )): + fromreg = getattr(cr, name) + comb += to_reg.data.eq(fromreg.data) + comb += to_reg.ok.eq(fromreg.ok) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) @@ -1187,29 +1140,6 @@ class PowerDecode2(PowerDecodeSubset): comb += e.write_fast1.eq(dec_o.fast_out) comb += e.write_fast2.eq(dec_o2.fast_out) - # condition registers (CR) - for to_reg, cr, name, svdec in ( - (e.read_cr1, self.dec_cr_in, "cr_bitfield", crin_svdec), - (e.read_cr2, self.dec_cr_in, "cr_bitfield_b", crin_svdec_b), - (e.read_cr3, self.dec_cr_in, "cr_bitfield_o", crin_svdec_o), - (e.write_cr, self.dec_cr_out, "cr_bitfield", crout_svdec)): - fromreg = getattr(cr, name) - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.cr_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) - with m.If(svdec.isvec): - # check if this is CR0 or CR1: treated differently - # (does not "listen" to EXTRA2/3 spec - with m.If(cr.sv_override == 1): # CR0 - comb += to_reg.data.eq(srcstep+0) # XXX TODO CR0 offset - with m.Elif(cr.sv_override == 2): # CR1 - comb += to_reg.data.eq(srcstep+1) # XXX TODO CR1 offset - with m.Else(): - comb += to_reg.data.eq(srcstep+svdec.cr_out) # 7-bit output - with m.Else(): - comb += to_reg.data.eq(svdec.cr_out) # 7-bit output - comb += to_reg.ok.eq(fromreg.ok) - # sigh this is exactly the sort of thing for which the # decoder is designed to not need. MTSPR, MFSPR and others need # access to the XER bits. however setting e.oe is not appropriate @@ -1337,53 +1267,6 @@ class PowerDecode2(PowerDecodeSubset): comb += self.do_copy("cia", self.state.pc, True) # copy of PC "state" -# SVP64 Prefix fields: see https://libre-soc.org/openpower/sv/svp64/ -# identifies if an instruction is a SVP64-encoded prefix, and extracts -# the 24-bit SVP64 context (RM) if it is -class SVP64PrefixDecoder(Elaboratable): - - def __init__(self): - self.opcode_in = Signal(32, reset_less=True) - self.raw_opcode_in = Signal.like(self.opcode_in, reset_less=True) - self.is_svp64_mode = Signal(1, reset_less=True) - self.svp64_rm = Signal(24, reset_less=True) - self.bigendian = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - opcode_in = self.opcode_in - comb = m.d.comb - # sigh copied this from TopPowerDecoder - # raw opcode in assumed to be in LE order: byte-reverse it to get BE - raw_le = self.raw_opcode_in - l = [] - for i in range(0, 32, 8): - l.append(raw_le[i:i+8]) - l.reverse() - raw_be = Cat(*l) - comb += opcode_in.eq(Mux(self.bigendian, raw_be, raw_le)) - - # start identifying if the incoming opcode is SVP64 prefix) - major = Signal(6, reset_less=True) - ident = Signal(2, reset_less=True) - - comb += major.eq(sel(opcode_in, SVP64P.OPC)) - comb += ident.eq(sel(opcode_in, SVP64P.SVP64_7_9)) - - comb += self.is_svp64_mode.eq( - (major == Const(1, 6)) & # EXT01 - (ident == Const(0b11, 2)) # identifier bits - ) - - with m.If(self.is_svp64_mode): - # now grab the 24-bit ReMap context bits, - comb += self.svp64_rm.eq(sel(opcode_in, SVP64P.RM)) - - return m - - def ports(self): - return [self.opcode_in, self.raw_opcode_in, self.is_svp64_mode, - self.svp64_rm, self.bigendian] def get_rdflags(e, cu): rdl = [] @@ -1396,10 +1279,6 @@ def get_rdflags(e, cu): if __name__ == '__main__': - svp64 = SVP64PowerDecoder() - vl = rtlil.convert(svp64, ports=svp64.ports()) - with open("svp64_dec.il", "w") as f: - f.write(vl) pdecode = create_pdecode() dec2 = PowerDecode2(pdecode) vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())