X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fdecoder%2Fpower_decoder2.py;h=640acef86e7b8c8c74c95fcbba75125f4d445bec;hb=fc4e7f45c011e099118005ec5ca0b07272dcf244;hp=7c435ca2e05fd44fb9737056ff0eb1249472c1ca;hpb=e287b19830b48b9d166f83ebb7fa31e8ac55feee;p=soc.git diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index 7c435ca2..640acef8 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -221,11 +221,9 @@ class DecodeA(Elaboratable): def __init__(self, dec): self.dec = dec - self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(In1Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(7, name="reg_a") - self.reg_isvec = Signal(1, name="reg_a_isvec") # TODO: in reg_out + self.reg_out = Data(5, name="reg_a") self.spr_out = Data(SPR, "spr_a") self.fast_out = Data(3, "fast_a") @@ -233,11 +231,8 @@ class DecodeA(Elaboratable): m = Module() comb = m.d.comb op = self.dec.op + reg = self.reg_out m.submodules.sprmap = sprmap = SPRMap() - m.submodules.svdec = svdec = SVP64RegExtra() - - # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec - reg = Signal(5, reset_less=True) # select Register A field ra = Signal(5, reset_less=True) @@ -245,30 +240,16 @@ class DecodeA(Elaboratable): with m.If((self.sel_in == In1Sel.RA) | ((self.sel_in == In1Sel.RA_OR_ZERO) & (ra != Const(0, 5)))): - comb += reg.eq(ra) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(ra) + comb += reg.ok.eq(1) # some Logic/ALU ops have RS as the 3rd arg, but no "RA". # moved it to 1st position (in1_sel)... because rs = Signal(5, reset_less=True) comb += rs.eq(self.dec.RS) with m.If(self.sel_in == In1Sel.RS): - comb += reg.eq(rs) - comb += self.reg_out.ok.eq(1) - - # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from - # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv - # which in turn were auto-generated by sv_analysis.py - - extra = self.sv_rm.extra # SVP64 extra bits 10:18 - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel) - comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) - - # outputs: 7-bit reg number and whether it's vectorised - comb += self.reg_out.data.eq(svdec.reg_out) - comb += self.reg_isvec.eq(svdec.isvec) + comb += reg.data.eq(rs) + comb += reg.ok.eq(1) # decode Fast-SPR based on instruction type with m.Switch(op.internal_op): @@ -334,7 +315,6 @@ class DecodeB(Elaboratable): def __init__(self, dec): self.dec = dec - self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(In2Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.reg_out = Data(7, "reg_b") @@ -345,32 +325,17 @@ class DecodeB(Elaboratable): m = Module() comb = m.d.comb op = self.dec.op - m.submodules.svdec = svdec = SVP64RegExtra() - - # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec - reg = Signal(5, reset_less=True) + reg = self.reg_out # select Register B field with m.Switch(self.sel_in): with m.Case(In2Sel.RB): - comb += reg.eq(self.dec.RB) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RB) + comb += reg.ok.eq(1) with m.Case(In2Sel.RS): # for M-Form shiftrot - comb += reg.eq(self.dec.RS) - comb += self.reg_out.ok.eq(1) - - # now do the SVP64 munging. different from DecodeA only by sv_in2 - - extra = self.sv_rm.extra # SVP64 extra bits 10:18 - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel) - comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) - - # outputs: 7-bit reg number and whether it's vectorised - comb += self.reg_out.data.eq(svdec.reg_out) - comb += self.reg_isvec.eq(svdec.isvec) + comb += reg.data.eq(self.dec.RS) + comb += reg.ok.eq(1) # decode SPR2 based on instruction type # BCREG implicitly uses LR or TAR for 2nd reg @@ -456,42 +421,25 @@ class DecodeC(Elaboratable): def __init__(self, dec): self.dec = dec - self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(In3Sel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(7, "reg_c") - self.reg_isvec = Signal(1, name="reg_c_isvec") # TODO: in reg_out + self.reg_out = Data(5, "reg_c") def elaborate(self, platform): m = Module() comb = m.d.comb op = self.dec.op - m.submodules.svdec = svdec = SVP64RegExtra() - - # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec - reg = Signal(5, reset_less=True) + reg = self.reg_out # select Register C field with m.Switch(self.sel_in): with m.Case(In3Sel.RB): # for M-Form shiftrot - comb += reg.eq(self.dec.RB) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RB) + comb += reg.ok.eq(1) with m.Case(In3Sel.RS): - comb += reg.eq(self.dec.RS) - comb += self.reg_out.ok.eq(1) - - # now do the SVP64 munging. different from DecodeA only by sv_in3 - - extra = self.sv_rm.extra # SVP64 extra bits 10:18 - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel) - comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) - - # outputs: 7-bit reg number and whether it's vectorised - comb += self.reg_out.data.eq(svdec.reg_out) - comb += self.reg_isvec.eq(svdec.isvec) + comb += reg.data.eq(self.dec.RS) + comb += reg.ok.eq(1) return m @@ -504,11 +452,9 @@ class DecodeOut(Elaboratable): def __init__(self, dec): self.dec = dec - self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(OutSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(7, "reg_o") - self.reg_isvec = Signal(1, name="reg_o_isvec") # TODO: in reg_out + self.reg_out = Data(5, "reg_o") self.spr_out = Data(SPR, "spr_o") self.fast_out = Data(3, "fast_o") @@ -517,19 +463,16 @@ class DecodeOut(Elaboratable): comb = m.d.comb m.submodules.sprmap = sprmap = SPRMap() op = self.dec.op - m.submodules.svdec = svdec = SVP64RegExtra() - - # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec - reg = Signal(5, reset_less=True) + reg = self.reg_out # select Register out field with m.Switch(self.sel_in): with m.Case(OutSel.RT): - comb += reg.eq(self.dec.RT) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RT) + comb += reg.ok.eq(1) with m.Case(OutSel.RA): - comb += reg.eq(self.dec.RA) - comb += self.reg_out.ok.eq(1) + comb += reg.data.eq(self.dec.RA) + comb += reg.ok.eq(1) with m.Case(OutSel.SPR): spr = Signal(10, reset_less=True) comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX @@ -539,18 +482,6 @@ class DecodeOut(Elaboratable): comb += self.spr_out.eq(sprmap.spr_o) comb += self.fast_out.eq(sprmap.fast_o) - # now do the SVP64 munging. different from DecodeA only by sv_out - - extra = self.sv_rm.extra # SVP64 extra bits 10:18 - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.idx.eq(op.sv_out) # SVP64 reg out1 (matches out_sel) - comb += svdec.reg_in.eq(reg) # 5-bit (RA, RS) - - # outputs: 7-bit reg number and whether it's vectorised - comb += self.reg_out.data.eq(svdec.reg_out) - comb += self.reg_isvec.eq(svdec.isvec) - # determine Fast Reg with m.Switch(op.internal_op): @@ -585,12 +516,10 @@ class DecodeOut2(Elaboratable): def __init__(self, dec): self.dec = dec - self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(OutSel, reset_less=True) self.lk = Signal(reset_less=True) self.insn_in = Signal(32, reset_less=True) - self.reg_out = Data(7, "reg_o2") - #self.reg_isvec = Signal(1, name="reg_o2_isvec") # TODO: in reg_out + self.reg_out = Data(5, "reg_o2") self.fast_out = Data(3, "fast_o2") def elaborate(self, platform): @@ -714,7 +643,6 @@ class DecodeCRIn(Elaboratable): def __init__(self, dec): self.dec = dec - self.sv_rm = SVP64Rec() # SVP64 RM field self.sel_in = Signal(CRInSel, reset_less=True) self.insn_in = Signal(32, reset_less=True) self.cr_bitfield = Data(3, "cr_bitfield") @@ -961,7 +889,16 @@ class PowerDecodeSubset(Elaboratable): # set up instruction type # no op: defaults to OP_ILLEGAL - comb += self.do_copy("insn_type", self.op_get("internal_op")) + if self.fn_name=="MMU": + # mmu is special case: needs SPR opcode as well + mmu0 = self.mmu0_spr_dec + with m.If(((mmu0.dec.op.internal_op == MicrOp.OP_MTSPR) | + (mmu0.dec.op.internal_op == MicrOp.OP_MFSPR))): + comb += self.do_copy("insn_type", mmu0.op_get("internal_op")) + with m.Else(): + comb += self.do_copy("insn_type", self.op_get("internal_op")) + else: + comb += self.do_copy("insn_type", self.op_get("internal_op")) # function unit for decoded instruction: requires minor redirect # for SPR set/get @@ -969,11 +906,13 @@ class PowerDecodeSubset(Elaboratable): spr = Signal(10, reset_less=True) comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX - # for first test only forward SPRs 18 and 19 to MMU, when - # operation is MTSPR or MFSPR. TODO: add other MMU SPRs + SPR_PID = 48 # TODO read docs for POWER9 + # Microwatt doesn't implement the partition table + # instead has PRTBL register (SPR) to point to process table + SPR_PRTBL = 720 # see common.vhdl in microwatt, not in POWER9 with m.If(((self.dec.op.internal_op == MicrOp.OP_MTSPR) | (self.dec.op.internal_op == MicrOp.OP_MFSPR)) & - ((spr == SPR.DSISR) | (spr == SPR.DAR))): + ((spr == SPR.DSISR) | (spr == SPR.DAR) | (spr==SPR_PRTBL) | (spr==SPR_PID))): comb += self.do_copy("fn_unit", Function.MMU) with m.Else(): comb += self.do_copy("fn_unit",fn) @@ -1059,6 +998,11 @@ class PowerDecode2(PowerDecodeSubset): self.cr_in_isvec = Signal(1, name="cr_in_isvec") self.cr_in_b_isvec = Signal(1, name="cr_in_b_isvec") self.cr_in_o_isvec = Signal(1, name="cr_in_o_isvec") + self.in1_isvec = Signal(1, name="reg_a_isvec") + self.in2_isvec = Signal(1, name="reg_b_isvec") + self.in3_isvec = Signal(1, name="reg_c_isvec") + self.o_isvec = Signal(1, name="reg_o_isvec") + self.o2_isvec = Signal(1, name="reg_o2_isvec") def get_col_subset(self, opkls): subset = super().get_col_subset(opkls) @@ -1104,18 +1048,23 @@ class PowerDecode2(PowerDecodeSubset): m.submodules.crin_svdec = crin_svdec = SVP64CRExtra() m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra() m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra() + m.submodules.in1_svdec = in1_svdec = SVP64RegExtra() + m.submodules.in2_svdec = in2_svdec = SVP64RegExtra() + m.submodules.in3_svdec = in3_svdec = SVP64RegExtra() + m.submodules.o_svdec = o_svdec = SVP64RegExtra() + m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() + + # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec + reg = Signal(5, reset_less=True) # copy instruction through... for i in [do.insn, dec_a.insn_in, dec_b.insn_in, dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: comb += i.eq(self.dec.opcode_in) - # ... and svp64 rm - for i in [dec_a.insn_in, dec_b.insn_in, - dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in]: - comb += i.eq(self.sv_rm) - - # now do the SVP64 munging. + # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from + # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv + # which in turn were auto-generated by sv_analysis.py extra = self.sv_rm.extra # SVP64 extra bits 10:18 ####### @@ -1128,7 +1077,7 @@ class PowerDecode2(PowerDecodeSubset): cr_a_idx = Signal(SVEXTRA) cr_b_idx = Signal(SVEXTRA) - # these change slighly, when decoding BA/BB. really should have + # these change slightly, when decoding BA/BB. really should have # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey comb += cr_a_idx.eq(op.sv_cr_in) comb += cr_b_idx.eq(SVEXTRA.NONE) @@ -1154,15 +1103,42 @@ class PowerDecode2(PowerDecodeSubset): if hasattr(do, "lk"): comb += dec_o2.lk.eq(do.lk) + # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below + srcstep = Signal.like(self.state.svstate.srcstep) + comb += srcstep.eq(self.state.svstate.srcstep) + # registers a, b, c and out and out2 (LD/ST EA) - for to_reg, fromreg in ( - (e.read_reg1, dec_a.reg_out), - (e.read_reg2, dec_b.reg_out), - (e.read_reg3, dec_c.reg_out), - (e.write_reg, dec_o.reg_out), - (e.write_ea, dec_o2.reg_out)): - comb += to_reg.data.eq(fromreg.data) + for to_reg, fromreg, svdec in ( + (e.read_reg1, dec_a.reg_out, in1_svdec), + (e.read_reg2, dec_b.reg_out, in2_svdec), + (e.read_reg3, dec_c.reg_out, in3_svdec), + (e.write_reg, dec_o.reg_out, o_svdec), + (e.write_ea, dec_o2.reg_out, o2_svdec)): + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) comb += to_reg.ok.eq(fromreg.ok) + # detect if Vectorised: add srcstep if yes. TODO: a LOT. + # this trick only holds when elwidth=default and in single-pred + with m.If(svdec.isvec): + comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output + with m.Else(): + comb += to_reg.data.eq(svdec.reg_out) # 7-bit output + + comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel) + comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel) + comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel) + comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (matches out_sel) + # XXX TODO - work out where this should come from. the problem is + # that LD-with-update is implied (computed from "is instruction in + # "update mode" rather than specified cleanly as its own CSV column + #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit) + + comb += self.in1_isvec.eq(in1_svdec.isvec) + comb += self.in2_isvec.eq(in2_svdec.isvec) + comb += self.in3_isvec.eq(in3_svdec.isvec) + comb += self.o_isvec.eq(o_svdec.isvec) + comb += self.o2_isvec.eq(o2_svdec.isvec) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) @@ -1313,6 +1289,53 @@ class PowerDecode2(PowerDecodeSubset): comb += self.do_copy("cia", self.state.pc, True) # copy of PC "state" +# SVP64 Prefix fields: see https://libre-soc.org/openpower/sv/svp64/ +# identifies if an instruction is a SVP64-encoded prefix, and extracts +# the 24-bit SVP64 context (RM) if it is +class SVP64PrefixDecoder(Elaboratable): + + def __init__(self): + self.opcode_in = Signal(32, reset_less=True) + self.raw_opcode_in = Signal.like(self.opcode_in, reset_less=True) + self.is_svp64_mode = Signal(1, reset_less=True) + self.svp64_rm = Signal(24, reset_less=True) + self.bigendian = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + # sigh copied this from TopPowerDecoder + # raw opcode in assumed to be in LE order: byte-reverse it to get BE + raw_le = self.raw_opcode_in + l = [] + for i in range(0, 32, 8): + l.append(raw_le[i:i+8]) + l.reverse() + raw_be = Cat(*l) + comb += self.opcode_in.eq(Mux(self.bigendian, raw_be, raw_le)) + + # start identifying if the incoming opcode is SVP64 prefix) + major = Signal(6, reset_less=True) + + comb += major.eq(self.opcode_in[26:32]) + comb += self.is_svp64_mode.eq((major == Const(1, 6)) & # EXT01 + self.opcode_in[31-7] & # identifier + self.opcode_in[31-9]) # bits + + # now grab the 24-bit ReMap context bits, + rmfields = [6, 8] + list(range(10,32)) # SVP64 24-bit RM + l = [] + for idx in rmfields: + l.append(self.opcode_in[32-idx]) + with m.If(self.is_svp64_mode): + comb += self.svp64_rm.eq(Cat(*l)) + + return m + + def ports(self): + return [self.opcode_in, self.raw_opcode_in, self.is_svp64_mode, + self.svp64_rm, self.bigendian] + def get_rdflags(e, cu): rdl = [] for idx in range(cu.n_src): @@ -1324,6 +1347,10 @@ def get_rdflags(e, cu): if __name__ == '__main__': + svp64 = SVP64PowerDecoder() + vl = rtlil.convert(svp64, ports=svp64.ports()) + with open("svp64_dec.il", "w") as f: + f.write(vl) pdecode = create_pdecode() dec2 = PowerDecode2(pdecode) vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())