From e653b5cb1e6e5a8c2070841a67f3c812d2a0f90c Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 8 Mar 2021 12:10:00 +0000 Subject: [PATCH] add option to cut out SVP64 from PowerDecoder2 --- src/soc/decoder/power_decoder2.py | 264 ++++++++++++++++-------------- 1 file changed, 145 insertions(+), 119 deletions(-) diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index 6fa183a8..b6032edb 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -678,9 +678,12 @@ class PowerDecodeSubset(Elaboratable): only fields actually requested are copied over. hence, "subset" (duh). """ - def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): + def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None, + svp64_en=True): - self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field + self.svp64_en = svp64_en + if svp64_en: + self.sv_rm = SVP64Rec(name="dec_svp64") # SVP64 RM field self.final = final self.opkls = opkls self.fn_name = fn_name @@ -732,7 +735,10 @@ class PowerDecodeSubset(Elaboratable): ) def ports(self): - return self.dec.ports() + self.e.ports() + self.sv_rm.ports() + ports = self.dec.ports() + self.e.ports() + if self.svp64_en: + ports += self.sv_rm.ports() + return ports def needs_field(self, field, op_field): if self.final: @@ -895,8 +901,9 @@ class PowerDecode2(PowerDecodeSubset): the output, into here (PowerDecoder2). without incrementing PC. """ - def __init__(self, dec, opkls=None, fn_name=None, final=False, state=None): - super().__init__(dec, opkls, fn_name, final, state) + def __init__(self, dec, opkls=None, fn_name=None, final=False, + state=None, svp64_en=True): + super().__init__(dec, opkls, fn_name, final, state, svp64_en) self.exc = LDSTException("dec2_exc") self.cr_out_isvec = Signal(1, name="cr_out_isvec") @@ -953,19 +960,20 @@ class PowerDecode2(PowerDecodeSubset): m.submodules.dec_cr_in = self.dec_cr_in = DecodeCRIn(self.dec) m.submodules.dec_cr_out = self.dec_cr_out = DecodeCROut(self.dec) - # and SVP64 Extra decoders - m.submodules.crout_svdec = crout_svdec = SVP64CRExtra() - m.submodules.crin_svdec = crin_svdec = SVP64CRExtra() - m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra() - m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra() - m.submodules.in1_svdec = in1_svdec = SVP64RegExtra() - m.submodules.in2_svdec = in2_svdec = SVP64RegExtra() - m.submodules.in3_svdec = in3_svdec = SVP64RegExtra() - m.submodules.o_svdec = o_svdec = SVP64RegExtra() - m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() - - # debug access to crout_svdec (used in get_pdecode_cr_out) - self.crout_svdec = crout_svdec + if self.svp64_en: + # and SVP64 Extra decoders + m.submodules.crout_svdec = crout_svdec = SVP64CRExtra() + m.submodules.crin_svdec = crin_svdec = SVP64CRExtra() + m.submodules.crin_svdec_b = crin_svdec_b = SVP64CRExtra() + m.submodules.crin_svdec_o = crin_svdec_o = SVP64CRExtra() + m.submodules.in1_svdec = in1_svdec = SVP64RegExtra() + m.submodules.in2_svdec = in2_svdec = SVP64RegExtra() + m.submodules.in3_svdec = in3_svdec = SVP64RegExtra() + m.submodules.o_svdec = o_svdec = SVP64RegExtra() + m.submodules.o2_svdec = o2_svdec = SVP64RegExtra() + + # debug access to crout_svdec (used in get_pdecode_cr_out) + self.crout_svdec = crout_svdec # get the 5-bit reg data before svp64-munging it into 7-bit plus isvec reg = Signal(5, reset_less=True) @@ -985,38 +993,6 @@ class PowerDecode2(PowerDecodeSubset): comb += self.do_copy("read_cr_whole", self.dec_cr_in.whole_reg) comb += self.do_copy("write_cr_whole", self.dec_cr_out.whole_reg) - # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from - # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv - # which in turn were auto-generated by sv_analysis.py - extra = self.sv_rm.extra # SVP64 extra bits 10:18 - - ####### - # CR out - comb += crout_svdec.idx.eq(op.sv_cr_out) # SVP64 CR out - comb += self.cr_out_isvec.eq(crout_svdec.isvec) - - ####### - # CR in - index selection slightly different due to shared CR field sigh - cr_a_idx = Signal(SVEXTRA) - cr_b_idx = Signal(SVEXTRA) - - # these change slightly, when decoding BA/BB. really should have - # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey - comb += cr_a_idx.eq(op.sv_cr_in) - comb += cr_b_idx.eq(SVEXTRA.NONE) - with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value): - comb += cr_a_idx.eq(SVEXTRA.Idx1) - comb += cr_b_idx.eq(SVEXTRA.Idx2) - - comb += self.cr_in_isvec.eq(crin_svdec.isvec) - comb += self.cr_in_b_isvec.eq(crin_svdec_b.isvec) - comb += self.cr_in_o_isvec.eq(crin_svdec_o.isvec) - - # indices are slightly different, BA/BB mess sorted above - comb += crin_svdec.idx.eq(cr_a_idx) # SVP64 CR in A - comb += crin_svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B - comb += crin_svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out - # ...and subdecoders' input fields comb += dec_a.sel_in.eq(op.in1_sel) comb += dec_b.sel_in.eq(op.in2_sel) @@ -1026,49 +1002,125 @@ class PowerDecode2(PowerDecodeSubset): if hasattr(do, "lk"): comb += dec_o2.lk.eq(do.lk) - # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below - srcstep = Signal.like(self.state.svstate.srcstep) - comb += srcstep.eq(self.state.svstate.srcstep) - - # registers a, b, c and out and out2 (LD/ST EA) - for to_reg, fromreg, svdec in ( - (e.read_reg1, dec_a.reg_out, in1_svdec), - (e.read_reg2, dec_b.reg_out, in2_svdec), - (e.read_reg3, dec_c.reg_out, in3_svdec), - (e.write_reg, dec_o.reg_out, o_svdec), - (e.write_ea, dec_o2.reg_out, o2_svdec)): - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) - comb += to_reg.ok.eq(fromreg.ok) - # detect if Vectorised: add srcstep if yes. TODO: a LOT. - # this trick only holds when elwidth=default and in single-pred - with m.If(svdec.isvec): - comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output - with m.Else(): - comb += to_reg.data.eq(svdec.reg_out) # 7-bit output - - comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (matches in1_sel) - comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (matches in2_sel) - comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (matches in3_sel) - comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (matches out_sel) - # XXX TODO - work out where this should come from. the problem is - # that LD-with-update is implied (computed from "is instruction in - # "update mode" rather than specified cleanly as its own CSV column - #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit) - - # output reg-is-vectorised (and when no input or output is vectorised) - comb += self.in1_isvec.eq(in1_svdec.isvec) - comb += self.in2_isvec.eq(in2_svdec.isvec) - comb += self.in3_isvec.eq(in3_svdec.isvec) - comb += self.o_isvec.eq(o_svdec.isvec) - comb += self.o2_isvec.eq(o2_svdec.isvec) - # TODO add SPRs here. must be True when *all* are scalar - l = map(lambda svdec: svdec.isvec, [in1_svdec, in2_svdec, in3_svdec, - crin_svdec, crin_svdec_b, crin_svdec_o]) - comb += self.no_in_vec.eq(~Cat(*l).bool()) # all input scalar - l = map(lambda svdec: svdec.isvec, [o2_svdec, o_svdec, crout_svdec]) - comb += self.no_out_vec.eq(~Cat(*l).bool()) # all output scalar + if self.svp64_en: + # now do the SVP64 munging. op.SV_Etype and op.sv_in1 comes from + # PowerDecoder which in turn comes from LDST-RM*.csv and RM-*.csv + # which in turn were auto-generated by sv_analysis.py + extra = self.sv_rm.extra # SVP64 extra bits 10:18 + + ####### + # CR out + comb += crout_svdec.idx.eq(op.sv_cr_out) # SVP64 CR out + comb += self.cr_out_isvec.eq(crout_svdec.isvec) + + ####### + # CR in - selection slightly different due to shared CR field sigh + cr_a_idx = Signal(SVEXTRA) + cr_b_idx = Signal(SVEXTRA) + + # these change slightly, when decoding BA/BB. really should have + # their own separate CSV column: sv_cr_in1 and sv_cr_in2, but hey + comb += cr_a_idx.eq(op.sv_cr_in) + comb += cr_b_idx.eq(SVEXTRA.NONE) + with m.If(op.sv_cr_in == SVEXTRA.Idx_1_2.value): + comb += cr_a_idx.eq(SVEXTRA.Idx1) + comb += cr_b_idx.eq(SVEXTRA.Idx2) + + comb += self.cr_in_isvec.eq(crin_svdec.isvec) + comb += self.cr_in_b_isvec.eq(crin_svdec_b.isvec) + comb += self.cr_in_o_isvec.eq(crin_svdec_o.isvec) + + # indices are slightly different, BA/BB mess sorted above + comb += crin_svdec.idx.eq(cr_a_idx) # SVP64 CR in A + comb += crin_svdec_b.idx.eq(cr_b_idx) # SVP64 CR in B + comb += crin_svdec_o.idx.eq(op.sv_cr_out) # SVP64 CR out + + # get SVSTATE srcstep (TODO: elwidth, dststep etc.) needed below + srcstep = Signal.like(self.state.svstate.srcstep) + comb += srcstep.eq(self.state.svstate.srcstep) + + # registers a, b, c and out and out2 (LD/ST EA) + for to_reg, fromreg, svdec in ( + (e.read_reg1, dec_a.reg_out, in1_svdec), + (e.read_reg2, dec_b.reg_out, in2_svdec), + (e.read_reg3, dec_c.reg_out, in3_svdec), + (e.write_reg, dec_o.reg_out, o_svdec), + (e.write_ea, dec_o2.reg_out, o2_svdec)): + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.reg_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) + comb += to_reg.ok.eq(fromreg.ok) + # detect if Vectorised: add srcstep if yes. TODO: a LOT. + # this trick only holds when elwidth=default and in single-pred + with m.If(svdec.isvec): + comb += to_reg.data.eq(srcstep+svdec.reg_out) # 7-bit output + with m.Else(): + comb += to_reg.data.eq(svdec.reg_out) # 7-bit output + + comb += in1_svdec.idx.eq(op.sv_in1) # SVP64 reg #1 (in1_sel) + comb += in2_svdec.idx.eq(op.sv_in2) # SVP64 reg #2 (in2_sel) + comb += in3_svdec.idx.eq(op.sv_in3) # SVP64 reg #3 (in3_sel) + comb += o_svdec.idx.eq(op.sv_out) # SVP64 output (out_sel) + # XXX TODO - work out where this should come from. the problem is + # that LD-with-update is implied (computed from "is instruction in + # "update mode" rather than specified cleanly as its own CSV column + #comb += o2_svdec.idx.eq(op.sv_out) # SVP64 output (implicit) + + # output reg-is-vectorised (and when no in/out is vectorised) + comb += self.in1_isvec.eq(in1_svdec.isvec) + comb += self.in2_isvec.eq(in2_svdec.isvec) + comb += self.in3_isvec.eq(in3_svdec.isvec) + comb += self.o_isvec.eq(o_svdec.isvec) + comb += self.o2_isvec.eq(o2_svdec.isvec) + # TODO add SPRs here. must be True when *all* are scalar + l = map(lambda svdec: svdec.isvec, [in1_svdec, in2_svdec, in3_svdec, + crin_svdec, crin_svdec_b, crin_svdec_o]) + comb += self.no_in_vec.eq(~Cat(*l).bool()) # all input scalar + l = map(lambda svdec: svdec.isvec, [o2_svdec, o_svdec, crout_svdec]) + comb += self.no_out_vec.eq(~Cat(*l).bool()) # all output scalar + + # condition registers (CR) + for to_reg, cr, name, svdec in ( + (e.read_cr1, self.dec_cr_in, "cr_bitfield", crin_svdec), + (e.read_cr2, self.dec_cr_in, "cr_bitfield_b", crin_svdec_b), + (e.read_cr3, self.dec_cr_in, "cr_bitfield_o", crin_svdec_o), + (e.write_cr, self.dec_cr_out, "cr_bitfield", crout_svdec)): + fromreg = getattr(cr, name) + comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM + comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn + comb += svdec.cr_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) + with m.If(svdec.isvec): + # check if this is CR0 or CR1: treated differently + # (does not "listen" to EXTRA2/3 spec for a start) + # also: the CRs start from completely different locations + with m.If(cr.sv_override == 1): # CR0 + offs = SVP64CROffs.CR0 + comb += to_reg.data.eq(srcstep+offs) + with m.Elif(cr.sv_override == 2): # CR1 + offs = SVP64CROffs.CR1 + comb += to_reg.data.eq(srcstep+1) + with m.Else(): + comb += to_reg.data.eq(srcstep+svdec.cr_out) # 7-bit out + with m.Else(): + comb += to_reg.data.eq(svdec.cr_out) # 7-bit output + comb += to_reg.ok.eq(fromreg.ok) + + else: + for to_reg, fromreg in ( + (e.read_reg1, dec_a.reg_out), + (e.read_reg2, dec_b.reg_out), + (e.read_reg3, dec_c.reg_out), + (e.write_reg, dec_o.reg_out), + (e.write_ea, dec_o2.reg_out), + (e.read_cr1, self.dec_cr_in), + (e.read_cr2, self.dec_cr_in), + (e.read_cr3, self.dec_cr_in), + (e.write_cr, self.dec_cr_out )): + comb += to_reg.data.eq(fromreg.data) + comb += to_reg.ok.eq(fromreg.ok) + + comb += self.no_in_vec.eq(1) + comb += self.no_out_vec.eq(1) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) @@ -1080,32 +1132,6 @@ class PowerDecode2(PowerDecodeSubset): comb += e.write_fast1.eq(dec_o.fast_out) comb += e.write_fast2.eq(dec_o2.fast_out) - # condition registers (CR) - for to_reg, cr, name, svdec in ( - (e.read_cr1, self.dec_cr_in, "cr_bitfield", crin_svdec), - (e.read_cr2, self.dec_cr_in, "cr_bitfield_b", crin_svdec_b), - (e.read_cr3, self.dec_cr_in, "cr_bitfield_o", crin_svdec_o), - (e.write_cr, self.dec_cr_out, "cr_bitfield", crout_svdec)): - fromreg = getattr(cr, name) - comb += svdec.extra.eq(extra) # EXTRA field of SVP64 RM - comb += svdec.etype.eq(op.SV_Etype) # EXTRA2/3 for this insn - comb += svdec.cr_in.eq(fromreg.data) # 3-bit (CR0/BC/BFA) - with m.If(svdec.isvec): - # check if this is CR0 or CR1: treated differently - # (does not "listen" to EXTRA2/3 spec for a start) - # also: the CRs start from completely different locations - with m.If(cr.sv_override == 1): # CR0 - offs = SVP64CROffs.CR0 - comb += to_reg.data.eq(srcstep+offs) - with m.Elif(cr.sv_override == 2): # CR1 - offs = SVP64CROffs.CR1 - comb += to_reg.data.eq(srcstep+1) - with m.Else(): - comb += to_reg.data.eq(srcstep+svdec.cr_out) # 7-bit output - with m.Else(): - comb += to_reg.data.eq(svdec.cr_out) # 7-bit output - comb += to_reg.ok.eq(fromreg.ok) - # sigh this is exactly the sort of thing for which the # decoder is designed to not need. MTSPR, MFSPR and others need # access to the XER bits. however setting e.oe is not appropriate -- 2.30.2