From d652efe4157cfc39bc08685a4c7b968a1d9c236e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 5 Jul 2020 16:40:30 +0100 Subject: [PATCH] big reorg on PowerDecoder2, actually Decode2Execute1Type plan is to move the decoding of instruction fields closer to the CompUnits --- src/soc/decoder/decode2execute1.py | 16 +++-- src/soc/decoder/isa/caller.py | 30 ++++---- src/soc/decoder/power_decoder2.py | 69 ++++++++++--------- src/soc/decoder/power_regspec_map.py | 10 +-- src/soc/fu/alu/alu_input_record.py | 4 +- src/soc/fu/alu/test/test_pipe_caller.py | 8 +-- src/soc/fu/branch/br_input_record.py | 2 +- src/soc/fu/branch/test/test_pipe_caller.py | 4 +- src/soc/fu/common_output_stage.py | 2 +- .../fu/compunits/test/test_alu_compunit.py | 4 +- .../fu/compunits/test/test_branch_compunit.py | 2 +- src/soc/fu/compunits/test/test_compunit.py | 2 +- src/soc/fu/compunits/test/test_cr_compunit.py | 2 +- .../compunits/test/test_logical_compunit.py | 4 +- .../compunits/test/test_shiftrot_compunit.py | 6 +- .../fu/compunits/test/test_spr_compunit.py | 4 +- .../fu/compunits/test/test_trap_compunit.py | 4 +- src/soc/fu/cr/cr_input_record.py | 2 +- src/soc/fu/cr/test/test_pipe_caller.py | 6 +- src/soc/fu/div/test/test_pipe_caller.py | 8 +-- src/soc/fu/ldst/ldst_input_record.py | 2 +- src/soc/fu/logical/logical_input_record.py | 4 +- src/soc/fu/logical/test/test_pipe_caller.py | 4 +- src/soc/fu/shift_rot/sr_input_record.py | 4 +- src/soc/fu/shift_rot/test/test_pipe_caller.py | 4 +- src/soc/fu/spr/spr_input_record.py | 2 +- src/soc/fu/spr/test/test_pipe_caller.py | 4 +- src/soc/fu/test/common.py | 26 +++---- src/soc/fu/trap/test/test_pipe_caller.py | 4 +- src/soc/fu/trap/trap_input_record.py | 2 +- src/soc/simple/core.py | 3 +- src/soc/simple/test/test_core.py | 4 +- 32 files changed, 129 insertions(+), 123 deletions(-) diff --git a/src/soc/decoder/decode2execute1.py b/src/soc/decoder/decode2execute1.py index d969fa11..b4f75200 100644 --- a/src/soc/decoder/decode2execute1.py +++ b/src/soc/decoder/decode2execute1.py @@ -36,8 +36,6 @@ class Decode2ToOperand(RecordObject): self.lk = Signal(reset_less=True) self.rc = Data(1, "rc") self.oe = Data(1, "oe") - self.xer_in = Signal(reset_less=True) # xer might be read - self.xer_out = Signal(reset_less=True) # xer might be written self.invert_a = Signal(reset_less=True) self.zero_a = Signal(reset_less=True) self.input_carry = Signal(CryIn, reset_less=True) @@ -54,13 +52,16 @@ class Decode2ToOperand(RecordObject): self.update = Signal(reset_less=True) # LD/ST is "update" variant self.traptype = Signal(5, reset_less=True) # see trap main_stage.py self.trapaddr = Signal(13, reset_less=True) + self.read_cr_whole = Signal(reset_less=True) + self.write_cr_whole = Signal(reset_less=True) + self.write_cr0 = Signal(reset_less=True) -class Decode2ToExecute1Type(Decode2ToOperand): +class Decode2ToExecute1Type(RecordObject): def __init__(self, name=None, asmcode=True): - Decode2ToOperand.__init__(self, name=name) + RecordObject.__init__(self, name=name) if asmcode: self.asmcode = Signal(8, reset_less=True) # only for simulator @@ -74,6 +75,9 @@ class Decode2ToExecute1Type(Decode2ToOperand): self.read_spr1 = Data(SPR, name="spr1") #self.read_spr2 = Data(SPR, name="spr2") # only one needed + self.xer_in = Signal(reset_less=True) # xer might be read + self.xer_out = Signal(reset_less=True) # xer might be written + self.read_fast1 = Data(3, name="fast1") self.read_fast2 = Data(3, name="fast2") self.write_fast1 = Data(3, name="fasto1") @@ -82,7 +86,7 @@ class Decode2ToExecute1Type(Decode2ToOperand): self.read_cr1 = Data(3, name="cr_in1") self.read_cr2 = Data(3, name="cr_in2") self.read_cr3 = Data(3, name="cr_in2") - self.read_cr_whole = Signal(reset_less=True) self.write_cr = Data(3, name="cr_out") - self.write_cr_whole = Signal(reset_less=True) + # decode operand data + self.do = Decode2ToOperand(name) diff --git a/src/soc/decoder/isa/caller.py b/src/soc/decoder/isa/caller.py index 16701072..35f02bd5 100644 --- a/src/soc/decoder/isa/caller.py +++ b/src/soc/decoder/isa/caller.py @@ -366,13 +366,13 @@ class ISACaller: self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value def handle_carry_(self, inputs, outputs, already_done): - inv_a = yield self.dec2.e.invert_a + inv_a = yield self.dec2.e.do.invert_a if inv_a: inputs[0] = ~inputs[0] - imm_ok = yield self.dec2.e.imm_data.ok + imm_ok = yield self.dec2.e.do.imm_data.ok if imm_ok: - imm = yield self.dec2.e.imm_data.data + imm = yield self.dec2.e.do.imm_data.data inputs.append(SelectableInt(imm, 64)) assert len(outputs) >= 1 print ("outputs", repr(outputs)) @@ -402,13 +402,13 @@ class ISACaller: self.spr['XER'][XER_bits['CA32']] = cy32 def handle_overflow(self, inputs, outputs, div_overflow): - inv_a = yield self.dec2.e.invert_a + inv_a = yield self.dec2.e.do.invert_a if inv_a: inputs[0] = ~inputs[0] - imm_ok = yield self.dec2.e.imm_data.ok + imm_ok = yield self.dec2.e.do.imm_data.ok if imm_ok: - imm = yield self.dec2.e.imm_data.data + imm = yield self.dec2.e.do.imm_data.data inputs.append(SelectableInt(imm, 64)) assert len(outputs) >= 1 print ("handle_overflow", inputs, outputs, div_overflow) @@ -492,11 +492,11 @@ class ISACaller: asmop = insns.get(asmcode, None) # sigh reconstruct the assembly instruction name - ov_en = yield self.dec2.e.oe.oe - ov_ok = yield self.dec2.e.oe.ok + ov_en = yield self.dec2.e.do.oe.oe + ov_ok = yield self.dec2.e.do.oe.ok if ov_en & ov_ok: asmop += "." - lk = yield self.dec2.e.lk + lk = yield self.dec2.e.do.lk if lk: asmop += "l" int_op = yield self.dec2.dec.op.internal_op @@ -507,7 +507,7 @@ class ISACaller: if AA: asmop += "a" if int_op == InternalOp.OP_MFCR.value: - dec_insn = yield self.dec2.e.insn + dec_insn = yield self.dec2.e.do.insn if dec_insn & (1<<20) != 0: # sigh asmop = 'mfocrf' else: @@ -515,7 +515,7 @@ class ISACaller: # XXX TODO: for whatever weird reason this doesn't work # https://bugs.libre-soc.org/show_bug.cgi?id=390 if int_op == InternalOp.OP_MTCRF.value: - dec_insn = yield self.dec2.e.insn + dec_insn = yield self.dec2.e.do.insn if dec_insn & (1<<20) != 0: # sigh asmop = 'mtocrf' else: @@ -578,7 +578,7 @@ class ISACaller: already_done |= 2 print ("carry already done?", bin(already_done)) - carry_en = yield self.dec2.e.output_carry + carry_en = yield self.dec2.e.do.output_carry if carry_en: yield from self.handle_carry_(inputs, results, already_done) @@ -589,13 +589,13 @@ class ISACaller: if name == 'overflow': overflow = output - ov_en = yield self.dec2.e.oe.oe - ov_ok = yield self.dec2.e.oe.ok + ov_en = yield self.dec2.e.do.oe.oe + ov_ok = yield self.dec2.e.do.oe.ok print ("internal overflow", overflow) if ov_en & ov_ok: yield from self.handle_overflow(inputs, results, overflow) - rc_en = yield self.dec2.e.rc.data + rc_en = yield self.dec2.e.do.rc.data if rc_en: self.handle_comparison(results) diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py index 125fef67..1c2f504c 100644 --- a/src/soc/decoder/power_decoder2.py +++ b/src/soc/decoder/power_decoder2.py @@ -551,7 +551,7 @@ class PowerDecode2(Elaboratable): def elaborate(self, platform): m = Module() comb = m.d.comb - e, op = self.e, self.dec.op + e, op, do = self.e, self.dec.op, self.e.do # set up submodule decoders m.submodules.dec = self.dec @@ -566,7 +566,7 @@ class PowerDecode2(Elaboratable): m.submodules.dec_cr_out = dec_cr_out = DecodeCROut(self.dec) # copy instruction through... - for i in [e.insn, dec_a.insn_in, dec_b.insn_in, + for i in [do.insn, dec_a.insn_in, dec_b.insn_in, dec_c.insn_in, dec_o.insn_in, dec_o2.insn_in, dec_rc.insn_in, dec_oe.insn_in, dec_cr_in.insn_in, dec_cr_out.insn_in]: comb += i.eq(self.dec.opcode_in) @@ -577,7 +577,7 @@ class PowerDecode2(Elaboratable): comb += dec_c.sel_in.eq(op.in3_sel) comb += dec_o.sel_in.eq(op.out_sel) comb += dec_o2.sel_in.eq(op.out_sel) - comb += dec_o2.lk.eq(e.lk) + comb += dec_o2.lk.eq(do.lk) comb += dec_rc.sel_in.eq(op.rc_sel) comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel comb += dec_cr_in.sel_in.eq(op.cr_in) @@ -588,8 +588,8 @@ class PowerDecode2(Elaboratable): comb += e.nia.eq(0) # XXX TODO (or remove? not sure yet) fu = op.function_unit itype = Mux(fu == Function.NONE, InternalOp.OP_ILLEGAL, op.internal_op) - comb += e.insn_type.eq(itype) - comb += e.fn_unit.eq(fu) + comb += do.insn_type.eq(itype) + comb += do.fn_unit.eq(fu) # registers a, b, c and out and out2 (LD/ST EA) comb += e.read_reg1.eq(dec_a.reg_out) @@ -597,12 +597,12 @@ class PowerDecode2(Elaboratable): comb += e.read_reg3.eq(dec_c.reg_out) comb += e.write_reg.eq(dec_o.reg_out) comb += e.write_ea.eq(dec_o2.reg_out) - comb += e.imm_data.eq(dec_b.imm_out) # immediate in RB (usually) - comb += e.zero_a.eq(dec_a.immz_out) # RA==0 detected + comb += do.imm_data.eq(dec_b.imm_out) # immediate in RB (usually) + comb += do.zero_a.eq(dec_a.immz_out) # RA==0 detected # rc and oe out - comb += e.rc.eq(dec_rc.rc_out) - comb += e.oe.eq(dec_oe.oe_out) + comb += do.rc.eq(dec_rc.rc_out) + comb += do.oe.eq(dec_oe.oe_out) # SPRs out comb += e.read_spr1.eq(dec_a.spr_out) @@ -618,29 +618,30 @@ class PowerDecode2(Elaboratable): comb += e.read_cr1.eq(dec_cr_in.cr_bitfield) comb += e.read_cr2.eq(dec_cr_in.cr_bitfield_b) comb += e.read_cr3.eq(dec_cr_in.cr_bitfield_o) - comb += e.read_cr_whole.eq(dec_cr_in.whole_reg) - comb += e.write_cr.eq(dec_cr_out.cr_bitfield) - comb += e.write_cr_whole.eq(dec_cr_out.whole_reg) + + comb += do.read_cr_whole.eq(dec_cr_in.whole_reg) + comb += do.write_cr_whole.eq(dec_cr_out.whole_reg) + comb += do.write_cr0.eq(dec_cr_out.cr_bitfield.ok) # decoded/selected instruction flags - comb += e.data_len.eq(op.ldst_len) - comb += e.invert_a.eq(op.inv_a) - comb += e.invert_out.eq(op.inv_out) - comb += e.input_carry.eq(op.cry_in) # carry comes in - comb += e.output_carry.eq(op.cry_out) # carry goes out - comb += e.is_32bit.eq(op.is_32b) - comb += e.is_signed.eq(op.sgn) + comb += do.data_len.eq(op.ldst_len) + comb += do.invert_a.eq(op.inv_a) + comb += do.invert_out.eq(op.inv_out) + comb += do.input_carry.eq(op.cry_in) # carry comes in + comb += do.output_carry.eq(op.cry_out) # carry goes out + comb += do.is_32bit.eq(op.is_32b) + comb += do.is_signed.eq(op.sgn) with m.If(op.lk): - comb += e.lk.eq(self.dec.LK) # XXX TODO: accessor + comb += do.lk.eq(self.dec.LK) # XXX TODO: accessor - comb += e.byte_reverse.eq(op.br) - comb += e.sign_extend.eq(op.sgn_ext) - comb += e.update.eq(op.upd) # LD/ST "update" mode. + comb += do.byte_reverse.eq(op.br) + comb += do.sign_extend.eq(op.sgn_ext) + comb += do.update.eq(op.upd) # LD/ST "update" mode. # These should be removed eventually - comb += e.input_cr.eq(op.cr_in) # condition reg comes in - comb += e.output_cr.eq(op.cr_out) # condition reg goes in + comb += do.input_cr.eq(op.cr_in) # condition reg comes in + comb += do.output_cr.eq(op.cr_out) # condition reg goes in # sigh this is exactly the sort of thing for which the # decoder is designed to not need. MTSPR, MFSPR and others need @@ -652,7 +653,7 @@ class PowerDecode2(Elaboratable): # set the trapaddr to 0x700 for a td/tw/tdi/twi operation with m.If(op.internal_op == InternalOp.OP_TRAP): - comb += e.trapaddr.eq(0x70) # addr=0x700 (strip first nibble) + comb += do.trapaddr.eq(0x70) # addr=0x700 (strip first nibble) # illegal instruction must redirect to trap. this is done by # *overwriting* the decoded instruction and starting again. @@ -661,16 +662,16 @@ class PowerDecode2(Elaboratable): with m.If(op.internal_op == InternalOp.OP_ILLEGAL): comb += e.eq(0) # reset eeeeeverything # start again - comb += e.insn.eq(self.dec.opcode_in) - comb += e.insn_type.eq(InternalOp.OP_TRAP) - comb += e.fn_unit.eq(Function.TRAP) - comb += e.trapaddr.eq(0x70) # addr=0x700 (strip first nibble) - comb += e.traptype.eq(TT_ILLEG) # request illegal instruction + comb += do.insn.eq(self.dec.opcode_in) + comb += do.insn_type.eq(InternalOp.OP_TRAP) + comb += do.fn_unit.eq(Function.TRAP) + comb += do.trapaddr.eq(0x70) # addr=0x700 (strip first nibble) + comb += do.traptype.eq(TT_ILLEG) # request illegal instruction # trap: (note e.insn_type so this includes OP_ILLEGAL) set up fast regs # Note: OP_SC could actually be modified to just be a trap - with m.If((e.insn_type == InternalOp.OP_TRAP) | - (e.insn_type == InternalOp.OP_SC)): + with m.If((do.insn_type == InternalOp.OP_TRAP) | + (do.insn_type == InternalOp.OP_SC)): # TRAP write fast1 = SRR0 comb += e.write_fast1.data.eq(FastRegs.SRR0) # constant: SRR0 comb += e.write_fast1.ok.eq(1) @@ -679,7 +680,7 @@ class PowerDecode2(Elaboratable): comb += e.write_fast2.ok.eq(1) # RFID: needs to read SRR0/1 - with m.If(e.insn_type == InternalOp.OP_RFID): + with m.If(do.insn_type == InternalOp.OP_RFID): # TRAP read fast1 = SRR0 comb += e.read_fast1.data.eq(FastRegs.SRR0) # constant: SRR0 comb += e.read_fast1.ok.eq(1) diff --git a/src/soc/decoder/power_regspec_map.py b/src/soc/decoder/power_regspec_map.py index 2d4f3fd9..7413cdd6 100644 --- a/src/soc/decoder/power_regspec_map.py +++ b/src/soc/decoder/power_regspec_map.py @@ -60,7 +60,7 @@ def regspec_decode_read(e, regfile, name): # CRRegs register numbering is *unary* encoded # *sigh*. numbering inverted on part-CRs. because POWER. if name == 'full_cr': # full CR - return e.read_cr_whole, 0b11111111 + return e.do.read_cr_whole, 0b11111111 if name == 'cr_a': # CR A return e.read_cr1.ok, 1<<(7-e.read_cr1.data) if name == 'cr_b': # CR B @@ -76,11 +76,11 @@ def regspec_decode_read(e, regfile, name): CA = 1<