From 47789aac637860792919c9d227c2ada0dbed6bd6 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 10 Jun 2019 06:03:25 +0100 Subject: [PATCH] added in the LD/ST Comp Unit (not connected up yet) and the code didnt fall over amazing that the unit test still runs, first time. particularly that the number of INT ALUs was reduced from 4 to 2 --- src/experiment/compldst.py | 7 +-- src/experiment/score6600.py | 105 +++++++++++++++++++++++++++++++----- 2 files changed, 95 insertions(+), 17 deletions(-) diff --git a/src/experiment/compldst.py b/src/experiment/compldst.py index f67d75b0..3e2d379e 100644 --- a/src/experiment/compldst.py +++ b/src/experiment/compldst.py @@ -103,7 +103,7 @@ class LDSTCompUnit(Elaboratable): comb += reset_s.eq(self.go_st_i | self.go_die_i) comb += reset_r.eq(self.go_rd_i | self.go_die_i) # this one is slightly different, issue_alu_i selects go_wr_i) - a_sel = Mux(self.isalu_i, self.go_wr_i, self.go_ad_i ) + a_sel = Mux(self.isalu_i, self.go_wr_i, self.go_ad_i) comb += reset_a.eq(a_sel| self.go_die_i) # opcode decode @@ -159,7 +159,7 @@ class LDSTCompUnit(Elaboratable): comb += self.sto_rel_o.eq(sto_l.q & busy_o & self.shadown_i) # address release only happens on LD/ST, and is shadowed. - comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o & self.shadownn_i) + comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o & self.shadown_i) # request release enabled based on if op is a LD/ST or a plain ALU # if op is a LD/ST, req_rel activates from the *address* latch @@ -189,7 +189,8 @@ class LDSTCompUnit(Elaboratable): latchregister(m, src2_or_imm, self.alu.b, src_sel) # create a latch/register for the operand - latchregister(m, Cat(op_alu, 0), self.alu.op, self.issue_i) + alu_op = Cat(op_alu, 0, op_is_imm) # using alu_hier, here. + latchregister(m, alu_op, self.alu.op, self.issue_i) # and one for the output from the ALU data_r = Signal(self.rwid, reset_less=True) # Dest register diff --git a/src/experiment/score6600.py b/src/experiment/score6600.py index 7c66f208..ea6765a5 100644 --- a/src/experiment/score6600.py +++ b/src/experiment/score6600.py @@ -10,8 +10,10 @@ from scoreboard.group_picker import GroupPicker from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord from scoreboard.instruction_q import Instruction, InstructionQ +from scoreboard.memfu import MemFunctionUnits from compalu import ComputationUnitNoDelay +from compldst import LDSTCompUnit from alu_hier import ALU, BranchALU from nmutil.latch import SRLatch @@ -85,13 +87,14 @@ class CompUnitsBase(Elaboratable): Computation Unit" as defined by Mitch Alsup (see section 11.4.9.3) """ - def __init__(self, rwid, units): + def __init__(self, rwid, units, ldstmode=False): """ Inputs: * :rwid: bit width of register file(s) - both FP and INT * :units: sequence of ALUs (or CompUnitsBase derivatives) """ self.units = units + self.ldstmode = ldstmode self.rwid = rwid self.rwid = rwid if units and isinstance(units[0], CompUnitsBase): @@ -109,11 +112,19 @@ class CompUnitsBase(Elaboratable): self.go_wr_i = Signal(n_units, reset_less=True) self.shadown_i = Signal(n_units, reset_less=True) self.go_die_i = Signal(n_units, reset_less=True) + if ldstmode: + self.go_ad_i = Signal(n_units, reset_less=True) # outputs self.busy_o = Signal(n_units, reset_less=True) self.rd_rel_o = Signal(n_units, reset_less=True) self.req_rel_o = Signal(n_units, reset_less=True) + if ldstmode: + self.adr_rel_o = Signal(n_units, reset_less=True) + self.sto_rel_o = Signal(n_units, reset_less=True) + self.req_rel_o = Signal(n_units, reset_less=True) + self.load_mem_o = Signal(n_units, reset_less=True) + self.stwd_mem_o = Signal(n_units, reset_less=True) # in/out register data (note: not register#, actual data) self.data_o = Signal(rwid, reset_less=True) @@ -166,12 +177,70 @@ class CompUnitsBase(Elaboratable): comb += alu.src1_i.eq(self.src1_i) comb += alu.src2_i.eq(self.src2_i) + if not self.ldstmode: + return m + + ldmem_l = [] + stmem_l = [] + go_ad_l = [] + adr_rel_l = [] + sto_rel_l = [] + for alu in self.units: + adr_rel_l.append(alu.adr_rel_o) + sto_rel_l.append(alu.sto_rel_o) + ldmem_l.append(alu.load_mem_o) + stmem_l.append(alu.stwd_mem_o) + go_ad_l.append(alu.go_ad_i) + comb += self.adr_rel_o.eq(Cat(*adr_rel_l)) + comb += self.sto_rel_o.eq(Cat(*sto_rel_l)) + comb += self.load_mem_o.eq(Cat(*ldmem_l)) + comb += self.stwd_mem_o.eq(Cat(*stmem_l)) + comb += Cat(*go_ad_l).eq(self.go_ad_i) + + return m + + +class CompUnitLDSTs(CompUnitsBase): + + def __init__(self, rwid, opwid, mem): + """ Inputs: + + * :rwid: bit width of register file(s) - both FP and INT + * :opwid: operand bit width + """ + self.opwid = opwid + + # inputs + self.oper_i = Signal(opwid, reset_less=True) + self.imm_i = Signal(rwid, reset_less=True) + + # Int ALUs + add1 = ALU(rwid) + add2 = ALU(rwid) + + units = [] + for alu in [add1, add2]: + aluopwid = 4 # see compldst.py for "internal" opcode + units.append(LDSTCompUnit(rwid, aluopwid, alu, mem)) + + CompUnitsBase.__init__(self, rwid, units, ldstmode=True) + + def elaborate(self, platform): + m = CompUnitsBase.elaborate(self, platform) + comb = m.d.comb + + # hand the same operation to all units, 4 lower bits though + for alu in self.units: + comb += alu.oper_i[0:4].eq(self.oper_i) + comb += alu.imm_i.eq(self.imm_i) + comb += alu.isalu_i.eq(0) + return m class CompUnitALUs(CompUnitsBase): - def __init__(self, rwid, opwid): + def __init__(self, rwid, opwid, n_alus): """ Inputs: * :rwid: bit width of register file(s) - both FP and INT @@ -184,13 +253,12 @@ class CompUnitALUs(CompUnitsBase): self.imm_i = Signal(rwid, reset_less=True) # Int ALUs - add = ALU(rwid) - sub = ALU(rwid) - mul = ALU(rwid) - shf = ALU(rwid) + alus = [] + for i in range(n_alus): + alus.append(ALU(rwid)) units = [] - for alu in [add, sub, mul, shf]: + for alu in alus: aluopwid = 3 # extra bit for immediate mode units.append(ComputationUnitNoDelay(rwid, aluopwid, alu)) @@ -200,7 +268,7 @@ class CompUnitALUs(CompUnitsBase): m = CompUnitsBase.elaborate(self, platform) comb = m.d.comb - # hand the same operation to all units, only lower 2 bits though + # hand the same operation to all units, only lower 3 bits though for alu in self.units: comb += alu.oper_i[0:3].eq(self.oper_i) comb += alu.imm_i.eq(self.imm_i) @@ -376,23 +444,32 @@ class Scoreboard(Elaboratable): fp_src1 = self.fpregs.read_port("src1") fp_src2 = self.fpregs.read_port("src2") - # Int ALUs and Comp Units + # Int ALUs and BR ALUs n_int_alus = 5 - cua = CompUnitALUs(self.rwid, 3) - cub = CompUnitBR(self.rwid, 3) - m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub]) + cua = CompUnitALUs(self.rwid, 3, n_alus=4) + cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs + + # LDST Comp Units + n_ldsts = 2 + cul = CompUnitLDSTs(self.rwid, 3, None) + + # Comp Units + m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub, cul]) bgt = cub.bgt # get at the branch computation unit br1 = cub.br1 # Int FUs m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus) + # Memory FUs + m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 11) + # Count of number of FUs n_intfus = n_int_alus n_fp_fus = 0 # for now - # Integer Priority Picker 1: Adder + Subtractor - intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf + # Integer Priority Picker 1: Adder + Subtractor (and LD/ST) + intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg m.submodules.intpick1 = intpick1 # INT/FP Issue Unit -- 2.30.2