From: Luke Kenneth Casson Leighton Date: Wed, 29 May 2019 00:10:49 +0000 (+0100) Subject: group computation units together X-Git-Tag: div_pipeline~1938 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=598565150f049215f68e1eb3bf76d908cfda341d;p=soc.git group computation units together --- diff --git a/src/experiment/score6600.py b/src/experiment/score6600.py index 9d6525e8..f1061106 100644 --- a/src/experiment/score6600.py +++ b/src/experiment/score6600.py @@ -19,20 +19,30 @@ from random import randint, seed from copy import deepcopy -class CompUnits(Elaboratable): - - def __init__(self, rwid, n_units): +class CompUnitsBase(Elaboratable): + """ Computation Unit Base class. + + Amazingly, this class works recursively. It's supposed to just + look after some ALUs (that can handle the same operations), + grouping them together, however it turns out that the same code + can also group *groups* of Computation Units together as well. + """ + def __init__(self, rwid, units): """ Inputs: * :rwid: bit width of register file(s) - both FP and INT - * :n_units: number of ALUs - - Note: bgt unit is returned so that a shadow unit can be created - for it - + * :units: sequence of ALUs (or CompUnitsBase derivatives) """ - self.n_units = n_units + self.units = units self.rwid = rwid + if units and isinstance(units[0], CompUnitsBase): + self.n_units = 0 + for u in self.units: + self.n_units += u.n_units + else: + self.n_units = len(units) + + n_units = self.n_units # inputs self.issue_i = Signal(n_units, reset_less=True) @@ -47,38 +57,17 @@ class CompUnits(Elaboratable): self.req_rel_o = Signal(n_units, reset_less=True) # in/out register data (note: not register#, actual data) - self.dest_o = Signal(rwid, reset_less=True) - self.src1_data_i = Signal(rwid, reset_less=True) - self.src2_data_i = Signal(rwid, reset_less=True) - - # Branch ALU and CU - self.bgt = BranchALU(self.rwid) - self.br1 = ComputationUnitNoDelay(self.rwid, 3, self.bgt) + self.data_o = Signal(rwid, reset_less=True) + self.src1_i = Signal(rwid, reset_less=True) + self.src2_i = Signal(rwid, reset_less=True) def elaborate(self, platform): m = Module() comb = m.d.comb - sync = m.d.sync - # Int ALUs - add = ALU(self.rwid) - sub = ALU(self.rwid) - mul = ALU(self.rwid) - shf = ALU(self.rwid) - bgt = self.bgt - - m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 2, add) - m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 2, sub) - m.submodules.comp3 = comp3 = ComputationUnitNoDelay(self.rwid, 2, mul) - m.submodules.comp4 = comp4 = ComputationUnitNoDelay(self.rwid, 2, shf) - m.submodules.br1 = br1 = self.br1 - int_alus = [comp1, comp2, comp3, comp4, br1] - - comb += comp1.oper_i.eq(Const(0, 2)) # op=add - comb += comp2.oper_i.eq(Const(1, 2)) # op=sub - comb += comp3.oper_i.eq(Const(2, 2)) # op=mul - comb += comp4.oper_i.eq(Const(3, 2)) # op=shf - comb += br1.oper_i.eq(Const(4, 3)) # op=bgt + for i, alu in enumerate(self.units): + print ("elaborate comp%d" % i, self, alu) + setattr(m.submodules, "comp%d" % i, alu) go_rd_l = [] go_wr_l = [] @@ -88,7 +77,7 @@ class CompUnits(Elaboratable): rd_rel_l = [] shadow_l = [] godie_l = [] - for alu in int_alus: + for alu in self.units: req_rel_l.append(alu.req_rel_o) rd_rel_l.append(alu.rd_rel_o) shadow_l.append(alu.shadown_i) @@ -109,13 +98,80 @@ class CompUnits(Elaboratable): # connect data register input/output # merge (OR) all integer FU / ALU outputs to a single value - # bit of a hack: treereduce needs a list with an item named "dest_o" - dest_o = treereduce(int_alus) - comb += self.dest_o.eq(dest_o) + # bit of a hack: treereduce needs a list with an item named "data_o" + if self.units: + data_o = treereduce(self.units) + comb += self.data_o.eq(data_o) + + for i, alu in enumerate(self.units): + comb += alu.src1_i.eq(self.src1_i) + comb += alu.src2_i.eq(self.src2_i) + + return m + + +class CompUnitALUs(CompUnitsBase): + + def __init__(self, rwid): + """ Inputs: + + * :rwid: bit width of register file(s) - both FP and INT + """ + + # Int ALUs + add = ALU(rwid) + sub = ALU(rwid) + mul = ALU(rwid) + shf = ALU(rwid) + + units = [] + for alu in [add, sub, mul, shf]: + units.append(ComputationUnitNoDelay(rwid, 2, alu)) + + print ("alu units", units) + CompUnitsBase.__init__(self, rwid, units) + print ("alu base init done") + + def elaborate(self, platform): + print ("alu elaborate start") + m = CompUnitsBase.elaborate(self, platform) + print ("alu elaborate done") + comb = m.d.comb + + comb += self.units[0].oper_i.eq(Const(0, 2)) # op=add + comb += self.units[1].oper_i.eq(Const(1, 2)) # op=sub + comb += self.units[2].oper_i.eq(Const(2, 2)) # op=mul + comb += self.units[3].oper_i.eq(Const(3, 2)) # op=shf + + return m + + +class CompUnitBR(CompUnitsBase): + + def __init__(self, rwid): + """ Inputs: + + * :rwid: bit width of register file(s) - both FP and INT + + Note: bgt unit is returned so that a shadow unit can be created + for it + + """ + + # Branch ALU and CU + self.bgt = BranchALU(rwid) + self.br1 = ComputationUnitNoDelay(rwid, 3, self.bgt) + print ("br units", [self.br1]) + CompUnitsBase.__init__(self, rwid, [self.br1]) + print ("br base init done") + + def elaborate(self, platform): + print ("br elaborate start") + m = CompUnitsBase.elaborate(self, platform) + print ("br elaborate done") + comb = m.d.comb - for i, alu in enumerate(int_alus): - comb += alu.src1_i.eq(self.src1_data_i) - comb += alu.src2_i.eq(self.src2_data_i) + comb += self.br1.oper_i.eq(Const(4, 3)) # op=bgt return m @@ -248,9 +304,11 @@ class Scoreboard(Elaboratable): # Int ALUs and Comp Units n_int_alus = 5 - m.submodules.cu = cu = CompUnits(self.rwid, n_int_alus) - comb += cu.go_die_i.eq(0) - bgt = cu.bgt # get at the branch computation unit + cua = CompUnitALUs(self.rwid) + cub = CompUnitBR(self.rwid) + m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub]) + bgt = cub.bgt # get at the branch computation unit + br1 = cub.br1 # Int FUs m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus) @@ -393,7 +451,7 @@ class Scoreboard(Elaboratable): comb += bshadow.reset_i[0:n_int_fus].eq(shreset[0:n_int_fus]) bactive = Signal(reset_less=True) - comb += bactive.eq((bspec.active_i | cu.br1.issue_i) & ~cu.br1.go_wr_i) + comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i) # instruction being issued (fn_issue_o) has a shadow cast by the branch with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)): @@ -406,7 +464,7 @@ class Scoreboard(Elaboratable): # whether the branch succeeded or failed, plus, link up to the # "recorder" of whether the instruction was under shadow or not - with m.If(cu.br1.issue_i): + with m.If(br1.issue_i): sync += bspec.active_i.eq(1) with m.If(self.branch_succ_i): comb += bspec.good_i.eq(fn_issue_o & 0x1f) @@ -415,12 +473,12 @@ class Scoreboard(Elaboratable): # branch is active (TODO: a better signal: this is over-using the # go_write signal - actually the branch should not be "writing") - with m.If(cu.br1.go_wr_i): - sync += self.branch_direction_o.eq(cu.br1.data_o+Const(1, 2)) + with m.If(br1.go_wr_i): + sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2)) sync += bspec.active_i.eq(0) comb += bspec.br_i.eq(1) # branch occurs if data == 1, failed if data == 0 - comb += bspec.br_ok_i.eq(cu.br1.data_o == 1) + comb += bspec.br_ok_i.eq(br1.data_o == 1) for i in range(n_int_fus): # *expected* direction of the branch matched against *actual* comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i]) @@ -436,9 +494,9 @@ class Scoreboard(Elaboratable): comb += int_src2.ren.eq(intfus.src2_rsel_o) # connect ALUs to regfule - comb += int_dest.data_i.eq(cu.dest_o) - comb += cu.src1_data_i.eq(int_src1.data_o) - comb += cu.src2_data_i.eq(int_src2.data_o) + comb += int_dest.data_i.eq(cu.data_o) + comb += cu.src1_i.eq(int_src1.data_o) + comb += cu.src2_i.eq(int_src2.data_o) # connect ALU Computation Units comb += cu.go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus])