src/soc/experiment/compalu_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Signal, Mux, Elaboratable, Repl, Array, Record
   4 from nmigen.hdl.rec import (DIR_FANIN, DIR_FANOUT)
   5
   6 from nmutil.latch import SRLatch, latchregister
   7 from nmutil.iocontrol import RecordObject
   8
   9 from soc.decoder.power_decoder2 import Data
  10 from soc.decoder.power_enums import InternalOp
  11
  12
  13 """ Computation Unit (aka "ALU Manager").
  14
  15     This module runs a "revolving door" set of three latches, based on
  16     * Issue
  17     * Go_Read
  18     * Go_Write
  19     where one of them cannot be set on any given cycle.
  20
  21     * When issue is first raised, a busy signal is sent out.
  22       The src1 and src2 registers and the operand can be latched in
  23       at this point
  24
  25     * Read request is set, which is acknowledged through the Scoreboard
  26       to the priority picker, which generates (one and only one) Go_Read
  27       at a time.  One of those will (eventually) be this Computation Unit.
  28
  29     * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
  30       src1/src2/operand in place), and the ALU is told to proceed.
  31
  32     * when the ALU pipeline is ready, this activates "write request release",
  33       and the ALU's output is captured into a temporary register.
  34
  35     * Write request release is *HELD UP* (prevented from proceeding) if shadowN
  36       is asserted LOW.  This is how all speculation, precise exceptions,
  37       predication - everything - is achieved.
  38
  39     * Write request release will go through a similar process as Read request,
  40       resulting (eventually) in Go_Write being asserted.
  41
  42     * When Go_Write is asserted, two things happen: (1) the data in the temp
  43       register is placed combinatorially onto the output, and (2) the
  44       req_l latch is cleared, busy is dropped, and the Comp Unit is back
  45       through its revolving door to do another task.
  46
  47     Note that the read and write latches are held synchronously for one cycle,
  48     i.e. that when Go_Read comes in, one cycle is given in which the incoming
  49     register (broadcast over a Regfile Read Port) may have time to be latched.
  50
  51     It is REQUIRED that Go_Read be held valid only for one cycle, and it is
  52     REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
  53     Go_Read is asserted HI.
  54
  55     Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
  56     likewise be dropped exactly one cycle after assertion of Go_Write.
  57
  58     When Go_Die is asserted then strictly speaking the entire FSM should be
  59     fully reset and that includes sending a cancellation request to the ALU.
  60     (XXX TODO: alu "go die" is not presently wired up)
  61 """
  62
  63 def go_record(n, name):
  64     r = Record([('go', n, DIR_FANIN),
  65                 ('rel', n, DIR_FANOUT)], name=name)
  66     r.go.reset_less = True
  67     r.rel.reset_less = True
  68     return r
  69
  70
  71 class CompUnitRecord(RecordObject):
  72     """CompUnitRecord
  73
  74     base class for Computation Units, to provide a uniform API
  75     and allow "record.connect" etc. to be used, particularly when
  76     it comes to connecting multiple Computation Units up as a block
  77     (very laborious)
  78
  79     LDSTCompUnitRecord should derive from this class and add the
  80     additional signals it requires
  81
  82     :subkls:      the class (not an instance) needed to construct the opcode
  83     """
  84     def __init__(self, subkls, rwid, n_src, n_dst, name=None):
  85         RecordObject.__init__(self, name)
  86         self._n_src, self._n_dst = n_src, n_dst
  87         self._rwid = rwid
  88         self._subkls = subkls
  89
  90         src = []
  91         for i in range(n_src):
  92             j = i + 1 # name numbering to match src1/src2
  93             name = "src%d_i" % j
  94             sreg = Signal(rwid, name=name, reset_less=True)
  95             setattr(self, name, sreg)
  96             src.append(sreg)
  97         self._src_i = src
  98
  99         dst = []
 100         for i in range(n_dst):
 101             j = i + 1 # name numbering to match dest1/2...
 102             name = "dest%d_i" % j
 103             dreg = Signal(rwid, name=name, reset_less=True)
 104             setattr(self, name, dreg)
 105             dst.append(dreg)
 106         self._dest = dst
 107
 108         self.rd = go_record(n_src, name="rd") # read in, req out
 109         self.wr = go_record(n_dst, name="wr") # write in, req out
 110         self.issue_i = Signal(reset_less=True) # fn issue in
 111         self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
 112         self.go_die_i = Signal() # go die (reset)
 113
 114         # operation / data input
 115         self.oper_i = subkls() # operand
 116
 117         # output (busy/done)
 118         self.busy_o = Signal(reset_less=True) # fn busy out
 119         self.done_o = Signal(reset_less=True)
 120
 121
 122 class MultiCompUnit(Elaboratable):
 123     def __init__(self, rwid, alu, opsubsetkls, n_src=2, n_dst=1):
 124         """MultiCompUnit
 125
 126         * :rwid:        width of register latches (TODO: allocate per regspec)
 127         * :alu:         the ALU (pipeline, FSM) - must conform to nmutil Pipe API
 128         * :opsubsetkls: the subset of Decode2ExecuteType
 129         * :n_src:       number of src operands
 130         * :n_dst:       number of destination operands
 131         """
 132         self.n_src, self.n_dst = n_src, n_dst
 133         self.rwid = rwid
 134         self.opsubsetkls = opsubsetkls
 135         self.alu = alu # actual ALU - set as a "submodule" of the CU
 136         self.cu = cu = CompUnitRecord(opsubsetkls, rwid, n_src, n_dst)
 137
 138         for i in range(n_src):
 139             j = i + 1 # name numbering to match src1/src2
 140             name = "src%d_i" % j
 141             setattr(self, name, getattr(cu, name))
 142
 143         for i in range(n_dst):
 144             j = i + 1 # name numbering to match dest1/2...
 145             name = "dest%d_i" % j
 146             setattr(self, name, getattr(cu, name))
 147
 148         # convenience names
 149         self.rd = cu.rd
 150         self.wr = cu.wr
 151         self.go_rd_i = self.rd.go # temporary naming
 152         self.go_wr_i = self.wr.go # temporary naming
 153         self.rd_rel_o = self.rd.rel # temporary naming
 154         self.req_rel_o = self.wr.rel # temporary naming
 155         self.issue_i = cu.issue_i
 156         self.shadown_i = cu.shadown_i
 157         self.go_die_i = cu.go_die_i
 158
 159         # operation / data input
 160         self.oper_i = cu.oper_i
 161         self.src_i = cu._src_i
 162
 163         self.busy_o = cu.busy_o
 164         self.dest = cu._dest
 165         self.data_o = self.dest[0] # Dest out
 166         self.done_o = cu.done_o
 167
 168     def elaborate(self, platform):
 169         m = Module()
 170         m.submodules.alu = self.alu
 171         m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
 172         m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
 173         m.submodules.req_l = req_l = SRLatch(False, self.n_dst, name="req")
 174         m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
 175         m.submodules.rok_l = rok_l = SRLatch(sync=False, name="rdok")
 176
 177         # ALU only proceeds when all src are ready.  rd_rel_o is delayed
 178         # so combine it with go_rd_i.  if all bits are set we're good
 179         all_rd = Signal(reset_less=True)
 180         m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
 181                     (((~self.rd.rel) | self.rd.go).all()))
 182
 183         # write_requests all done
 184         # req_done works because any one of the last of the writes
 185         # is enough, when combined with when read-phase is done (rst_l.q)
 186         wr_any = Signal(reset_less=True)
 187         req_done = Signal(reset_less=True)
 188         m.d.comb += self.done_o.eq(self.busy_o & ~(self.wr.rel.bool()))
 189         m.d.comb += wr_any.eq(self.wr.go.bool())
 190         m.d.comb += req_done.eq(rst_l.q & wr_any)
 191
 192         # shadow/go_die
 193         reset = Signal(reset_less=True)
 194         rst_r = Signal(reset_less=True) # reset latch off
 195         reset_w = Signal(self.n_dst, reset_less=True)
 196         reset_r = Signal(self.n_src, reset_less=True)
 197         m.d.comb += reset.eq(req_done | self.go_die_i)
 198         m.d.comb += rst_r.eq(self.issue_i | self.go_die_i)
 199         m.d.comb += reset_w.eq(self.wr.go | Repl(self.go_die_i, self.n_dst))
 200         m.d.comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
 201
 202         # read-done,wr-proceed latch
 203         m.d.comb += rok_l.s.eq(self.issue_i)  # set up when issue starts
 204         m.d.comb += rok_l.r.eq(self.alu.p_ready_o) # off when ALU acknowledges
 205
 206         # wr-done, back-to-start latch
 207         m.d.comb += rst_l.s.eq(all_rd)     # set when read-phase is fully done
 208         m.d.comb += rst_l.r.eq(rst_r)        # *off* on issue
 209
 210         # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
 211         m.d.sync += opc_l.s.eq(self.issue_i)       # set on issue
 212         m.d.sync += opc_l.r.eq(self.alu.n_valid_o & req_done) # reset on ALU
 213
 214         # src operand latch (not using go_wr_i)
 215         m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
 216         m.d.sync += src_l.r.eq(reset_r)
 217
 218         # dest operand latch (not using issue_i)
 219         m.d.sync += req_l.s.eq(Repl(all_rd, self.n_dst))
 220         m.d.sync += req_l.r.eq(reset_w)
 221
 222         # create a latch/register for the operand
 223         oper_r = self.opsubsetkls()
 224         latchregister(m, self.oper_i, oper_r, self.issue_i, "oper_r")
 225
 226         # and for each output from the ALU
 227         drl = []
 228         for i in range(self.n_dst):
 229             name = "data_r%d" % i
 230             data_r = Signal(self.rwid, name=name, reset_less=True)
 231             latchregister(m, self.alu.out[i], data_r, req_l.q[i], name)
 232             drl.append(data_r)
 233
 234         # pass the operation to the ALU
 235         m.d.comb += self.alu.op.eq(oper_r)
 236
 237         # create list of src/alu-src/src-latch.  override 2nd one below
 238         sl = []
 239         for i in range(self.n_src):
 240             sl.append([self.src_i[i], self.alu.i[i], src_l.q[i]])
 241
 242         # select immediate if opcode says so.  however also change the latch
 243         # to trigger *from* the opcode latch instead.
 244         op_is_imm = oper_r.imm_data.imm_ok
 245         src2_or_imm = Signal(self.rwid, reset_less=True)
 246         src_sel = Signal(reset_less=True)
 247         m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.q, src_l.q[1]))
 248         m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm,
 249                                                   self.src2_i))
 250         # overwrite 2nd src-latch with immediate-muxed stuff
 251         sl[1][0] = src2_or_imm
 252         sl[1][2] = src_sel
 253
 254         # create a latch/register for src1/src2
 255         for i in range(self.n_src):
 256             src, alusrc, latch = sl[i]
 257             latchregister(m, src, alusrc, latch, name="src_r%d" % i)
 258
 259         # -----
 260         # outputs
 261         # -----
 262
 263         # all request signals gated by busy_o.  prevents picker problems
 264         m.d.comb += self.busy_o.eq(opc_l.q) # busy out
 265         bro = Repl(self.busy_o, self.n_src)
 266         m.d.comb += self.rd.rel.eq(src_l.q & bro) # src1/src2 req rel
 267
 268         # on a go_read, tell the ALU we're accepting data.
 269         # NOTE: this spells TROUBLE if the ALU isn't ready!
 270         # go_read is only valid for one clock!
 271         with m.If(all_rd):                           # src operands ready, GO!
 272             with m.If(~self.alu.p_ready_o):          # no ACK yet
 273                 m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
 274
 275         brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
 276         # only proceed if ALU says its output is valid
 277         with m.If(self.alu.n_valid_o):
 278             # when ALU ready, write req release out. waits for shadow
 279             m.d.comb += self.wr.rel.eq(req_l.q & brd)
 280             # when output latch is ready, and ALU says ready, accept ALU output
 281             with m.If(reset):
 282                 m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
 283
 284         # output the data from the latch on go_write
 285         for i in range(self.n_dst):
 286             with m.If(self.wr.go[i]):
 287                 m.d.comb += self.dest[i].eq(drl[i])
 288
 289         return m
 290
 291     def __iter__(self):
 292         yield self.rd.go
 293         yield self.wr.go
 294         yield self.issue_i
 295         yield self.shadown_i
 296         yield self.go_die_i
 297         yield from self.oper_i.ports()
 298         yield self.src1_i
 299         yield self.src2_i
 300         yield self.busy_o
 301         yield self.rd.rel
 302         yield self.wr.rel
 303         yield self.data_o
 304
 305     def ports(self):
 306         return list(self)
 307
 308
 309 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0):
 310     yield dut.issue_i.eq(0)
 311     yield
 312     yield dut.src_i[0].eq(a)
 313     yield dut.src_i[1].eq(b)
 314     yield dut.oper_i.insn_type.eq(op)
 315     yield dut.oper_i.invert_a.eq(inv_a)
 316     yield dut.oper_i.imm_data.imm.eq(imm)
 317     yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
 318     yield dut.issue_i.eq(1)
 319     yield
 320     yield dut.issue_i.eq(0)
 321     yield
 322     yield dut.rd.go.eq(0b11)
 323     while True:
 324         yield
 325         rd_rel_o = yield dut.rd.rel
 326         print ("rd_rel", rd_rel_o)
 327         if rd_rel_o:
 328             break
 329     yield
 330     yield dut.rd.go.eq(0)
 331     req_rel_o = yield dut.wr.rel
 332     result = yield dut.data_o
 333     print ("req_rel", req_rel_o, result)
 334     while True:
 335         req_rel_o = yield dut.wr.rel
 336         result = yield dut.data_o
 337         print ("req_rel", req_rel_o, result)
 338         if req_rel_o:
 339             break
 340         yield
 341     yield dut.wr.go[0].eq(1)
 342     yield
 343     result = yield dut.data_o
 344     print ("result", result)
 345     yield dut.wr.go[0].eq(0)
 346     yield
 347     return result
 348
 349
 350 def scoreboard_sim(dut):
 351     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=0,
 352                                     imm=8, imm_ok=1)
 353     assert result == 13
 354
 355     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD)
 356     assert result == 7
 357
 358     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=1)
 359     assert result == 65532
 360
 361
 362 def test_scoreboard():
 363     from alu_hier import ALU
 364     from soc.fu.alu.alu_input_record import CompALUOpSubset
 365
 366     m = Module()
 367     alu = ALU(16)
 368     dut = MultiCompUnit(16, alu, CompALUOpSubset)
 369     m.submodules.cu = dut
 370
 371     vl = rtlil.convert(dut, ports=dut.ports())
 372     with open("test_compalu.il", "w") as f:
 373         f.write(vl)
 374
 375     run_simulation(m, scoreboard_sim(dut), vcd_name='test_compalu.vcd')
 376
 377 if __name__ == '__main__':
 378     test_scoreboard()