src/soc/experiment/compalu_multi.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Signal, Mux, Elaboratable, Repl, Array, Record
   4 from nmigen.hdl.rec import (DIR_FANIN, DIR_FANOUT)
   5
   6 from nmutil.latch import SRLatch, latchregister
   7 from nmutil.iocontrol import RecordObject
   8
   9 from soc.decoder.power_decoder2 import Data
  10 from soc.decoder.power_enums import InternalOp
  11
  12
  13 """ Computation Unit (aka "ALU Manager").
  14
  15     This module runs a "revolving door" set of three latches, based on
  16     * Issue
  17     * Go_Read
  18     * Go_Write
  19     where one of them cannot be set on any given cycle.
  20
  21     * When issue is first raised, a busy signal is sent out.
  22       The src1 and src2 registers and the operand can be latched in
  23       at this point
  24
  25     * Read request is set, which is acknowledged through the Scoreboard
  26       to the priority picker, which generates (one and only one) Go_Read
  27       at a time.  One of those will (eventually) be this Computation Unit.
  28
  29     * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
  30       src1/src2/operand in place), and the ALU is told to proceed.
  31
  32     * when the ALU pipeline is ready, this activates "write request release",
  33       and the ALU's output is captured into a temporary register.
  34
  35     * Write request release is *HELD UP* (prevented from proceeding) if shadowN
  36       is asserted LOW.  This is how all speculation, precise exceptions,
  37       predication - everything - is achieved.
  38
  39     * Write request release will go through a similar process as Read request,
  40       resulting (eventually) in Go_Write being asserted.
  41
  42     * When Go_Write is asserted, two things happen: (1) the data in the temp
  43       register is placed combinatorially onto the output, and (2) the
  44       req_l latch is cleared, busy is dropped, and the Comp Unit is back
  45       through its revolving door to do another task.
  46
  47     Note that the read and write latches are held synchronously for one cycle,
  48     i.e. that when Go_Read comes in, one cycle is given in which the incoming
  49     register (broadcast over a Regfile Read Port) may have time to be latched.
  50
  51     It is REQUIRED that Go_Read be held valid only for one cycle, and it is
  52     REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
  53     Go_Read is asserted HI.
  54
  55     Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
  56     likewise be dropped exactly one cycle after assertion of Go_Write.
  57
  58     When Go_Die is asserted then strictly speaking the entire FSM should be
  59     fully reset and that includes sending a cancellation request to the ALU.
  60     (XXX TODO: alu "go die" is not presently wired up)
  61 """
  62
  63 def go_record(n, name):
  64     r = Record([('go', n, DIR_FANIN),
  65                 ('rel', n, DIR_FANOUT)], name=name)
  66     r.go.reset_less = True
  67     r.rel.reset_less = True
  68     return r
  69
  70
  71 def get_regspec_bitwidth(regspec, srcdest, idx):
  72     bitspec = regspec[srcdest][idx]
  73     wid = 0
  74     print (bitspec)
  75     for ranges in bitspec[2].split(","):
  76         ranges = ranges.split(":")
  77         print (ranges)
  78         if len(ranges) == 1: # only one bit
  79             wid += 1
  80         else:
  81             start, end = map(int, ranges)
  82             wid += (end-start)+1
  83     return wid
  84
  85
  86 class CompUnitRecord(RecordObject):
  87     """CompUnitRecord
  88
  89     base class for Computation Units, to provide a uniform API
  90     and allow "record.connect" etc. to be used, particularly when
  91     it comes to connecting multiple Computation Units up as a block
  92     (very laborious)
  93
  94     LDSTCompUnitRecord should derive from this class and add the
  95     additional signals it requires
  96
  97     :subkls:      the class (not an instance) needed to construct the opcode
  98     :rwid:        either an integer (specifies width of all regs) or a "regspec"
  99     """
 100     def __init__(self, subkls, rwid, n_src=None, n_dst=None, name=None):
 101         RecordObject.__init__(self, name)
 102         self._rwid = rwid
 103         if isinstance(rwid, int):
 104             # rwid: integer (covers all registers)
 105             self._n_src, self._n_dst = n_src, n_dst
 106         else:
 107             # rwid: a regspec.
 108             self._n_src, self._n_dst = len(rwid[0]), len(rwid[1])
 109         self._subkls = subkls
 110
 111         src = []
 112         for i in range(n_src):
 113             j = i + 1 # name numbering to match src1/src2
 114             name = "src%d_i" % j
 115             rw = self._get_srcwid(i)
 116             sreg = Signal(rw, name=name, reset_less=True)
 117             setattr(self, name, sreg)
 118             src.append(sreg)
 119         self._src_i = src
 120
 121         dst = []
 122         for i in range(n_dst):
 123             j = i + 1 # name numbering to match dest1/2...
 124             name = "dest%d_i" % j
 125             rw = self._get_dstwid(i)
 126             dreg = Signal(rw, name=name, reset_less=True)
 127             setattr(self, name, dreg)
 128             dst.append(dreg)
 129         self._dest = dst
 130
 131         self.rd = go_record(n_src, name="rd") # read in, req out
 132         self.wr = go_record(n_dst, name="wr") # write in, req out
 133         self.issue_i = Signal(reset_less=True) # fn issue in
 134         self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
 135         self.go_die_i = Signal() # go die (reset)
 136
 137         # operation / data input
 138         self.oper_i = subkls() # operand
 139
 140         # output (busy/done)
 141         self.busy_o = Signal(reset_less=True) # fn busy out
 142         self.done_o = Signal(reset_less=True)
 143
 144     def _get_dstwid(self, i):
 145         if isinstance(self._rwid, int):
 146             return self._rwid
 147         return get_regspec_bitwidth(self._rwid, 1, i)
 148
 149     def _get_srcwid(self, i):
 150         if isinstance(self._rwid, int):
 151             return self._rwid
 152         return get_regspec_bitwidth(self._rwid, 0, i)
 153
 154 class MultiCompUnit(Elaboratable):
 155     def __init__(self, rwid, alu, opsubsetkls, n_src=2, n_dst=1):
 156         """MultiCompUnit
 157
 158         * :rwid:        width of register latches (TODO: allocate per regspec)
 159         * :alu:         the ALU (pipeline, FSM) - must conform to nmutil Pipe API
 160         * :opsubsetkls: the subset of Decode2ExecuteType
 161         * :n_src:       number of src operands
 162         * :n_dst:       number of destination operands
 163         """
 164         self.n_src, self.n_dst = n_src, n_dst
 165         self.rwid = rwid
 166         self.opsubsetkls = opsubsetkls
 167         self.alu = alu # actual ALU - set as a "submodule" of the CU
 168         self.cu = cu = CompUnitRecord(opsubsetkls, rwid, n_src, n_dst)
 169
 170         for i in range(n_src):
 171             j = i + 1 # name numbering to match src1/src2
 172             name = "src%d_i" % j
 173             setattr(self, name, getattr(cu, name))
 174
 175         for i in range(n_dst):
 176             j = i + 1 # name numbering to match dest1/2...
 177             name = "dest%d_i" % j
 178             setattr(self, name, getattr(cu, name))
 179
 180         # convenience names
 181         self.rd = cu.rd
 182         self.wr = cu.wr
 183         self.go_rd_i = self.rd.go # temporary naming
 184         self.go_wr_i = self.wr.go # temporary naming
 185         self.rd_rel_o = self.rd.rel # temporary naming
 186         self.req_rel_o = self.wr.rel # temporary naming
 187         self.issue_i = cu.issue_i
 188         self.shadown_i = cu.shadown_i
 189         self.go_die_i = cu.go_die_i
 190
 191         # operation / data input
 192         self.oper_i = cu.oper_i
 193         self.src_i = cu._src_i
 194
 195         self.busy_o = cu.busy_o
 196         self.dest = cu._dest
 197         self.data_o = self.dest[0] # Dest out
 198         self.done_o = cu.done_o
 199
 200     def elaborate(self, platform):
 201         m = Module()
 202         m.submodules.alu = self.alu
 203         m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
 204         m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
 205         m.submodules.req_l = req_l = SRLatch(False, self.n_dst, name="req")
 206         m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
 207         m.submodules.rok_l = rok_l = SRLatch(sync=False, name="rdok")
 208
 209         # ALU only proceeds when all src are ready.  rd_rel_o is delayed
 210         # so combine it with go_rd_i.  if all bits are set we're good
 211         all_rd = Signal(reset_less=True)
 212         m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
 213                     (((~self.rd.rel) | self.rd.go).all()))
 214
 215         # write_requests all done
 216         # req_done works because any one of the last of the writes
 217         # is enough, when combined with when read-phase is done (rst_l.q)
 218         wr_any = Signal(reset_less=True)
 219         req_done = Signal(reset_less=True)
 220         m.d.comb += self.done_o.eq(self.busy_o & ~(self.wr.rel.bool()))
 221         m.d.comb += wr_any.eq(self.wr.go.bool())
 222         m.d.comb += req_done.eq(rst_l.q & wr_any)
 223
 224         # shadow/go_die
 225         reset = Signal(reset_less=True)
 226         rst_r = Signal(reset_less=True) # reset latch off
 227         reset_w = Signal(self.n_dst, reset_less=True)
 228         reset_r = Signal(self.n_src, reset_less=True)
 229         m.d.comb += reset.eq(req_done | self.go_die_i)
 230         m.d.comb += rst_r.eq(self.issue_i | self.go_die_i)
 231         m.d.comb += reset_w.eq(self.wr.go | Repl(self.go_die_i, self.n_dst))
 232         m.d.comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
 233
 234         # read-done,wr-proceed latch
 235         m.d.comb += rok_l.s.eq(self.issue_i)  # set up when issue starts
 236         m.d.comb += rok_l.r.eq(self.alu.p_ready_o) # off when ALU acknowledges
 237
 238         # wr-done, back-to-start latch
 239         m.d.comb += rst_l.s.eq(all_rd)     # set when read-phase is fully done
 240         m.d.comb += rst_l.r.eq(rst_r)        # *off* on issue
 241
 242         # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
 243         m.d.sync += opc_l.s.eq(self.issue_i)       # set on issue
 244         m.d.sync += opc_l.r.eq(self.alu.n_valid_o & req_done) # reset on ALU
 245
 246         # src operand latch (not using go_wr_i)
 247         m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
 248         m.d.sync += src_l.r.eq(reset_r)
 249
 250         # dest operand latch (not using issue_i)
 251         m.d.sync += req_l.s.eq(Repl(all_rd, self.n_dst))
 252         m.d.sync += req_l.r.eq(reset_w)
 253
 254         # create a latch/register for the operand
 255         oper_r = self.opsubsetkls()
 256         latchregister(m, self.oper_i, oper_r, self.issue_i, "oper_r")
 257
 258         # and for each output from the ALU
 259         drl = []
 260         for i in range(self.n_dst):
 261             name = "data_r%d" % i
 262             data_r = Signal(self.cu._get_srcwid(i), name=name, reset_less=True)
 263             latchregister(m, self.alu.out[i], data_r, req_l.q[i], name)
 264             drl.append(data_r)
 265
 266         # pass the operation to the ALU
 267         m.d.comb += self.alu.op.eq(oper_r)
 268
 269         # create list of src/alu-src/src-latch.  override 1st and 2nd one below.
 270         # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
 271         # in the input "regspec".  see for example soc.fu.alu.pipe_data.ALUInputData
 272         # TODO: assume RA is the 1st operand, zero_a detection is needed.
 273         sl = []
 274         for i in range(self.n_src):
 275             sl.append([self.src_i[i], self.alu.i[i], src_l.q[i]])
 276
 277         # if the operand subset has "zero_a" we implicitly assume that means
 278         # src_i[0] is an INT register type where zero can be multiplexed in, instead.
 279         # see https://bugs.libre-soc.org/show_bug.cgi?id=336
 280         #if hasattr(oper_r, "zero_a"):
 281             # select zero immediate if opcode says so.  however also change the latch
 282             # to trigger *from* the opcode latch instead.
 283             # ...
 284             # ...
 285
 286         # if the operand subset has "imm_data" we implicitly assume that means
 287         # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
 288         if hasattr(oper_r, "imm_data"):
 289             # select immediate if opcode says so.  however also change the latch
 290             # to trigger *from* the opcode latch instead.
 291             op_is_imm = oper_r.imm_data.imm_ok
 292             src2_or_imm = Signal(self.cu._get_srcwid(1), reset_less=True)
 293             src_sel = Signal(reset_less=True)
 294             m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.q, src_l.q[1]))
 295             m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm,
 296                                                       self.src2_i))
 297             # overwrite 2nd src-latch with immediate-muxed stuff
 298             sl[1][0] = src2_or_imm
 299             sl[1][2] = src_sel
 300
 301         # create a latch/register for src1/src2 (even if it is a copy of an immediate)
 302         for i in range(self.n_src):
 303             src, alusrc, latch = sl[i]
 304             latchregister(m, src, alusrc, latch, name="src_r%d" % i)
 305
 306         # -----
 307         # outputs
 308         # -----
 309
 310         # all request signals gated by busy_o.  prevents picker problems
 311         m.d.comb += self.busy_o.eq(opc_l.q) # busy out
 312         bro = Repl(self.busy_o, self.n_src)
 313         m.d.comb += self.rd.rel.eq(src_l.q & bro) # src1/src2 req rel
 314
 315         # on a go_read, tell the ALU we're accepting data.
 316         # NOTE: this spells TROUBLE if the ALU isn't ready!
 317         # go_read is only valid for one clock!
 318         with m.If(all_rd):                           # src operands ready, GO!
 319             with m.If(~self.alu.p_ready_o):          # no ACK yet
 320                 m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
 321
 322         brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
 323         # only proceed if ALU says its output is valid
 324         with m.If(self.alu.n_valid_o):
 325             # when ALU ready, write req release out. waits for shadow
 326             m.d.comb += self.wr.rel.eq(req_l.q & brd)
 327             # when output latch is ready, and ALU says ready, accept ALU output
 328             with m.If(reset):
 329                 m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
 330
 331         # output the data from the latch on go_write
 332         for i in range(self.n_dst):
 333             with m.If(self.wr.go[i]):
 334                 m.d.comb += self.dest[i].eq(drl[i])
 335
 336         return m
 337
 338     def __iter__(self):
 339         yield self.rd.go
 340         yield self.wr.go
 341         yield self.issue_i
 342         yield self.shadown_i
 343         yield self.go_die_i
 344         yield from self.oper_i.ports()
 345         yield self.src1_i
 346         yield self.src2_i
 347         yield self.busy_o
 348         yield self.rd.rel
 349         yield self.wr.rel
 350         yield self.data_o
 351
 352     def ports(self):
 353         return list(self)
 354
 355
 356 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0):
 357     yield dut.issue_i.eq(0)
 358     yield
 359     yield dut.src_i[0].eq(a)
 360     yield dut.src_i[1].eq(b)
 361     yield dut.oper_i.insn_type.eq(op)
 362     yield dut.oper_i.invert_a.eq(inv_a)
 363     yield dut.oper_i.imm_data.imm.eq(imm)
 364     yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
 365     yield dut.issue_i.eq(1)
 366     yield
 367     yield dut.issue_i.eq(0)
 368     yield
 369     yield dut.rd.go.eq(0b11)
 370     while True:
 371         yield
 372         rd_rel_o = yield dut.rd.rel
 373         print ("rd_rel", rd_rel_o)
 374         if rd_rel_o:
 375             break
 376     yield
 377     yield dut.rd.go.eq(0)
 378     req_rel_o = yield dut.wr.rel
 379     result = yield dut.data_o
 380     print ("req_rel", req_rel_o, result)
 381     while True:
 382         req_rel_o = yield dut.wr.rel
 383         result = yield dut.data_o
 384         print ("req_rel", req_rel_o, result)
 385         if req_rel_o:
 386             break
 387         yield
 388     yield dut.wr.go[0].eq(1)
 389     yield
 390     result = yield dut.data_o
 391     print ("result", result)
 392     yield dut.wr.go[0].eq(0)
 393     yield
 394     return result
 395
 396
 397 def scoreboard_sim(dut):
 398     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=0,
 399                                     imm=8, imm_ok=1)
 400     assert result == 13
 401
 402     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD)
 403     assert result == 7
 404
 405     result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=1)
 406     assert result == 65532
 407
 408
 409 def test_compunit():
 410     from alu_hier import ALU
 411     from soc.fu.alu.alu_input_record import CompALUOpSubset
 412
 413     m = Module()
 414     alu = ALU(16)
 415     dut = MultiCompUnit(16, alu, CompALUOpSubset)
 416     m.submodules.cu = dut
 417
 418     vl = rtlil.convert(dut, ports=dut.ports())
 419     with open("test_compunit1.il", "w") as f:
 420         f.write(vl)
 421
 422     run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
 423
 424
 425 def test_compunit_regspec1():
 426     from alu_hier import ALU
 427     from soc.fu.alu.alu_input_record import CompALUOpSubset
 428
 429     inspec = [('INT', 'a', '0:15'),
 430               ('INT', 'b', '0:15')]
 431     outspec = [('INT', 'o', '0:15'),
 432               ]
 433
 434     regspec = (inspec, outspec)
 435
 436     m = Module()
 437     alu = ALU(16)
 438     dut = MultiCompUnit(regspec, alu, CompALUOpSubset)
 439     m.submodules.cu = dut
 440
 441     vl = rtlil.convert(dut, ports=dut.ports())
 442     with open("test_compunit_regspec1.il", "w") as f:
 443         f.write(vl)
 444
 445     run_simulation(m, scoreboard_sim(dut), vcd_name='test_compunit1.vcd')
 446
 447
 448 if __name__ == '__main__':
 449     test_compunit()
 450     test_compunit_regspec1()