src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmigen.back.pysim import Simulator, Settle
  18
  19 from soc.decoder.power_enums import InternalOp, Function, CryIn
  20
  21 from soc.fu.alu.alu_input_record import CompALUOpSubset
  22 from soc.fu.cr.cr_input_record import CompCROpSubset
  23
  24 import operator
  25
  26
  27
  28
  29 class Adder(Elaboratable):
  30     def __init__(self, width):
  31         self.invert_a = Signal()
  32         self.a   = Signal(width)
  33         self.b   = Signal(width)
  34         self.o   = Signal(width, name="add_o")
  35
  36     def elaborate(self, platform):
  37         m = Module()
  38         with m.If(self.invert_a):
  39             m.d.comb += self.o.eq((~self.a) + self.b)
  40         with m.Else():
  41             m.d.comb += self.o.eq(self.a + self.b)
  42         return m
  43
  44
  45 class Subtractor(Elaboratable):
  46     def __init__(self, width):
  47         self.a   = Signal(width)
  48         self.b   = Signal(width)
  49         self.o   = Signal(width, name="sub_o")
  50
  51     def elaborate(self, platform):
  52         m = Module()
  53         m.d.comb += self.o.eq(self.a - self.b)
  54         return m
  55
  56
  57 class Multiplier(Elaboratable):
  58     def __init__(self, width):
  59         self.a   = Signal(width)
  60         self.b   = Signal(width)
  61         self.o   = Signal(width, name="mul_o")
  62
  63     def elaborate(self, platform):
  64         m = Module()
  65         m.d.comb += self.o.eq(self.a * self.b)
  66         return m
  67
  68
  69 class Shifter(Elaboratable):
  70     def __init__(self, width):
  71         self.width = width
  72         self.a   = Signal(width)
  73         self.b   = Signal(width)
  74         self.o   = Signal(width, name="shf_o")
  75
  76     def elaborate(self, platform):
  77         m = Module()
  78         btrunc = Signal(self.width)
  79         m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
  80         m.d.comb += self.o.eq(self.a >> btrunc)
  81         return m
  82
  83 class Dummy:
  84     pass
  85
  86
  87 class DummyALU(Elaboratable):
  88     def __init__(self, width):
  89         self.p = Dummy() # make look like nmutil pipeline API
  90         self.p.data_i = Dummy()
  91         self.p.data_i.ctx = Dummy()
  92         self.n = Dummy() # make look like nmutil pipeline API
  93         self.n.data_o = Dummy()
  94         self.p.valid_i = Signal()
  95         self.p.ready_o = Signal()
  96         self.n.ready_i = Signal()
  97         self.n.valid_o = Signal()
  98         self.counter   = Signal(4)
  99         self.op  = CompCROpSubset()
 100         i = []
 101         i.append(Signal(width, name="i1"))
 102         i.append(Signal(width, name="i2"))
 103         i.append(Signal(width, name="i3"))
 104         self.i = Array(i)
 105         self.a, self.b, self.c = i[0], i[1], i[2]
 106         self.out = Array([Signal(width, name="alu_o")])
 107         self.o = self.out[0]
 108         self.width = width
 109         # more "look like nmutil pipeline API"
 110         self.p.data_i.ctx.op = self.op
 111         self.p.data_i.a = self.a
 112         self.p.data_i.b = self.b
 113         self.p.data_i.c = self.c
 114         self.n.data_o.o = self.o
 115
 116     def elaborate(self, platform):
 117         m = Module()
 118
 119         go_now = Signal(reset_less=True) # testing no-delay ALU
 120
 121         with m.If(self.p.valid_i):
 122             # input is valid. next check, if we already said "ready" or not
 123             with m.If(~self.p.ready_o):
 124                 # we didn't say "ready" yet, so say so and initialise
 125                 m.d.sync += self.p.ready_o.eq(1)
 126
 127                 m.d.sync += self.o.eq(self.a)
 128                 m.d.comb += go_now.eq(1)
 129                 m.d.sync += self.counter.eq(1)
 130
 131         with m.Else():
 132             # input says no longer valid, so drop ready as well.
 133             # a "proper" ALU would have had to sync in the opcode and a/b ops
 134             m.d.sync += self.p.ready_o.eq(0)
 135
 136         # ok so the counter's running: when it gets to 1, fire the output
 137         with m.If((self.counter == 1) | go_now):
 138             # set the output as valid if the recipient is ready for it
 139             m.d.sync += self.n.valid_o.eq(1)
 140         with m.If(self.n.ready_i & self.n.valid_o):
 141             m.d.sync += self.n.valid_o.eq(0)
 142             # recipient said it was ready: reset back to known-good.
 143             m.d.sync += self.counter.eq(0) # reset the counter
 144             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 145
 146         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 147         with m.If(self.counter > 1):
 148             m.d.sync += self.counter.eq(self.counter - 1)
 149
 150         return m
 151
 152     def __iter__(self):
 153         yield from self.op.ports()
 154         yield self.a
 155         yield self.b
 156         yield self.c
 157         yield self.o
 158
 159     def ports(self):
 160         return list(self)
 161
 162
 163 class ALU(Elaboratable):
 164     def __init__(self, width):
 165         self.p = Dummy() # make look like nmutil pipeline API
 166         self.p.data_i = Dummy()
 167         self.p.data_i.ctx = Dummy()
 168         self.n = Dummy() # make look like nmutil pipeline API
 169         self.n.data_o = Dummy()
 170         self.p.valid_i = Signal()
 171         self.p.ready_o = Signal()
 172         self.n.ready_i = Signal()
 173         self.n.valid_o = Signal()
 174         self.counter   = Signal(4)
 175         self.op = CompALUOpSubset(name="op")
 176         i = []
 177         i.append(Signal(width, name="i1"))
 178         i.append(Signal(width, name="i2"))
 179         self.i = Array(i)
 180         self.a, self.b = i[0], i[1]
 181         self.out = Array([Signal(width, name="alu_o")])
 182         self.o = self.out[0]
 183         self.width = width
 184         # more "look like nmutil pipeline API"
 185         self.p.data_i.ctx.op = self.op
 186         self.p.data_i.a = self.a
 187         self.p.data_i.b = self.b
 188         self.n.data_o.o = self.o
 189
 190     def elaborate(self, platform):
 191         m = Module()
 192         add = Adder(self.width)
 193         mul = Multiplier(self.width)
 194         shf = Shifter(self.width)
 195         sub = Subtractor(self.width)
 196
 197         m.submodules.add = add
 198         m.submodules.mul = mul
 199         m.submodules.shf = shf
 200         m.submodules.sub = sub
 201
 202         # really should not activate absolutely all ALU inputs like this
 203         for mod in [add, mul, shf, sub]:
 204             m.d.comb += [
 205                 mod.a.eq(self.a),
 206                 mod.b.eq(self.b),
 207             ]
 208
 209         # pass invert (and carry later)
 210         m.d.comb += add.invert_a.eq(self.op.invert_a)
 211
 212         go_now = Signal(reset_less=True) # testing no-delay ALU
 213
 214         # ALU sequencer is idle when the count is zero
 215         alu_idle = Signal(reset_less=True)
 216         m.d.comb += alu_idle.eq(self.counter == 0)
 217
 218         # ALU sequencer is done when the count is one
 219         alu_done = Signal(reset_less=True)
 220         m.d.comb += alu_done.eq(self.counter == 1)
 221
 222         # select handshake handling according to ALU type
 223         with m.If(go_now):
 224             # with a combinatorial, no-delay ALU, just pass through
 225             # the handshake signals to the other side
 226             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 227             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 228         with m.Else():
 229             # sequential ALU handshake:
 230             # ready_o responds to valid_i, but only if the ALU is idle
 231             m.d.comb += self.p.ready_o.eq(alu_idle)
 232             # select the internally generated valid_o, above
 233             m.d.comb += self.n.valid_o.eq(alu_done)
 234
 235         # hold the ALU result until ready_o is asserted
 236         alu_r = Signal(self.width)
 237
 238         with m.If(alu_idle):
 239             with m.If(self.p.valid_i):
 240
 241                 # as this is a "fake" pipeline, just grab the output right now
 242                 with m.If(self.op.insn_type == InternalOp.OP_ADD):
 243                     m.d.sync += alu_r.eq(add.o)
 244                 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
 245                     m.d.sync += alu_r.eq(mul.o)
 246                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 247                     m.d.sync += alu_r.eq(shf.o)
 248                 # SUB is zero-delay, no need to register
 249
 250                 # NOTE: all of these are fake, just something to test
 251
 252                 # MUL, to take 5 instructions
 253                 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
 254                     m.d.sync += self.counter.eq(5)
 255                 # SHIFT to take 1, straight away
 256                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 257                     m.d.sync += self.counter.eq(1)
 258                 # ADD/SUB to take 3
 259                 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
 260                     m.d.sync += self.counter.eq(3)
 261                 # others to take no delay
 262                 with m.Else():
 263                     m.d.comb += go_now.eq(1)
 264
 265         with m.Elif(~alu_done | self.n.ready_i):
 266             # decrement the counter while the ALU is neither idle nor finished
 267             m.d.sync += self.counter.eq(self.counter - 1)
 268
 269         # choose between zero-delay output, or registered
 270         with m.If(go_now):
 271             m.d.comb += self.o.eq(sub.o)
 272         # only present the result at the last computation cycle
 273         with m.Elif(alu_done):
 274             m.d.comb += self.o.eq(alu_r)
 275
 276         return m
 277
 278     def __iter__(self):
 279         yield from self.op.ports()
 280         yield self.a
 281         yield self.b
 282         yield self.o
 283         yield self.p.valid_i
 284         yield self.p.ready_o
 285         yield self.n.valid_o
 286         yield self.n.ready_i
 287
 288     def ports(self):
 289         return list(self)
 290
 291
 292 class BranchOp(Elaboratable):
 293     def __init__(self, width, op):
 294         self.a   = Signal(width)
 295         self.b   = Signal(width)
 296         self.o   = Signal(width)
 297         self.op = op
 298
 299     def elaborate(self, platform):
 300         m = Module()
 301         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 302         return m
 303
 304
 305 class BranchALU(Elaboratable):
 306     def __init__(self, width):
 307         self.p = Dummy() # make look like nmutil pipeline API
 308         self.p.data_i = Dummy()
 309         self.p.data_i.ctx = Dummy()
 310         self.n = Dummy() # make look like nmutil pipeline API
 311         self.n.data_o = Dummy()
 312         self.p.valid_i = Signal()
 313         self.p.ready_o = Signal()
 314         self.n.ready_i = Signal()
 315         self.n.valid_o = Signal()
 316         self.counter   = Signal(4)
 317         self.op  = Signal(2)
 318         i = []
 319         i.append(Signal(width, name="i1"))
 320         i.append(Signal(width, name="i2"))
 321         self.i = Array(i)
 322         self.a, self.b = i[0], i[1]
 323         self.out = Array([Signal(width)])
 324         self.o = self.out[0]
 325         self.width = width
 326
 327     def elaborate(self, platform):
 328         m = Module()
 329         bgt = BranchOp(self.width, operator.gt)
 330         blt = BranchOp(self.width, operator.lt)
 331         beq = BranchOp(self.width, operator.eq)
 332         bne = BranchOp(self.width, operator.ne)
 333
 334         m.submodules.bgt = bgt
 335         m.submodules.blt = blt
 336         m.submodules.beq = beq
 337         m.submodules.bne = bne
 338         for mod in [bgt, blt, beq, bne]:
 339             m.d.comb += [
 340                 mod.a.eq(self.a),
 341                 mod.b.eq(self.b),
 342             ]
 343
 344         go_now = Signal(reset_less=True) # testing no-delay ALU
 345         with m.If(self.p.valid_i):
 346             # input is valid. next check, if we already said "ready" or not
 347             with m.If(~self.p.ready_o):
 348                 # we didn't say "ready" yet, so say so and initialise
 349                 m.d.sync += self.p.ready_o.eq(1)
 350
 351                 # as this is a "fake" pipeline, just grab the output right now
 352                 with m.Switch(self.op):
 353                     for i, mod in enumerate([bgt, blt, beq, bne]):
 354                         with m.Case(i):
 355                             m.d.sync += self.o.eq(mod.o)
 356                 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
 357                 #m.d.comb += go_now.eq(1)
 358         with m.Else():
 359             # input says no longer valid, so drop ready as well.
 360             # a "proper" ALU would have had to sync in the opcode and a/b ops
 361             m.d.sync += self.p.ready_o.eq(0)
 362
 363         # ok so the counter's running: when it gets to 1, fire the output
 364         with m.If((self.counter == 1) | go_now):
 365             # set the output as valid if the recipient is ready for it
 366             m.d.sync += self.n.valid_o.eq(1)
 367         with m.If(self.n.ready_i & self.n.valid_o):
 368             m.d.sync += self.n.valid_o.eq(0)
 369             # recipient said it was ready: reset back to known-good.
 370             m.d.sync += self.counter.eq(0) # reset the counter
 371             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 372
 373         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 374         with m.If(self.counter > 1):
 375             m.d.sync += self.counter.eq(self.counter - 1)
 376
 377         return m
 378
 379     def __iter__(self):
 380         yield self.op
 381         yield self.a
 382         yield self.b
 383         yield self.o
 384
 385     def ports(self):
 386         return list(self)
 387
 388 def run_op(dut, a, b, op, inv_a=0):
 389     yield dut.a.eq(a)
 390     yield dut.b.eq(b)
 391     yield dut.op.insn_type.eq(op)
 392     yield dut.op.invert_a.eq(inv_a)
 393     yield dut.n.ready_i.eq(0)
 394     yield dut.p.valid_i.eq(1)
 395
 396     # if valid_o rose on the very first cycle, it is a
 397     # zero-delay ALU
 398     yield Settle()
 399     vld = yield dut.n.valid_o
 400     if vld:
 401         # special case for zero-delay ALU
 402         # we must raise ready_i first, since the combinatorial ALU doesn't
 403         # have any storage, and doesn't dare to assert ready_o back to us
 404         # until we accepted the output data
 405         yield dut.n.ready_i.eq(1)
 406         result = yield dut.o
 407         yield
 408         yield dut.p.valid_i.eq(0)
 409         yield dut.n.ready_i.eq(0)
 410         yield
 411         return result
 412
 413     yield
 414
 415     # wait for the ALU to accept our input data
 416     while True:
 417         rdy = yield dut.p.ready_o
 418         if rdy:
 419             break
 420         yield
 421
 422     yield dut.p.valid_i.eq(0)
 423
 424     # wait for the ALU to present the output data
 425     while True:
 426         yield Settle()
 427         vld = yield dut.n.valid_o
 428         if vld:
 429             break
 430         yield
 431
 432     # latch the result and lower read_i
 433     yield dut.n.ready_i.eq(1)
 434     result = yield dut.o
 435     yield
 436     yield dut.n.ready_i.eq(0)
 437     yield
 438
 439     return result
 440
 441
 442 def alu_sim(dut):
 443     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
 444     print ("alu_sim add", result)
 445     assert (result == 8)
 446
 447     result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
 448     print ("alu_sim mul", result)
 449     assert (result == 6)
 450
 451     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
 452     print ("alu_sim add-inv", result)
 453     assert (result == 65533)
 454
 455     # test zero-delay ALU
 456     # don't have OP_SUB, so use any other
 457     result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
 458     print ("alu_sim sub", result)
 459     assert (result == 2)
 460
 461     result = yield from run_op(dut, 13, 2, InternalOp.OP_SHR)
 462     print ("alu_sim shr", result)
 463     assert (result == 3)
 464
 465
 466 def test_alu():
 467     alu = ALU(width=16)
 468     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 469
 470     vl = rtlil.convert(alu, ports=alu.ports())
 471     with open("test_alu.il", "w") as f:
 472         f.write(vl)
 473
 474
 475 def test_alu_parallel():
 476     m = Module()
 477     m.submodules.alu = dut = ALU(width=16)
 478     sim = Simulator(m)
 479     sim.add_clock(1e-6)
 480
 481     def send(a, b, op, inv_a=0):
 482         yield dut.a.eq(a)
 483         yield dut.b.eq(b)
 484         yield dut.op.insn_type.eq(op)
 485         yield dut.op.invert_a.eq(inv_a)
 486         yield dut.p.valid_i.eq(1)
 487         while True:
 488             yield
 489             rdy = yield dut.p.ready_o
 490             if rdy:
 491                 break
 492         yield dut.p.valid_i.eq(0)
 493
 494     def receive():
 495         yield dut.n.ready_i.eq(1)
 496         while True:
 497             valid = yield dut.n.valid_o
 498             if valid:
 499                 break
 500             yield
 501         result = yield dut.o
 502         yield dut.n.ready_i.eq(0)
 503         return result
 504
 505     def producer():
 506         yield from send(5, 3, InternalOp.OP_ADD)
 507
 508     def consumer():
 509         result = yield from receive()
 510         assert (result == 8)
 511
 512     sim.add_sync_process(producer)
 513     sim.add_sync_process(consumer)
 514     sim_writer = sim.write_vcd(
 515         "test_alu_parallel.vcd",
 516         "test_alu_parallel.gtkw",
 517         traces=dut.ports()
 518     )
 519     with sim_writer:
 520         sim.run()
 521
 522
 523 if __name__ == "__main__":
 524     test_alu()
 525     test_alu_parallel()
 526
 527     # alu = BranchALU(width=16)
 528     # vl = rtlil.convert(alu, ports=alu.ports())
 529     # with open("test_branch_alu.il", "w") as f:
 530     #     f.write(vl)
 531