src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17
  18 from soc.decoder.power_enums import InternalOp, Function, CryIn
  19
  20 from soc.fu.alu.alu_input_record import CompALUOpSubset
  21 from soc.fu.cr.cr_input_record import CompCROpSubset
  22
  23 import operator
  24
  25
  26
  27
  28 class Adder(Elaboratable):
  29     def __init__(self, width):
  30         self.invert_a = Signal()
  31         self.a   = Signal(width)
  32         self.b   = Signal(width)
  33         self.o   = Signal(width, name="add_o")
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         with m.If(self.invert_a):
  38             m.d.comb += self.o.eq((~self.a) + self.b)
  39         with m.Else():
  40             m.d.comb += self.o.eq(self.a + self.b)
  41         return m
  42
  43
  44 class Subtractor(Elaboratable):
  45     def __init__(self, width):
  46         self.a   = Signal(width)
  47         self.b   = Signal(width)
  48         self.o   = Signal(width, name="sub_o")
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.o.eq(self.a - self.b)
  53         return m
  54
  55
  56 class Multiplier(Elaboratable):
  57     def __init__(self, width):
  58         self.a   = Signal(width)
  59         self.b   = Signal(width)
  60         self.o   = Signal(width, name="mul_o")
  61
  62     def elaborate(self, platform):
  63         m = Module()
  64         m.d.comb += self.o.eq(self.a * self.b)
  65         return m
  66
  67
  68 class Shifter(Elaboratable):
  69     def __init__(self, width):
  70         self.width = width
  71         self.a   = Signal(width)
  72         self.b   = Signal(width)
  73         self.o   = Signal(width, name="shf_o")
  74
  75     def elaborate(self, platform):
  76         m = Module()
  77         btrunc = Signal(self.width)
  78         m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
  79         m.d.comb += self.o.eq(self.a >> btrunc)
  80         return m
  81
  82 class Dummy:
  83     pass
  84
  85
  86 class DummyALU(Elaboratable):
  87     def __init__(self, width):
  88         self.p = Dummy() # make look like nmutil pipeline API
  89         self.p.data_i = Dummy()
  90         self.p.data_i.ctx = Dummy()
  91         self.n = Dummy() # make look like nmutil pipeline API
  92         self.n.data_o = Dummy()
  93         self.p.valid_i = Signal()
  94         self.p.ready_o = Signal()
  95         self.n.ready_i = Signal()
  96         self.n.valid_o = Signal()
  97         self.counter   = Signal(4)
  98         self.op  = CompCROpSubset()
  99         i = []
 100         i.append(Signal(width, name="i1"))
 101         i.append(Signal(width, name="i2"))
 102         i.append(Signal(width, name="i3"))
 103         self.i = Array(i)
 104         self.a, self.b, self.c = i[0], i[1], i[2]
 105         self.out = Array([Signal(width, name="alu_o")])
 106         self.o = self.out[0]
 107         self.width = width
 108         # more "look like nmutil pipeline API"
 109         self.p.data_i.ctx.op = self.op
 110         self.p.data_i.a = self.a
 111         self.p.data_i.b = self.b
 112         self.p.data_i.c = self.c
 113         self.n.data_o.o = self.o
 114
 115     def elaborate(self, platform):
 116         m = Module()
 117
 118         go_now = Signal(reset_less=True) # testing no-delay ALU
 119
 120         with m.If(self.p.valid_i):
 121             # input is valid. next check, if we already said "ready" or not
 122             with m.If(~self.p.ready_o):
 123                 # we didn't say "ready" yet, so say so and initialise
 124                 m.d.sync += self.p.ready_o.eq(1)
 125
 126                 m.d.sync += self.o.eq(self.a)
 127                 m.d.comb += go_now.eq(1)
 128                 m.d.sync += self.counter.eq(1)
 129
 130         with m.Else():
 131             # input says no longer valid, so drop ready as well.
 132             # a "proper" ALU would have had to sync in the opcode and a/b ops
 133             m.d.sync += self.p.ready_o.eq(0)
 134
 135         # ok so the counter's running: when it gets to 1, fire the output
 136         with m.If((self.counter == 1) | go_now):
 137             # set the output as valid if the recipient is ready for it
 138             m.d.sync += self.n.valid_o.eq(1)
 139         with m.If(self.n.ready_i & self.n.valid_o):
 140             m.d.sync += self.n.valid_o.eq(0)
 141             # recipient said it was ready: reset back to known-good.
 142             m.d.sync += self.counter.eq(0) # reset the counter
 143             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 144
 145         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 146         with m.If(self.counter > 1):
 147             m.d.sync += self.counter.eq(self.counter - 1)
 148
 149         return m
 150
 151     def __iter__(self):
 152         yield from self.op.ports()
 153         yield self.a
 154         yield self.b
 155         yield self.c
 156         yield self.o
 157
 158     def ports(self):
 159         return list(self)
 160
 161
 162 class ALU(Elaboratable):
 163     def __init__(self, width):
 164         self.p = Dummy() # make look like nmutil pipeline API
 165         self.p.data_i = Dummy()
 166         self.p.data_i.ctx = Dummy()
 167         self.n = Dummy() # make look like nmutil pipeline API
 168         self.n.data_o = Dummy()
 169         self.p.valid_i = Signal()
 170         self.p.ready_o = Signal()
 171         self.n.ready_i = Signal()
 172         self.n.valid_o = Signal()
 173         self.counter   = Signal(4)
 174         self.op = CompALUOpSubset(name="op")
 175         i = []
 176         i.append(Signal(width, name="i1"))
 177         i.append(Signal(width, name="i2"))
 178         self.i = Array(i)
 179         self.a, self.b = i[0], i[1]
 180         self.out = Array([Signal(width, name="alu_o")])
 181         self.o = self.out[0]
 182         self.width = width
 183         # more "look like nmutil pipeline API"
 184         self.p.data_i.ctx.op = self.op
 185         self.p.data_i.a = self.a
 186         self.p.data_i.b = self.b
 187         self.n.data_o.o = self.o
 188
 189     def elaborate(self, platform):
 190         m = Module()
 191         add = Adder(self.width)
 192         mul = Multiplier(self.width)
 193         shf = Shifter(self.width)
 194         sub = Subtractor(self.width)
 195
 196         m.submodules.add = add
 197         m.submodules.mul = mul
 198         m.submodules.shf = shf
 199         m.submodules.sub = sub
 200
 201         # really should not activate absolutely all ALU inputs like this
 202         for mod in [add, mul, shf, sub]:
 203             m.d.comb += [
 204                 mod.a.eq(self.a),
 205                 mod.b.eq(self.b),
 206             ]
 207
 208         # pass invert (and carry later)
 209         m.d.comb += add.invert_a.eq(self.op.invert_a)
 210
 211         go_now = Signal(reset_less=True) # testing no-delay ALU
 212
 213         # ALU sequencer is idle when the count is zero
 214         alu_idle = Signal(reset_less=True)
 215         m.d.comb += alu_idle.eq(self.counter == 0)
 216
 217         # ALU sequencer is done when the count is one
 218         alu_done = Signal(reset_less=True)
 219         m.d.comb += alu_done.eq(self.counter == 1)
 220
 221         # in a sequential ALU, valid_o rises when the ALU is done
 222         # and falls when acknowledged by ready_i
 223         valid_o = Signal()
 224         with m.If(self.n.ready_i):
 225             m.d.sync += valid_o.eq(0)
 226         with m.Elif(alu_done):
 227             m.d.sync += valid_o.eq(1)
 228
 229         # select handshake handling according to ALU type
 230         with m.If(go_now):
 231             # with a combinatorial, no-delay ALU, just pass through
 232             # the handshake signals to the other side
 233             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 234             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 235         with m.Else():
 236             # sequential ALU handshake:
 237             # ready_o responds to valid_i, but only if the ALU is idle
 238             m.d.comb += self.p.ready_o.eq(self.p.valid_i & alu_idle)
 239             # select the internally generated valid_o, above
 240             m.d.comb += self.n.valid_o.eq(valid_o | alu_done)
 241
 242         # hold the ALU result until ready_o is asserted
 243         alu_r = Signal(self.width)
 244
 245         with m.If(alu_idle):
 246             with m.If(self.p.valid_i):
 247
 248                 # as this is a "fake" pipeline, just grab the output right now
 249                 with m.If(self.op.insn_type == InternalOp.OP_ADD):
 250                     m.d.sync += alu_r.eq(add.o)
 251                 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
 252                     m.d.sync += alu_r.eq(mul.o)
 253                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 254                     m.d.sync += alu_r.eq(shf.o)
 255                 # SUB is zero-delay, no need to register
 256
 257                 # NOTE: all of these are fake, just something to test
 258
 259                 # MUL, to take 5 instructions
 260                 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
 261                     m.d.sync += self.counter.eq(5)
 262                 # SHIFT to take 7
 263                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 264                     m.d.sync += self.counter.eq(7)
 265                 # ADD/SUB to take 1, straight away
 266                 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
 267                     m.d.sync += self.counter.eq(1)
 268                 # others to take no delay
 269                 with m.Else():
 270                     m.d.comb += go_now.eq(1)
 271
 272         with m.Else():
 273             # decrement the counter while the ALU is not idle
 274             m.d.sync += self.counter.eq(self.counter - 1)
 275
 276         # choose between zero-delay output, or registered
 277         with m.If(go_now):
 278             m.d.comb += self.o.eq(sub.o)
 279         with m.Else():
 280             m.d.comb += self.o.eq(alu_r)
 281
 282         return m
 283
 284     def __iter__(self):
 285         yield from self.op.ports()
 286         yield self.a
 287         yield self.b
 288         yield self.o
 289
 290     def ports(self):
 291         return list(self)
 292
 293
 294 class BranchOp(Elaboratable):
 295     def __init__(self, width, op):
 296         self.a   = Signal(width)
 297         self.b   = Signal(width)
 298         self.o   = Signal(width)
 299         self.op = op
 300
 301     def elaborate(self, platform):
 302         m = Module()
 303         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 304         return m
 305
 306
 307 class BranchALU(Elaboratable):
 308     def __init__(self, width):
 309         self.p = Dummy() # make look like nmutil pipeline API
 310         self.p.data_i = Dummy()
 311         self.p.data_i.ctx = Dummy()
 312         self.n = Dummy() # make look like nmutil pipeline API
 313         self.n.data_o = Dummy()
 314         self.p.valid_i = Signal()
 315         self.p.ready_o = Signal()
 316         self.n.ready_i = Signal()
 317         self.n.valid_o = Signal()
 318         self.counter   = Signal(4)
 319         self.op  = Signal(2)
 320         i = []
 321         i.append(Signal(width, name="i1"))
 322         i.append(Signal(width, name="i2"))
 323         self.i = Array(i)
 324         self.a, self.b = i[0], i[1]
 325         self.out = Array([Signal(width)])
 326         self.o = self.out[0]
 327         self.width = width
 328
 329     def elaborate(self, platform):
 330         m = Module()
 331         bgt = BranchOp(self.width, operator.gt)
 332         blt = BranchOp(self.width, operator.lt)
 333         beq = BranchOp(self.width, operator.eq)
 334         bne = BranchOp(self.width, operator.ne)
 335
 336         m.submodules.bgt = bgt
 337         m.submodules.blt = blt
 338         m.submodules.beq = beq
 339         m.submodules.bne = bne
 340         for mod in [bgt, blt, beq, bne]:
 341             m.d.comb += [
 342                 mod.a.eq(self.a),
 343                 mod.b.eq(self.b),
 344             ]
 345
 346         go_now = Signal(reset_less=True) # testing no-delay ALU
 347         with m.If(self.p.valid_i):
 348             # input is valid. next check, if we already said "ready" or not
 349             with m.If(~self.p.ready_o):
 350                 # we didn't say "ready" yet, so say so and initialise
 351                 m.d.sync += self.p.ready_o.eq(1)
 352
 353                 # as this is a "fake" pipeline, just grab the output right now
 354                 with m.Switch(self.op):
 355                     for i, mod in enumerate([bgt, blt, beq, bne]):
 356                         with m.Case(i):
 357                             m.d.sync += self.o.eq(mod.o)
 358                 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
 359                 #m.d.comb += go_now.eq(1)
 360         with m.Else():
 361             # input says no longer valid, so drop ready as well.
 362             # a "proper" ALU would have had to sync in the opcode and a/b ops
 363             m.d.sync += self.p.ready_o.eq(0)
 364
 365         # ok so the counter's running: when it gets to 1, fire the output
 366         with m.If((self.counter == 1) | go_now):
 367             # set the output as valid if the recipient is ready for it
 368             m.d.sync += self.n.valid_o.eq(1)
 369         with m.If(self.n.ready_i & self.n.valid_o):
 370             m.d.sync += self.n.valid_o.eq(0)
 371             # recipient said it was ready: reset back to known-good.
 372             m.d.sync += self.counter.eq(0) # reset the counter
 373             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 374
 375         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 376         with m.If(self.counter > 1):
 377             m.d.sync += self.counter.eq(self.counter - 1)
 378
 379         return m
 380
 381     def __iter__(self):
 382         yield self.op
 383         yield self.a
 384         yield self.b
 385         yield self.o
 386
 387     def ports(self):
 388         return list(self)
 389
 390 def run_op(dut, a, b, op, inv_a=0):
 391     yield dut.a.eq(a)
 392     yield dut.b.eq(b)
 393     yield dut.op.insn_type.eq(op)
 394     yield dut.op.invert_a.eq(inv_a)
 395     yield dut.n.ready_i.eq(0)
 396     yield dut.p.valid_i.eq(1)
 397     yield
 398
 399     # if valid_o rose on the very first cycle, it is a
 400     # zero-delay ALU
 401     vld = yield dut.n.valid_o
 402     if vld:
 403         # special case for zero-delay ALU
 404         # we must raise ready_i first, since the combinatorial ALU doesn't
 405         # have any storage, and doesn't dare to assert ready_o back to us
 406         # until we accepted the output data
 407         yield dut.n.ready_i.eq(1)
 408         result = yield dut.o
 409         yield
 410         yield dut.p.valid_i.eq(0)
 411         yield dut.n.ready_i.eq(0)
 412         yield
 413         return result
 414
 415     # wait for the ALU to accept our input data
 416     while True:
 417         rdy = yield dut.p.ready_o
 418         if rdy:
 419             break
 420         yield
 421
 422     yield dut.p.valid_i.eq(0)
 423
 424     # wait for the ALU to present the output data
 425     while True:
 426         vld = yield dut.n.valid_o
 427         if vld:
 428             break
 429         yield
 430
 431     # latch the result and lower read_i
 432     yield dut.n.ready_i.eq(1)
 433     result = yield dut.o
 434     yield
 435     yield dut.n.ready_i.eq(0)
 436     yield
 437
 438     return result
 439
 440
 441 def alu_sim(dut):
 442     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
 443     print ("alu_sim add", result)
 444     assert (result == 8)
 445
 446     result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
 447     print ("alu_sim mul", result)
 448     assert (result == 6)
 449
 450     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
 451     print ("alu_sim add-inv", result)
 452     assert (result == 65533)
 453
 454     # test zero-delay ALU
 455     # don't have OP_SUB, so use any other
 456     result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
 457     print ("alu_sim sub", result)
 458     assert (result == 2)
 459
 460
 461 def test_alu():
 462     alu = ALU(width=16)
 463     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 464
 465     vl = rtlil.convert(alu, ports=alu.ports())
 466     with open("test_alu.il", "w") as f:
 467         f.write(vl)
 468
 469
 470 if __name__ == "__main__":
 471     test_alu()
 472
 473     # alu = BranchALU(width=16)
 474     # vl = rtlil.convert(alu, ports=alu.ports())
 475     # with open("test_branch_alu.il", "w") as f:
 476     #     f.write(vl)
 477