src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17
  18 from soc.decoder.power_enums import InternalOp, Function, CryIn
  19
  20 from soc.fu.alu.alu_input_record import CompALUOpSubset
  21 from soc.fu.cr.cr_input_record import CompCROpSubset
  22
  23 import operator
  24
  25
  26
  27
  28 class Adder(Elaboratable):
  29     def __init__(self, width):
  30         self.invert_a = Signal()
  31         self.a   = Signal(width)
  32         self.b   = Signal(width)
  33         self.o   = Signal(width, name="add_o")
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         with m.If(self.invert_a):
  38             m.d.comb += self.o.eq((~self.a) + self.b)
  39         with m.Else():
  40             m.d.comb += self.o.eq(self.a + self.b)
  41         return m
  42
  43
  44 class Subtractor(Elaboratable):
  45     def __init__(self, width):
  46         self.a   = Signal(width)
  47         self.b   = Signal(width)
  48         self.o   = Signal(width, name="sub_o")
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.o.eq(self.a - self.b)
  53         return m
  54
  55
  56 class Multiplier(Elaboratable):
  57     def __init__(self, width):
  58         self.a   = Signal(width)
  59         self.b   = Signal(width)
  60         self.o   = Signal(width, name="mul_o")
  61
  62     def elaborate(self, platform):
  63         m = Module()
  64         m.d.comb += self.o.eq(self.a * self.b)
  65         return m
  66
  67
  68 class Shifter(Elaboratable):
  69     def __init__(self, width):
  70         self.width = width
  71         self.a   = Signal(width)
  72         self.b   = Signal(width)
  73         self.o   = Signal(width, name="shf_o")
  74
  75     def elaborate(self, platform):
  76         m = Module()
  77         btrunc = Signal(self.width)
  78         m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
  79         m.d.comb += self.o.eq(self.a >> btrunc)
  80         return m
  81
  82 class Dummy:
  83     pass
  84
  85
  86 class DummyALU(Elaboratable):
  87     def __init__(self, width):
  88         self.p = Dummy() # make look like nmutil pipeline API
  89         self.p.data_i = Dummy()
  90         self.p.data_i.ctx = Dummy()
  91         self.n = Dummy() # make look like nmutil pipeline API
  92         self.n.data_o = Dummy()
  93         self.p.valid_i = Signal()
  94         self.p.ready_o = Signal()
  95         self.n.ready_i = Signal()
  96         self.n.valid_o = Signal()
  97         self.counter   = Signal(4)
  98         self.op  = CompCROpSubset()
  99         i = []
 100         i.append(Signal(width, name="i1"))
 101         i.append(Signal(width, name="i2"))
 102         i.append(Signal(width, name="i3"))
 103         self.i = Array(i)
 104         self.a, self.b, self.c = i[0], i[1], i[2]
 105         self.out = Array([Signal(width, name="alu_o")])
 106         self.o = self.out[0]
 107         self.width = width
 108         # more "look like nmutil pipeline API"
 109         self.p.data_i.ctx.op = self.op
 110         self.p.data_i.a = self.a
 111         self.p.data_i.b = self.b
 112         self.p.data_i.c = self.c
 113         self.n.data_o.o = self.o
 114
 115     def elaborate(self, platform):
 116         m = Module()
 117
 118         go_now = Signal(reset_less=True) # testing no-delay ALU
 119
 120         with m.If(self.p.valid_i):
 121             # input is valid. next check, if we already said "ready" or not
 122             with m.If(~self.p.ready_o):
 123                 # we didn't say "ready" yet, so say so and initialise
 124                 m.d.sync += self.p.ready_o.eq(1)
 125
 126                 m.d.sync += self.o.eq(self.a)
 127                 m.d.comb += go_now.eq(1)
 128                 m.d.sync += self.counter.eq(1)
 129
 130         with m.Else():
 131             # input says no longer valid, so drop ready as well.
 132             # a "proper" ALU would have had to sync in the opcode and a/b ops
 133             m.d.sync += self.p.ready_o.eq(0)
 134
 135         # ok so the counter's running: when it gets to 1, fire the output
 136         with m.If((self.counter == 1) | go_now):
 137             # set the output as valid if the recipient is ready for it
 138             m.d.sync += self.n.valid_o.eq(1)
 139         with m.If(self.n.ready_i & self.n.valid_o):
 140             m.d.sync += self.n.valid_o.eq(0)
 141             # recipient said it was ready: reset back to known-good.
 142             m.d.sync += self.counter.eq(0) # reset the counter
 143             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 144
 145         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 146         with m.If(self.counter > 1):
 147             m.d.sync += self.counter.eq(self.counter - 1)
 148
 149         return m
 150
 151     def __iter__(self):
 152         yield from self.op.ports()
 153         yield self.a
 154         yield self.b
 155         yield self.c
 156         yield self.o
 157
 158     def ports(self):
 159         return list(self)
 160
 161
 162 class ALU(Elaboratable):
 163     def __init__(self, width):
 164         self.p = Dummy() # make look like nmutil pipeline API
 165         self.p.data_i = Dummy()
 166         self.p.data_i.ctx = Dummy()
 167         self.n = Dummy() # make look like nmutil pipeline API
 168         self.n.data_o = Dummy()
 169         self.p.valid_i = Signal()
 170         self.p.ready_o = Signal()
 171         self.n.ready_i = Signal()
 172         self.n.valid_o = Signal()
 173         self.counter   = Signal(4)
 174         self.op = CompALUOpSubset(name="op")
 175         i = []
 176         i.append(Signal(width, name="i1"))
 177         i.append(Signal(width, name="i2"))
 178         self.i = Array(i)
 179         self.a, self.b = i[0], i[1]
 180         self.out = Array([Signal(width, name="alu_o")])
 181         self.o = self.out[0]
 182         self.width = width
 183         # more "look like nmutil pipeline API"
 184         self.p.data_i.ctx.op = self.op
 185         self.p.data_i.a = self.a
 186         self.p.data_i.b = self.b
 187         self.n.data_o.o = self.o
 188
 189     def elaborate(self, platform):
 190         m = Module()
 191         add = Adder(self.width)
 192         mul = Multiplier(self.width)
 193         shf = Shifter(self.width)
 194         sub = Subtractor(self.width)
 195
 196         m.submodules.add = add
 197         m.submodules.mul = mul
 198         m.submodules.shf = shf
 199         m.submodules.sub = sub
 200
 201         # really should not activate absolutely all ALU inputs like this
 202         for mod in [add, mul, shf, sub]:
 203             m.d.comb += [
 204                 mod.a.eq(self.a),
 205                 mod.b.eq(self.b),
 206             ]
 207
 208         # pass invert (and carry later)
 209         m.d.comb += add.invert_a.eq(self.op.invert_a)
 210
 211         go_now = Signal(reset_less=True) # testing no-delay ALU
 212
 213         # ALU sequencer is idle when the count is zero
 214         alu_idle = Signal(reset_less=True)
 215         m.d.comb += alu_idle.eq(self.counter == 0)
 216
 217         # ALU sequencer is done when the count is one
 218         alu_done = Signal(reset_less=True)
 219         m.d.comb += alu_done.eq(self.counter == 1)
 220
 221         # select handshake handling according to ALU type
 222         with m.If(go_now):
 223             # with a combinatorial, no-delay ALU, just pass through
 224             # the handshake signals to the other side
 225             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 226             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 227         with m.Else():
 228             # sequential ALU handshake:
 229             # ready_o responds to valid_i, but only if the ALU is idle
 230             m.d.comb += self.p.ready_o.eq(self.p.valid_i & alu_idle)
 231             # select the internally generated valid_o, above
 232             m.d.comb += self.n.valid_o.eq(alu_done)
 233
 234         # hold the ALU result until ready_o is asserted
 235         alu_r = Signal(self.width)
 236
 237         with m.If(alu_idle):
 238             with m.If(self.p.valid_i):
 239
 240                 # as this is a "fake" pipeline, just grab the output right now
 241                 with m.If(self.op.insn_type == InternalOp.OP_ADD):
 242                     m.d.sync += alu_r.eq(add.o)
 243                 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
 244                     m.d.sync += alu_r.eq(mul.o)
 245                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 246                     m.d.sync += alu_r.eq(shf.o)
 247                 # SUB is zero-delay, no need to register
 248
 249                 # NOTE: all of these are fake, just something to test
 250
 251                 # MUL, to take 5 instructions
 252                 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
 253                     m.d.sync += self.counter.eq(5)
 254                 # SHIFT to take 1, straight away
 255                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 256                     m.d.sync += self.counter.eq(1)
 257                 # ADD/SUB to take 3
 258                 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
 259                     m.d.sync += self.counter.eq(3)
 260                 # others to take no delay
 261                 with m.Else():
 262                     m.d.comb += go_now.eq(1)
 263
 264         with m.Elif(~alu_done | self.n.ready_i):
 265             # decrement the counter while the ALU is neither idle nor finished
 266             m.d.sync += self.counter.eq(self.counter - 1)
 267
 268         # choose between zero-delay output, or registered
 269         with m.If(go_now):
 270             m.d.comb += self.o.eq(sub.o)
 271         with m.Else():
 272             m.d.comb += self.o.eq(alu_r)
 273
 274         return m
 275
 276     def __iter__(self):
 277         yield from self.op.ports()
 278         yield self.a
 279         yield self.b
 280         yield self.o
 281
 282     def ports(self):
 283         return list(self)
 284
 285
 286 class BranchOp(Elaboratable):
 287     def __init__(self, width, op):
 288         self.a   = Signal(width)
 289         self.b   = Signal(width)
 290         self.o   = Signal(width)
 291         self.op = op
 292
 293     def elaborate(self, platform):
 294         m = Module()
 295         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 296         return m
 297
 298
 299 class BranchALU(Elaboratable):
 300     def __init__(self, width):
 301         self.p = Dummy() # make look like nmutil pipeline API
 302         self.p.data_i = Dummy()
 303         self.p.data_i.ctx = Dummy()
 304         self.n = Dummy() # make look like nmutil pipeline API
 305         self.n.data_o = Dummy()
 306         self.p.valid_i = Signal()
 307         self.p.ready_o = Signal()
 308         self.n.ready_i = Signal()
 309         self.n.valid_o = Signal()
 310         self.counter   = Signal(4)
 311         self.op  = Signal(2)
 312         i = []
 313         i.append(Signal(width, name="i1"))
 314         i.append(Signal(width, name="i2"))
 315         self.i = Array(i)
 316         self.a, self.b = i[0], i[1]
 317         self.out = Array([Signal(width)])
 318         self.o = self.out[0]
 319         self.width = width
 320
 321     def elaborate(self, platform):
 322         m = Module()
 323         bgt = BranchOp(self.width, operator.gt)
 324         blt = BranchOp(self.width, operator.lt)
 325         beq = BranchOp(self.width, operator.eq)
 326         bne = BranchOp(self.width, operator.ne)
 327
 328         m.submodules.bgt = bgt
 329         m.submodules.blt = blt
 330         m.submodules.beq = beq
 331         m.submodules.bne = bne
 332         for mod in [bgt, blt, beq, bne]:
 333             m.d.comb += [
 334                 mod.a.eq(self.a),
 335                 mod.b.eq(self.b),
 336             ]
 337
 338         go_now = Signal(reset_less=True) # testing no-delay ALU
 339         with m.If(self.p.valid_i):
 340             # input is valid. next check, if we already said "ready" or not
 341             with m.If(~self.p.ready_o):
 342                 # we didn't say "ready" yet, so say so and initialise
 343                 m.d.sync += self.p.ready_o.eq(1)
 344
 345                 # as this is a "fake" pipeline, just grab the output right now
 346                 with m.Switch(self.op):
 347                     for i, mod in enumerate([bgt, blt, beq, bne]):
 348                         with m.Case(i):
 349                             m.d.sync += self.o.eq(mod.o)
 350                 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
 351                 #m.d.comb += go_now.eq(1)
 352         with m.Else():
 353             # input says no longer valid, so drop ready as well.
 354             # a "proper" ALU would have had to sync in the opcode and a/b ops
 355             m.d.sync += self.p.ready_o.eq(0)
 356
 357         # ok so the counter's running: when it gets to 1, fire the output
 358         with m.If((self.counter == 1) | go_now):
 359             # set the output as valid if the recipient is ready for it
 360             m.d.sync += self.n.valid_o.eq(1)
 361         with m.If(self.n.ready_i & self.n.valid_o):
 362             m.d.sync += self.n.valid_o.eq(0)
 363             # recipient said it was ready: reset back to known-good.
 364             m.d.sync += self.counter.eq(0) # reset the counter
 365             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 366
 367         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 368         with m.If(self.counter > 1):
 369             m.d.sync += self.counter.eq(self.counter - 1)
 370
 371         return m
 372
 373     def __iter__(self):
 374         yield self.op
 375         yield self.a
 376         yield self.b
 377         yield self.o
 378
 379     def ports(self):
 380         return list(self)
 381
 382 def run_op(dut, a, b, op, inv_a=0):
 383     from nmigen.back.pysim import Settle
 384     yield dut.a.eq(a)
 385     yield dut.b.eq(b)
 386     yield dut.op.insn_type.eq(op)
 387     yield dut.op.invert_a.eq(inv_a)
 388     yield dut.n.ready_i.eq(0)
 389     yield dut.p.valid_i.eq(1)
 390
 391     # if valid_o rose on the very first cycle, it is a
 392     # zero-delay ALU
 393     yield Settle()
 394     vld = yield dut.n.valid_o
 395     if vld:
 396         # special case for zero-delay ALU
 397         # we must raise ready_i first, since the combinatorial ALU doesn't
 398         # have any storage, and doesn't dare to assert ready_o back to us
 399         # until we accepted the output data
 400         yield dut.n.ready_i.eq(1)
 401         result = yield dut.o
 402         yield
 403         yield dut.p.valid_i.eq(0)
 404         yield dut.n.ready_i.eq(0)
 405         yield
 406         return result
 407
 408     yield
 409
 410     # wait for the ALU to accept our input data
 411     while True:
 412         rdy = yield dut.p.ready_o
 413         if rdy:
 414             break
 415         yield
 416
 417     yield dut.p.valid_i.eq(0)
 418
 419     # wait for the ALU to present the output data
 420     while True:
 421         yield Settle()
 422         vld = yield dut.n.valid_o
 423         if vld:
 424             break
 425         yield
 426
 427     # latch the result and lower read_i
 428     yield dut.n.ready_i.eq(1)
 429     result = yield dut.o
 430     yield
 431     yield dut.n.ready_i.eq(0)
 432     yield
 433
 434     return result
 435
 436
 437 def alu_sim(dut):
 438     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
 439     print ("alu_sim add", result)
 440     assert (result == 8)
 441
 442     result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
 443     print ("alu_sim mul", result)
 444     assert (result == 6)
 445
 446     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
 447     print ("alu_sim add-inv", result)
 448     assert (result == 65533)
 449
 450     # test zero-delay ALU
 451     # don't have OP_SUB, so use any other
 452     result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
 453     print ("alu_sim sub", result)
 454     assert (result == 2)
 455
 456     result = yield from run_op(dut, 13, 2, InternalOp.OP_SHR)
 457     print ("alu_sim shr", result)
 458     assert (result == 3)
 459
 460
 461 def test_alu():
 462     alu = ALU(width=16)
 463     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 464
 465     vl = rtlil.convert(alu, ports=alu.ports())
 466     with open("test_alu.il", "w") as f:
 467         f.write(vl)
 468
 469
 470 if __name__ == "__main__":
 471     test_alu()
 472
 473     # alu = BranchALU(width=16)
 474     # vl = rtlil.convert(alu, ports=alu.ports())
 475     # with open("test_branch_alu.il", "w") as f:
 476     #     f.write(vl)
 477