src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17
  18 from soc.decoder.power_enums import InternalOp, Function, CryIn
  19
  20 from soc.fu.alu.alu_input_record import CompALUOpSubset
  21 from soc.fu.cr.cr_input_record import CompCROpSubset
  22
  23 import operator
  24
  25
  26
  27
  28 class Adder(Elaboratable):
  29     def __init__(self, width):
  30         self.invert_a = Signal()
  31         self.a   = Signal(width)
  32         self.b   = Signal(width)
  33         self.o   = Signal(width, name="add_o")
  34
  35     def elaborate(self, platform):
  36         m = Module()
  37         with m.If(self.invert_a):
  38             m.d.comb += self.o.eq((~self.a) + self.b)
  39         with m.Else():
  40             m.d.comb += self.o.eq(self.a + self.b)
  41         return m
  42
  43
  44 class Subtractor(Elaboratable):
  45     def __init__(self, width):
  46         self.a   = Signal(width)
  47         self.b   = Signal(width)
  48         self.o   = Signal(width, name="sub_o")
  49
  50     def elaborate(self, platform):
  51         m = Module()
  52         m.d.comb += self.o.eq(self.a - self.b)
  53         return m
  54
  55
  56 class Multiplier(Elaboratable):
  57     def __init__(self, width):
  58         self.a   = Signal(width)
  59         self.b   = Signal(width)
  60         self.o   = Signal(width, name="mul_o")
  61
  62     def elaborate(self, platform):
  63         m = Module()
  64         m.d.comb += self.o.eq(self.a * self.b)
  65         return m
  66
  67
  68 class Shifter(Elaboratable):
  69     def __init__(self, width):
  70         self.width = width
  71         self.a   = Signal(width)
  72         self.b   = Signal(width)
  73         self.o   = Signal(width, name="shf_o")
  74
  75     def elaborate(self, platform):
  76         m = Module()
  77         btrunc = Signal(self.width)
  78         m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
  79         m.d.comb += self.o.eq(self.a >> btrunc)
  80         return m
  81
  82 class Dummy:
  83     pass
  84
  85
  86 class DummyALU(Elaboratable):
  87     def __init__(self, width):
  88         self.p = Dummy() # make look like nmutil pipeline API
  89         self.p.data_i = Dummy()
  90         self.p.data_i.ctx = Dummy()
  91         self.n = Dummy() # make look like nmutil pipeline API
  92         self.n.data_o = Dummy()
  93         self.p.valid_i = Signal()
  94         self.p.ready_o = Signal()
  95         self.n.ready_i = Signal()
  96         self.n.valid_o = Signal()
  97         self.counter   = Signal(4)
  98         self.op  = CompCROpSubset()
  99         i = []
 100         i.append(Signal(width, name="i1"))
 101         i.append(Signal(width, name="i2"))
 102         i.append(Signal(width, name="i3"))
 103         self.i = Array(i)
 104         self.a, self.b, self.c = i[0], i[1], i[2]
 105         self.out = Array([Signal(width, name="alu_o")])
 106         self.o = self.out[0]
 107         self.width = width
 108         # more "look like nmutil pipeline API"
 109         self.p.data_i.ctx.op = self.op
 110         self.p.data_i.a = self.a
 111         self.p.data_i.b = self.b
 112         self.p.data_i.c = self.c
 113         self.n.data_o.o = self.o
 114
 115     def elaborate(self, platform):
 116         m = Module()
 117
 118         go_now = Signal(reset_less=True) # testing no-delay ALU
 119
 120         with m.If(self.p.valid_i):
 121             # input is valid. next check, if we already said "ready" or not
 122             with m.If(~self.p.ready_o):
 123                 # we didn't say "ready" yet, so say so and initialise
 124                 m.d.sync += self.p.ready_o.eq(1)
 125
 126                 m.d.sync += self.o.eq(self.a)
 127                 m.d.comb += go_now.eq(1)
 128                 m.d.sync += self.counter.eq(1)
 129
 130         with m.Else():
 131             # input says no longer valid, so drop ready as well.
 132             # a "proper" ALU would have had to sync in the opcode and a/b ops
 133             m.d.sync += self.p.ready_o.eq(0)
 134
 135         # ok so the counter's running: when it gets to 1, fire the output
 136         with m.If((self.counter == 1) | go_now):
 137             # set the output as valid if the recipient is ready for it
 138             m.d.sync += self.n.valid_o.eq(1)
 139         with m.If(self.n.ready_i & self.n.valid_o):
 140             m.d.sync += self.n.valid_o.eq(0)
 141             # recipient said it was ready: reset back to known-good.
 142             m.d.sync += self.counter.eq(0) # reset the counter
 143             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 144
 145         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 146         with m.If(self.counter > 1):
 147             m.d.sync += self.counter.eq(self.counter - 1)
 148
 149         return m
 150
 151     def __iter__(self):
 152         yield from self.op.ports()
 153         yield self.a
 154         yield self.b
 155         yield self.c
 156         yield self.o
 157
 158     def ports(self):
 159         return list(self)
 160
 161
 162 class ALU(Elaboratable):
 163     def __init__(self, width):
 164         self.p = Dummy() # make look like nmutil pipeline API
 165         self.p.data_i = Dummy()
 166         self.p.data_i.ctx = Dummy()
 167         self.n = Dummy() # make look like nmutil pipeline API
 168         self.n.data_o = Dummy()
 169         self.p.valid_i = Signal()
 170         self.p.ready_o = Signal()
 171         self.n.ready_i = Signal()
 172         self.n.valid_o = Signal()
 173         self.counter   = Signal(4)
 174         self.op = CompALUOpSubset(name="op")
 175         i = []
 176         i.append(Signal(width, name="i1"))
 177         i.append(Signal(width, name="i2"))
 178         self.i = Array(i)
 179         self.a, self.b = i[0], i[1]
 180         self.out = Array([Signal(width, name="alu_o")])
 181         self.o = self.out[0]
 182         self.width = width
 183         # more "look like nmutil pipeline API"
 184         self.p.data_i.ctx.op = self.op
 185         self.p.data_i.a = self.a
 186         self.p.data_i.b = self.b
 187         self.n.data_o.o = self.o
 188
 189     def elaborate(self, platform):
 190         m = Module()
 191         add = Adder(self.width)
 192         mul = Multiplier(self.width)
 193         shf = Shifter(self.width)
 194         sub = Subtractor(self.width)
 195
 196         m.submodules.add = add
 197         m.submodules.mul = mul
 198         m.submodules.shf = shf
 199         m.submodules.sub = sub
 200
 201         # really should not activate absolutely all ALU inputs like this
 202         for mod in [add, mul, shf, sub]:
 203             m.d.comb += [
 204                 mod.a.eq(self.a),
 205                 mod.b.eq(self.b),
 206             ]
 207
 208         # pass invert (and carry later)
 209         m.d.comb += add.invert_a.eq(self.op.invert_a)
 210
 211         go_now = Signal(reset_less=True) # testing no-delay ALU
 212
 213         # ALU sequencer is idle when the count is zero
 214         alu_idle = Signal(reset_less=True)
 215         m.d.comb += alu_idle.eq(self.counter == 0)
 216
 217         # ALU sequencer is done when the count is one
 218         alu_done = Signal(reset_less=True)
 219         m.d.comb += alu_done.eq(self.counter == 1)
 220
 221         # select handshake handling according to ALU type
 222         with m.If(go_now):
 223             # with a combinatorial, no-delay ALU, just pass through
 224             # the handshake signals to the other side
 225             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 226             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 227         with m.Else():
 228             # sequential ALU handshake:
 229             # ready_o responds to valid_i, but only if the ALU is idle
 230             m.d.comb += self.p.ready_o.eq(alu_idle)
 231             # select the internally generated valid_o, above
 232             m.d.comb += self.n.valid_o.eq(alu_done)
 233
 234         # hold the ALU result until ready_o is asserted
 235         alu_r = Signal(self.width)
 236
 237         with m.If(alu_idle):
 238             with m.If(self.p.valid_i):
 239
 240                 # as this is a "fake" pipeline, just grab the output right now
 241                 with m.If(self.op.insn_type == InternalOp.OP_ADD):
 242                     m.d.sync += alu_r.eq(add.o)
 243                 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
 244                     m.d.sync += alu_r.eq(mul.o)
 245                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 246                     m.d.sync += alu_r.eq(shf.o)
 247                 # SUB is zero-delay, no need to register
 248
 249                 # NOTE: all of these are fake, just something to test
 250
 251                 # MUL, to take 5 instructions
 252                 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
 253                     m.d.sync += self.counter.eq(5)
 254                 # SHIFT to take 1, straight away
 255                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 256                     m.d.sync += self.counter.eq(1)
 257                 # ADD/SUB to take 3
 258                 with m.Elif(self.op.insn_type == InternalOp.OP_ADD):
 259                     m.d.sync += self.counter.eq(3)
 260                 # others to take no delay
 261                 with m.Else():
 262                     m.d.comb += go_now.eq(1)
 263
 264         with m.Elif(~alu_done | self.n.ready_i):
 265             # decrement the counter while the ALU is neither idle nor finished
 266             m.d.sync += self.counter.eq(self.counter - 1)
 267
 268         # choose between zero-delay output, or registered
 269         with m.If(go_now):
 270             m.d.comb += self.o.eq(sub.o)
 271         with m.Else():
 272             m.d.comb += self.o.eq(alu_r)
 273
 274         return m
 275
 276     def __iter__(self):
 277         yield from self.op.ports()
 278         yield self.a
 279         yield self.b
 280         yield self.o
 281         yield self.p.valid_i
 282         yield self.p.ready_o
 283         yield self.n.valid_o
 284         yield self.n.ready_i
 285
 286     def ports(self):
 287         return list(self)
 288
 289
 290 class BranchOp(Elaboratable):
 291     def __init__(self, width, op):
 292         self.a   = Signal(width)
 293         self.b   = Signal(width)
 294         self.o   = Signal(width)
 295         self.op = op
 296
 297     def elaborate(self, platform):
 298         m = Module()
 299         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 300         return m
 301
 302
 303 class BranchALU(Elaboratable):
 304     def __init__(self, width):
 305         self.p = Dummy() # make look like nmutil pipeline API
 306         self.p.data_i = Dummy()
 307         self.p.data_i.ctx = Dummy()
 308         self.n = Dummy() # make look like nmutil pipeline API
 309         self.n.data_o = Dummy()
 310         self.p.valid_i = Signal()
 311         self.p.ready_o = Signal()
 312         self.n.ready_i = Signal()
 313         self.n.valid_o = Signal()
 314         self.counter   = Signal(4)
 315         self.op  = Signal(2)
 316         i = []
 317         i.append(Signal(width, name="i1"))
 318         i.append(Signal(width, name="i2"))
 319         self.i = Array(i)
 320         self.a, self.b = i[0], i[1]
 321         self.out = Array([Signal(width)])
 322         self.o = self.out[0]
 323         self.width = width
 324
 325     def elaborate(self, platform):
 326         m = Module()
 327         bgt = BranchOp(self.width, operator.gt)
 328         blt = BranchOp(self.width, operator.lt)
 329         beq = BranchOp(self.width, operator.eq)
 330         bne = BranchOp(self.width, operator.ne)
 331
 332         m.submodules.bgt = bgt
 333         m.submodules.blt = blt
 334         m.submodules.beq = beq
 335         m.submodules.bne = bne
 336         for mod in [bgt, blt, beq, bne]:
 337             m.d.comb += [
 338                 mod.a.eq(self.a),
 339                 mod.b.eq(self.b),
 340             ]
 341
 342         go_now = Signal(reset_less=True) # testing no-delay ALU
 343         with m.If(self.p.valid_i):
 344             # input is valid. next check, if we already said "ready" or not
 345             with m.If(~self.p.ready_o):
 346                 # we didn't say "ready" yet, so say so and initialise
 347                 m.d.sync += self.p.ready_o.eq(1)
 348
 349                 # as this is a "fake" pipeline, just grab the output right now
 350                 with m.Switch(self.op):
 351                     for i, mod in enumerate([bgt, blt, beq, bne]):
 352                         with m.Case(i):
 353                             m.d.sync += self.o.eq(mod.o)
 354                 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
 355                 #m.d.comb += go_now.eq(1)
 356         with m.Else():
 357             # input says no longer valid, so drop ready as well.
 358             # a "proper" ALU would have had to sync in the opcode and a/b ops
 359             m.d.sync += self.p.ready_o.eq(0)
 360
 361         # ok so the counter's running: when it gets to 1, fire the output
 362         with m.If((self.counter == 1) | go_now):
 363             # set the output as valid if the recipient is ready for it
 364             m.d.sync += self.n.valid_o.eq(1)
 365         with m.If(self.n.ready_i & self.n.valid_o):
 366             m.d.sync += self.n.valid_o.eq(0)
 367             # recipient said it was ready: reset back to known-good.
 368             m.d.sync += self.counter.eq(0) # reset the counter
 369             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 370
 371         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 372         with m.If(self.counter > 1):
 373             m.d.sync += self.counter.eq(self.counter - 1)
 374
 375         return m
 376
 377     def __iter__(self):
 378         yield self.op
 379         yield self.a
 380         yield self.b
 381         yield self.o
 382
 383     def ports(self):
 384         return list(self)
 385
 386 def run_op(dut, a, b, op, inv_a=0):
 387     from nmigen.back.pysim import Settle
 388     yield dut.a.eq(a)
 389     yield dut.b.eq(b)
 390     yield dut.op.insn_type.eq(op)
 391     yield dut.op.invert_a.eq(inv_a)
 392     yield dut.n.ready_i.eq(0)
 393     yield dut.p.valid_i.eq(1)
 394
 395     # if valid_o rose on the very first cycle, it is a
 396     # zero-delay ALU
 397     yield Settle()
 398     vld = yield dut.n.valid_o
 399     if vld:
 400         # special case for zero-delay ALU
 401         # we must raise ready_i first, since the combinatorial ALU doesn't
 402         # have any storage, and doesn't dare to assert ready_o back to us
 403         # until we accepted the output data
 404         yield dut.n.ready_i.eq(1)
 405         result = yield dut.o
 406         yield
 407         yield dut.p.valid_i.eq(0)
 408         yield dut.n.ready_i.eq(0)
 409         yield
 410         return result
 411
 412     yield
 413
 414     # wait for the ALU to accept our input data
 415     while True:
 416         rdy = yield dut.p.ready_o
 417         if rdy:
 418             break
 419         yield
 420
 421     yield dut.p.valid_i.eq(0)
 422
 423     # wait for the ALU to present the output data
 424     while True:
 425         yield Settle()
 426         vld = yield dut.n.valid_o
 427         if vld:
 428             break
 429         yield
 430
 431     # latch the result and lower read_i
 432     yield dut.n.ready_i.eq(1)
 433     result = yield dut.o
 434     yield
 435     yield dut.n.ready_i.eq(0)
 436     yield
 437
 438     return result
 439
 440
 441 def alu_sim(dut):
 442     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
 443     print ("alu_sim add", result)
 444     assert (result == 8)
 445
 446     result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
 447     print ("alu_sim mul", result)
 448     assert (result == 6)
 449
 450     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
 451     print ("alu_sim add-inv", result)
 452     assert (result == 65533)
 453
 454     # test zero-delay ALU
 455     # don't have OP_SUB, so use any other
 456     result = yield from run_op(dut, 5, 3, InternalOp.OP_NOP)
 457     print ("alu_sim sub", result)
 458     assert (result == 2)
 459
 460     result = yield from run_op(dut, 13, 2, InternalOp.OP_SHR)
 461     print ("alu_sim shr", result)
 462     assert (result == 3)
 463
 464
 465 def test_alu():
 466     alu = ALU(width=16)
 467     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 468
 469     vl = rtlil.convert(alu, ports=alu.ports())
 470     with open("test_alu.il", "w") as f:
 471         f.write(vl)
 472
 473
 474 if __name__ == "__main__":
 475     test_alu()
 476
 477     # alu = BranchALU(width=16)
 478     # vl = rtlil.convert(alu, ports=alu.ports())
 479     # with open("test_branch_alu.il", "w") as f:
 480     #     f.write(vl)
 481