src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17
  18 from soc.decoder.power_enums import InternalOp, Function, CryIn
  19
  20 import operator
  21
  22
  23 class CompALUOpSubset(Record):
  24     """CompALUOpSubset
  25
  26     a copy of the relevant subset information from Decode2Execute1Type
  27     needed for ALU operations.  use with eq_from_execute1 (below) to
  28     grab subsets.
  29     """
  30     def __init__(self, name=None):
  31         layout = (('insn_type', InternalOp),
  32                   ('fn_unit', Function),
  33                   ('nia', 64),
  34                   ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
  35                     #'cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
  36                     #'xerc = XerBits() # NO: this is from the XER SPR
  37                   ('lk', 1),
  38                   ('rc', Layout((("rc", 1), ("rc_ok", 1)))),
  39                   ('oe', Layout((("oe", 1), ("oe_ok", 1)))),
  40                   ('invert_a', 1),
  41                   ('invert_out', 1),
  42                   ('input_carry', CryIn),
  43                   ('output_carry', 1),
  44                   ('input_cr', 1),
  45                   ('output_cr', 1),
  46                   ('is_32bit', 1),
  47                   ('is_signed', 1),
  48                   ('data_len', 4), # TODO: should be in separate CompLDSTSubset
  49                   ('byte_reverse', 1),
  50                   ('sign_extend', 1))
  51
  52         Record.__init__(self, Layout(layout), name=name)
  53
  54         # grrr.  Record does not have kwargs
  55         self.insn_type.reset_less = True
  56         self.fn_unit.reset_less = True
  57         self.nia.reset_less = True
  58         #self.cr = Signal(32, reset_less = True
  59         #self.xerc = XerBits(
  60         self.lk.reset_less = True
  61         self.invert_a.reset_less = True
  62         self.invert_out.reset_less = True
  63         self.input_carry.reset_less = True
  64         self.output_carry.reset_less = True
  65         self.input_cr.reset_less = True
  66         self.output_cr.reset_less = True
  67         self.is_32bit.reset_less = True
  68         self.is_signed.reset_less = True
  69         self.data_len.reset_less = True
  70         self.byte_reverse.reset_less = True
  71         self.sign_extend.reset_less = True
  72
  73     def eq_from_execute1(self, other):
  74         """ use this to copy in from Decode2Execute1Type
  75         """
  76         res = []
  77         for fname, sig in self.fields.items():
  78             eqfrom = other.fields[fname]
  79             res.append(sig.eq(eqfrom))
  80         return res
  81
  82     def ports(self):
  83         return [self.insn_type,
  84                 self.nia,
  85                 #self.cr,
  86                 #self.xerc,
  87                 self.lk,
  88                 self.invert_a,
  89                 self.invert_out,
  90                 self.input_carry,
  91                 self.output_carry,
  92                 self.input_cr,
  93                 self.output_cr,
  94                 self.is_32bit,
  95                 self.is_signed,
  96                 self.data_len,
  97                 self.byte_reverse,
  98                 self.sign_extend,
  99         ]
 100
 101
 102 class Adder(Elaboratable):
 103     def __init__(self, width):
 104         self.invert_a = Signal()
 105         self.a   = Signal(width)
 106         self.b   = Signal(width)
 107         self.o   = Signal(width)
 108
 109     def elaborate(self, platform):
 110         m = Module()
 111         with m.If(self.invert_a):
 112             m.d.comb += self.o.eq((~self.a) + self.b)
 113         with m.Else():
 114             m.d.comb += self.o.eq(self.a + self.b)
 115         return m
 116
 117
 118 class Subtractor(Elaboratable):
 119     def __init__(self, width):
 120         self.a   = Signal(width)
 121         self.b   = Signal(width)
 122         self.o   = Signal(width)
 123
 124     def elaborate(self, platform):
 125         m = Module()
 126         m.d.comb += self.o.eq(self.a - self.b)
 127         return m
 128
 129
 130 class Multiplier(Elaboratable):
 131     def __init__(self, width):
 132         self.a   = Signal(width)
 133         self.b   = Signal(width)
 134         self.o   = Signal(width)
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         m.d.comb += self.o.eq(self.a * self.b)
 139         return m
 140
 141
 142 class Shifter(Elaboratable):
 143     def __init__(self, width):
 144         self.width = width
 145         self.a   = Signal(width)
 146         self.b   = Signal(width)
 147         self.o   = Signal(width)
 148
 149     def elaborate(self, platform):
 150         m = Module()
 151         btrunc = Signal(self.width)
 152         m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
 153         m.d.comb += self.o.eq(self.a >> btrunc)
 154         return m
 155
 156
 157 class ALU(Elaboratable):
 158     def __init__(self, width):
 159         self.p_valid_i = Signal()
 160         self.p_ready_o = Signal()
 161         self.n_ready_i = Signal()
 162         self.n_valid_o = Signal()
 163         self.counter   = Signal(4)
 164         self.op  = CompALUOpSubset()
 165         i = []
 166         i.append(Signal(width, name="i1"))
 167         i.append(Signal(width, name="i2"))
 168         self.i = Array(i)
 169         self.a, self.b = i[0], i[1]
 170         self.out = Array([Signal(width)])
 171         self.o = self.out[0]
 172         self.width = width
 173
 174     def elaborate(self, platform):
 175         m = Module()
 176         add = Adder(self.width)
 177         mul = Multiplier(self.width)
 178         shf = Shifter(self.width)
 179
 180         m.submodules.add = add
 181         m.submodules.mul = mul
 182         m.submodules.shf = shf
 183
 184         # really should not activate absolutely all ALU inputs like this
 185         for mod in [add, mul, shf]:
 186             m.d.comb += [
 187                 mod.a.eq(self.a),
 188                 mod.b.eq(self.b),
 189             ]
 190
 191         # pass invert (and carry later)
 192         m.d.comb += add.invert_a.eq(self.op.invert_a)
 193
 194         go_now = Signal(reset_less=True) # testing no-delay ALU
 195
 196         with m.If(self.p_valid_i):
 197             # input is valid. next check, if we already said "ready" or not
 198             with m.If(~self.p_ready_o):
 199                 # we didn't say "ready" yet, so say so and initialise
 200                 m.d.sync += self.p_ready_o.eq(1)
 201
 202                 # as this is a "fake" pipeline, just grab the output right now
 203                 with m.If(self.op.insn_type == InternalOp.OP_ADD):
 204                     m.d.sync += self.o.eq(add.o)
 205                 with m.Elif(self.op.insn_type == InternalOp.OP_MUL_L64):
 206                     m.d.sync += self.o.eq(mul.o)
 207                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 208                     m.d.sync += self.o.eq(shf.o)
 209                 # TODO: SUB
 210
 211                 # NOTE: all of these are fake, just something to test
 212
 213                 # MUL, to take 5 instructions
 214                 with m.If(self.op.insn_type == InternalOp.OP_MUL_L64):
 215                     m.d.sync += self.counter.eq(5)
 216                 # SHIFT to take 7
 217                 with m.Elif(self.op.insn_type == InternalOp.OP_SHR):
 218                     m.d.sync += self.counter.eq(7)
 219                 # ADD/SUB to take 2, straight away
 220                 with m.If(self.op.insn_type == InternalOp.OP_ADD):
 221                     m.d.sync += self.counter.eq(3)
 222                 # others to take 1, straight away
 223                 with m.Else():
 224                     m.d.comb += go_now.eq(1)
 225                     m.d.sync += self.counter.eq(1)
 226
 227         with m.Else():
 228             # input says no longer valid, so drop ready as well.
 229             # a "proper" ALU would have had to sync in the opcode and a/b ops
 230             m.d.sync += self.p_ready_o.eq(0)
 231
 232         # ok so the counter's running: when it gets to 1, fire the output
 233         with m.If((self.counter == 1) | go_now):
 234             # set the output as valid if the recipient is ready for it
 235             m.d.sync += self.n_valid_o.eq(1)
 236         with m.If(self.n_ready_i & self.n_valid_o):
 237             m.d.sync += self.n_valid_o.eq(0)
 238             # recipient said it was ready: reset back to known-good.
 239             m.d.sync += self.counter.eq(0) # reset the counter
 240             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 241
 242         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 243         with m.If(self.counter > 1):
 244             m.d.sync += self.counter.eq(self.counter - 1)
 245
 246         return m
 247
 248     def __iter__(self):
 249         yield from self.op.ports()
 250         yield self.a
 251         yield self.b
 252         yield self.o
 253
 254     def ports(self):
 255         return list(self)
 256
 257
 258 class BranchOp(Elaboratable):
 259     def __init__(self, width, op):
 260         self.a   = Signal(width)
 261         self.b   = Signal(width)
 262         self.o   = Signal(width)
 263         self.op = op
 264
 265     def elaborate(self, platform):
 266         m = Module()
 267         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 268         return m
 269
 270
 271 class BranchALU(Elaboratable):
 272     def __init__(self, width):
 273         self.p_valid_i = Signal()
 274         self.p_ready_o = Signal()
 275         self.n_ready_i = Signal()
 276         self.n_valid_o = Signal()
 277         self.counter   = Signal(4)
 278         self.op  = Signal(2)
 279         i = []
 280         i.append(Signal(width, name="i1"))
 281         i.append(Signal(width, name="i2"))
 282         self.i = Array(i)
 283         self.a, self.b = i[0], i[1]
 284         self.out = Array([Signal(width)])
 285         self.o = self.out[0]
 286         self.width = width
 287
 288     def elaborate(self, platform):
 289         m = Module()
 290         bgt = BranchOp(self.width, operator.gt)
 291         blt = BranchOp(self.width, operator.lt)
 292         beq = BranchOp(self.width, operator.eq)
 293         bne = BranchOp(self.width, operator.ne)
 294
 295         m.submodules.bgt = bgt
 296         m.submodules.blt = blt
 297         m.submodules.beq = beq
 298         m.submodules.bne = bne
 299         for mod in [bgt, blt, beq, bne]:
 300             m.d.comb += [
 301                 mod.a.eq(self.a),
 302                 mod.b.eq(self.b),
 303             ]
 304
 305         go_now = Signal(reset_less=True) # testing no-delay ALU
 306         with m.If(self.p_valid_i):
 307             # input is valid. next check, if we already said "ready" or not
 308             with m.If(~self.p_ready_o):
 309                 # we didn't say "ready" yet, so say so and initialise
 310                 m.d.sync += self.p_ready_o.eq(1)
 311
 312                 # as this is a "fake" pipeline, just grab the output right now
 313                 with m.Switch(self.op):
 314                     for i, mod in enumerate([bgt, blt, beq, bne]):
 315                         with m.Case(i):
 316                             m.d.sync += self.o.eq(mod.o)
 317                 m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
 318                 #m.d.comb += go_now.eq(1)
 319         with m.Else():
 320             # input says no longer valid, so drop ready as well.
 321             # a "proper" ALU would have had to sync in the opcode and a/b ops
 322             m.d.sync += self.p_ready_o.eq(0)
 323
 324         # ok so the counter's running: when it gets to 1, fire the output
 325         with m.If((self.counter == 1) | go_now):
 326             # set the output as valid if the recipient is ready for it
 327             m.d.sync += self.n_valid_o.eq(1)
 328         with m.If(self.n_ready_i & self.n_valid_o):
 329             m.d.sync += self.n_valid_o.eq(0)
 330             # recipient said it was ready: reset back to known-good.
 331             m.d.sync += self.counter.eq(0) # reset the counter
 332             m.d.sync += self.o.eq(0) # clear the output for tidiness sake
 333
 334         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 335         with m.If(self.counter > 1):
 336             m.d.sync += self.counter.eq(self.counter - 1)
 337
 338         return m
 339
 340     def __iter__(self):
 341         yield self.op
 342         yield self.a
 343         yield self.b
 344         yield self.o
 345
 346     def ports(self):
 347         return list(self)
 348
 349 def run_op(dut, a, b, op, inv_a=0):
 350     yield dut.a.eq(a)
 351     yield dut.b.eq(b)
 352     yield dut.op.insn_type.eq(op)
 353     yield dut.op.invert_a.eq(inv_a)
 354     yield dut.n_ready_i.eq(0)
 355     yield dut.p_valid_i.eq(1)
 356     yield
 357     while True:
 358         yield
 359         n_valid_o = yield dut.n_valid_o
 360         if n_valid_o:
 361             break
 362     yield
 363
 364     result = yield dut.o
 365     yield dut.p_valid_i.eq(0)
 366     yield dut.n_ready_i.eq(0)
 367     yield
 368
 369     return result
 370
 371
 372 def alu_sim(dut):
 373     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD)
 374     print ("alu_sim add", result)
 375     assert (result == 8)
 376
 377     result = yield from run_op(dut, 2, 3, InternalOp.OP_MUL_L64)
 378     print ("alu_sim mul", result)
 379     assert (result == 6)
 380
 381     result = yield from run_op(dut, 5, 3, InternalOp.OP_ADD, inv_a=1)
 382     print ("alu_sim add-inv", result)
 383     assert (result == 65533)
 384
 385
 386 def test_alu():
 387     alu = ALU(width=16)
 388     run_simulation(alu, alu_sim(alu), vcd_name='test_alusim.vcd')
 389
 390     vl = rtlil.convert(alu, ports=alu.ports())
 391     with open("test_alu.il", "w") as f:
 392         f.write(vl)
 393
 394
 395 if __name__ == "__main__":
 396     test_alu()
 397
 398     alu = BranchALU(width=16)
 399     vl = rtlil.convert(alu, ports=alu.ports())
 400     with open("test_branch_alu.il", "w") as f:
 401         f.write(vl)
 402