src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmutil.extend import exts
  18 from nmutil.gtkw import write_gtkw
  19
  20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
  23                                         is_engine_pysim)
  24
  25 from openpower.decoder.decode2execute1 import Data
  26 from openpower.decoder.power_enums import MicrOp, Function, CryIn
  27
  28 from soc.fu.alu.alu_input_record import CompALUOpSubset
  29 from soc.fu.cr.cr_input_record import CompCROpSubset
  30
  31 from soc.fu.pipe_data import FUBaseData
  32 from soc.fu.alu.pipe_data import CommonPipeSpec
  33 from soc.fu.compunits.compunits import FunctionUnitBaseSingle
  34
  35 import operator
  36
  37
  38 class Adder(Elaboratable):
  39     def __init__(self, width):
  40         self.invert_in = Signal()
  41         self.a = Signal(width)
  42         self.b = Signal(width)
  43         self.o = Signal(width, name="add_o")
  44
  45     def elaborate(self, platform):
  46         m = Module()
  47         with m.If(self.invert_in):
  48             m.d.comb += self.o.eq((~self.a) + self.b)
  49         with m.Else():
  50             m.d.comb += self.o.eq(self.a + self.b)
  51         return m
  52
  53
  54 class Subtractor(Elaboratable):
  55     def __init__(self, width):
  56         self.a = Signal(width)
  57         self.b = Signal(width)
  58         self.o = Signal(width, name="sub_o")
  59
  60     def elaborate(self, platform):
  61         m = Module()
  62         m.d.comb += self.o.eq(self.a - self.b)
  63         return m
  64
  65
  66 class Multiplier(Elaboratable):
  67     def __init__(self, width):
  68         self.a = Signal(width)
  69         self.b = Signal(width)
  70         self.o = Signal(width, name="mul_o")
  71
  72     def elaborate(self, platform):
  73         m = Module()
  74         m.d.comb += self.o.eq(self.a * self.b)
  75         return m
  76
  77
  78 class Shifter(Elaboratable):
  79     def __init__(self, width):
  80         self.width = width
  81         self.a = Signal(width)
  82         self.b = Signal(width)
  83         self.o = Signal(width, name="shf_o")
  84
  85     def elaborate(self, platform):
  86         m = Module()
  87         btrunc = Signal(self.width)
  88         m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
  89         m.d.comb += self.o.eq(self.a >> btrunc)
  90         return m
  91
  92
  93 class SignExtend(Elaboratable):
  94     def __init__(self, width):
  95         self.width = width
  96         self.a = Signal(width)
  97         self.o = Signal(width, name="exts_o")
  98
  99     def elaborate(self, platform):
 100         m = Module()
 101         m.d.comb += self.o.eq(exts(self.a, 8, self.width))
 102         return m
 103
 104
 105 class Dummy:
 106     pass
 107
 108
 109 class DummyALU(Elaboratable):
 110     def __init__(self, width):
 111         self.p = Dummy()  # make look like nmutil pipeline API
 112         self.p.i_data = Dummy()
 113         self.p.i_data.ctx = Dummy()
 114         self.n = Dummy()  # make look like nmutil pipeline API
 115         self.n.o_data = Dummy()
 116         self.p.i_valid = Signal()
 117         self.p.o_ready = Signal()
 118         self.n.i_ready = Signal()
 119         self.n.o_valid = Signal()
 120         self.counter = Signal(4)
 121         self.op = CompCROpSubset()
 122         i = []
 123         i.append(Signal(width, name="i1"))
 124         i.append(Signal(width, name="i2"))
 125         i.append(Signal(width, name="i3"))
 126         self.i = i
 127         self.a, self.b, self.c = i[0], i[1], i[2]
 128         self.out = tuple([Signal(width, name="alu_o")])
 129         self.o = self.out[0]
 130         self.width = width
 131         # more "look like nmutil pipeline API"
 132         self.p.i_data.ctx.op = self.op
 133         self.p.i_data.a = self.a
 134         self.p.i_data.b = self.b
 135         self.p.i_data.c = self.c
 136         self.n.o_data.o = self.o
 137
 138     def elaborate(self, platform):
 139         m = Module()
 140
 141         go_now = Signal(reset_less=True)  # testing no-delay ALU
 142
 143         with m.If(self.p.i_valid):
 144             # input is valid. next check, if we already said "ready" or not
 145             with m.If(~self.p.o_ready):
 146                 # we didn't say "ready" yet, so say so and initialise
 147                 m.d.sync += self.p.o_ready.eq(1)
 148
 149                 m.d.sync += self.o.eq(self.a)
 150                 m.d.comb += go_now.eq(1)
 151                 m.d.sync += self.counter.eq(1)
 152
 153         with m.Else():
 154             # input says no longer valid, so drop ready as well.
 155             # a "proper" ALU would have had to sync in the opcode and a/b ops
 156             m.d.sync += self.p.o_ready.eq(0)
 157
 158         # ok so the counter's running: when it gets to 1, fire the output
 159         with m.If((self.counter == 1) | go_now):
 160             # set the output as valid if the recipient is ready for it
 161             m.d.sync += self.n.o_valid.eq(1)
 162         with m.If(self.n.i_ready & self.n.o_valid):
 163             m.d.sync += self.n.o_valid.eq(0)
 164             # recipient said it was ready: reset back to known-good.
 165             m.d.sync += self.counter.eq(0)  # reset the counter
 166             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 167
 168         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 169         with m.If(self.counter > 1):
 170             m.d.sync += self.counter.eq(self.counter - 1)
 171
 172         return m
 173
 174     def __iter__(self):
 175         yield from self.op.ports()
 176         yield self.a
 177         yield self.b
 178         yield self.c
 179         yield self.o
 180
 181     def ports(self):
 182         return list(self)
 183
 184 #####################
 185 # converting even this dummy ALU over to the FunctionUnit RegSpecs API
 186 # which, errr, note that the regspecs are totally ignored below, but
 187 # at least the widths are all 64-bit so it's okay.
 188 #####################
 189
 190 # input (and output) for logical initial stage (common input)
 191
 192
 193 class ALUInputData(FUBaseData):
 194     regspec = [('INT', 'a', '0:63'),  # RA
 195                ('INT', 'b', '0:63'),  # RB/immediate
 196                ]
 197
 198     def __init__(self, pspec):
 199         super().__init__(pspec, False)
 200
 201
 202 # output from ALU final stage
 203 class ALUOutputData(FUBaseData):
 204     regspec = [('INT', 'o', '0:63'),        # RT
 205                ]
 206
 207     def __init__(self, pspec):
 208         super().__init__(pspec, True)
 209
 210
 211 # ALU pipe specification class
 212 class ALUPipeSpec(CommonPipeSpec):
 213     regspec = (ALUInputData.regspec, ALUOutputData.regspec)
 214     opsubsetkls = CompALUOpSubset
 215
 216
 217 class ALUFunctionUnit(FunctionUnitBaseSingle):
 218     # class ALUFunctionUnit(FunctionUnitBaseMulti):
 219     fnunit = Function.ALU
 220
 221     def __init__(self, idx, parent_pspec):
 222         super().__init__(ALUPipeSpec, ALU, 1, parent_pspec)
 223
 224
 225 class ALU(Elaboratable):
 226     def __init__(self, width):
 227         # XXX major temporary hack: attempting to convert
 228         # ALU over to RegSpecs API, FunctionUnitBaseSingle passes in
 229         # a regspec here which we can't cope with.  therefore, errr...
 230         # just throw it away and set the width to 64
 231         if not isinstance(width, int):
 232             width = 64
 233         # TODO, really this should just inherit from ControlBase it would
 234         # be a lot less messy.
 235         self.p = Dummy()  # make look like nmutil pipeline API
 236         self.p.i_data = Dummy()
 237         self.p.i_data.ctx = Dummy()
 238         self.n = Dummy()  # make look like nmutil pipeline API
 239         self.n.o_data = Dummy()
 240         self.p.i_valid = Signal()
 241         self.p.o_ready = Signal()
 242         self.n.i_ready = Signal()
 243         self.n.o_valid = Signal()
 244         self.counter = Signal(4)
 245         self.op = CompALUOpSubset(name="op")
 246         i = []
 247         i.append(Signal(width, name="i1"))
 248         i.append(Signal(width, name="i2"))
 249         self.i = i
 250         self.a, self.b = i[0], i[1]
 251         out = []
 252         out.append(Data(width, name="alu_o"))
 253         out.append(Data(width, name="alu_cr"))
 254         self.out = tuple(out)
 255         self.o = self.out[0]
 256         self.cr = self.out[1]
 257         self.width = width
 258         # more "look like nmutil ControlBase pipeline API" stuff
 259         self.p.i_data.ctx.op = self.op
 260         self.p.i_data.a = self.a
 261         self.p.i_data.b = self.b
 262         self.n.o_data.o = self.o
 263         self.n.o_data.cr = self.cr
 264
 265     def elaborate(self, platform):
 266         m = Module()
 267         add = Adder(self.width)
 268         mul = Multiplier(self.width)
 269         shf = Shifter(self.width)
 270         sub = Subtractor(self.width)
 271         ext_sign = SignExtend(self.width)
 272
 273         m.submodules.add = add
 274         m.submodules.mul = mul
 275         m.submodules.shf = shf
 276         m.submodules.sub = sub
 277         m.submodules.ext_sign = ext_sign
 278
 279         # really should not activate absolutely all ALU inputs like this
 280         for mod in [add, mul, shf, sub]:
 281             m.d.comb += [
 282                 mod.a.eq(self.a),
 283                 mod.b.eq(self.b),
 284             ]
 285         # EXTS sign extends the first input
 286         with m.If(self.op.insn_type == MicrOp.OP_EXTS):
 287             m.d.comb += ext_sign.a.eq(self.a)
 288         # EXTSWSLI sign extends the second input
 289         with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 290             m.d.comb += ext_sign.a.eq(self.b)
 291
 292         # pass invert (and carry later)
 293         m.d.comb += add.invert_in.eq(self.op.invert_in)
 294
 295         go_now = Signal(reset_less=True)  # testing no-delay ALU
 296
 297         # ALU sequencer is idle when the count is zero
 298         alu_idle = Signal(reset_less=True)
 299         m.d.comb += alu_idle.eq(self.counter == 0)
 300
 301         # ALU sequencer is done when the count is one
 302         alu_done = Signal(reset_less=True)
 303         m.d.comb += alu_done.eq(self.counter == 1)
 304
 305         # select handshake handling according to ALU type
 306         with m.If(go_now):
 307             # with a combinatorial, no-delay ALU, just pass through
 308             # the handshake signals to the other side
 309             m.d.comb += self.p.o_ready.eq(self.n.i_ready)
 310             m.d.comb += self.n.o_valid.eq(self.p.i_valid)
 311         with m.Else():
 312             # sequential ALU handshake:
 313             # o_ready responds to i_valid, but only if the ALU is idle
 314             m.d.comb += self.p.o_ready.eq(alu_idle)
 315             # select the internally generated o_valid, above
 316             m.d.comb += self.n.o_valid.eq(alu_done)
 317
 318         # hold the ALU result until o_ready is asserted
 319         alu_r = Signal(self.width)
 320
 321         # output masks
 322         # NOP and ILLEGAL don't output anything
 323         with m.If((self.op.insn_type != MicrOp.OP_NOP) &
 324                   (self.op.insn_type != MicrOp.OP_ILLEGAL)):
 325             m.d.comb += self.o.ok.eq(1)
 326         # CR is output when rc bit is active
 327         m.d.comb += self.cr.ok.eq(self.op.rc.rc)
 328
 329         with m.If(alu_idle):
 330             with m.If(self.p.i_valid):
 331
 332                 # as this is a "fake" pipeline, just grab the output right now
 333                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
 334                     m.d.sync += alu_r.eq(add.o)
 335                 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
 336                     m.d.sync += alu_r.eq(mul.o)
 337                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 338                     m.d.sync += alu_r.eq(shf.o)
 339                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 340                     m.d.sync += alu_r.eq(ext_sign.o)
 341                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 342                     m.d.sync += alu_r.eq(ext_sign.o)
 343                 # SUB is zero-delay, no need to register
 344
 345                 # NOTE: all of these are fake, just something to test
 346
 347                 # MUL, to take 5 instructions
 348                 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
 349                     m.d.sync += self.counter.eq(5)
 350                 # SHIFT to take 1, straight away
 351                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 352                     m.d.sync += self.counter.eq(1)
 353                 # ADD/SUB to take 3
 354                 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
 355                     m.d.sync += self.counter.eq(3)
 356                 # EXTS to take 1
 357                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 358                     m.d.sync += self.counter.eq(1)
 359                 # EXTSWSLI to take 1
 360                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 361                     m.d.sync += self.counter.eq(1)
 362                 # others to take no delay
 363                 with m.Else():
 364                     m.d.comb += go_now.eq(1)
 365
 366         with m.Elif(~alu_done | self.n.i_ready):
 367             # decrement the counter while the ALU is neither idle nor finished
 368             m.d.sync += self.counter.eq(self.counter - 1)
 369
 370         # choose between zero-delay output, or registered
 371         with m.If(go_now):
 372             m.d.comb += self.o.data.eq(sub.o)
 373         # only present the result at the last computation cycle
 374         with m.Elif(alu_done):
 375             m.d.comb += self.o.data.eq(alu_r)
 376
 377         # determine condition register bits based on the data output value
 378         with m.If(~self.o.data.any()):
 379             m.d.comb += self.cr.data.eq(0b001)
 380         with m.Elif(self.o.data[-1]):
 381             m.d.comb += self.cr.data.eq(0b010)
 382         with m.Else():
 383             m.d.comb += self.cr.data.eq(0b100)
 384
 385         return m
 386
 387     def __iter__(self):
 388         yield from self.op.ports()
 389         yield self.a
 390         yield self.b
 391         yield from self.o.ports()
 392         yield self.p.i_valid
 393         yield self.p.o_ready
 394         yield self.n.o_valid
 395         yield self.n.i_ready
 396
 397     def ports(self):
 398         return list(self)
 399
 400
 401 class BranchOp(Elaboratable):
 402     def __init__(self, width, op):
 403         self.a = Signal(width)
 404         self.b = Signal(width)
 405         self.o = Signal(width)
 406         self.op = op
 407
 408     def elaborate(self, platform):
 409         m = Module()
 410         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 411         return m
 412
 413
 414 class BranchALU(Elaboratable):
 415     def __init__(self, width):
 416         self.p = Dummy()  # make look like nmutil pipeline API
 417         self.p.i_data = Dummy()
 418         self.p.i_data.ctx = Dummy()
 419         self.n = Dummy()  # make look like nmutil pipeline API
 420         self.n.o_data = Dummy()
 421         self.p.i_valid = Signal()
 422         self.p.o_ready = Signal()
 423         self.n.i_ready = Signal()
 424         self.n.o_valid = Signal()
 425         self.counter = Signal(4)
 426         self.op = Signal(2)
 427         i = []
 428         i.append(Signal(width, name="i1"))
 429         i.append(Signal(width, name="i2"))
 430         self.i = i
 431         self.a, self.b = i[0], i[1]
 432         self.out = tuple([Signal(width)])
 433         self.o = self.out[0]
 434         self.width = width
 435
 436     def elaborate(self, platform):
 437         m = Module()
 438         bgt = BranchOp(self.width, operator.gt)
 439         blt = BranchOp(self.width, operator.lt)
 440         beq = BranchOp(self.width, operator.eq)
 441         bne = BranchOp(self.width, operator.ne)
 442
 443         m.submodules.bgt = bgt
 444         m.submodules.blt = blt
 445         m.submodules.beq = beq
 446         m.submodules.bne = bne
 447         for mod in [bgt, blt, beq, bne]:
 448             m.d.comb += [
 449                 mod.a.eq(self.a),
 450                 mod.b.eq(self.b),
 451             ]
 452
 453         go_now = Signal(reset_less=True)  # testing no-delay ALU
 454         with m.If(self.p.i_valid):
 455             # input is valid. next check, if we already said "ready" or not
 456             with m.If(~self.p.o_ready):
 457                 # we didn't say "ready" yet, so say so and initialise
 458                 m.d.sync += self.p.o_ready.eq(1)
 459
 460                 # as this is a "fake" pipeline, just grab the output right now
 461                 with m.Switch(self.op):
 462                     for i, mod in enumerate([bgt, blt, beq, bne]):
 463                         with m.Case(i):
 464                             m.d.sync += self.o.eq(mod.o)
 465                 # branch to take 5 cycles (fake)
 466                 m.d.sync += self.counter.eq(5)
 467                 #m.d.comb += go_now.eq(1)
 468         with m.Else():
 469             # input says no longer valid, so drop ready as well.
 470             # a "proper" ALU would have had to sync in the opcode and a/b ops
 471             m.d.sync += self.p.o_ready.eq(0)
 472
 473         # ok so the counter's running: when it gets to 1, fire the output
 474         with m.If((self.counter == 1) | go_now):
 475             # set the output as valid if the recipient is ready for it
 476             m.d.sync += self.n.o_valid.eq(1)
 477         with m.If(self.n.i_ready & self.n.o_valid):
 478             m.d.sync += self.n.o_valid.eq(0)
 479             # recipient said it was ready: reset back to known-good.
 480             m.d.sync += self.counter.eq(0)  # reset the counter
 481             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 482
 483         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 484         with m.If(self.counter > 1):
 485             m.d.sync += self.counter.eq(self.counter - 1)
 486
 487         return m
 488
 489     def __iter__(self):
 490         yield self.op
 491         yield self.a
 492         yield self.b
 493         yield self.o
 494
 495     def ports(self):
 496         return list(self)
 497
 498
 499 def run_op(dut, a, b, op, inv_a=0):
 500     yield dut.a.eq(a)
 501     yield dut.b.eq(b)
 502     yield dut.op.insn_type.eq(op)
 503     yield dut.op.invert_in.eq(inv_a)
 504     yield dut.n.i_ready.eq(0)
 505     yield dut.p.i_valid.eq(1)
 506     yield dut.n.i_ready.eq(1)
 507     yield
 508
 509     # wait for the ALU to accept our input data
 510     while not (yield dut.p.o_ready):
 511         yield
 512
 513     yield dut.p.i_valid.eq(0)
 514     yield dut.a.eq(0)
 515     yield dut.b.eq(0)
 516     yield dut.op.insn_type.eq(0)
 517     yield dut.op.invert_in.eq(0)
 518
 519     # wait for the ALU to present the output data
 520     while not (yield dut.n.o_valid):
 521         yield
 522
 523     # latch the result and lower read_i
 524     result = yield dut.o.data
 525     yield dut.n.i_ready.eq(0)
 526
 527     return result
 528
 529
 530 def alu_sim(dut):
 531     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
 532     print("alu_sim add", result)
 533     assert (result == 8)
 534
 535     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
 536     print("alu_sim mul", result)
 537     assert (result == 6)
 538
 539     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
 540     print("alu_sim add-inv", result)
 541     assert (result == 65533)
 542
 543     # test zero-delay ALU
 544     # don't have OP_SUB, so use any other
 545     result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
 546     print("alu_sim sub", result)
 547     assert (result == 2)
 548
 549     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
 550     print("alu_sim shr", result)
 551     assert (result == 3)
 552
 553
 554 def test_alu():
 555     alu = ALU(width=16)
 556     write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
 557     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 558
 559     vl = rtlil.convert(alu, ports=alu.ports())
 560     with open("test_alu.il", "w") as f:
 561         f.write(vl)
 562
 563
 564 def test_alu_parallel():
 565     # Compare with the sequential test implementation, above.
 566     m = Module()
 567     m.submodules.alu = dut = ALU(width=16)
 568     write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
 569                    pysim=is_engine_pysim())
 570
 571     sim = Simulator(m)
 572     sim.add_clock(1e-6)
 573
 574     def send(a, b, op, inv_a=0, rc=0):
 575         # present input data and assert i_valid
 576         yield dut.a.eq(a)
 577         yield dut.b.eq(b)
 578         yield dut.op.insn_type.eq(op)
 579         yield dut.op.invert_in.eq(inv_a)
 580         yield dut.op.rc.rc.eq(rc)
 581         yield dut.p.i_valid.eq(1)
 582         yield
 583         # wait for o_ready to be asserted
 584         while not (yield dut.p.o_ready):
 585             yield
 586         # clear input data and negate i_valid
 587         # if send is called again immediately afterwards, there will be no
 588         # visible transition (they will not be negated, after all)
 589         yield dut.p.i_valid.eq(0)
 590         yield dut.a.eq(0)
 591         yield dut.b.eq(0)
 592         yield dut.op.insn_type.eq(0)
 593         yield dut.op.invert_in.eq(0)
 594         yield dut.op.rc.rc.eq(0)
 595
 596     def receive():
 597         # signal readiness to receive data
 598         yield dut.n.i_ready.eq(1)
 599         yield
 600         # wait for o_valid to be asserted
 601         while not (yield dut.n.o_valid):
 602             yield
 603         # read results
 604         result = yield dut.o.data
 605         cr = yield dut.cr.data
 606         # negate i_ready
 607         # if receive is called again immediately afterwards, there will be no
 608         # visible transition (it will not be negated, after all)
 609         yield dut.n.i_ready.eq(0)
 610         return result, cr
 611
 612     def producer():
 613         # send a few test cases, interspersed with wait states
 614         # note that, for this test, we do not wait for the result to be ready,
 615         # before presenting the next input
 616         # 5 + 3
 617         yield from send(5, 3, MicrOp.OP_ADD)
 618         yield
 619         yield
 620         # 2 * 3
 621         yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
 622         # (-6) + 3
 623         yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
 624         yield
 625         # 5 - 3
 626         # note that this is a zero-delay operation
 627         yield from send(5, 3, MicrOp.OP_CMP)
 628         yield
 629         yield
 630         # NOP
 631         yield from send(5, 3, MicrOp.OP_NOP)
 632         # 13 >> 2
 633         yield from send(13, 2, MicrOp.OP_SHR)
 634         # sign extent 13
 635         yield from send(13, 2, MicrOp.OP_EXTS)
 636         # sign extend -128 (8 bits)
 637         yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
 638         # sign extend -128 (8 bits)
 639         yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
 640         # 5 - 5
 641         yield from send(5, 5, MicrOp.OP_CMP, rc=1)
 642
 643     def consumer():
 644         # receive and check results, interspersed with wait states
 645         # the consumer is not in step with the producer, but the
 646         # order of the results are preserved
 647         yield
 648         # 5 + 3 = 8
 649         result = yield from receive()
 650         assert result[0] == 8
 651         # 2 * 3 = 6
 652         # 6 > 0 => CR = 0b100
 653         result = yield from receive()
 654         assert result == (6, 0b100)
 655         yield
 656         yield
 657         # (-6) + 3 = -3
 658         # -3 < 0 => CR = 0b010
 659         result = yield from receive()
 660         assert result == (65533, 0b010)  # unsigned equivalent to -2
 661         # 5 - 3 = 2
 662         # note that this is a zero-delay operation
 663         # this, and the previous result, will be received back-to-back
 664         # (check the output waveform to see this)
 665         result = yield from receive()
 666         assert result[0] == 2
 667         yield
 668         yield
 669         # NOP
 670         yield from receive()
 671         # 13 >> 2 = 3
 672         result = yield from receive()
 673         assert result[0] == 3
 674         # sign extent 13 = 13
 675         result = yield from receive()
 676         assert result[0] == 13
 677         # sign extend -128 (8 bits) = -128 (16 bits)
 678         # -128 < 0 => CR = 0b010
 679         result = yield from receive()
 680         assert result == (0xFF80, 0b010)
 681         # sign extend -128 (8 bits) = -128 (16 bits)
 682         result = yield from receive()
 683         assert result[0] == 0xFF80
 684         # 5 - 5 = 0
 685         # 0 == 0 => CR = 0b001
 686         result = yield from receive()
 687         assert result == (0, 0b001)
 688
 689     sim.add_sync_process(producer)
 690     sim.add_sync_process(consumer)
 691     sim_writer = sim.write_vcd("test_alu_parallel.vcd")
 692     with sim_writer:
 693         sim.run()
 694
 695
 696 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
 697                    pysim=True):
 698     """Common function to write the GTKWave documents for this module"""
 699     gtkwave_desc = [
 700         'clk',
 701         'i1[15:0]',
 702         'i2[15:0]',
 703         'op__insn_type' if pysim else 'op__insn_type[6:0]',
 704         'op__invert_in',
 705         'i_valid',
 706         'o_ready',
 707         'o_valid',
 708         'i_ready',
 709         'alu_o[15:0]',
 710         'alu_o_ok',
 711         'alu_cr[15:0]',
 712         'alu_cr_ok'
 713     ]
 714     # determine the module name of the DUT
 715     module = 'top'
 716     if sub_module is not None:
 717         module = nmigen_sim_top_module + sub_module
 718     vcd_name = gtkw_name.replace('.gtkw', '.vcd')
 719     write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
 720                loc=__file__, clk_period=clk_period, base='signed')
 721
 722
 723 if __name__ == "__main__":
 724     test_alu()
 725     test_alu_parallel()
 726
 727     # alu = BranchALU(width=16)
 728     # vl = rtlil.convert(alu, ports=alu.ports())
 729     # with open("test_branch_alu.il", "w") as f:
 730     #     f.write(vl)