src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmutil.extend import exts
  18 from nmutil.gtkw import write_gtkw
  19
  20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
  23                                         is_engine_pysim)
  24
  25 from soc.decoder.decode2execute1 import Data
  26 from soc.decoder.power_enums import MicrOp, Function, CryIn
  27
  28 from soc.fu.alu.alu_input_record import CompALUOpSubset
  29 from soc.fu.cr.cr_input_record import CompCROpSubset
  30
  31 import operator
  32
  33
  34 class Adder(Elaboratable):
  35     def __init__(self, width):
  36         self.invert_in = Signal()
  37         self.a = Signal(width)
  38         self.b = Signal(width)
  39         self.o = Signal(width, name="add_o")
  40
  41     def elaborate(self, platform):
  42         m = Module()
  43         with m.If(self.invert_in):
  44             m.d.comb += self.o.eq((~self.a) + self.b)
  45         with m.Else():
  46             m.d.comb += self.o.eq(self.a + self.b)
  47         return m
  48
  49
  50 class Subtractor(Elaboratable):
  51     def __init__(self, width):
  52         self.a = Signal(width)
  53         self.b = Signal(width)
  54         self.o = Signal(width, name="sub_o")
  55
  56     def elaborate(self, platform):
  57         m = Module()
  58         m.d.comb += self.o.eq(self.a - self.b)
  59         return m
  60
  61
  62 class Multiplier(Elaboratable):
  63     def __init__(self, width):
  64         self.a = Signal(width)
  65         self.b = Signal(width)
  66         self.o = Signal(width, name="mul_o")
  67
  68     def elaborate(self, platform):
  69         m = Module()
  70         m.d.comb += self.o.eq(self.a * self.b)
  71         return m
  72
  73
  74 class Shifter(Elaboratable):
  75     def __init__(self, width):
  76         self.width = width
  77         self.a = Signal(width)
  78         self.b = Signal(width)
  79         self.o = Signal(width, name="shf_o")
  80
  81     def elaborate(self, platform):
  82         m = Module()
  83         btrunc = Signal(self.width)
  84         m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
  85         m.d.comb += self.o.eq(self.a >> btrunc)
  86         return m
  87
  88
  89 class SignExtend(Elaboratable):
  90     def __init__(self, width):
  91         self.width = width
  92         self.a = Signal(width)
  93         self.o = Signal(width, name="exts_o")
  94
  95     def elaborate(self, platform):
  96         m = Module()
  97         m.d.comb += self.o.eq(exts(self.a, 8, self.width))
  98         return m
  99
 100
 101 class Dummy:
 102     pass
 103
 104
 105 class DummyALU(Elaboratable):
 106     def __init__(self, width):
 107         self.p = Dummy()  # make look like nmutil pipeline API
 108         self.p.data_i = Dummy()
 109         self.p.data_i.ctx = Dummy()
 110         self.n = Dummy()  # make look like nmutil pipeline API
 111         self.n.data_o = Dummy()
 112         self.p.valid_i = Signal()
 113         self.p.ready_o = Signal()
 114         self.n.ready_i = Signal()
 115         self.n.valid_o = Signal()
 116         self.counter = Signal(4)
 117         self.op = CompCROpSubset()
 118         i = []
 119         i.append(Signal(width, name="i1"))
 120         i.append(Signal(width, name="i2"))
 121         i.append(Signal(width, name="i3"))
 122         self.i = Array(i)
 123         self.a, self.b, self.c = i[0], i[1], i[2]
 124         self.out = Array([Signal(width, name="alu_o")])
 125         self.o = self.out[0]
 126         self.width = width
 127         # more "look like nmutil pipeline API"
 128         self.p.data_i.ctx.op = self.op
 129         self.p.data_i.a = self.a
 130         self.p.data_i.b = self.b
 131         self.p.data_i.c = self.c
 132         self.n.data_o.o = self.o
 133
 134     def elaborate(self, platform):
 135         m = Module()
 136
 137         go_now = Signal(reset_less=True)  # testing no-delay ALU
 138
 139         with m.If(self.p.valid_i):
 140             # input is valid. next check, if we already said "ready" or not
 141             with m.If(~self.p.ready_o):
 142                 # we didn't say "ready" yet, so say so and initialise
 143                 m.d.sync += self.p.ready_o.eq(1)
 144
 145                 m.d.sync += self.o.eq(self.a)
 146                 m.d.comb += go_now.eq(1)
 147                 m.d.sync += self.counter.eq(1)
 148
 149         with m.Else():
 150             # input says no longer valid, so drop ready as well.
 151             # a "proper" ALU would have had to sync in the opcode and a/b ops
 152             m.d.sync += self.p.ready_o.eq(0)
 153
 154         # ok so the counter's running: when it gets to 1, fire the output
 155         with m.If((self.counter == 1) | go_now):
 156             # set the output as valid if the recipient is ready for it
 157             m.d.sync += self.n.valid_o.eq(1)
 158         with m.If(self.n.ready_i & self.n.valid_o):
 159             m.d.sync += self.n.valid_o.eq(0)
 160             # recipient said it was ready: reset back to known-good.
 161             m.d.sync += self.counter.eq(0)  # reset the counter
 162             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 163
 164         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 165         with m.If(self.counter > 1):
 166             m.d.sync += self.counter.eq(self.counter - 1)
 167
 168         return m
 169
 170     def __iter__(self):
 171         yield from self.op.ports()
 172         yield self.a
 173         yield self.b
 174         yield self.c
 175         yield self.o
 176
 177     def ports(self):
 178         return list(self)
 179
 180
 181 class ALU(Elaboratable):
 182     def __init__(self, width):
 183         self.p = Dummy()  # make look like nmutil pipeline API
 184         self.p.data_i = Dummy()
 185         self.p.data_i.ctx = Dummy()
 186         self.n = Dummy()  # make look like nmutil pipeline API
 187         self.n.data_o = Dummy()
 188         self.p.valid_i = Signal()
 189         self.p.ready_o = Signal()
 190         self.n.ready_i = Signal()
 191         self.n.valid_o = Signal()
 192         self.counter = Signal(4)
 193         self.op = CompALUOpSubset(name="op")
 194         i = []
 195         i.append(Signal(width, name="i1"))
 196         i.append(Signal(width, name="i2"))
 197         self.i = Array(i)
 198         self.a, self.b = i[0], i[1]
 199         out = []
 200         out.append(Data(width, name="alu_o"))
 201         out.append(Data(3, name="alu_cr"))
 202         self.out = Array(out)
 203         self.o = self.out[0]
 204         self.cr = self.out[1]
 205         self.width = width
 206         # more "look like nmutil pipeline API"
 207         self.p.data_i.ctx.op = self.op
 208         self.p.data_i.a = self.a
 209         self.p.data_i.b = self.b
 210         self.n.data_o.o = self.o
 211
 212     def elaborate(self, platform):
 213         m = Module()
 214         add = Adder(self.width)
 215         mul = Multiplier(self.width)
 216         shf = Shifter(self.width)
 217         sub = Subtractor(self.width)
 218         ext_sign = SignExtend(self.width)
 219
 220         m.submodules.add = add
 221         m.submodules.mul = mul
 222         m.submodules.shf = shf
 223         m.submodules.sub = sub
 224         m.submodules.ext_sign = ext_sign
 225
 226         # really should not activate absolutely all ALU inputs like this
 227         for mod in [add, mul, shf, sub]:
 228             m.d.comb += [
 229                 mod.a.eq(self.a),
 230                 mod.b.eq(self.b),
 231             ]
 232         # EXTS sign extends the first input
 233         with m.If(self.op.insn_type == MicrOp.OP_EXTS):
 234             m.d.comb += ext_sign.a.eq(self.a)
 235         # EXTSWSLI sign extends the second input
 236         with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 237             m.d.comb += ext_sign.a.eq(self.b)
 238
 239         # pass invert (and carry later)
 240         m.d.comb += add.invert_in.eq(self.op.invert_in)
 241
 242         go_now = Signal(reset_less=True)  # testing no-delay ALU
 243
 244         # ALU sequencer is idle when the count is zero
 245         alu_idle = Signal(reset_less=True)
 246         m.d.comb += alu_idle.eq(self.counter == 0)
 247
 248         # ALU sequencer is done when the count is one
 249         alu_done = Signal(reset_less=True)
 250         m.d.comb += alu_done.eq(self.counter == 1)
 251
 252         # select handshake handling according to ALU type
 253         with m.If(go_now):
 254             # with a combinatorial, no-delay ALU, just pass through
 255             # the handshake signals to the other side
 256             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 257             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 258         with m.Else():
 259             # sequential ALU handshake:
 260             # ready_o responds to valid_i, but only if the ALU is idle
 261             m.d.comb += self.p.ready_o.eq(alu_idle)
 262             # select the internally generated valid_o, above
 263             m.d.comb += self.n.valid_o.eq(alu_done)
 264
 265         # hold the ALU result until ready_o is asserted
 266         alu_r = Signal(self.width)
 267
 268         # condition register output enable
 269         cr_ok_r = Signal()
 270
 271         # NOP doesn't output anything
 272         with m.If(self.op.insn_type != MicrOp.OP_NOP):
 273             m.d.comb += self.o.ok.eq(1)
 274         with m.If(alu_idle):
 275             with m.If(self.p.valid_i):
 276
 277                 # as this is a "fake" pipeline, just grab the output right now
 278                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
 279                     m.d.sync += alu_r.eq(add.o)
 280                 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
 281                     m.d.sync += alu_r.eq(mul.o)
 282                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 283                     m.d.sync += alu_r.eq(shf.o)
 284                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 285                     m.d.sync += alu_r.eq(ext_sign.o)
 286                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 287                     m.d.sync += alu_r.eq(ext_sign.o)
 288                 # SUB is zero-delay, no need to register
 289
 290                 # NOTE: all of these are fake, just something to test
 291
 292                 # MUL, to take 5 instructions
 293                 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
 294                     m.d.sync += self.counter.eq(5)
 295                 # SHIFT to take 1, straight away
 296                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 297                     m.d.sync += self.counter.eq(1)
 298                 # ADD/SUB to take 3
 299                 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
 300                     m.d.sync += self.counter.eq(3)
 301                 # EXTS to take 1
 302                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 303                     m.d.sync += self.counter.eq(1)
 304                 # EXTSWSLI to take 1
 305                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 306                     m.d.sync += self.counter.eq(1)
 307                 # others to take no delay
 308                 with m.Else():
 309                     m.d.comb += go_now.eq(1)
 310
 311                 # store rc bit, to enable cr output later
 312                 m.d.sync += cr_ok_r.eq(self.op.rc.rc)
 313
 314         with m.Elif(~alu_done | self.n.ready_i):
 315             # decrement the counter while the ALU is neither idle nor finished
 316             m.d.sync += self.counter.eq(self.counter - 1)
 317
 318         # choose between zero-delay output, or registered
 319         with m.If(go_now):
 320             with m.If(self.o.ok):
 321                 m.d.comb += self.o.data.eq(sub.o)
 322             m.d.comb += self.cr.ok.eq(self.op.rc.rc)
 323         # only present the result at the last computation cycle
 324         with m.Elif(alu_done):
 325             with m.If(self.o.ok):
 326                 m.d.comb += self.o.data.eq(alu_r)
 327             m.d.comb += self.cr.ok.eq(cr_ok_r)
 328
 329         # determine condition register bits based on the data output value
 330         with m.If(self.cr.ok):
 331             with m.If(~self.o.data.any()):
 332                 m.d.comb += self.cr.data.eq(0b001)
 333             with m.Elif(self.o.data[-1]):
 334                 m.d.comb += self.cr.data.eq(0b010)
 335             with m.Else():
 336                 m.d.comb += self.cr.data.eq(0b100)
 337
 338         return m
 339
 340     def __iter__(self):
 341         yield from self.op.ports()
 342         yield self.a
 343         yield self.b
 344         yield from self.o.ports()
 345         yield self.p.valid_i
 346         yield self.p.ready_o
 347         yield self.n.valid_o
 348         yield self.n.ready_i
 349
 350     def ports(self):
 351         return list(self)
 352
 353
 354 class BranchOp(Elaboratable):
 355     def __init__(self, width, op):
 356         self.a = Signal(width)
 357         self.b = Signal(width)
 358         self.o = Signal(width)
 359         self.op = op
 360
 361     def elaborate(self, platform):
 362         m = Module()
 363         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 364         return m
 365
 366
 367 class BranchALU(Elaboratable):
 368     def __init__(self, width):
 369         self.p = Dummy()  # make look like nmutil pipeline API
 370         self.p.data_i = Dummy()
 371         self.p.data_i.ctx = Dummy()
 372         self.n = Dummy()  # make look like nmutil pipeline API
 373         self.n.data_o = Dummy()
 374         self.p.valid_i = Signal()
 375         self.p.ready_o = Signal()
 376         self.n.ready_i = Signal()
 377         self.n.valid_o = Signal()
 378         self.counter = Signal(4)
 379         self.op = Signal(2)
 380         i = []
 381         i.append(Signal(width, name="i1"))
 382         i.append(Signal(width, name="i2"))
 383         self.i = Array(i)
 384         self.a, self.b = i[0], i[1]
 385         self.out = Array([Signal(width)])
 386         self.o = self.out[0]
 387         self.width = width
 388
 389     def elaborate(self, platform):
 390         m = Module()
 391         bgt = BranchOp(self.width, operator.gt)
 392         blt = BranchOp(self.width, operator.lt)
 393         beq = BranchOp(self.width, operator.eq)
 394         bne = BranchOp(self.width, operator.ne)
 395
 396         m.submodules.bgt = bgt
 397         m.submodules.blt = blt
 398         m.submodules.beq = beq
 399         m.submodules.bne = bne
 400         for mod in [bgt, blt, beq, bne]:
 401             m.d.comb += [
 402                 mod.a.eq(self.a),
 403                 mod.b.eq(self.b),
 404             ]
 405
 406         go_now = Signal(reset_less=True)  # testing no-delay ALU
 407         with m.If(self.p.valid_i):
 408             # input is valid. next check, if we already said "ready" or not
 409             with m.If(~self.p.ready_o):
 410                 # we didn't say "ready" yet, so say so and initialise
 411                 m.d.sync += self.p.ready_o.eq(1)
 412
 413                 # as this is a "fake" pipeline, just grab the output right now
 414                 with m.Switch(self.op):
 415                     for i, mod in enumerate([bgt, blt, beq, bne]):
 416                         with m.Case(i):
 417                             m.d.sync += self.o.eq(mod.o)
 418                 # branch to take 5 cycles (fake)
 419                 m.d.sync += self.counter.eq(5)
 420                 #m.d.comb += go_now.eq(1)
 421         with m.Else():
 422             # input says no longer valid, so drop ready as well.
 423             # a "proper" ALU would have had to sync in the opcode and a/b ops
 424             m.d.sync += self.p.ready_o.eq(0)
 425
 426         # ok so the counter's running: when it gets to 1, fire the output
 427         with m.If((self.counter == 1) | go_now):
 428             # set the output as valid if the recipient is ready for it
 429             m.d.sync += self.n.valid_o.eq(1)
 430         with m.If(self.n.ready_i & self.n.valid_o):
 431             m.d.sync += self.n.valid_o.eq(0)
 432             # recipient said it was ready: reset back to known-good.
 433             m.d.sync += self.counter.eq(0)  # reset the counter
 434             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 435
 436         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 437         with m.If(self.counter > 1):
 438             m.d.sync += self.counter.eq(self.counter - 1)
 439
 440         return m
 441
 442     def __iter__(self):
 443         yield self.op
 444         yield self.a
 445         yield self.b
 446         yield self.o
 447
 448     def ports(self):
 449         return list(self)
 450
 451
 452 def run_op(dut, a, b, op, inv_a=0):
 453     yield dut.a.eq(a)
 454     yield dut.b.eq(b)
 455     yield dut.op.insn_type.eq(op)
 456     yield dut.op.invert_in.eq(inv_a)
 457     yield dut.n.ready_i.eq(0)
 458     yield dut.p.valid_i.eq(1)
 459     yield dut.n.ready_i.eq(1)
 460     yield
 461
 462     # wait for the ALU to accept our input data
 463     while not (yield dut.p.ready_o):
 464         yield
 465
 466     yield dut.p.valid_i.eq(0)
 467     yield dut.a.eq(0)
 468     yield dut.b.eq(0)
 469     yield dut.op.insn_type.eq(0)
 470     yield dut.op.invert_in.eq(0)
 471
 472     # wait for the ALU to present the output data
 473     while not (yield dut.n.valid_o):
 474         yield
 475
 476     # latch the result and lower read_i
 477     result = yield dut.o.data
 478     yield dut.n.ready_i.eq(0)
 479
 480     return result
 481
 482
 483 def alu_sim(dut):
 484     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
 485     print("alu_sim add", result)
 486     assert (result == 8)
 487
 488     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
 489     print("alu_sim mul", result)
 490     assert (result == 6)
 491
 492     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
 493     print("alu_sim add-inv", result)
 494     assert (result == 65533)
 495
 496     # test zero-delay ALU
 497     # don't have OP_SUB, so use any other
 498     result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
 499     print("alu_sim sub", result)
 500     assert (result == 2)
 501
 502     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
 503     print("alu_sim shr", result)
 504     assert (result == 3)
 505
 506
 507 def test_alu():
 508     alu = ALU(width=16)
 509     write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
 510     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 511
 512     vl = rtlil.convert(alu, ports=alu.ports())
 513     with open("test_alu.il", "w") as f:
 514         f.write(vl)
 515
 516
 517 def test_alu_parallel():
 518     # Compare with the sequential test implementation, above.
 519     m = Module()
 520     m.submodules.alu = dut = ALU(width=16)
 521     write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
 522                    pysim=is_engine_pysim())
 523
 524     sim = Simulator(m)
 525     sim.add_clock(1e-6)
 526
 527     def send(a, b, op, inv_a=0, rc=0):
 528         # present input data and assert valid_i
 529         yield dut.a.eq(a)
 530         yield dut.b.eq(b)
 531         yield dut.op.insn_type.eq(op)
 532         yield dut.op.invert_in.eq(inv_a)
 533         yield dut.op.rc.rc.eq(rc)
 534         yield dut.p.valid_i.eq(1)
 535         yield
 536         # wait for ready_o to be asserted
 537         while not (yield dut.p.ready_o):
 538             yield
 539         # clear input data and negate valid_i
 540         # if send is called again immediately afterwards, there will be no
 541         # visible transition (they will not be negated, after all)
 542         yield dut.p.valid_i.eq(0)
 543         yield dut.a.eq(0)
 544         yield dut.b.eq(0)
 545         yield dut.op.insn_type.eq(0)
 546         yield dut.op.invert_in.eq(0)
 547         yield dut.op.rc.rc.eq(0)
 548
 549     def receive():
 550         # signal readiness to receive data
 551         yield dut.n.ready_i.eq(1)
 552         yield
 553         # wait for valid_o to be asserted
 554         while not (yield dut.n.valid_o):
 555             yield
 556         # read results
 557         result = yield dut.o.data
 558         cr = yield dut.cr.data
 559         # negate ready_i
 560         # if receive is called again immediately afterwards, there will be no
 561         # visible transition (it will not be negated, after all)
 562         yield dut.n.ready_i.eq(0)
 563         return result, cr
 564
 565     def producer():
 566         # send a few test cases, interspersed with wait states
 567         # note that, for this test, we do not wait for the result to be ready,
 568         # before presenting the next input
 569         # 5 + 3
 570         yield from send(5, 3, MicrOp.OP_ADD)
 571         yield
 572         yield
 573         # 2 * 3
 574         yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
 575         # (-6) + 3
 576         yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
 577         yield
 578         # 5 - 3
 579         # note that this is a zero-delay operation
 580         yield from send(5, 3, MicrOp.OP_CMP)
 581         yield
 582         yield
 583         # NOP
 584         yield from send(5, 3, MicrOp.OP_NOP)
 585         # 13 >> 2
 586         yield from send(13, 2, MicrOp.OP_SHR)
 587         # sign extent 13
 588         yield from send(13, 2, MicrOp.OP_EXTS)
 589         # sign extend -128 (8 bits)
 590         yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
 591         # sign extend -128 (8 bits)
 592         yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
 593
 594     def consumer():
 595         # receive and check results, interspersed with wait states
 596         # the consumer is not in step with the producer, but the
 597         # order of the results are preserved
 598         yield
 599         # 5 + 3 = 8
 600         result = yield from receive()
 601         assert result[0] == 8
 602         # 2 * 3 = 6
 603         result = yield from receive()
 604         assert result == (6, 0b100)
 605         yield
 606         yield
 607         # (-6) + 3 = -3
 608         result = yield from receive()
 609         assert result == (65533, 0b010)  # unsigned equivalent to -2
 610         # 5 - 3 = 2
 611         # note that this is a zero-delay operation
 612         # this, and the previous result, will be received back-to-back
 613         # (check the output waveform to see this)
 614         result = yield from receive()
 615         assert result[0] == 2
 616         yield
 617         yield
 618         # NOP
 619         yield from receive()
 620         # 13 >> 2 = 3
 621         result = yield from receive()
 622         assert result[0] == 3
 623         # sign extent 13 = 13
 624         result = yield from receive()
 625         assert result[0] == 13
 626         # sign extend -128 (8 bits) = -128 (16 bits)
 627         result = yield from receive()
 628         assert result == (0xFF80, 0b010)
 629         # sign extend -128 (8 bits) = -128 (16 bits)
 630         result = yield from receive()
 631         assert result[0] == 0xFF80
 632
 633     sim.add_sync_process(producer)
 634     sim.add_sync_process(consumer)
 635     sim_writer = sim.write_vcd("test_alu_parallel.vcd")
 636     with sim_writer:
 637         sim.run()
 638
 639
 640 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
 641                    pysim=True):
 642     """Common function to write the GTKWave documents for this module"""
 643     gtkwave_desc = [
 644         'clk',
 645         'i1[15:0]',
 646         'i2[15:0]',
 647         'op__insn_type' if pysim else 'op__insn_type[6:0]',
 648         'op__invert_in',
 649         'valid_i',
 650         'ready_o',
 651         'valid_o',
 652         'ready_i',
 653         'alu_o[15:0]',
 654         'alu_o_ok',
 655         'alu_cr[2:0]',
 656         'alu_cr_ok'
 657     ]
 658     # determine the module name of the DUT
 659     module = 'top'
 660     if sub_module is not None:
 661         module = nmigen_sim_top_module + sub_module
 662     vcd_name = gtkw_name.replace('.gtkw', '.vcd')
 663     write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
 664                loc=__file__, clk_period=clk_period, base='signed')
 665
 666
 667 if __name__ == "__main__":
 668     test_alu()
 669     test_alu_parallel()
 670
 671     # alu = BranchALU(width=16)
 672     # vl = rtlil.convert(alu, ports=alu.ports())
 673     # with open("test_branch_alu.il", "w") as f:
 674     #     f.write(vl)