src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmutil.extend import exts
  18 from nmutil.gtkw import write_gtkw
  19
  20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
  23                                         is_engine_pysim)
  24
  25 from soc.decoder.decode2execute1 import Data
  26 from soc.decoder.power_enums import MicrOp, Function, CryIn
  27
  28 from soc.fu.alu.alu_input_record import CompALUOpSubset
  29 from soc.fu.cr.cr_input_record import CompCROpSubset
  30
  31 import operator
  32
  33
  34 class Adder(Elaboratable):
  35     def __init__(self, width):
  36         self.invert_in = Signal()
  37         self.a = Signal(width)
  38         self.b = Signal(width)
  39         self.o = Signal(width, name="add_o")
  40
  41     def elaborate(self, platform):
  42         m = Module()
  43         with m.If(self.invert_in):
  44             m.d.comb += self.o.eq((~self.a) + self.b)
  45         with m.Else():
  46             m.d.comb += self.o.eq(self.a + self.b)
  47         return m
  48
  49
  50 class Subtractor(Elaboratable):
  51     def __init__(self, width):
  52         self.a = Signal(width)
  53         self.b = Signal(width)
  54         self.o = Signal(width, name="sub_o")
  55
  56     def elaborate(self, platform):
  57         m = Module()
  58         m.d.comb += self.o.eq(self.a - self.b)
  59         return m
  60
  61
  62 class Multiplier(Elaboratable):
  63     def __init__(self, width):
  64         self.a = Signal(width)
  65         self.b = Signal(width)
  66         self.o = Signal(width, name="mul_o")
  67
  68     def elaborate(self, platform):
  69         m = Module()
  70         m.d.comb += self.o.eq(self.a * self.b)
  71         return m
  72
  73
  74 class Shifter(Elaboratable):
  75     def __init__(self, width):
  76         self.width = width
  77         self.a = Signal(width)
  78         self.b = Signal(width)
  79         self.o = Signal(width, name="shf_o")
  80
  81     def elaborate(self, platform):
  82         m = Module()
  83         btrunc = Signal(self.width)
  84         m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
  85         m.d.comb += self.o.eq(self.a >> btrunc)
  86         return m
  87
  88
  89 class SignExtend(Elaboratable):
  90     def __init__(self, width):
  91         self.width = width
  92         self.a = Signal(width)
  93         self.o = Signal(width, name="exts_o")
  94
  95     def elaborate(self, platform):
  96         m = Module()
  97         m.d.comb += self.o.eq(exts(self.a, 8, self.width))
  98         return m
  99
 100
 101 class Dummy:
 102     pass
 103
 104
 105 class DummyALU(Elaboratable):
 106     def __init__(self, width):
 107         self.p = Dummy()  # make look like nmutil pipeline API
 108         self.p.data_i = Dummy()
 109         self.p.data_i.ctx = Dummy()
 110         self.n = Dummy()  # make look like nmutil pipeline API
 111         self.n.data_o = Dummy()
 112         self.p.valid_i = Signal()
 113         self.p.ready_o = Signal()
 114         self.n.ready_i = Signal()
 115         self.n.valid_o = Signal()
 116         self.counter = Signal(4)
 117         self.op = CompCROpSubset()
 118         i = []
 119         i.append(Signal(width, name="i1"))
 120         i.append(Signal(width, name="i2"))
 121         i.append(Signal(width, name="i3"))
 122         self.i = Array(i)
 123         self.a, self.b, self.c = i[0], i[1], i[2]
 124         self.out = Array([Signal(width, name="alu_o")])
 125         self.o = self.out[0]
 126         self.width = width
 127         # more "look like nmutil pipeline API"
 128         self.p.data_i.ctx.op = self.op
 129         self.p.data_i.a = self.a
 130         self.p.data_i.b = self.b
 131         self.p.data_i.c = self.c
 132         self.n.data_o.o = self.o
 133
 134     def elaborate(self, platform):
 135         m = Module()
 136
 137         go_now = Signal(reset_less=True)  # testing no-delay ALU
 138
 139         with m.If(self.p.valid_i):
 140             # input is valid. next check, if we already said "ready" or not
 141             with m.If(~self.p.ready_o):
 142                 # we didn't say "ready" yet, so say so and initialise
 143                 m.d.sync += self.p.ready_o.eq(1)
 144
 145                 m.d.sync += self.o.eq(self.a)
 146                 m.d.comb += go_now.eq(1)
 147                 m.d.sync += self.counter.eq(1)
 148
 149         with m.Else():
 150             # input says no longer valid, so drop ready as well.
 151             # a "proper" ALU would have had to sync in the opcode and a/b ops
 152             m.d.sync += self.p.ready_o.eq(0)
 153
 154         # ok so the counter's running: when it gets to 1, fire the output
 155         with m.If((self.counter == 1) | go_now):
 156             # set the output as valid if the recipient is ready for it
 157             m.d.sync += self.n.valid_o.eq(1)
 158         with m.If(self.n.ready_i & self.n.valid_o):
 159             m.d.sync += self.n.valid_o.eq(0)
 160             # recipient said it was ready: reset back to known-good.
 161             m.d.sync += self.counter.eq(0)  # reset the counter
 162             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 163
 164         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 165         with m.If(self.counter > 1):
 166             m.d.sync += self.counter.eq(self.counter - 1)
 167
 168         return m
 169
 170     def __iter__(self):
 171         yield from self.op.ports()
 172         yield self.a
 173         yield self.b
 174         yield self.c
 175         yield self.o
 176
 177     def ports(self):
 178         return list(self)
 179
 180
 181 class ALU(Elaboratable):
 182     def __init__(self, width):
 183         self.p = Dummy()  # make look like nmutil pipeline API
 184         self.p.data_i = Dummy()
 185         self.p.data_i.ctx = Dummy()
 186         self.n = Dummy()  # make look like nmutil pipeline API
 187         self.n.data_o = Dummy()
 188         self.p.valid_i = Signal()
 189         self.p.ready_o = Signal()
 190         self.n.ready_i = Signal()
 191         self.n.valid_o = Signal()
 192         self.counter = Signal(4)
 193         self.op = CompALUOpSubset(name="op")
 194         i = []
 195         i.append(Signal(width, name="i1"))
 196         i.append(Signal(width, name="i2"))
 197         self.i = Array(i)
 198         self.a, self.b = i[0], i[1]
 199         out = []
 200         out.append(Data(width, name="alu_o"))
 201         out.append(Data(3, name="alu_cr"))
 202         self.out = Array(out)
 203         self.o = self.out[0]
 204         self.cr = self.out[1]
 205         self.width = width
 206         # more "look like nmutil pipeline API"
 207         self.p.data_i.ctx.op = self.op
 208         self.p.data_i.a = self.a
 209         self.p.data_i.b = self.b
 210         self.n.data_o.o = self.o
 211
 212     def elaborate(self, platform):
 213         m = Module()
 214         add = Adder(self.width)
 215         mul = Multiplier(self.width)
 216         shf = Shifter(self.width)
 217         sub = Subtractor(self.width)
 218         ext_sign = SignExtend(self.width)
 219
 220         m.submodules.add = add
 221         m.submodules.mul = mul
 222         m.submodules.shf = shf
 223         m.submodules.sub = sub
 224         m.submodules.ext_sign = ext_sign
 225
 226         # really should not activate absolutely all ALU inputs like this
 227         for mod in [add, mul, shf, sub]:
 228             m.d.comb += [
 229                 mod.a.eq(self.a),
 230                 mod.b.eq(self.b),
 231             ]
 232         # EXTS sign extends the first input
 233         with m.If(self.op.insn_type == MicrOp.OP_EXTS):
 234             m.d.comb += ext_sign.a.eq(self.a)
 235         # EXTSWSLI sign extends the second input
 236         with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 237             m.d.comb += ext_sign.a.eq(self.b)
 238
 239         # pass invert (and carry later)
 240         m.d.comb += add.invert_in.eq(self.op.invert_in)
 241
 242         go_now = Signal(reset_less=True)  # testing no-delay ALU
 243
 244         # ALU sequencer is idle when the count is zero
 245         alu_idle = Signal(reset_less=True)
 246         m.d.comb += alu_idle.eq(self.counter == 0)
 247
 248         # ALU sequencer is done when the count is one
 249         alu_done = Signal(reset_less=True)
 250         m.d.comb += alu_done.eq(self.counter == 1)
 251
 252         # select handshake handling according to ALU type
 253         with m.If(go_now):
 254             # with a combinatorial, no-delay ALU, just pass through
 255             # the handshake signals to the other side
 256             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 257             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 258         with m.Else():
 259             # sequential ALU handshake:
 260             # ready_o responds to valid_i, but only if the ALU is idle
 261             m.d.comb += self.p.ready_o.eq(alu_idle)
 262             # select the internally generated valid_o, above
 263             m.d.comb += self.n.valid_o.eq(alu_done)
 264
 265         # hold the ALU result until ready_o is asserted
 266         alu_r = Signal(self.width)
 267
 268         # output masks
 269         # NOP and ILLEGAL don't output anything
 270         with m.If((self.op.insn_type != MicrOp.OP_NOP) &
 271                   (self.op.insn_type != MicrOp.OP_ILLEGAL)):
 272             m.d.comb += self.o.ok.eq(1)
 273         # CR is output when rc bit is active
 274         m.d.comb += self.cr.ok.eq(self.op.rc.rc)
 275
 276         with m.If(alu_idle):
 277             with m.If(self.p.valid_i):
 278
 279                 # as this is a "fake" pipeline, just grab the output right now
 280                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
 281                     m.d.sync += alu_r.eq(add.o)
 282                 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
 283                     m.d.sync += alu_r.eq(mul.o)
 284                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 285                     m.d.sync += alu_r.eq(shf.o)
 286                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 287                     m.d.sync += alu_r.eq(ext_sign.o)
 288                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 289                     m.d.sync += alu_r.eq(ext_sign.o)
 290                 # SUB is zero-delay, no need to register
 291
 292                 # NOTE: all of these are fake, just something to test
 293
 294                 # MUL, to take 5 instructions
 295                 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
 296                     m.d.sync += self.counter.eq(5)
 297                 # SHIFT to take 1, straight away
 298                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 299                     m.d.sync += self.counter.eq(1)
 300                 # ADD/SUB to take 3
 301                 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
 302                     m.d.sync += self.counter.eq(3)
 303                 # EXTS to take 1
 304                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 305                     m.d.sync += self.counter.eq(1)
 306                 # EXTSWSLI to take 1
 307                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 308                     m.d.sync += self.counter.eq(1)
 309                 # others to take no delay
 310                 with m.Else():
 311                     m.d.comb += go_now.eq(1)
 312
 313         with m.Elif(~alu_done | self.n.ready_i):
 314             # decrement the counter while the ALU is neither idle nor finished
 315             m.d.sync += self.counter.eq(self.counter - 1)
 316
 317         # choose between zero-delay output, or registered
 318         with m.If(go_now):
 319             m.d.comb += self.o.data.eq(sub.o)
 320         # only present the result at the last computation cycle
 321         with m.Elif(alu_done):
 322             m.d.comb += self.o.data.eq(alu_r)
 323
 324         # determine condition register bits based on the data output value
 325         with m.If(~self.o.data.any()):
 326             m.d.comb += self.cr.data.eq(0b001)
 327         with m.Elif(self.o.data[-1]):
 328             m.d.comb += self.cr.data.eq(0b010)
 329         with m.Else():
 330             m.d.comb += self.cr.data.eq(0b100)
 331
 332         return m
 333
 334     def __iter__(self):
 335         yield from self.op.ports()
 336         yield self.a
 337         yield self.b
 338         yield from self.o.ports()
 339         yield self.p.valid_i
 340         yield self.p.ready_o
 341         yield self.n.valid_o
 342         yield self.n.ready_i
 343
 344     def ports(self):
 345         return list(self)
 346
 347
 348 class BranchOp(Elaboratable):
 349     def __init__(self, width, op):
 350         self.a = Signal(width)
 351         self.b = Signal(width)
 352         self.o = Signal(width)
 353         self.op = op
 354
 355     def elaborate(self, platform):
 356         m = Module()
 357         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 358         return m
 359
 360
 361 class BranchALU(Elaboratable):
 362     def __init__(self, width):
 363         self.p = Dummy()  # make look like nmutil pipeline API
 364         self.p.data_i = Dummy()
 365         self.p.data_i.ctx = Dummy()
 366         self.n = Dummy()  # make look like nmutil pipeline API
 367         self.n.data_o = Dummy()
 368         self.p.valid_i = Signal()
 369         self.p.ready_o = Signal()
 370         self.n.ready_i = Signal()
 371         self.n.valid_o = Signal()
 372         self.counter = Signal(4)
 373         self.op = Signal(2)
 374         i = []
 375         i.append(Signal(width, name="i1"))
 376         i.append(Signal(width, name="i2"))
 377         self.i = Array(i)
 378         self.a, self.b = i[0], i[1]
 379         self.out = Array([Signal(width)])
 380         self.o = self.out[0]
 381         self.width = width
 382
 383     def elaborate(self, platform):
 384         m = Module()
 385         bgt = BranchOp(self.width, operator.gt)
 386         blt = BranchOp(self.width, operator.lt)
 387         beq = BranchOp(self.width, operator.eq)
 388         bne = BranchOp(self.width, operator.ne)
 389
 390         m.submodules.bgt = bgt
 391         m.submodules.blt = blt
 392         m.submodules.beq = beq
 393         m.submodules.bne = bne
 394         for mod in [bgt, blt, beq, bne]:
 395             m.d.comb += [
 396                 mod.a.eq(self.a),
 397                 mod.b.eq(self.b),
 398             ]
 399
 400         go_now = Signal(reset_less=True)  # testing no-delay ALU
 401         with m.If(self.p.valid_i):
 402             # input is valid. next check, if we already said "ready" or not
 403             with m.If(~self.p.ready_o):
 404                 # we didn't say "ready" yet, so say so and initialise
 405                 m.d.sync += self.p.ready_o.eq(1)
 406
 407                 # as this is a "fake" pipeline, just grab the output right now
 408                 with m.Switch(self.op):
 409                     for i, mod in enumerate([bgt, blt, beq, bne]):
 410                         with m.Case(i):
 411                             m.d.sync += self.o.eq(mod.o)
 412                 # branch to take 5 cycles (fake)
 413                 m.d.sync += self.counter.eq(5)
 414                 #m.d.comb += go_now.eq(1)
 415         with m.Else():
 416             # input says no longer valid, so drop ready as well.
 417             # a "proper" ALU would have had to sync in the opcode and a/b ops
 418             m.d.sync += self.p.ready_o.eq(0)
 419
 420         # ok so the counter's running: when it gets to 1, fire the output
 421         with m.If((self.counter == 1) | go_now):
 422             # set the output as valid if the recipient is ready for it
 423             m.d.sync += self.n.valid_o.eq(1)
 424         with m.If(self.n.ready_i & self.n.valid_o):
 425             m.d.sync += self.n.valid_o.eq(0)
 426             # recipient said it was ready: reset back to known-good.
 427             m.d.sync += self.counter.eq(0)  # reset the counter
 428             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 429
 430         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 431         with m.If(self.counter > 1):
 432             m.d.sync += self.counter.eq(self.counter - 1)
 433
 434         return m
 435
 436     def __iter__(self):
 437         yield self.op
 438         yield self.a
 439         yield self.b
 440         yield self.o
 441
 442     def ports(self):
 443         return list(self)
 444
 445
 446 def run_op(dut, a, b, op, inv_a=0):
 447     yield dut.a.eq(a)
 448     yield dut.b.eq(b)
 449     yield dut.op.insn_type.eq(op)
 450     yield dut.op.invert_in.eq(inv_a)
 451     yield dut.n.ready_i.eq(0)
 452     yield dut.p.valid_i.eq(1)
 453     yield dut.n.ready_i.eq(1)
 454     yield
 455
 456     # wait for the ALU to accept our input data
 457     while not (yield dut.p.ready_o):
 458         yield
 459
 460     yield dut.p.valid_i.eq(0)
 461     yield dut.a.eq(0)
 462     yield dut.b.eq(0)
 463     yield dut.op.insn_type.eq(0)
 464     yield dut.op.invert_in.eq(0)
 465
 466     # wait for the ALU to present the output data
 467     while not (yield dut.n.valid_o):
 468         yield
 469
 470     # latch the result and lower read_i
 471     result = yield dut.o.data
 472     yield dut.n.ready_i.eq(0)
 473
 474     return result
 475
 476
 477 def alu_sim(dut):
 478     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
 479     print("alu_sim add", result)
 480     assert (result == 8)
 481
 482     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
 483     print("alu_sim mul", result)
 484     assert (result == 6)
 485
 486     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
 487     print("alu_sim add-inv", result)
 488     assert (result == 65533)
 489
 490     # test zero-delay ALU
 491     # don't have OP_SUB, so use any other
 492     result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
 493     print("alu_sim sub", result)
 494     assert (result == 2)
 495
 496     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
 497     print("alu_sim shr", result)
 498     assert (result == 3)
 499
 500
 501 def test_alu():
 502     alu = ALU(width=16)
 503     write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
 504     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 505
 506     vl = rtlil.convert(alu, ports=alu.ports())
 507     with open("test_alu.il", "w") as f:
 508         f.write(vl)
 509
 510
 511 def test_alu_parallel():
 512     # Compare with the sequential test implementation, above.
 513     m = Module()
 514     m.submodules.alu = dut = ALU(width=16)
 515     write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
 516                    pysim=is_engine_pysim())
 517
 518     sim = Simulator(m)
 519     sim.add_clock(1e-6)
 520
 521     def send(a, b, op, inv_a=0, rc=0):
 522         # present input data and assert valid_i
 523         yield dut.a.eq(a)
 524         yield dut.b.eq(b)
 525         yield dut.op.insn_type.eq(op)
 526         yield dut.op.invert_in.eq(inv_a)
 527         yield dut.op.rc.rc.eq(rc)
 528         yield dut.p.valid_i.eq(1)
 529         yield
 530         # wait for ready_o to be asserted
 531         while not (yield dut.p.ready_o):
 532             yield
 533         # clear input data and negate valid_i
 534         # if send is called again immediately afterwards, there will be no
 535         # visible transition (they will not be negated, after all)
 536         yield dut.p.valid_i.eq(0)
 537         yield dut.a.eq(0)
 538         yield dut.b.eq(0)
 539         yield dut.op.insn_type.eq(0)
 540         yield dut.op.invert_in.eq(0)
 541         yield dut.op.rc.rc.eq(0)
 542
 543     def receive():
 544         # signal readiness to receive data
 545         yield dut.n.ready_i.eq(1)
 546         yield
 547         # wait for valid_o to be asserted
 548         while not (yield dut.n.valid_o):
 549             yield
 550         # read results
 551         result = yield dut.o.data
 552         cr = yield dut.cr.data
 553         # negate ready_i
 554         # if receive is called again immediately afterwards, there will be no
 555         # visible transition (it will not be negated, after all)
 556         yield dut.n.ready_i.eq(0)
 557         return result, cr
 558
 559     def producer():
 560         # send a few test cases, interspersed with wait states
 561         # note that, for this test, we do not wait for the result to be ready,
 562         # before presenting the next input
 563         # 5 + 3
 564         yield from send(5, 3, MicrOp.OP_ADD)
 565         yield
 566         yield
 567         # 2 * 3
 568         yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
 569         # (-6) + 3
 570         yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
 571         yield
 572         # 5 - 3
 573         # note that this is a zero-delay operation
 574         yield from send(5, 3, MicrOp.OP_CMP)
 575         yield
 576         yield
 577         # NOP
 578         yield from send(5, 3, MicrOp.OP_NOP)
 579         # 13 >> 2
 580         yield from send(13, 2, MicrOp.OP_SHR)
 581         # sign extent 13
 582         yield from send(13, 2, MicrOp.OP_EXTS)
 583         # sign extend -128 (8 bits)
 584         yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
 585         # sign extend -128 (8 bits)
 586         yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
 587
 588     def consumer():
 589         # receive and check results, interspersed with wait states
 590         # the consumer is not in step with the producer, but the
 591         # order of the results are preserved
 592         yield
 593         # 5 + 3 = 8
 594         result = yield from receive()
 595         assert result[0] == 8
 596         # 2 * 3 = 6
 597         result = yield from receive()
 598         assert result == (6, 0b100)
 599         yield
 600         yield
 601         # (-6) + 3 = -3
 602         result = yield from receive()
 603         assert result == (65533, 0b010)  # unsigned equivalent to -2
 604         # 5 - 3 = 2
 605         # note that this is a zero-delay operation
 606         # this, and the previous result, will be received back-to-back
 607         # (check the output waveform to see this)
 608         result = yield from receive()
 609         assert result[0] == 2
 610         yield
 611         yield
 612         # NOP
 613         yield from receive()
 614         # 13 >> 2 = 3
 615         result = yield from receive()
 616         assert result[0] == 3
 617         # sign extent 13 = 13
 618         result = yield from receive()
 619         assert result[0] == 13
 620         # sign extend -128 (8 bits) = -128 (16 bits)
 621         result = yield from receive()
 622         assert result == (0xFF80, 0b010)
 623         # sign extend -128 (8 bits) = -128 (16 bits)
 624         result = yield from receive()
 625         assert result[0] == 0xFF80
 626
 627     sim.add_sync_process(producer)
 628     sim.add_sync_process(consumer)
 629     sim_writer = sim.write_vcd("test_alu_parallel.vcd")
 630     with sim_writer:
 631         sim.run()
 632
 633
 634 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
 635                    pysim=True):
 636     """Common function to write the GTKWave documents for this module"""
 637     gtkwave_desc = [
 638         'clk',
 639         'i1[15:0]',
 640         'i2[15:0]',
 641         'op__insn_type' if pysim else 'op__insn_type[6:0]',
 642         'op__invert_in',
 643         'valid_i',
 644         'ready_o',
 645         'valid_o',
 646         'ready_i',
 647         'alu_o[15:0]',
 648         'alu_o_ok',
 649         'alu_cr[2:0]',
 650         'alu_cr_ok'
 651     ]
 652     # determine the module name of the DUT
 653     module = 'top'
 654     if sub_module is not None:
 655         module = nmigen_sim_top_module + sub_module
 656     vcd_name = gtkw_name.replace('.gtkw', '.vcd')
 657     write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
 658                loc=__file__, clk_period=clk_period, base='signed')
 659
 660
 661 if __name__ == "__main__":
 662     test_alu()
 663     test_alu_parallel()
 664
 665     # alu = BranchALU(width=16)
 666     # vl = rtlil.convert(alu, ports=alu.ports())
 667     # with open("test_branch_alu.il", "w") as f:
 668     #     f.write(vl)