src/soc/experiment/alu_hier.py

   1 """*Experimental* ALU: based on nmigen alu_hier.py, includes branch-compare ALU
   2
   3 This ALU is *deliberately* designed to add in (unnecessary) delays into
   4 different operations so as to be able to test the 6600-style matrices
   5 and the CompUnits.  Countdown timers wait for (defined) periods before
   6 indicating that the output is valid
   7
   8 A "real" integer ALU would place the answers onto the output bus after
   9 only one cycle (sync)
  10 """
  11
  12 from nmigen import Elaboratable, Signal, Module, Const, Mux, Array
  13 from nmigen.hdl.rec import Record, Layout
  14 from nmigen.cli import main
  15 from nmigen.cli import verilog, rtlil
  16 from nmigen.compat.sim import run_simulation
  17 from nmutil.extend import exts
  18 from nmutil.gtkw import write_gtkw
  19
  20 # NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
  21 # Also, check out the cxxsim nmigen branch, and latest yosys from git
  22 from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
  23                                         is_engine_pysim)
  24
  25 from soc.decoder.decode2execute1 import Data
  26 from soc.decoder.power_enums import MicrOp, Function, CryIn
  27
  28 from soc.fu.alu.alu_input_record import CompALUOpSubset
  29 from soc.fu.cr.cr_input_record import CompCROpSubset
  30
  31 import operator
  32
  33
  34 class Adder(Elaboratable):
  35     def __init__(self, width):
  36         self.invert_in = Signal()
  37         self.a = Signal(width)
  38         self.b = Signal(width)
  39         self.o = Signal(width, name="add_o")
  40
  41     def elaborate(self, platform):
  42         m = Module()
  43         with m.If(self.invert_in):
  44             m.d.comb += self.o.eq((~self.a) + self.b)
  45         with m.Else():
  46             m.d.comb += self.o.eq(self.a + self.b)
  47         return m
  48
  49
  50 class Subtractor(Elaboratable):
  51     def __init__(self, width):
  52         self.a = Signal(width)
  53         self.b = Signal(width)
  54         self.o = Signal(width, name="sub_o")
  55
  56     def elaborate(self, platform):
  57         m = Module()
  58         m.d.comb += self.o.eq(self.a - self.b)
  59         return m
  60
  61
  62 class Multiplier(Elaboratable):
  63     def __init__(self, width):
  64         self.a = Signal(width)
  65         self.b = Signal(width)
  66         self.o = Signal(width, name="mul_o")
  67
  68     def elaborate(self, platform):
  69         m = Module()
  70         m.d.comb += self.o.eq(self.a * self.b)
  71         return m
  72
  73
  74 class Shifter(Elaboratable):
  75     def __init__(self, width):
  76         self.width = width
  77         self.a = Signal(width)
  78         self.b = Signal(width)
  79         self.o = Signal(width, name="shf_o")
  80
  81     def elaborate(self, platform):
  82         m = Module()
  83         btrunc = Signal(self.width)
  84         m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
  85         m.d.comb += self.o.eq(self.a >> btrunc)
  86         return m
  87
  88
  89 class SignExtend(Elaboratable):
  90     def __init__(self, width):
  91         self.width = width
  92         self.a = Signal(width)
  93         self.o = Signal(width, name="exts_o")
  94
  95     def elaborate(self, platform):
  96         m = Module()
  97         m.d.comb += self.o.eq(exts(self.a, 8, self.width))
  98         return m
  99
 100
 101 class Dummy:
 102     pass
 103
 104
 105 class DummyALU(Elaboratable):
 106     def __init__(self, width):
 107         self.p = Dummy()  # make look like nmutil pipeline API
 108         self.p.data_i = Dummy()
 109         self.p.data_i.ctx = Dummy()
 110         self.n = Dummy()  # make look like nmutil pipeline API
 111         self.n.data_o = Dummy()
 112         self.p.valid_i = Signal()
 113         self.p.ready_o = Signal()
 114         self.n.ready_i = Signal()
 115         self.n.valid_o = Signal()
 116         self.counter = Signal(4)
 117         self.op = CompCROpSubset()
 118         i = []
 119         i.append(Signal(width, name="i1"))
 120         i.append(Signal(width, name="i2"))
 121         i.append(Signal(width, name="i3"))
 122         self.i = Array(i)
 123         self.a, self.b, self.c = i[0], i[1], i[2]
 124         self.out = Array([Signal(width, name="alu_o")])
 125         self.o = self.out[0]
 126         self.width = width
 127         # more "look like nmutil pipeline API"
 128         self.p.data_i.ctx.op = self.op
 129         self.p.data_i.a = self.a
 130         self.p.data_i.b = self.b
 131         self.p.data_i.c = self.c
 132         self.n.data_o.o = self.o
 133
 134     def elaborate(self, platform):
 135         m = Module()
 136
 137         go_now = Signal(reset_less=True)  # testing no-delay ALU
 138
 139         with m.If(self.p.valid_i):
 140             # input is valid. next check, if we already said "ready" or not
 141             with m.If(~self.p.ready_o):
 142                 # we didn't say "ready" yet, so say so and initialise
 143                 m.d.sync += self.p.ready_o.eq(1)
 144
 145                 m.d.sync += self.o.eq(self.a)
 146                 m.d.comb += go_now.eq(1)
 147                 m.d.sync += self.counter.eq(1)
 148
 149         with m.Else():
 150             # input says no longer valid, so drop ready as well.
 151             # a "proper" ALU would have had to sync in the opcode and a/b ops
 152             m.d.sync += self.p.ready_o.eq(0)
 153
 154         # ok so the counter's running: when it gets to 1, fire the output
 155         with m.If((self.counter == 1) | go_now):
 156             # set the output as valid if the recipient is ready for it
 157             m.d.sync += self.n.valid_o.eq(1)
 158         with m.If(self.n.ready_i & self.n.valid_o):
 159             m.d.sync += self.n.valid_o.eq(0)
 160             # recipient said it was ready: reset back to known-good.
 161             m.d.sync += self.counter.eq(0)  # reset the counter
 162             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 163
 164         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 165         with m.If(self.counter > 1):
 166             m.d.sync += self.counter.eq(self.counter - 1)
 167
 168         return m
 169
 170     def __iter__(self):
 171         yield from self.op.ports()
 172         yield self.a
 173         yield self.b
 174         yield self.c
 175         yield self.o
 176
 177     def ports(self):
 178         return list(self)
 179
 180
 181 class ALU(Elaboratable):
 182     def __init__(self, width):
 183         self.p = Dummy()  # make look like nmutil pipeline API
 184         self.p.data_i = Dummy()
 185         self.p.data_i.ctx = Dummy()
 186         self.n = Dummy()  # make look like nmutil pipeline API
 187         self.n.data_o = Dummy()
 188         self.p.valid_i = Signal()
 189         self.p.ready_o = Signal()
 190         self.n.ready_i = Signal()
 191         self.n.valid_o = Signal()
 192         self.counter = Signal(4)
 193         self.op = CompALUOpSubset(name="op")
 194         i = []
 195         i.append(Signal(width, name="i1"))
 196         i.append(Signal(width, name="i2"))
 197         self.i = Array(i)
 198         self.a, self.b = i[0], i[1]
 199         self.out = Array([Data(width, name="alu_o")])
 200         self.o = self.out[0]
 201         self.width = width
 202         # more "look like nmutil pipeline API"
 203         self.p.data_i.ctx.op = self.op
 204         self.p.data_i.a = self.a
 205         self.p.data_i.b = self.b
 206         self.n.data_o.o = self.o
 207
 208     def elaborate(self, platform):
 209         m = Module()
 210         add = Adder(self.width)
 211         mul = Multiplier(self.width)
 212         shf = Shifter(self.width)
 213         sub = Subtractor(self.width)
 214         ext_sign = SignExtend(self.width)
 215
 216         m.submodules.add = add
 217         m.submodules.mul = mul
 218         m.submodules.shf = shf
 219         m.submodules.sub = sub
 220         m.submodules.ext_sign = ext_sign
 221
 222         # really should not activate absolutely all ALU inputs like this
 223         for mod in [add, mul, shf, sub]:
 224             m.d.comb += [
 225                 mod.a.eq(self.a),
 226                 mod.b.eq(self.b),
 227             ]
 228         # EXTS sign extends the first input
 229         with m.If(self.op.insn_type == MicrOp.OP_EXTS):
 230             m.d.comb += ext_sign.a.eq(self.a)
 231         # EXTSWSLI sign extends the second input
 232         with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 233             m.d.comb += ext_sign.a.eq(self.b)
 234
 235         # pass invert (and carry later)
 236         m.d.comb += add.invert_in.eq(self.op.invert_in)
 237
 238         go_now = Signal(reset_less=True)  # testing no-delay ALU
 239
 240         # ALU sequencer is idle when the count is zero
 241         alu_idle = Signal(reset_less=True)
 242         m.d.comb += alu_idle.eq(self.counter == 0)
 243
 244         # ALU sequencer is done when the count is one
 245         alu_done = Signal(reset_less=True)
 246         m.d.comb += alu_done.eq(self.counter == 1)
 247
 248         # select handshake handling according to ALU type
 249         with m.If(go_now):
 250             # with a combinatorial, no-delay ALU, just pass through
 251             # the handshake signals to the other side
 252             m.d.comb += self.p.ready_o.eq(self.n.ready_i)
 253             m.d.comb += self.n.valid_o.eq(self.p.valid_i)
 254         with m.Else():
 255             # sequential ALU handshake:
 256             # ready_o responds to valid_i, but only if the ALU is idle
 257             m.d.comb += self.p.ready_o.eq(alu_idle)
 258             # select the internally generated valid_o, above
 259             m.d.comb += self.n.valid_o.eq(alu_done)
 260
 261         # hold the ALU result until ready_o is asserted
 262         alu_r = Signal(self.width)
 263
 264         # NOP doesn't output anything
 265         with m.If(self.op.insn_type != MicrOp.OP_NOP):
 266             m.d.comb += self.o.ok.eq(1)
 267         with m.If(alu_idle):
 268             with m.If(self.p.valid_i):
 269
 270                 # as this is a "fake" pipeline, just grab the output right now
 271                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
 272                     m.d.sync += alu_r.eq(add.o)
 273                 with m.Elif(self.op.insn_type == MicrOp.OP_MUL_L64):
 274                     m.d.sync += alu_r.eq(mul.o)
 275                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 276                     m.d.sync += alu_r.eq(shf.o)
 277                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 278                     m.d.sync += alu_r.eq(ext_sign.o)
 279                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 280                     m.d.sync += alu_r.eq(ext_sign.o)
 281                 # SUB is zero-delay, no need to register
 282
 283                 # NOTE: all of these are fake, just something to test
 284
 285                 # MUL, to take 5 instructions
 286                 with m.If(self.op.insn_type == MicrOp.OP_MUL_L64):
 287                     m.d.sync += self.counter.eq(5)
 288                 # SHIFT to take 1, straight away
 289                 with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
 290                     m.d.sync += self.counter.eq(1)
 291                 # ADD/SUB to take 3
 292                 with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
 293                     m.d.sync += self.counter.eq(3)
 294                 # EXTS to take 1
 295                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
 296                     m.d.sync += self.counter.eq(1)
 297                 # EXTSWSLI to take 1
 298                 with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
 299                     m.d.sync += self.counter.eq(1)
 300                 # others to take no delay
 301                 with m.Else():
 302                     m.d.comb += go_now.eq(1)
 303
 304         with m.Elif(~alu_done | self.n.ready_i):
 305             # decrement the counter while the ALU is neither idle nor finished
 306             m.d.sync += self.counter.eq(self.counter - 1)
 307
 308         # choose between zero-delay output, or registered
 309         with m.If(go_now):
 310             m.d.comb += self.o.data.eq(sub.o)
 311         # only present the result at the last computation cycle
 312         with m.Elif(alu_done):
 313             m.d.comb += self.o.data.eq(alu_r)
 314
 315         return m
 316
 317     def __iter__(self):
 318         yield from self.op.ports()
 319         yield self.a
 320         yield self.b
 321         yield from self.o.ports()
 322         yield self.p.valid_i
 323         yield self.p.ready_o
 324         yield self.n.valid_o
 325         yield self.n.ready_i
 326
 327     def ports(self):
 328         return list(self)
 329
 330
 331 class BranchOp(Elaboratable):
 332     def __init__(self, width, op):
 333         self.a = Signal(width)
 334         self.b = Signal(width)
 335         self.o = Signal(width)
 336         self.op = op
 337
 338     def elaborate(self, platform):
 339         m = Module()
 340         m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
 341         return m
 342
 343
 344 class BranchALU(Elaboratable):
 345     def __init__(self, width):
 346         self.p = Dummy()  # make look like nmutil pipeline API
 347         self.p.data_i = Dummy()
 348         self.p.data_i.ctx = Dummy()
 349         self.n = Dummy()  # make look like nmutil pipeline API
 350         self.n.data_o = Dummy()
 351         self.p.valid_i = Signal()
 352         self.p.ready_o = Signal()
 353         self.n.ready_i = Signal()
 354         self.n.valid_o = Signal()
 355         self.counter = Signal(4)
 356         self.op = Signal(2)
 357         i = []
 358         i.append(Signal(width, name="i1"))
 359         i.append(Signal(width, name="i2"))
 360         self.i = Array(i)
 361         self.a, self.b = i[0], i[1]
 362         self.out = Array([Signal(width)])
 363         self.o = self.out[0]
 364         self.width = width
 365
 366     def elaborate(self, platform):
 367         m = Module()
 368         bgt = BranchOp(self.width, operator.gt)
 369         blt = BranchOp(self.width, operator.lt)
 370         beq = BranchOp(self.width, operator.eq)
 371         bne = BranchOp(self.width, operator.ne)
 372
 373         m.submodules.bgt = bgt
 374         m.submodules.blt = blt
 375         m.submodules.beq = beq
 376         m.submodules.bne = bne
 377         for mod in [bgt, blt, beq, bne]:
 378             m.d.comb += [
 379                 mod.a.eq(self.a),
 380                 mod.b.eq(self.b),
 381             ]
 382
 383         go_now = Signal(reset_less=True)  # testing no-delay ALU
 384         with m.If(self.p.valid_i):
 385             # input is valid. next check, if we already said "ready" or not
 386             with m.If(~self.p.ready_o):
 387                 # we didn't say "ready" yet, so say so and initialise
 388                 m.d.sync += self.p.ready_o.eq(1)
 389
 390                 # as this is a "fake" pipeline, just grab the output right now
 391                 with m.Switch(self.op):
 392                     for i, mod in enumerate([bgt, blt, beq, bne]):
 393                         with m.Case(i):
 394                             m.d.sync += self.o.eq(mod.o)
 395                 # branch to take 5 cycles (fake)
 396                 m.d.sync += self.counter.eq(5)
 397                 #m.d.comb += go_now.eq(1)
 398         with m.Else():
 399             # input says no longer valid, so drop ready as well.
 400             # a "proper" ALU would have had to sync in the opcode and a/b ops
 401             m.d.sync += self.p.ready_o.eq(0)
 402
 403         # ok so the counter's running: when it gets to 1, fire the output
 404         with m.If((self.counter == 1) | go_now):
 405             # set the output as valid if the recipient is ready for it
 406             m.d.sync += self.n.valid_o.eq(1)
 407         with m.If(self.n.ready_i & self.n.valid_o):
 408             m.d.sync += self.n.valid_o.eq(0)
 409             # recipient said it was ready: reset back to known-good.
 410             m.d.sync += self.counter.eq(0)  # reset the counter
 411             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 412
 413         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
 414         with m.If(self.counter > 1):
 415             m.d.sync += self.counter.eq(self.counter - 1)
 416
 417         return m
 418
 419     def __iter__(self):
 420         yield self.op
 421         yield self.a
 422         yield self.b
 423         yield self.o
 424
 425     def ports(self):
 426         return list(self)
 427
 428
 429 def run_op(dut, a, b, op, inv_a=0):
 430     yield dut.a.eq(a)
 431     yield dut.b.eq(b)
 432     yield dut.op.insn_type.eq(op)
 433     yield dut.op.invert_in.eq(inv_a)
 434     yield dut.n.ready_i.eq(0)
 435     yield dut.p.valid_i.eq(1)
 436     yield dut.n.ready_i.eq(1)
 437     yield
 438
 439     # wait for the ALU to accept our input data
 440     while not (yield dut.p.ready_o):
 441         yield
 442
 443     yield dut.p.valid_i.eq(0)
 444     yield dut.a.eq(0)
 445     yield dut.b.eq(0)
 446     yield dut.op.insn_type.eq(0)
 447     yield dut.op.invert_in.eq(0)
 448
 449     # wait for the ALU to present the output data
 450     while not (yield dut.n.valid_o):
 451         yield
 452
 453     # latch the result and lower read_i
 454     result = yield dut.o.data
 455     yield dut.n.ready_i.eq(0)
 456
 457     return result
 458
 459
 460 def alu_sim(dut):
 461     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
 462     print("alu_sim add", result)
 463     assert (result == 8)
 464
 465     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
 466     print("alu_sim mul", result)
 467     assert (result == 6)
 468
 469     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
 470     print("alu_sim add-inv", result)
 471     assert (result == 65533)
 472
 473     # test zero-delay ALU
 474     # don't have OP_SUB, so use any other
 475     result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
 476     print("alu_sim sub", result)
 477     assert (result == 2)
 478
 479     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
 480     print("alu_sim shr", result)
 481     assert (result == 3)
 482
 483
 484 def test_alu():
 485     alu = ALU(width=16)
 486     write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
 487     run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
 488
 489     vl = rtlil.convert(alu, ports=alu.ports())
 490     with open("test_alu.il", "w") as f:
 491         f.write(vl)
 492
 493
 494 def test_alu_parallel():
 495     # Compare with the sequential test implementation, above.
 496     m = Module()
 497     m.submodules.alu = dut = ALU(width=16)
 498     write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
 499                    pysim=is_engine_pysim())
 500
 501     sim = Simulator(m)
 502     sim.add_clock(1e-6)
 503
 504     def send(a, b, op, inv_a=0):
 505         # present input data and assert valid_i
 506         yield dut.a.eq(a)
 507         yield dut.b.eq(b)
 508         yield dut.op.insn_type.eq(op)
 509         yield dut.op.invert_in.eq(inv_a)
 510         yield dut.p.valid_i.eq(1)
 511         yield
 512         # wait for ready_o to be asserted
 513         while not (yield dut.p.ready_o):
 514             yield
 515         # clear input data and negate valid_i
 516         # if send is called again immediately afterwards, there will be no
 517         # visible transition (they will not be negated, after all)
 518         yield dut.p.valid_i.eq(0)
 519         yield dut.a.eq(0)
 520         yield dut.b.eq(0)
 521         yield dut.op.insn_type.eq(0)
 522         yield dut.op.invert_in.eq(0)
 523
 524     def receive():
 525         # signal readiness to receive data
 526         yield dut.n.ready_i.eq(1)
 527         yield
 528         # wait for valid_o to be asserted
 529         while not (yield dut.n.valid_o):
 530             yield
 531         # read result
 532         result = yield dut.o.data
 533         # negate ready_i
 534         # if receive is called again immediately afterwards, there will be no
 535         # visible transition (it will not be negated, after all)
 536         yield dut.n.ready_i.eq(0)
 537         return result
 538
 539     def producer():
 540         # send a few test cases, interspersed with wait states
 541         # note that, for this test, we do not wait for the result to be ready,
 542         # before presenting the next input
 543         # 5 + 3
 544         yield from send(5, 3, MicrOp.OP_ADD)
 545         yield
 546         yield
 547         # 2 * 3
 548         yield from send(2, 3, MicrOp.OP_MUL_L64)
 549         # (-5) + 3
 550         yield from send(5, 3, MicrOp.OP_ADD, inv_a=1)
 551         yield
 552         # 5 - 3
 553         # note that this is a zero-delay operation
 554         yield from send(5, 3, MicrOp.OP_NOP)
 555         yield
 556         yield
 557         # 13 >> 2
 558         yield from send(13, 2, MicrOp.OP_SHR)
 559         # sign extent 13
 560         yield from send(13, 2, MicrOp.OP_EXTS)
 561         # sign extend -128 (8 bits)
 562         yield from send(0x80, 2, MicrOp.OP_EXTS)
 563         # sign extend -128 (8 bits)
 564         yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
 565
 566     def consumer():
 567         # receive and check results, interspersed with wait states
 568         # the consumer is not in step with the producer, but the
 569         # order of the results are preserved
 570         yield
 571         # 5 + 3 = 8
 572         result = yield from receive()
 573         assert (result == 8)
 574         # 2 * 3 = 6
 575         result = yield from receive()
 576         assert (result == 6)
 577         yield
 578         yield
 579         # (-5) + 3 = -2
 580         result = yield from receive()
 581         assert (result == 65533)  # unsigned equivalent to -2
 582         # 5 - 3 = 2
 583         # note that this is a zero-delay operation
 584         # this, and the previous result, will be received back-to-back
 585         # (check the output waveform to see this)
 586         result = yield from receive()
 587         assert (result == 2)
 588         yield
 589         yield
 590         # 13 >> 2 = 3
 591         result = yield from receive()
 592         assert (result == 3)
 593         # sign extent 13 = 13
 594         result = yield from receive()
 595         assert (result == 13)
 596         # sign extend -128 (8 bits) = -128 (16 bits)
 597         result = yield from receive()
 598         assert (result == 0xFF80)
 599         # sign extend -128 (8 bits) = -128 (16 bits)
 600         result = yield from receive()
 601         assert (result == 0xFF80)
 602
 603     sim.add_sync_process(producer)
 604     sim.add_sync_process(consumer)
 605     sim_writer = sim.write_vcd("test_alu_parallel.vcd")
 606     with sim_writer:
 607         sim.run()
 608
 609
 610 def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
 611                    pysim=True):
 612     """Common function to write the GTKWave documents for this module"""
 613     gtkwave_desc = [
 614         'clk',
 615         'i1[15:0]',
 616         'i2[15:0]',
 617         'op__insn_type' if pysim else 'op__insn_type[6:0]',
 618         'op__invert_in',
 619         'valid_i',
 620         'ready_o',
 621         'valid_o',
 622         'ready_i',
 623         'alu_o[15:0]',
 624     ]
 625     # determine the module name of the DUT
 626     module = 'top'
 627     if sub_module is not None:
 628         module = nmigen_sim_top_module + sub_module
 629     vcd_name = gtkw_name.replace('.gtkw', '.vcd')
 630     write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
 631                loc=__file__, clk_period=clk_period, base='signed')
 632
 633
 634 if __name__ == "__main__":
 635     test_alu()
 636     test_alu_parallel()
 637
 638     # alu = BranchALU(width=16)
 639     # vl = rtlil.convert(alu, ports=alu.ports())
 640     # with open("test_branch_alu.il", "w") as f:
 641     #     f.write(vl)