Add zero CR test case and fix comments

[soc.git] / src / soc / experiment / alu_hier.py
diff --git a/src/soc/experiment/alu_hier.py b/src/soc/experiment/alu_hier.py

index 59bca26e358051b9579a9686833c8a9c93a1e393..7aecaf692910b611333cc2b79480d1130fab6521 100644 (file)
--- a/src/soc/experiment/alu_hier.py
+++ b/src/soc/experiment/alu_hier.py
@@ -14,8 +14,15 @@ from nmigen.hdl.rec import Record, Layout
  from nmigen.cli import main
  from nmigen.cli import verilog, rtlil
  from nmigen.compat.sim import run_simulation
-from nmigen.back.pysim import Simulator, Settle
+from nmutil.extend import exts
+from nmutil.gtkw import write_gtkw
  
+# NOTE: to use cxxsim, export NMIGEN_SIM_MODE=cxxsim from the shell
+# Also, check out the cxxsim nmigen branch, and latest yosys from git
+from nmutil.sim_tmp_alternative import (Simulator, nmigen_sim_top_module,
+                                        is_engine_pysim)
+
+from soc.decoder.decode2execute1 import Data
  from soc.decoder.power_enums import MicrOp, Function, CryIn
  
  from soc.fu.alu.alu_input_record import CompALUOpSubset
@@ -26,14 +33,14 @@ import operator
  
  class Adder(Elaboratable):
      def __init__(self, width):
-        self.invert_a = Signal()
+        self.invert_in = Signal()
          self.a = Signal(width)
          self.b = Signal(width)
          self.o = Signal(width, name="add_o")
  
      def elaborate(self, platform):
          m = Module()
-        with m.If(self.invert_a):
+        with m.If(self.invert_in):
              m.d.comb += self.o.eq((~self.a) + self.b)
          with m.Else():
              m.d.comb += self.o.eq(self.a + self.b)
@@ -79,6 +86,18 @@ class Shifter(Elaboratable):
          return m
  
  
+class SignExtend(Elaboratable):
+    def __init__(self, width):
+        self.width = width
+        self.a = Signal(width)
+        self.o = Signal(width, name="exts_o")
+
+    def elaborate(self, platform):
+        m = Module()
+        m.d.comb += self.o.eq(exts(self.a, 8, self.width))
+        return m
+
+
  class Dummy:
      pass
  
@@ -177,14 +196,19 @@ class ALU(Elaboratable):
          i.append(Signal(width, name="i2"))
          self.i = Array(i)
          self.a, self.b = i[0], i[1]
-        self.out = Array([Signal(width, name="alu_o")])
+        out = []
+        out.append(Data(width, name="alu_o"))
+        out.append(Data(width, name="alu_cr"))
+        self.out = Array(out)
          self.o = self.out[0]
+        self.cr = self.out[1]
          self.width = width
          # more "look like nmutil pipeline API"
          self.p.data_i.ctx.op = self.op
          self.p.data_i.a = self.a
          self.p.data_i.b = self.b
          self.n.data_o.o = self.o
+        self.n.data_o.cr = self.cr
  
      def elaborate(self, platform):
          m = Module()
@@ -192,11 +216,13 @@ class ALU(Elaboratable):
          mul = Multiplier(self.width)
          shf = Shifter(self.width)
          sub = Subtractor(self.width)
+        ext_sign = SignExtend(self.width)
  
          m.submodules.add = add
          m.submodules.mul = mul
          m.submodules.shf = shf
          m.submodules.sub = sub
+        m.submodules.ext_sign = ext_sign
  
          # really should not activate absolutely all ALU inputs like this
          for mod in [add, mul, shf, sub]:
@@ -204,9 +230,15 @@ class ALU(Elaboratable):
                  mod.a.eq(self.a),
                  mod.b.eq(self.b),
              ]
+        # EXTS sign extends the first input
+        with m.If(self.op.insn_type == MicrOp.OP_EXTS):
+            m.d.comb += ext_sign.a.eq(self.a)
+        # EXTSWSLI sign extends the second input
+        with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
+            m.d.comb += ext_sign.a.eq(self.b)
  
          # pass invert (and carry later)
-        m.d.comb += add.invert_a.eq(self.op.invert_a)
+        m.d.comb += add.invert_in.eq(self.op.invert_in)
  
          go_now = Signal(reset_less=True)  # testing no-delay ALU
  
@@ -234,6 +266,14 @@ class ALU(Elaboratable):
          # hold the ALU result until ready_o is asserted
          alu_r = Signal(self.width)
  
+        # output masks
+        # NOP and ILLEGAL don't output anything
+        with m.If((self.op.insn_type != MicrOp.OP_NOP) &
+                  (self.op.insn_type != MicrOp.OP_ILLEGAL)):
+            m.d.comb += self.o.ok.eq(1)
+        # CR is output when rc bit is active
+        m.d.comb += self.cr.ok.eq(self.op.rc.rc)
+
          with m.If(alu_idle):
              with m.If(self.p.valid_i):
  
@@ -244,6 +284,10 @@ class ALU(Elaboratable):
                      m.d.sync += alu_r.eq(mul.o)
                  with m.Elif(self.op.insn_type == MicrOp.OP_SHR):
                      m.d.sync += alu_r.eq(shf.o)
+                with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
+                    m.d.sync += alu_r.eq(ext_sign.o)
+                with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
+                    m.d.sync += alu_r.eq(ext_sign.o)
                  # SUB is zero-delay, no need to register
  
                  # NOTE: all of these are fake, just something to test
@@ -257,6 +301,12 @@ class ALU(Elaboratable):
                  # ADD/SUB to take 3
                  with m.Elif(self.op.insn_type == MicrOp.OP_ADD):
                      m.d.sync += self.counter.eq(3)
+                # EXTS to take 1
+                with m.Elif(self.op.insn_type == MicrOp.OP_EXTS):
+                    m.d.sync += self.counter.eq(1)
+                # EXTSWSLI to take 1
+                with m.Elif(self.op.insn_type == MicrOp.OP_EXTSWSLI):
+                    m.d.sync += self.counter.eq(1)
                  # others to take no delay
                  with m.Else():
                      m.d.comb += go_now.eq(1)
@@ -267,10 +317,18 @@ class ALU(Elaboratable):
  
          # choose between zero-delay output, or registered
          with m.If(go_now):
-            m.d.comb += self.o.eq(sub.o)
+            m.d.comb += self.o.data.eq(sub.o)
          # only present the result at the last computation cycle
          with m.Elif(alu_done):
-            m.d.comb += self.o.eq(alu_r)
+            m.d.comb += self.o.data.eq(alu_r)
+
+        # determine condition register bits based on the data output value
+        with m.If(~self.o.data.any()):
+            m.d.comb += self.cr.data.eq(0b001)
+        with m.Elif(self.o.data[-1]):
+            m.d.comb += self.cr.data.eq(0b010)
+        with m.Else():
+            m.d.comb += self.cr.data.eq(0b100)
  
          return m
  
@@ -278,7 +336,7 @@ class ALU(Elaboratable):
          yield from self.op.ports()
          yield self.a
          yield self.b
-        yield self.o
+        yield from self.o.ports()
          yield self.p.valid_i
          yield self.p.ready_o
          yield self.n.valid_o
@@ -390,7 +448,7 @@ def run_op(dut, a, b, op, inv_a=0):
      yield dut.a.eq(a)
      yield dut.b.eq(b)
      yield dut.op.insn_type.eq(op)
-    yield dut.op.invert_a.eq(inv_a)
+    yield dut.op.invert_in.eq(inv_a)
      yield dut.n.ready_i.eq(0)
      yield dut.p.valid_i.eq(1)
      yield dut.n.ready_i.eq(1)
@@ -404,14 +462,14 @@ def run_op(dut, a, b, op, inv_a=0):
      yield dut.a.eq(0)
      yield dut.b.eq(0)
      yield dut.op.insn_type.eq(0)
-    yield dut.op.invert_a.eq(0)
+    yield dut.op.invert_in.eq(0)
  
      # wait for the ALU to present the output data
      while not (yield dut.n.valid_o):
          yield
  
      # latch the result and lower read_i
-    result = yield dut.o
+    result = yield dut.o.data
      yield dut.n.ready_i.eq(0)
  
      return result
@@ -432,7 +490,7 @@ def alu_sim(dut):
  
      # test zero-delay ALU
      # don't have OP_SUB, so use any other
-    result = yield from run_op(dut, 5, 3, MicrOp.OP_NOP)
+    result = yield from run_op(dut, 5, 3, MicrOp.OP_CMP)
      print("alu_sim sub", result)
      assert (result == 2)
  
@@ -443,6 +501,7 @@ def alu_sim(dut):
  
  def test_alu():
      alu = ALU(width=16)
+    write_alu_gtkw("test_alusim.gtkw", clk_period=10e-9)
      run_simulation(alu, {"sync": alu_sim(alu)}, vcd_name='test_alusim.vcd')
  
      vl = rtlil.convert(alu, ports=alu.ports())
@@ -454,15 +513,19 @@ def test_alu_parallel():
      # Compare with the sequential test implementation, above.
      m = Module()
      m.submodules.alu = dut = ALU(width=16)
+    write_alu_gtkw("test_alu_parallel.gtkw", sub_module='alu',
+                   pysim=is_engine_pysim())
+
      sim = Simulator(m)
      sim.add_clock(1e-6)
  
-    def send(a, b, op, inv_a=0):
+    def send(a, b, op, inv_a=0, rc=0):
          # present input data and assert valid_i
          yield dut.a.eq(a)
          yield dut.b.eq(b)
          yield dut.op.insn_type.eq(op)
-        yield dut.op.invert_a.eq(inv_a)
+        yield dut.op.invert_in.eq(inv_a)
+        yield dut.op.rc.rc.eq(rc)
          yield dut.p.valid_i.eq(1)
          yield
          # wait for ready_o to be asserted
@@ -475,7 +538,8 @@ def test_alu_parallel():
          yield dut.a.eq(0)
          yield dut.b.eq(0)
          yield dut.op.insn_type.eq(0)
-        yield dut.op.invert_a.eq(0)
+        yield dut.op.invert_in.eq(0)
+        yield dut.op.rc.rc.eq(0)
  
      def receive():
          # signal readiness to receive data
@@ -484,13 +548,14 @@ def test_alu_parallel():
          # wait for valid_o to be asserted
          while not (yield dut.n.valid_o):
              yield
-        # read result
-        result = yield dut.o
+        # read results
+        result = yield dut.o.data
+        cr = yield dut.cr.data
          # negate ready_i
          # if receive is called again immediately afterwards, there will be no
          # visible transition (it will not be negated, after all)
          yield dut.n.ready_i.eq(0)
-        return result
+        return result, cr
  
      def producer():
          # send a few test cases, interspersed with wait states
@@ -501,17 +566,27 @@ def test_alu_parallel():
          yield
          yield
          # 2 * 3
-        yield from send(2, 3, MicrOp.OP_MUL_L64)
-        # (-5) + 3
-        yield from send(5, 3, MicrOp.OP_ADD, inv_a=1)
+        yield from send(2, 3, MicrOp.OP_MUL_L64, rc=1)
+        # (-6) + 3
+        yield from send(5, 3, MicrOp.OP_ADD, inv_a=1, rc=1)
          yield
          # 5 - 3
          # note that this is a zero-delay operation
-        yield from send(5, 3, MicrOp.OP_NOP)
+        yield from send(5, 3, MicrOp.OP_CMP)
          yield
          yield
+        # NOP
+        yield from send(5, 3, MicrOp.OP_NOP)
          # 13 >> 2
          yield from send(13, 2, MicrOp.OP_SHR)
+        # sign extent 13
+        yield from send(13, 2, MicrOp.OP_EXTS)
+        # sign extend -128 (8 bits)
+        yield from send(0x80, 2, MicrOp.OP_EXTS, rc=1)
+        # sign extend -128 (8 bits)
+        yield from send(2, 0x80, MicrOp.OP_EXTSWSLI)
+        # 5 - 5
+        yield from send(5, 5, MicrOp.OP_CMP, rc=1)
  
      def consumer():
          # receive and check results, interspersed with wait states
@@ -520,38 +595,79 @@ def test_alu_parallel():
          yield
          # 5 + 3 = 8
          result = yield from receive()
-        assert (result == 8)
+        assert result[0] == 8
          # 2 * 3 = 6
+        # 6 > 0 => CR = 0b100
          result = yield from receive()
-        assert (result == 6)
+        assert result == (6, 0b100)
          yield
          yield
-        # (-5) + 3 = -2
+        # (-6) + 3 = -3
+        # -3 < 0 => CR = 0b010
          result = yield from receive()
-        assert (result == 65533)  # unsigned equivalent to -2
+        assert result == (65533, 0b010)  # unsigned equivalent to -2
          # 5 - 3 = 2
          # note that this is a zero-delay operation
          # this, and the previous result, will be received back-to-back
          # (check the output waveform to see this)
          result = yield from receive()
-        assert (result == 2)
+        assert result[0] == 2
          yield
          yield
+        # NOP
+        yield from receive()
          # 13 >> 2 = 3
          result = yield from receive()
-        assert (result == 3)
+        assert result[0] == 3
+        # sign extent 13 = 13
+        result = yield from receive()
+        assert result[0] == 13
+        # sign extend -128 (8 bits) = -128 (16 bits)
+        # -128 < 0 => CR = 0b010
+        result = yield from receive()
+        assert result == (0xFF80, 0b010)
+        # sign extend -128 (8 bits) = -128 (16 bits)
+        result = yield from receive()
+        assert result[0] == 0xFF80
+        # 5 - 5 = 0
+        # 0 == 0 => CR = 0b001
+        result = yield from receive()
+        assert result == (0, 0b001)
  
      sim.add_sync_process(producer)
      sim.add_sync_process(consumer)
-    sim_writer = sim.write_vcd(
-        "test_alu_parallel.vcd",
-        "test_alu_parallel.gtkw",
-        traces=dut.ports()
-    )
+    sim_writer = sim.write_vcd("test_alu_parallel.vcd")
      with sim_writer:
          sim.run()
  
  
+def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
+                   pysim=True):
+    """Common function to write the GTKWave documents for this module"""
+    gtkwave_desc = [
+        'clk',
+        'i1[15:0]',
+        'i2[15:0]',
+        'op__insn_type' if pysim else 'op__insn_type[6:0]',
+        'op__invert_in',
+        'valid_i',
+        'ready_o',
+        'valid_o',
+        'ready_i',
+        'alu_o[15:0]',
+        'alu_o_ok',
+        'alu_cr[15:0]',
+        'alu_cr_ok'
+    ]
+    # determine the module name of the DUT
+    module = 'top'
+    if sub_module is not None:
+        module = nmigen_sim_top_module + sub_module
+    vcd_name = gtkw_name.replace('.gtkw', '.vcd')
+    write_gtkw(gtkw_name, vcd_name, gtkwave_desc, module=module,
+               loc=__file__, clk_period=clk_period, base='signed')
+
+
  if __name__ == "__main__":
      test_alu()
      test_alu_parallel()