normalise XER regs carry/32 and SO

[soc.git] / src / soc / fu / logical / main_stage.py
diff --git a/src/soc/fu/logical/main_stage.py b/src/soc/fu/logical/main_stage.py

index 39c2400dd718e8ae6ff24acc805ecb10af36b4dd..4885708997b835b62a0be10b25bb2c5ee8cdf7ac 100644 (file)
--- a/src/soc/fu/logical/main_stage.py
+++ b/src/soc/fu/logical/main_stage.py
@@ -7,11 +7,11 @@
  
  from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
  from nmutil.pipemodbase import PipeModBase
-from soc.fu.logical.pipe_data import ALUInputData
+from nmutil.clz import CLZ
+from soc.fu.logical.pipe_data import LogicalInputData
  from soc.fu.alu.pipe_data import ALUOutputData
  from ieee754.part.partsig import PartitionedSignal
  from soc.decoder.power_enums import InternalOp
-from soc.countzero.countzero import ZeroCounter
  
  from soc.decoder.power_fields import DecodeFields
  from soc.decoder.power_fieldsn import SignalBitRange
@@ -20,7 +20,8 @@ from soc.decoder.power_fieldsn import SignalBitRange
  def array_of(count, bitwidth):
      res = []
      for i in range(count):
-        res.append(Signal(bitwidth, reset_less=True))
+        res.append(Signal(bitwidth, reset_less=True,
+                          name=f"pop_{bitwidth}_{i}"))
      return res
  
  
@@ -31,7 +32,7 @@ class LogicalMainStage(PipeModBase):
          self.fields.create_specs()
  
      def ispec(self):
-        return ALUInputData(self.pspec)
+        return LogicalInputData(self.pspec)
  
      def ospec(self):
          return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
@@ -68,7 +69,7 @@ class LogicalMainStage(PipeModBase):
                  # creating arrays big enough to store the sum, each time
                  pc = [a]
                  # QTY32 2-bit (to take 2x 1-bit sums) etc.
-                work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
+                work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 7)]
                  for l, b in work:
                      pc.append(array_of(l, b))
                  pc8 = pc[3]     # array of 8 8-bit counts (popcntb)
@@ -82,14 +83,14 @@ class LogicalMainStage(PipeModBase):
                          comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
                                            Cat(src[end], Const(0, 1)))
                  # decode operation length
-                with m.If(op.data_len[2:4] == 0b00):
+                with m.If(op.data_len == 1):
                      # popcntb - pack 8x 4-bit answers into output
                      for i in range(8):
-                        comb += o[i*8:i*8+4].eq(pc8[i])
-                with m.Elif(op.data_len[3] == 0):
+                        comb += o[i*8:(i+1)*8].eq(pc8[i])
+                with m.Elif(op.data_len == 4):
                      # popcntw - pack 2x 5-bit answers into output
                      for i in range(2):
-                        comb += o[i*32:i*32+5].eq(pc32[i])
+                        comb += o[i*32:(i+1)*32].eq(pc32[i])
                  with m.Else():
                      # popcntd - put 1x 6-bit answer into output
                      comb += o.eq(popcnt[0])
@@ -110,18 +111,28 @@ class LogicalMainStage(PipeModBase):
              ###### cntlz #######
              with m.Case(InternalOp.OP_CNTZ):
                  XO = self.fields.FormX.XO[0:-1]
-                m.submodules.countz = countz = ZeroCounter()
-                comb += countz.rs_i.eq(a)
-                comb += countz.is_32bit_i.eq(op.is_32bit)
-                comb += countz.count_right_i.eq(XO[-1])
-                comb += o.eq(countz.result_o)
+                count_right = Signal(reset_less=True)
+                comb += count_right.eq(XO[-1])
+
+                cntz_i = Signal(64, reset_less=True)
+                a32 = Signal(32, reset_less=True)
+                comb += a32.eq(a[0:32])
+
+                with m.If(op.is_32bit):
+                    comb += cntz_i.eq(Mux(count_right, a32[::-1], a32))
+                with m.Else():
+                    comb += cntz_i.eq(Mux(count_right, a[::-1], a))
+
+                m.submodules.clz = clz = CLZ(64)
+                comb += clz.sig_in.eq(cntz_i)
+                comb += o.eq(Mux(op.is_32bit, clz.lz-32, clz.lz))
  
              ###### bpermd #######
              # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
  
          ###### sticky overflow and context, both pass-through #####
  
-        comb += self.o.so.eq(self.i.so)
+        comb += self.o.xer_so.data.eq(self.i.xer_so)
          comb += self.o.ctx.eq(self.i.ctx)
  
          return m