also read LDST RM files

[soc.git] / src / soc / fu / mul / post_stage.py
diff --git a/src/soc/fu/mul/post_stage.py b/src/soc/fu/mul/post_stage.py

index 501b4ed5e4fb4f7952a1818c95a0391f41aa6a98..14d2d91114f5e8b318028bde087c38896da55be2 100644 (file)
--- a/src/soc/fu/mul/post_stage.py
+++ b/src/soc/fu/mul/post_stage.py
@@ -1,11 +1,17 @@
  # This stage is intended to do most of the work of analysing the multiply result
+"""
+bugreports/links:
+* https://libre-soc.org/openpower/isa/fixedarith/
+* https://bugs.libre-soc.org/show_bug.cgi?id=432
+* https://bugs.libre-soc.org/show_bug.cgi?id=323
+"""
  
  from nmigen import (Module, Signal, Cat, Repl, Mux, signed)
  from nmutil.pipemodbase import PipeModBase
-from soc.fu.alu.pipe_data import ALUOutputData
+from soc.fu.div.pipe_data import DivMulOutputData
  from soc.fu.mul.pipe_data import MulOutputData
  from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
+from soc.decoder.power_enums import MicrOp
  
  
  class MulMainStage3(PipeModBase):
@@ -16,65 +22,60 @@ class MulMainStage3(PipeModBase):
          return MulOutputData(self.pspec) # pipeline stage output format
  
      def ospec(self):
-        return ALUOutputData(self.pspec) # defines pipeline stage output format
+        return DivMulOutputData(self.pspec) # defines stage output format
  
      def elaborate(self, platform):
          m = Module()
          comb = m.d.comb
  
          # convenience variables
-        cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
-        ov_o = self.o.xer_ov
-        o_i, cry_i, op = self.i.o, self.i.xer_ca, self.i.ctx.op
+        o, cr0 = self.o.o, self.o.cr0
+        ov_o, o_i, op = self.o.xer_ov, self.i.o, self.i.ctx.op
  
          # check if op is 32-bit, and get sign bit from operand a
          is_32bit = Signal(reset_less=True)
          comb += is_32bit.eq(op.is_32bit)
  
          # check negate: select signed/unsigned
-        o_s = Signal(signed(o.width * 2), reset_less=True)
-        mul_o = Signal(o.width * 2, reset_less=True)
-        comb += o_s.eq(-o_i)
-        comb += mul_o.eq(Mux(self.i.neg_res, o_s, o_i))
-        comb += o.ok.eq(1)
+        mul_o = Signal(o_i.width, reset_less=True)
+        comb += mul_o.eq(Mux(self.i.neg_res, -o_i, o_i))
  
+        # OP_MUL_nnn - select hi32/hi64/lo64 from result
          with m.Switch(op.insn_type):
              # hi-32 replicated twice
-            with m.Case(InternalOp.OP_MUL_H32):
+            with m.Case(MicrOp.OP_MUL_H32):
                  comb += o.data.eq(Repl(mul_o[32:64], 2))
+                comb += o.ok.eq(1)
              # hi-64 
-            with m.Case(InternalOp.OP_MUL_H64):
+            with m.Case(MicrOp.OP_MUL_H64):
                  comb += o.data.eq(mul_o[64:128])
+                comb += o.ok.eq(1)
              # lo-64 - overflow
-            with m.Default():
+            with m.Case(MicrOp.OP_MUL_L64):
+                # take the low 64 bits of the mul
                  comb += o.data.eq(mul_o[0:64])
+                comb += o.ok.eq(1)
  
-                # compute overflow
+                # compute overflow 32/64
                  mul_ov = Signal(reset_less=True)
                  with m.If(is_32bit):
-                    m32 = mul_o[32:64]
-                    comb += mul_ov.eq(m32.bool() & ~m32.all())
+                    # here we're checking that the top 32 bits is the
+                    # sign-extended version of the bottom 32 bits.
+                    m31 = mul_o[31:64] # yes really bits 31 to 63 (incl)
+                    comb += mul_ov.eq(m31.bool() & ~m31.all())
                  with m.Else():
-                    m64 = mul_o[64:128]
-                    comb += mul_ov.eq(m64.bool() & ~m64.all())
-
-                # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
-                ov = Signal(2, reset_less=True)
-                comb += ov[0].eq(mul_ov)
-                comb += ov[1].eq(mul_ov)
-                comb += ov_o.data.eq(ov)
-                comb += ov_o.ok.eq(1)
+                    # here we're checking that the top 64 bits is the
+                    # sign-extended version of the bottom 64 bits.
+                    m63 = mul_o[63:128] # yes really bits 63 to 127 (incl)
+                    comb += mul_ov.eq(m63.bool() & ~m63.all())
  
-        # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
-        ca = Signal(2, reset_less=True)
-        comb += ca[0].eq(add_o[-1])                   # XER.CA
-        comb += ca[1].eq(add_o[33] ^ (a[32] ^ b[32])) # XER.CA32
-        comb += cry_o.data.eq(ca)
-        comb += cry_o.ok.eq(1)
+                # 32-bit (ov[1]) and 64-bit (ov[0]) overflow - both same
+                comb += ov_o.data.eq(Repl(mul_ov, 2)) # sets OV _and_ OV32
+                comb += ov_o.ok.eq(1)
  
          ###### sticky overflow and context, both pass-through #####
  
-        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.xer_so.eq(self.i.xer_so)
          comb += self.o.ctx.eq(self.i.ctx)
  
          return m