too much debug info going past, so add the test registers to the
[soc.git] / src / soc / fu / div / output_stage.py
index eb4461e061313f80665aced8adf91a70de3b4482..0fc31c391414b89228d62b3300f4217e9f69774f 100644 (file)
@@ -1,12 +1,15 @@
 # This stage is the setup stage that converts the inputs
 # into the values expected by DivPipeCore
+"""
+* https://bugs.libre-soc.org/show_bug.cgi?id=424
+"""
 
 from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
 from nmutil.pipemodbase import PipeModBase
 from soc.fu.logical.pipe_data import LogicalInputData
-from soc.fu.alu.pipe_data import ALUOutputData
+from soc.fu.div.pipe_data import DivMulOutputData
 from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
+from soc.decoder.power_enums import MicrOp
 
 from soc.decoder.power_fields import DecodeFields
 from soc.decoder.power_fieldsn import SignalBitRange
@@ -20,18 +23,20 @@ class DivOutputStage(PipeModBase):
         self.fields.create_specs()
         self.quotient_neg = Signal()
         self.remainder_neg = Signal()
-        self.quotient_64 = Signal(64)
+        self.quotient_65 = Signal(65)  # one extra spare bit for overflow
         self.remainder_64 = Signal(64)
 
     def ispec(self):
         return CoreOutputData(self.pspec)
 
     def ospec(self):
-        return ALUOutputData(self.pspec)
+        return DivMulOutputData(self.pspec)
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
+
+        # convenience variables
         op = self.i.ctx.op
         abs_quotient = self.i.core.quotient_root
         fract_width = self.pspec.core_config.fract_width
@@ -43,10 +48,12 @@ class DivOutputStage(PipeModBase):
         abs_remainder = self.i.core.remainder[rem_start:rem_start+64]
         dividend_neg = self.i.dividend_neg
         divisor_neg = self.i.divisor_neg
-        quotient_64 = self.quotient_64
+        quotient_65 = self.quotient_65
         remainder_64 = self.remainder_64
 
+        # work out if sign of result is to be negative
         comb += self.quotient_neg.eq(dividend_neg ^ divisor_neg)
+
         # follows rules for truncating division
         comb += self.remainder_neg.eq(dividend_neg)
 
@@ -54,47 +61,88 @@ class DivOutputStage(PipeModBase):
         # result as negation of just the lower 32-bits, so we don't
         # need to do anything special before negating
         comb += [
-            quotient_64.eq(Mux(self.quotient_neg,
+            quotient_65.eq(Mux(self.quotient_neg,
                                -abs_quotient, abs_quotient)),
             remainder_64.eq(Mux(self.remainder_neg,
                                 -abs_remainder, abs_remainder))
         ]
 
+        # calculate overflow
+        self.o.xer_ov.ok.eq(1)
         xer_ov = self.o.xer_ov.data
 
-        # TODO(programmerjake): check code against instruction models
-
+        # see test_6_regression in div test_pipe_caller.py
+        # https://bugs.libre-soc.org/show_bug.cgi?id=425
         def calc_overflow(dive_abs_overflow, sign_bit_mask):
             nonlocal comb
             overflow = dive_abs_overflow | self.i.div_by_zero
+            ov = Signal(reset_less=True)
             with m.If(op.is_signed):
-                comb += xer_ov.eq(overflow
-                                  | (abs_quotient > sign_bit_mask)
-                                  | ((abs_quotient == sign_bit_mask)
-                                     & ~self.quotient_neg))
+                comb += ov.eq(overflow
+                              | (abs_quotient > sign_bit_mask)
+                              | ((abs_quotient == sign_bit_mask)
+                                 & ~self.quotient_neg))
             with m.Else():
-                comb += xer_ov.eq(overflow)
+                comb += ov.eq(overflow)
+            comb += xer_ov.eq(Repl(ov, 2))  # set OV _and_ OV32
 
+        # check 32/64 bit version of overflow
         with m.If(op.is_32bit):
-            calc_overflow(self.i.dive_abs_overflow_32, 0x8000_0000)
+            calc_overflow(self.i.dive_abs_ov32, 0x80000000)
+        with m.Else():
+            calc_overflow(self.i.dive_abs_ov64, 0x8000000000000000)
+
+        # microwatt overflow detection
+        ov = Signal(reset_less=True)
+        with m.If(self.i.div_by_zero):
+            comb += ov.eq(1)
+        with m.Elif(~op.is_32bit):
+            comb += ov.eq(self.i.dive_abs_ov64)
+            with m.If(op.is_signed & (quotient_65[64] ^ quotient_65[63])):
+                comb += ov.eq(1)
+        with m.Elif(op.is_signed):
+            comb += ov.eq(self.i.dive_abs_ov32)
+            with m.If(quotient_65[32] != quotient_65[31]):
+                comb += ov.eq(1)
         with m.Else():
-            calc_overflow(self.i.dive_abs_overflow_32, 0x8000_0000_0000_0000)
+            comb += ov.eq(self.i.dive_abs_ov32)
+        comb += xer_ov.eq(Repl(ov, 2))  # set OV _and_ OV32
 
         ##########################
-        # main switch for DIV
-
-        with m.Switch(op.insn_type):
-            # TODO(programmerjake): finish switch
-            with m.Case(InternalOp.OP_DIV, InternalOp.OP_DIVE):
-                with m.If(op.is_32bit):
-                    comb += dividend_in.eq(self.abs_dividend[0:32])
-                with m.Else():
-                    comb += dividend_in.eq(self.abs_dividend[0:64])
-            with m.Case(InternalOp.OP_MOD):
-                with m.If(op.is_32bit):
-                    comb += dividend_in.eq(self.abs_dividend[0:32] << 32)
-                with m.Else():
-                    comb += dividend_in.eq(self.abs_dividend[0:64] << 64)
+        # main switch for Div
+
+        comb += self.o.o.ok.eq(1)
+        o = self.o.o.data
+
+        with m.If(~ov):  # result is valid (no overflow)
+            with m.Switch(op.insn_type):
+                with m.Case(MicrOp.OP_DIVE):
+                    with m.If(op.is_32bit):
+                        with m.If(op.is_signed):
+                            # matches POWER9's divweo behavior
+                            comb += o.eq(quotient_65[0:32].as_unsigned())
+                        with m.Else():
+                            comb += o.eq(quotient_65[0:32].as_unsigned())
+                    with m.Else():
+                        comb += o.eq(quotient_65)
+                with m.Case(MicrOp.OP_DIV):
+                    with m.If(op.is_32bit):
+                        with m.If(op.is_signed):
+                            # matches POWER9's divwo behavior
+                            comb += o.eq(quotient_65[0:32].as_unsigned())
+                        with m.Else():
+                            comb += o.eq(quotient_65[0:32].as_unsigned())
+                    with m.Else():
+                        comb += o.eq(quotient_65)
+                with m.Case(MicrOp.OP_MOD):
+                    with m.If(op.is_32bit):
+                        with m.If(op.is_signed):
+                            # matches POWER9's modsw behavior
+                            comb += o.eq(remainder_64[0:32].as_signed())
+                        with m.Else():
+                            comb += o.eq(remainder_64[0:32].as_unsigned())
+                    with m.Else():
+                        comb += o.eq(remainder_64)
 
         ###### sticky overflow and context, both pass-through #####