From: Jacob Lifshay <programmerjake@gmail.com>
Date: Thu, 18 Jun 2020 02:56:07 +0000 (-0700)
Subject: working on adding rest of stage classes for div pipeline
X-Git-Tag: div_pipeline~331
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3276f295fe3aeb6dd4a24cefe0ce0f082d91ec3a;p=soc.git

working on adding rest of stage classes for div pipeline
---

diff --git a/src/soc/fu/div/core_stages.py b/src/soc/fu/div/core_stages.py
new file mode 100644
index 00000000..cc4e6f7e
--- /dev/null
+++ b/src/soc/fu/div/core_stages.py
@@ -0,0 +1,68 @@
+# This stage is the setup stage that converts the inputs
+# into the values expected by DivPipeCore
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.logical.pipe_data import LogicalInputData
+from soc.fu.alu.pipe_data import ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+from soc.fu.div.pipe_data import CoreInputData, CoreInterstageData, CoreOutputData
+from ieee754.div_rem_sqrt_rsqrt.core import (DivPipeCoreSetupStage,
+                                             DivPipeCoreCalculateStage,
+                                             DivPipeCoreFinalStage)
+
+
+class DivCoreBaseStage(PipeModBase):
+    def __init__(self, pspec, modname, core_class, *args, **kwargs):
+        super().__init__(pspec, modname)
+        self.core = core_class(pspec.core_config, *args, **kwargs)
+
+    def elaborate(self, platform):
+        m = Module()
+
+        m.d.comb += self.o.eq_without_core(self.i)
+
+        m.submodules.core = self.core
+
+        m.d.comb += self.core.i.eq(self.i.core)
+        m.d.comb += self.o.core.eq(self.core.o)
+
+        return m
+
+
+class DivCoreSetupStage(DivCoreBaseStage):
+    def __init__(self, pspec):
+        super().__init__(pspec, "core_setup_stage", DivPipeCoreSetupStage)
+
+    def ispec(self):
+        return CoreInputData(self.pspec)
+
+    def ospec(self):
+        return CoreInterstageData(self.pspec)
+
+
+class DivCoreCalculateStage(DivCoreBaseStage):
+    def __init__(self, pspec, stage_index):
+        super().__init__(pspec, f"core_calculate_stage_{stage_index}",
+                         DivPipeCoreCalculateStage, stage_index)
+
+    def ispec(self):
+        return CoreInterstageData(self.pspec)
+
+    def ospec(self):
+        return CoreInterstageData(self.pspec)
+
+
+class DivCoreFinalStage(DivCoreBaseStage):
+    def __init__(self, pspec):
+        super().__init__(pspec, "core_final_stage", DivPipeCoreFinalStage)
+
+    def ispec(self):
+        return CoreInterstageData(self.pspec)
+
+    def ospec(self):
+        return CoreOutputData(self.pspec)
diff --git a/src/soc/fu/div/output_stage.py b/src/soc/fu/div/output_stage.py
new file mode 100644
index 00000000..eb4461e0
--- /dev/null
+++ b/src/soc/fu/div/output_stage.py
@@ -0,0 +1,104 @@
+# This stage is the setup stage that converts the inputs
+# into the values expected by DivPipeCore
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.logical.pipe_data import LogicalInputData
+from soc.fu.alu.pipe_data import ALUOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+from soc.decoder.power_fields import DecodeFields
+from soc.decoder.power_fieldsn import SignalBitRange
+from soc.fu.div.pipe_data import CoreOutputData
+
+
+class DivOutputStage(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "output_stage")
+        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
+        self.fields.create_specs()
+        self.quotient_neg = Signal()
+        self.remainder_neg = Signal()
+        self.quotient_64 = Signal(64)
+        self.remainder_64 = Signal(64)
+
+    def ispec(self):
+        return CoreOutputData(self.pspec)
+
+    def ospec(self):
+        return ALUOutputData(self.pspec)
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+        op = self.i.ctx.op
+        abs_quotient = self.i.core.quotient_root
+        fract_width = self.pspec.core_config.fract_width
+        # fract width of `DivPipeCoreOutputData.remainder`
+        remainder_fract_width = fract_width * 3
+        # fract width of `DivPipeCoreInputData.dividend`
+        dividend_fract_width = fract_width * 2
+        rem_start = remainder_fract_width - dividend_fract_width
+        abs_remainder = self.i.core.remainder[rem_start:rem_start+64]
+        dividend_neg = self.i.dividend_neg
+        divisor_neg = self.i.divisor_neg
+        quotient_64 = self.quotient_64
+        remainder_64 = self.remainder_64
+
+        comb += self.quotient_neg.eq(dividend_neg ^ divisor_neg)
+        # follows rules for truncating division
+        comb += self.remainder_neg.eq(dividend_neg)
+
+        # negation of a 64-bit value produces the same lower 32-bit
+        # result as negation of just the lower 32-bits, so we don't
+        # need to do anything special before negating
+        comb += [
+            quotient_64.eq(Mux(self.quotient_neg,
+                               -abs_quotient, abs_quotient)),
+            remainder_64.eq(Mux(self.remainder_neg,
+                                -abs_remainder, abs_remainder))
+        ]
+
+        xer_ov = self.o.xer_ov.data
+
+        # TODO(programmerjake): check code against instruction models
+
+        def calc_overflow(dive_abs_overflow, sign_bit_mask):
+            nonlocal comb
+            overflow = dive_abs_overflow | self.i.div_by_zero
+            with m.If(op.is_signed):
+                comb += xer_ov.eq(overflow
+                                  | (abs_quotient > sign_bit_mask)
+                                  | ((abs_quotient == sign_bit_mask)
+                                     & ~self.quotient_neg))
+            with m.Else():
+                comb += xer_ov.eq(overflow)
+
+        with m.If(op.is_32bit):
+            calc_overflow(self.i.dive_abs_overflow_32, 0x8000_0000)
+        with m.Else():
+            calc_overflow(self.i.dive_abs_overflow_32, 0x8000_0000_0000_0000)
+
+        ##########################
+        # main switch for DIV
+
+        with m.Switch(op.insn_type):
+            # TODO(programmerjake): finish switch
+            with m.Case(InternalOp.OP_DIV, InternalOp.OP_DIVE):
+                with m.If(op.is_32bit):
+                    comb += dividend_in.eq(self.abs_dividend[0:32])
+                with m.Else():
+                    comb += dividend_in.eq(self.abs_dividend[0:64])
+            with m.Case(InternalOp.OP_MOD):
+                with m.If(op.is_32bit):
+                    comb += dividend_in.eq(self.abs_dividend[0:32] << 32)
+                with m.Else():
+                    comb += dividend_in.eq(self.abs_dividend[0:64] << 64)
+
+        ###### sticky overflow and context, both pass-through #####
+
+        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
diff --git a/src/soc/fu/div/pipe_data.py b/src/soc/fu/div/pipe_data.py
index c31641d6..f9df4e56 100644
--- a/src/soc/fu/div/pipe_data.py
+++ b/src/soc/fu/div/pipe_data.py
@@ -22,8 +22,15 @@ class CoreBaseData(ALUInputData):
     def __init__(self, pspec, core_data_class):
         super().__init__(pspec)
         self.core = core_data_class(pspec.core_config)
-        self.divisor_neg = Signal(1, reset_less=True)
-        self.dividend_neg = Signal(1, reset_less=True)
+        self.divisor_neg = Signal(reset_less=True)
+        self.dividend_neg = Signal(reset_less=True)
+        self.div_by_zero = Signal(reset_less=True)
+
+        # set if an overflow for divide extended instructions is detected because
+        # `abs_dividend >= abs_divisor` for the appropriate bit width;
+        # 0 if the instruction is not a divide extended instruction
+        self.dive_abs_overflow_32 = Signal(reset_less=True)
+        self.dive_abs_overflow_64 = Signal(reset_less=True)
 
     def __iter__(self):
         yield from super().__iter__()
@@ -32,8 +39,10 @@ class CoreBaseData(ALUInputData):
         yield self.dividend_neg
 
     def eq(self, rhs):
+        return self.eq_without_core(rhs) + self.core.eq(rhs.core)
+
+    def eq_without_core(self, rhs):
         return super().eq(rhs) + \
-            self.core.eq(rhs.core) + \
             [self.divisor_neg.eq(rhs.divisor_neg),
              self.dividend_neg.eq(rhs.dividend_neg)]
 
diff --git a/src/soc/fu/div/setup_stage.py b/src/soc/fu/div/setup_stage.py
index 6f04e839..6da1d733 100644
--- a/src/soc/fu/div/setup_stage.py
+++ b/src/soc/fu/div/setup_stage.py
@@ -16,7 +16,7 @@ from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreOperation
 
 class DivSetupStage(PipeModBase):
     def __init__(self, pspec):
-        super().__init__(pspec, "main")
+        super().__init__(pspec, "setup_stage")
         self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
         self.fields.create_specs()
         self.abs_divisor = Signal(64)
@@ -50,11 +50,21 @@ class DivSetupStage(PipeModBase):
         comb += self.abs_divisor.eq(Mux(divisor_neg, -b, b))
         comb += self.abs_dividend.eq(Mux(dividend_neg, -a, a))
 
+        comb += self.o.dive_abs_overflow_64.eq(
+            (self.abs_dividend >= self.abs_divisor)
+            & (op.insn_type == InternalOp.OP_DIVE))
+
+        comb += self.o.dive_abs_overflow_32.eq(
+            (self.abs_dividend[0:32] >= self.abs_divisor[0:32])
+            & (op.insn_type == InternalOp.OP_DIVE))
+
         with m.If(op.is_32bit):
             comb += divisor_in.eq(self.abs_divisor[0:32])
         with m.Else():
             comb += divisor_in.eq(self.abs_divisor[0:64])
 
+        comb += self.o.div_by_zero.eq(self.divisor_in == 0)
+
         ##########################
         # main switch for DIV