FP16 DIV seems to be working

[ieee754fpu.git] / src / ieee754 / fpdiv / pipeline.py
diff --git a/src/ieee754/fpdiv/pipeline.py b/src/ieee754/fpdiv/pipeline.py

index 7c130fae0539a48c235b331f70b8e3f451da0eb6..f9ad69e35dbe0431c8d1f8a171b7063f2356f26a 100644 (file)
--- a/src/ieee754/fpdiv/pipeline.py
+++ b/src/ieee754/fpdiv/pipeline.py
@@ -1,4 +1,60 @@
-# IEEE Floating Point Divider Pipeline
+"""IEEE Floating Point Divider Pipeline
+
+Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
+
+Stack looks like this:
+
+scnorm   - FPDIVSpecialCasesDeNorm ispec FPADDBaseData
+------                             ospec FPSCData
+
+                StageChain: FPDIVSpecialCasesMod,
+                            FPAddDeNormMod
+
+pipediv0 - FPDivStagesSetup        ispec FPSCData
+--------                           ospec DivPipeInterstageData
+
+                StageChain: FPDivStage0Mod,
+                            DivPipeSetupStage,
+                            DivPipeCalculateStage,
+                            ...
+                            DivPipeCalculateStage
+
+pipediv1 - FPDivStagesIntermediate ispec DivPipeInterstageData
+--------                           ospec DivPipeInterstageData
+
+                StageChain: DivPipeCalculateStage,
+                            ...
+                            DivPipeCalculateStage
+...
+...
+
+pipediv5 - FPDivStageFinal         ispec FPDivStage0Data
+--------                           ospec FPAddStage1Data
+
+                StageChain: DivPipeCalculateStage,
+                            ...
+                            DivPipeCalculateStage,
+                            DivPipeFinalStage,
+                            FPDivStage2Mod
+
+normpack - FPNormToPack            ispec FPAddStage1Data
+--------                           ospec FPPackData
+
+                StageChain: Norm1ModSingle,
+                            RoundMod,
+                            CorrectionsMod,
+                            PackMod
+
+the number of combinatorial StageChains (n_comb_stages) in
+FPDivStages is an argument arranged to get the length of the whole
+pipeline down to sane numbers.
+
+the reason for keeping the number of stages down is that for every
+pipeline clock delay, a corresponding ReservationStation is needed.
+if there are 24 pipeline stages, we need a whopping TWENTY FOUR
+RS's.  that's far too many.  6 is just about an acceptable number.
+even 8 is starting to get alarmingly high.
+"""
  
  from nmigen import Module
  from nmigen.cli import main, verilog
@@ -8,30 +64,81 @@ from nmutil.concurrentunit import ReservationStations, num_bits
  
  from ieee754.fpcommon.getop import FPADDBaseData
  from ieee754.fpcommon.denorm import FPSCData
+from ieee754.fpcommon.fpbase import FPFormat
  from ieee754.fpcommon.pack import FPPackData
  from ieee754.fpcommon.normtopack import FPNormToPack
-from .specialcases import FPDIVSpecialCasesDeNorm
-from .divstages import FPDivStages
-
+from ieee754.fpdiv.specialcases import FPDIVSpecialCasesDeNorm
+from ieee754.fpdiv.divstages import (FPDivStagesSetup,
+                                     FPDivStagesIntermediate,
+                                     FPDivStagesFinal)
+from ieee754.pipeline import PipelineSpec
+from ieee754.div_rem_sqrt_rsqrt.core import DivPipeCoreConfig
  
  
  class FPDIVBasePipe(ControlBase):
-    def __init__(self, width, id_wid):
+    def __init__(self, pspec):
+        self.pspec = pspec
          ControlBase.__init__(self)
-        self.pipe1 = FPDIVSpecialCasesDeNorm(width, id_wid)
-        self.pipe2 = FPDivStages(width, id_wid)
-        self.pipe3 = FPNormToPack(width, id_wid)
  
-        self._eqs = self.connect([self.pipe1, self.pipe2, self.pipe3])
+        pipechain = []
+        max_n_comb_stages = 2  # TODO (depends on how many RS's we want)
+        # to which the answer: "as few as possible"
+        # is required.  too many ReservationStations
+        # means "big problems".
+
+        # XXX BUG - subtracting 4 from number of stages stops assert
+        # probably related to having to add 4 in FPDivMuxInOut
+        radix = pspec.log2_radix
+        n_stages = pspec.core_config.n_stages // max_n_comb_stages
+        print ("n_stages", pspec.core_config.n_stages, n_stages)
+        stage_idx = 0
+
+        for i in range(n_stages):
+
+            n_comb_stages = max_n_comb_stages
+            # needs to convert input from pipestart ospec
+            if i == 0:
+                kls = FPDivStagesSetup
+                #n_comb_stages -= 1  # reduce due to work done at start?
+
+            # needs to convert output to pipeend ispec
+            elif i == n_stages - 1:
+                kls = FPDivStagesFinal
+                #n_comb_stages -= 1  # FIXME - reduce due to work done at end?
+
+            # intermediary stage
+            else:
+                kls = FPDivStagesIntermediate
+
+            pipechain.append(kls(self.pspec, n_comb_stages, stage_idx))
+            stage_idx += n_comb_stages # increment so that each CalcStage
+                                       # gets a (correct) unique index
+
+        self.pipechain = pipechain
+
+        # start and end: unpack/specialcases then normalisation/packing
+        self.pipestart = pipestart = FPDIVSpecialCasesDeNorm(self.pspec)
+        self.pipeend = pipeend = FPNormToPack(self.pspec)
+
+        self._eqs = self.connect([pipestart] + pipechain + [pipeend])
  
      def elaborate(self, platform):
          m = ControlBase.elaborate(self, platform)
-        m.submodules.scnorm = self.pipe1
-        m.submodules.divstages = self.pipe2
-        m.submodules.normpack = self.pipe3
+
+        # add submodules
+        m.submodules.scnorm = self.pipestart
+        for i, p in enumerate(self.pipechain):
+            setattr(m.submodules, "pipediv%d" % i, p)
+        m.submodules.normpack = self.pipeend
+
+        # ControlBase.connect creates the "eqs" needed to connect each pipe
          m.d.comb += self._eqs
+
          return m
  
+def roundup(x, mod):
+    return x if x % mod == 0 else x + mod - x % mod
+
  
  class FPDIVMuxInOut(ReservationStations):
      """ Reservation-Station version of FPDIV pipeline.
@@ -41,15 +148,40 @@ class FPDIVMuxInOut(ReservationStations):
          * fan-out on outputs (an array of FPPackData: z,mid)
  
          Fan-in and Fan-out are combinatorial.
+
+        :op_wid: - set this to the width of an operator which can
+                   then be used to change the behaviour of the pipeline.
      """
-    def __init__(self, width, num_rows):
-        self.width = width
+
+    def __init__(self, width, num_rows, op_wid=1):
          self.id_wid = num_bits(width)
-        self.alu = FPDIVBasePipe(width, self.id_wid)
+        self.pspec = PipelineSpec(width, self.id_wid, op_wid)
+        # get the standard mantissa width, store in the pspec HOWEVER...
+        fmt = FPFormat.standard(width)
+        log2_radix = 2
+
+        # ...5 extra bits on the mantissa: MSB is zero, MSB-1 is 1
+        # then there is guard, round and sticky at the LSB end.
+        # also: round up to nearest radix
+        fmt.m_width = roundup(fmt.m_width + 5, log2_radix)
+        print ("width", fmt.m_width)
+
+        cfg = DivPipeCoreConfig(fmt.m_width, fmt.fraction_width, log2_radix)
+
+        self.pspec.fpformat = fmt
+        self.pspec.log2_radix = log2_radix
+        self.pspec.core_config = cfg
+
+        # XXX TODO - a class (or function?) that takes the pspec (right here)
+        # and creates... "something".  that "something" MUST have an eq function
+        # new_pspec = deepcopy(self.pspec)
+        # new_pspec.opkls = DivPipeCoreOperation
+        # self.alu = FPDIVBasePipe(new_pspec)
+        self.alu = FPDIVBasePipe(self.pspec)
          ReservationStations.__init__(self, num_rows)
  
      def i_specfn(self):
-        return FPADDBaseData(self.width, self.id_wid)
+        return FPADDBaseData(self.pspec)
  
      def o_specfn(self):
-        return FPPackData(self.width, self.id_wid)
+        return FPPackData(self.pspec)