add copyright / bugreport notice

[ieee754fpu.git] / src / ieee754 / fpdiv / pipeline.py
diff --git a/src/ieee754/fpdiv/pipeline.py b/src/ieee754/fpdiv/pipeline.py

index b265bd18e4166612c3a01331a2c7f710673ea0a8..c73d38148eb62bbd04f62359607e6106dc270e0d 100644 (file)
--- a/src/ieee754/fpdiv/pipeline.py
+++ b/src/ieee754/fpdiv/pipeline.py
@@ -1,6 +1,12 @@
-"""IEEE Floating Point Divider Pipeline
+"""IEEE754 Floating Point Divider Pipeline
  
-Relevant bugreport: http://bugs.libre-riscv.org/show_bug.cgi?id=99
+Copyright (C) 2019 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
+Copyright (C) 2019 Jacob Lifshay
+
+Relevant bugreports:
+* http://bugs.libre-riscv.org/show_bug.cgi?id=99
+* http://bugs.libre-riscv.org/show_bug.cgi?id=43
+* http://bugs.libre-riscv.org/show_bug.cgi?id=44
  
  Stack looks like this:
  
@@ -47,7 +53,8 @@ normpack - FPNormToPack            ispec FPAddStage1Data
  
  the number of combinatorial StageChains (n_comb_stages) in
  FPDivStages is an argument arranged to get the length of the whole
-pipeline down to sane numbers.
+pipeline down to sane numbers.  it specifies the number of "blocks"
+that will be combinatorially chained together.
  
  the reason for keeping the number of stages down is that for every
  pipeline clock delay, a corresponding ReservationStation is needed.
@@ -81,37 +88,39 @@ class FPDIVBasePipe(ControlBase):
          ControlBase.__init__(self)
  
          pipechain = []
-        max_n_comb_stages = 3  # TODO (depends on how many RS's we want)
          # to which the answer: "as few as possible"
          # is required.  too many ReservationStations
          # means "big problems".
  
-        # XXX BUG - subtracting 4 from number of stages stops assert
-        # probably related to having to add 4 in FPDivMuxInOut
-        radix = pspec.log2_radix
-        n_stages = pspec.core_config.n_stages // max_n_comb_stages
+        # get number of stages, set up loop.
+        n_stages = pspec.core_config.n_stages
+        max_n_comb_stages = self.pspec.n_comb_stages
+        print("n_stages", n_stages)
          stage_idx = 0
  
-        for i in range(n_stages):
+        end = False
+        while not end:
  
              n_comb_stages = max_n_comb_stages
              # needs to convert input from pipestart ospec
-            if i == 0:
-                kls = FPDivStagesSetup
-                #n_comb_stages -= 1  # reduce due to work done at start?
+            if stage_idx == 0:
+                n_comb_stages -= 1
+                kls = FPDivStagesSetup  # does n_comb_stages-1 calcs as well
  
              # needs to convert output to pipeend ispec
-            elif i == n_stages - 1:
-                kls = FPDivStagesFinal
-                #n_comb_stages -= 1  # FIXME - reduce due to work done at end?
+            elif stage_idx + n_comb_stages >= n_stages:
+                kls = FPDivStagesFinal  # does n_comb_stages-1 calcs as well
+                end = True
+                n_comb_stages = n_stages - stage_idx
  
              # intermediary stage
              else:
-                kls = FPDivStagesIntermediate
+                kls = FPDivStagesIntermediate  # does n_comb_stages calcs
  
+            # create (in each pipe) a StageChain n_comb_stages in length
              pipechain.append(kls(self.pspec, n_comb_stages, stage_idx))
-            stage_idx += n_comb_stages # increment so that each CalcStage
-                                       # gets a (correct) unique index
+            stage_idx += n_comb_stages  # increment so that each CalcStage
+            # gets a (correct) unique index
  
          self.pipechain = pipechain
  
@@ -135,6 +144,7 @@ class FPDIVBasePipe(ControlBase):
  
          return m
  
+
  def roundup(x, mod):
      return x if x % mod == 0 else x + mod - x % mod
  
@@ -152,22 +162,26 @@ class FPDIVMuxInOut(ReservationStations):
                     then be used to change the behaviour of the pipeline.
      """
  
-    def __init__(self, width, num_rows, op_wid=1):
-        self.id_wid = num_bits(width)
+    def __init__(self, width, num_rows, op_wid=2):
+        self.id_wid = num_bits(num_rows)
          self.pspec = PipelineSpec(width, self.id_wid, op_wid)
-        # get the standard mantissa width, store in the pspec HOWEVER...
+
+        # get the standard mantissa width, store in the pspec
          fmt = FPFormat.standard(width)
-        log2_radix = 2
+        log2_radix = 3     # tested options so far: 1, 2 and 3.
+        n_comb_stages = 2  # 2 compute stages per pipeline stage
  
-        # ...4 extra bits on the mantissa: MSB is zero, MSB-1 is 1
-        # then there is guard and round at the LSB end.
-        # also: round up to nearest radix
-        fmt.m_width = roundup(fmt.m_width + 4, log2_radix)
+        # extra bits needed: guard + round (sticky comes from remainer.bool())
+        fraction_width = fmt.fraction_width
+        fraction_width += 2
  
-        cfg = DivPipeCoreConfig(fmt.m_width, fmt.fraction_width, log2_radix)
+        # rounding width to a multiple of log2_radix is not needed,
+        # DivPipeCoreCalculateStage just internally reduces log2_radix on
+        # the last stage
+        cfg = DivPipeCoreConfig(fmt.width, fraction_width, log2_radix)
  
          self.pspec.fpformat = fmt
-        self.pspec.log2_radix = log2_radix
+        self.pspec.n_comb_stages = n_comb_stages
          self.pspec.core_config = cfg
  
          # XXX TODO - a class (or function?) that takes the pspec (right here)