add first cut at fu mul pipeline

author Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Mon, 6 Jul 2020 15:34:31 +0000 (16:34 +0100)

committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Mon, 6 Jul 2020 15:34:31 +0000 (16:34 +0100)
author Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 6 Jul 2020 15:34:31 +0000 (16:34 +0100)
committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Mon, 6 Jul 2020 15:34:31 +0000 (16:34 +0100)
diff --git a/src/soc/fu/mul/main_stage.py b/src/soc/fu/mul/main_stage.py

index ea40da35a37f67c846a233c7c7279928aa71886f..97ba81d7482e99760cd41a69dcb8aeb9499f2a55 100644 (file)
--- a/src/soc/fu/mul/main_stage.py
+++ b/src/soc/fu/mul/main_stage.py
@@ -1,79 +1,37 @@
-# This stage is intended to do most of the work of executing multiply
-# instructions, as well as carry and overflow generation. This module
-# however should not gate the carry or overflow, that's up to the
-# output stage
-from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
+# This stage is intended to do the main work of an actual multiply
+
+from nmigen import Module
  from nmutil.pipemodbase import PipeModBase
-from soc.fu.alu.pipe_data import ALUOutputData
-from soc.fu.mul.pipe_data import MulInputData
+from soc.fu.mul.pipe_data import MulIntermediateData, MulOutputData
  from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import InternalOp
-from soc.fu.shift_rot.rotator import Rotator
-
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
  
  
-class ShiftRotMainStage(PipeModBase):
+class MulMainStage2(PipeModBase):
      def __init__(self, pspec):
-        super().__init__(pspec, "main")
-        self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
-        self.fields.create_specs()
+        super().__init__(pspec, "mul2")
  
      def ispec(self):
-        return MulInputData(self.pspec)
+        return MulIntermediateData(self.pspec) # pipeline stage input format
  
      def ospec(self):
-        return ALUOutputData(self.pspec)
+        return MulOutputData(self.pspec) # pipeline stage output format
  
      def elaborate(self, platform):
          m = Module()
          comb = m.d.comb
  
-        # obtain me and mb fields from instruction.
-        m_fields = self.fields.instrs['M']
-        md_fields = self.fields.instrs['MD']
-        mb = Signal(m_fields['MB'][0:-1].shape())
-        me = Signal(m_fields['ME'][0:-1].shape())
-        mb_extra = Signal(1, reset_less=True)
-        comb += mb.eq(m_fields['MB'][0:-1])
-        comb += me.eq(m_fields['ME'][0:-1])
-        comb += mb_extra.eq(md_fields['mb'][0:-1][0])
-
-        # set up microwatt rotator module
-        m.submodules.rotator = rotator = Rotator()
-        comb += [
-            rotator.me.eq(me),
-            rotator.mb.eq(mb),
-            rotator.mb_extra.eq(mb_extra),
-            rotator.rs.eq(self.i.rs),
-            rotator.ra.eq(self.i.ra),
-            rotator.shift.eq(self.i.rb),
-            rotator.is_32bit.eq(self.i.ctx.op.is_32bit),
-            rotator.arith.eq(self.i.ctx.op.is_signed),
-        ]
+        # convenience variables
+        a, b, o = self.i.a, self.i.b, self.o.o
  
-        # instruction rotate type
-        mode = Signal(3, reset_less=True)
-        with m.Switch(self.i.ctx.op.insn_type):
-            with m.Case(InternalOp.OP_SHL):  comb += mode.eq(0b000)
-            with m.Case(InternalOp.OP_SHR):  comb += mode.eq(0b001) # R-shift
-            with m.Case(InternalOp.OP_RLC):  comb += mode.eq(0b110) # clear LR
-            with m.Case(InternalOp.OP_RLCL): comb += mode.eq(0b010) # clear L
-            with m.Case(InternalOp.OP_RLCR): comb += mode.eq(0b100) # clear R
+        # actual multiply (TODO: split into stages)
+        comb += o.eq(a * b)
  
-        comb += Cat(rotator.right_shift,
-                    rotator.clear_left,
-                    rotator.clear_right).eq(mode)
-                
-        # outputs from the microwatt rotator module
-        # XXX TODO: carry32
-        comb += [self.o.o.eq(rotator.result_o),
-                 self.o.xer_ca[0].eq(rotator.carry_out_o)]
-
-        ###### sticky overflow and context, both pass-through #####
+        ###### xer and context, all pass-through #####
  
+        comb += self.o.xer_ca.data.eq(self.i.xer_ca)
+        comb += self.o.neg_res.data.eq(self.i.neg_res)
          comb += self.o.xer_so.data.eq(self.i.xer_so)
          comb += self.o.ctx.eq(self.i.ctx)
  
          return m
+
diff --git a/src/soc/fu/mul/pipe_data.py b/src/soc/fu/mul/pipe_data.py

index 495d503b4ff1bffffb02e251b19cb64a2c92dd80..429be0086cffe4b9d17f7b544a99d9f1a3cc6cda 100644 (file)
--- a/src/soc/fu/mul/pipe_data.py
+++ b/src/soc/fu/mul/pipe_data.py
@@ -1,10 +1,27 @@
  from soc.fu.alu.alu_input_record import CompALUOpSubset
  from soc.fu.pipe_data import IntegerData, CommonPipeSpec
-from soc.fu.alu.pipe_data import ALUOutputData
-from soc.fu.shift_rot.pipe_data import ShoftRotInputData
+from soc.fu.alu.pipe_data import ALUOutputData, ALUInputData
  
  
-# TODO: replace CompALUOpSubset with CompShiftRotOpSubset
-class ShiftRotPipeSpec(CommonPipeSpec):
-    regspec = (ShiftRotInputData.regspec, ALUOutputData.regspec)
+class MulIntermediateData(ALUInputData):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+
+        neg_result = Signal(reset_less=True)
+        self.data.append(neg_result)
+
+
+class MulOutputData(IntegerData):
+    regspec = [('INT', 'o', '0:128'),
+               ('XER', 'xer_so', '32'), # XER bit 32: SO
+               ('XER', 'xer_ca', '34,45')] # XER bit 34/45: CA/CA32
+    def __init__(self, pspec):
+        super().__init__(pspec, False)
+
+        neg_result = Signal(reset_less=True)
+        self.data.append(neg_result)
+
+
+class MulPipeSpec(CommonPipeSpec):
+    regspec = (ALUInputData.regspec, ALUOutputData.regspec)
      opsubsetkls = CompALUOpSubset
diff --git a/src/soc/fu/mul/pipeline.py b/src/soc/fu/mul/pipeline.py

index e726d170d79cf01c5af88454998f14ef60e46d6e..d32d75297257e573b85c4249c0e421fe0550d93b 100644 (file)
--- a/src/soc/fu/mul/pipeline.py
+++ b/src/soc/fu/mul/pipeline.py
@@ -3,18 +3,27 @@ from nmutil.pipemodbase import PipeModBaseChain
  from soc.fu.shift_rot.input_stage import ShiftRotInputStage
  from soc.fu.shift_rot.main_stage import ShiftRotMainStage
  from soc.fu.alu.output_stage import ALUOutputStage
+from soc.fu.mul.main_stage import MulMainStage1, MulMainStage2, MulMainStage3
+
  
  class MulStages1(PipeModBaseChain):
      def get_chain(self):
-        inp = ALUInputStage(self.pspec)
-        main = MulMainStage1(self.pspec)
+        inp = ALUInputStage(self.pspec)   # a-invert, carry etc
+        main = MulMainStage1(self.pspec)  # detect signed/32-bit
          return [inp, main]
  
+
  class MulStages2(PipeModBaseChain):
      def get_chain(self):
-        main2 = MulMainStage2(self.pspec)
-        out = ALUOutputStage(self.pspec)
-        return [main2, out]
+        main2 = MulMainStage2(self.pspec) # actual multiply
+        return [main2]
+
+
+class MulStages3(PipeModBaseChain):
+    def get_chain(self):
+        main3 = MulMainStage3(self.pspec) # select output bits, invert, set ov
+        out = ALUOutputStage(self.pspec)  # do CR, XER and out-invert etc.
+        return [main3, out]
  
  
  class ShiftRotBasePipe(ControlBase):
@@ -23,6 +32,7 @@ class ShiftRotBasePipe(ControlBase):
          self.pspec = pspec
          self.pipe1 = MulStages1(pspec)
          self.pipe2 = MulStages2(pspec)
+        self.pipe2 = MulStages3(pspec)
          self._eqs = self.connect([self.pipe1, self.pipe2])
  
      def elaborate(self, platform):
diff --git a/src/soc/fu/mul/post_stage.py b/src/soc/fu/mul/post_stage.py

new file mode 100644 (file)

index 0000000..501b4ed
--- /dev/null
+++ b/src/soc/fu/mul/post_stage.py
@@ -0,0 +1,81 @@
+# This stage is intended to do most of the work of analysing the multiply result
+
+from nmigen import (Module, Signal, Cat, Repl, Mux, signed)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.alu.pipe_data import ALUOutputData
+from soc.fu.mul.pipe_data import MulOutputData
+from ieee754.part.partsig import PartitionedSignal
+from soc.decoder.power_enums import InternalOp
+
+
+class MulMainStage3(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "mul3")
+
+    def ispec(self):
+        return MulOutputData(self.pspec) # pipeline stage output format
+
+    def ospec(self):
+        return ALUOutputData(self.pspec) # defines pipeline stage output format
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        # convenience variables
+        cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
+        ov_o = self.o.xer_ov
+        o_i, cry_i, op = self.i.o, self.i.xer_ca, self.i.ctx.op
+
+        # check if op is 32-bit, and get sign bit from operand a
+        is_32bit = Signal(reset_less=True)
+        comb += is_32bit.eq(op.is_32bit)
+
+        # check negate: select signed/unsigned
+        o_s = Signal(signed(o.width * 2), reset_less=True)
+        mul_o = Signal(o.width * 2, reset_less=True)
+        comb += o_s.eq(-o_i)
+        comb += mul_o.eq(Mux(self.i.neg_res, o_s, o_i))
+        comb += o.ok.eq(1)
+
+        with m.Switch(op.insn_type):
+            # hi-32 replicated twice
+            with m.Case(InternalOp.OP_MUL_H32):
+                comb += o.data.eq(Repl(mul_o[32:64], 2))
+            # hi-64 
+            with m.Case(InternalOp.OP_MUL_H64):
+                comb += o.data.eq(mul_o[64:128])
+            # lo-64 - overflow
+            with m.Default():
+                comb += o.data.eq(mul_o[0:64])
+
+                # compute overflow
+                mul_ov = Signal(reset_less=True)
+                with m.If(is_32bit):
+                    m32 = mul_o[32:64]
+                    comb += mul_ov.eq(m32.bool() & ~m32.all())
+                with m.Else():
+                    m64 = mul_o[64:128]
+                    comb += mul_ov.eq(m64.bool() & ~m64.all())
+
+                # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
+                ov = Signal(2, reset_less=True)
+                comb += ov[0].eq(mul_ov)
+                comb += ov[1].eq(mul_ov)
+                comb += ov_o.data.eq(ov)
+                comb += ov_o.ok.eq(1)
+
+        # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
+        ca = Signal(2, reset_less=True)
+        comb += ca[0].eq(add_o[-1])                   # XER.CA
+        comb += ca[1].eq(add_o[33] ^ (a[32] ^ b[32])) # XER.CA32
+        comb += cry_o.data.eq(ca)
+        comb += cry_o.ok.eq(1)
+
+        ###### sticky overflow and context, both pass-through #####
+
+        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
+
diff --git a/src/soc/fu/mul/pre_stage.py b/src/soc/fu/mul/pre_stage.py

new file mode 100644 (file)

index 0000000..ff1e322
--- /dev/null
+++ b/src/soc/fu/mul/pre_stage.py
@@ -0,0 +1,53 @@
+# This stage is intended to do most of the work of executing multiply
+from nmigen import (Module, Signal, Mux)
+from nmutil.pipemodbase import PipeModBase
+from soc.fu.alu.pipe_data import ALUInputData
+from soc.fu.mul.pipe_data import MulIntermediateData
+from ieee754.part.partsig import PartitionedSignal
+
+
+class MulMainStage1(PipeModBase):
+    def __init__(self, pspec):
+        super().__init__(pspec, "mul1")
+
+    def ispec(self):
+        return ALUInputData(self.pspec) # defines pipeline stage input format
+
+    def ospec(self):
+        return MulIntermediateData(self.pspec) # pipeline stage output format
+
+    def elaborate(self, platform):
+        m = Module()
+        comb = m.d.comb
+
+        # convenience variables
+        a, b = self.i.a, self.i.b
+        a_o, b_o, neg_res_o = self.o.a, self.o.b, self.o.neg_res
+
+        # check if op is 32-bit, and get sign bit from operand a
+        is_32bit = Signal(reset_less=True)
+        sign_a = Signal(reset_less=True)
+        sign_b = Signal(reset_less=True)
+        comb += is_32bit.eq(op.is_32bit)
+
+        # work out if a/b are negative (check 32-bit / signed)
+        comb += sign_a.eq(Mux(op.is_32bit, a[31], a[63]) & op.is_signed)
+        comb += sign_b.eq(Mux(op.is_32bit, b[31], b[63]) & op.is_signed)
+
+        # work out if result is negative sign
+        comb += neg_res_o.eq(sign_a ^ sign_b)
+
+        # negation of a 64-bit value produces the same lower 32-bit
+        # result as negation of just the lower 32-bits, so we don't
+        # need to do anything special before negating
+        comb += a_o.eq(Mux(sign_a, -a, a))
+        comb += b_o.eq(Mux(sign_b, -b, b))
+
+        ###### XER and context, both pass-through #####
+
+        comb += self.o.xer_ca.data.eq(self.i.xer_ca)
+        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.ctx.eq(self.i.ctx)
+
+        return m
+
author	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Mon, 6 Jul 2020 15:34:31 +0000 (16:34 +0100)
committer	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Mon, 6 Jul 2020 15:34:31 +0000 (16:34 +0100)
src/soc/fu/mul/main_stage.py		patch \| blob \| history
src/soc/fu/mul/pipe_data.py		patch \| blob \| history
src/soc/fu/mul/pipeline.py		patch \| blob \| history
src/soc/fu/mul/post_stage.py	[new file with mode: 0644]	patch \| blob
src/soc/fu/mul/pre_stage.py	[new file with mode: 0644]	patch \| blob