X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Ffu%2Falu%2Fmain_stage.py;h=4d5fe2313bb9184e63fff9b3f81422650d1d9ba4;hb=HEAD;hp=16950629c743a301c62b7712420aeb436cce0e76;hpb=b3d3ce90ac56e1d6b708c503cd37a1739b591428;p=soc.git

diff --git a/src/soc/fu/alu/main_stage.py b/src/soc/fu/alu/main_stage.py
index 16950629..1f17943c 100644
--- a/src/soc/fu/alu/main_stage.py
+++ b/src/soc/fu/alu/main_stage.py
@@ -4,16 +4,20 @@
 # however should not gate the carry or overflow, that's up to the
 # output stage
 
+# License: LGPLv3+
+# Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
 # Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
+# (michael: note that there are multiple copyright holders)
+
 from nmigen import (Module, Signal, Cat, Repl, Mux, Const)
 from nmutil.pipemodbase import PipeModBase
-from nmutil.extend import exts
+from nmutil.extend import exts, extz
 from soc.fu.alu.pipe_data import ALUInputData, ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
-from soc.decoder.power_enums import MicrOp
+from ieee754.part.partsig import SimdSignal
+from openpower.decoder.power_enums import MicrOp
 
-from soc.decoder.power_fields import DecodeFields
-from soc.decoder.power_fieldsn import SignalBitRange
+from openpower.decoder.power_fields import DecodeFields
+from openpower.decoder.power_fieldsn import SignalBitRange
 
 
 # microwatt calc_ov function.
@@ -34,12 +38,13 @@ class ALUMainStage(PipeModBase):
         return ALUOutputData(self.pspec) # defines pipeline stage output format
 
     def elaborate(self, platform):
+        XLEN = self.pspec.XLEN
         m = Module()
         comb = m.d.comb
 
         # convenience variables
         cry_o, o, cr0 = self.o.xer_ca, self.o.o, self.o.cr0
-        ov_o = self.o.xer_ov
+        xer_so_i, ov_o = self.i.xer_so, self.o.xer_ov
         a, b, cry_i, op = self.i.a, self.i.b, self.i.xer_ca, self.i.ctx.op
 
         # get L-field for OP_CMP
@@ -60,9 +65,16 @@ class ALUMainStage(PipeModBase):
 
         a_i = Signal.like(a)
         b_i = Signal.like(b)
-        with m.If(is_32bit):
-            comb += a_i.eq(exts(a, 32, 64))
-            comb += b_i.eq(exts(b, 32, 64))
+        with m.If(op.insn_type == MicrOp.OP_CMP): # another temporary hack
+            comb += a_i.eq(a)                     # reaaaally need to move CMP
+            comb += b_i.eq(b)                     # into trap pipeline
+        with m.Elif(is_32bit):
+            with m.If(op.is_signed):
+                comb += a_i.eq(exts(a, 32, XLEN))
+                comb += b_i.eq(exts(b, 32, XLEN))
+            with m.Else():
+                comb += a_i.eq(extz(a, 32, XLEN))
+                comb += b_i.eq(extz(b, 32, XLEN))
         with m.Else():
             comb += a_i.eq(a)
             comb += b_i.eq(b)
@@ -83,12 +95,51 @@ class ALUMainStage(PipeModBase):
             #### CMP, CMPL v3.0B p85-86
 
             with m.Case(MicrOp.OP_CMP):
+                a_n = Signal(XLEN) # temporary - inverted a
+                tval = Signal(5)
+                a_lt = Signal()
+                carry_32 = Signal()
+                carry_64 = Signal()
+                zerolo = Signal()
+                zerohi = Signal()
+                msb_a = Signal()
+                msb_b = Signal()
+                newcrf = Signal(4)
+
                 # this is supposed to be inverted (b-a, not a-b)
-                # however we have a trick: instead of adding either 2x 64-bit
-                # MUXes to invert a and b, or messing with a 64-bit output,
-                # swap +ve and -ve test in the *output* stage using an XOR gate
-                comb += o.data.eq(add_o[1:-1])
-                comb += o.ok.eq(0) # use o.data but do *not* actually output
+                comb += a_n.eq(~a) # sigh a gets inverted
+                if XLEN == 64:
+                    comb += carry_32.eq(add_o[33] ^ a[32] ^ b[32])
+                else:
+                    comb += carry_32.eq(add_o[XLEN+1])
+                comb += carry_64.eq(add_o[XLEN+1])
+
+                comb += zerolo.eq(~((a_n[0:32] ^ b[0:32]).bool()))
+                comb += zerohi.eq(~((a_n[32:XLEN] ^ b[32:XLEN]).bool()))
+
+                with m.If(zerolo & (is_32bit | zerohi)):
+                    # values are equal
+                    comb += tval[2].eq(1)
+                with m.Else():
+                    comb += msb_a.eq(Mux(is_32bit, a_n[31], a_n[XLEN-1]))
+                    comb += msb_b.eq(Mux(is_32bit, b[31], b[XLEN-1]))
+                    C0 = Const(0, 1)
+                    with m.If(msb_a != msb_b):
+                        # Subtraction might overflow, but
+                        # comparison is clear from MSB difference.
+                        # for signed, 0 is greater; for unsigned, 1 is greater
+                        comb += tval.eq(Cat(msb_a, msb_b, C0, msb_b, msb_a))
+                    with m.Else():
+                        # Subtraction cannot overflow since MSBs are equal.
+                        # carry = 1 indicates RA is smaller (signed or unsigned)
+                        comb += a_lt.eq(Mux(is_32bit, carry_32, carry_64))
+                        comb += tval.eq(Cat(~a_lt, a_lt, C0, ~a_lt, a_lt))
+                comb += cr0.data[0:2].eq(Cat(xer_so_i[0], tval[2]))
+                with m.If(op.is_signed):
+                    comb += cr0.data[2:4].eq(tval[3:5])
+                with m.Else():
+                    comb += cr0.data[2:4].eq(tval[0:2])
+                comb += cr0.ok.eq(1)
 
             ###################
             #### add v3.0B p67, p69-72
@@ -102,13 +153,21 @@ class ALUMainStage(PipeModBase):
                 # https://bugs.libre-soc.org/show_bug.cgi?id=319#c5
                 ca = Signal(2, reset_less=True)
                 comb += ca[0].eq(add_o[-1])                   # XER.CA
-                comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
+                if XLEN == 64:
+                    comb += ca[1].eq(add_o[33] ^ (a_i[32] ^ b_i[32])) # XER.CA32
+                else:
+                    comb += ca[1].eq(add_o[-1])                   # XER.CA32
                 comb += cry_o.data.eq(ca)
                 comb += cry_o.ok.eq(1)
                 # 32-bit (ov[1]) and 64-bit (ov[0]) overflow
                 ov = Signal(2, reset_less=True)
                 comb += ov[0].eq(calc_ov(a_i[-1], b_i[-1], ca[0], add_o[-2]))
-                comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1], add_o[32]))
+                if XLEN == 64:
+                    comb += ov[1].eq(calc_ov(a_i[31], b_i[31], ca[1],
+                                             add_o[32]))
+                else:
+                    comb += ov[1].eq(calc_ov(a_i[-1], b_i[-1], ca[0],
+                                            add_o[-2]))
                 comb += ov_o.data.eq(ov)
                 comb += ov_o.ok.eq(1)
 
@@ -117,11 +176,11 @@ class ALUMainStage(PipeModBase):
 
             with m.Case(MicrOp.OP_EXTS):
                 with m.If(op.data_len == 1):
-                    comb += o.data.eq(exts(a, 8, 64))
+                    comb += o.data.eq(exts(a, 8, XLEN))
                 with m.If(op.data_len == 2):
-                    comb += o.data.eq(exts(a, 16, 64))
+                    comb += o.data.eq(exts(a, 16, XLEN))
                 with m.If(op.data_len == 4):
-                    comb += o.data.eq(exts(a, 32, 64))
+                    comb += o.data.eq(exts(a, 32, XLEN))
                 comb += o.ok.eq(1) # output register
 
             ###################
@@ -140,7 +199,7 @@ class ALUMainStage(PipeModBase):
 
         ###### sticky overflow and context, both pass-through #####
 
-        comb += self.o.xer_so.data.eq(self.i.xer_so)
+        comb += self.o.xer_so.data.eq(xer_so_i)
         comb += self.o.ctx.eq(self.i.ctx)
 
         return m