From 3e5975ed2814bca9bcd5c07ba6c4d800e38b3f4d Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Sat, 16 May 2020 19:17:13 +0100
Subject: [PATCH] code-munge on CR pipeline

---
 src/soc/cr/main_stage.py | 60 +++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/src/soc/cr/main_stage.py b/src/soc/cr/main_stage.py
index 1552c4b1..97120123 100644
--- a/src/soc/cr/main_stage.py
+++ b/src/soc/cr/main_stage.py
@@ -5,6 +5,14 @@
 # This module however should not gate the carry or overflow, that's up
 # to the output stage
 
+# NOTE: we really should be doing the field decoding which
+# selectswhich bits of CR are to be read / written, back in the
+# decoder / insn-isue, have both self.i.cr and self.o.cr
+# be broken down into 4-bit-wide "registers", with their
+# own "Register File" (indexed by bt, ba and bb),
+# exactly how INT regs are done (by RA, RB, RS and RT)
+# however we are pushed for time so do it as *one* register.
+
 from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
 from nmutil.pipemodbase import PipeModBase
 from soc.cr.pipe_data import CRInputData, CROutputData
@@ -40,8 +48,9 @@ class CRMainStage(PipeModBase):
         xl_fields = self.fields.instrs['XL']
         xfx_fields = self.fields.instrs['XFX']
 
-        cr_output = Signal.like(self.i.cr)
-        comb += cr_output.eq(self.i.cr)
+        # default: cr_o remains same as cr input unless modified, below
+        cr_o = Signal.like(self.i.cr)
+        comb += cr_o.eq(self.i.cr)
 
         # Generate array for cr input so bits can be selected
         cr_arr = Array([Signal(name=f"cr_arr_{i}") for i in range(32)])
@@ -52,20 +61,9 @@ class CRMainStage(PipeModBase):
         # selected by a signal
         cr_out_arr = Array([Signal(name=f"cr_out_{i}") for i in range(32)])
         for i in range(32):
-            comb += cr_output[31-i].eq(cr_out_arr[i])
+            comb += cr_o[31-i].eq(cr_out_arr[i])
             comb += cr_out_arr[i].eq(cr_arr[i])
 
-        # crand/cror and friends get decoded to the same opcode, but
-        # one of the fields inside the instruction is a 4 bit lookup
-        # table. This lookup table gets indexed by bits a and b from
-        # the CR to determine what the resulting bit should be.
-
-        # Grab the lookup table for cr_op type instructions
-        lut = Signal(4, reset_less=True)
-        # There's no field, just have to grab it directly from the insn
-        comb += lut.eq(self.i.ctx.op.insn[6:10])
-
-
         # Ugh. mtocrf and mtcrf have one random bit differentiating
         # them. This bit is not in any particular field, so this
         # extracts that bit from the instruction
@@ -73,6 +71,7 @@ class CRMainStage(PipeModBase):
         comb += move_one.eq(self.i.ctx.op.insn[20])
 
         with m.Switch(op.insn_type):
+            ##### mcrf #####
             with m.Case(InternalOp.OP_MCRF):
                 # MCRF copies the 4 bits of crA to crB (for instance
                 # copying cr2 to cr1)
@@ -86,13 +85,15 @@ class CRMainStage(PipeModBase):
 
                 for i in range(4):
                     comb += cr_out_arr[bf*4 + i].eq(cr_arr[bfa*4 + i])
+
+            ##### crand, cror, crnor etc. #####
             with m.Case(InternalOp.OP_CROP):
                 # Get the bit selector fields from the instruction
                 bt = Signal(xl_fields['BT'][0:-1].shape())
-                comb += bt.eq(xl_fields['BT'][0:-1])
                 ba = Signal(xl_fields['BA'][0:-1].shape())
-                comb += ba.eq(xl_fields['BA'][0:-1])
                 bb = Signal(xl_fields['BB'][0:-1].shape())
+                comb += bt.eq(xl_fields['BT'][0:-1])
+                comb += ba.eq(xl_fields['BA'][0:-1])
                 comb += bb.eq(xl_fields['BB'][0:-1])
 
                 # Extract the two input bits from the CR
@@ -101,31 +102,40 @@ class CRMainStage(PipeModBase):
                 comb += bit_a.eq(cr_arr[ba])
                 comb += bit_b.eq(cr_arr[bb])
 
-                bit_out = Signal(reset_less=True)
+                # crand/cror and friends get decoded to the same opcode, but
+                # one of the fields inside the instruction is a 4 bit lookup
+                # table. This lookup table gets indexed by bits a and b from
+                # the CR to determine what the resulting bit should be.
+
+                # Grab the lookup table for cr_op type instructions
+                lut = Signal(4, reset_less=True)
+                # There's no field, just have to grab it directly from the insn
+                comb += lut.eq(self.i.ctx.op.insn[6:10])
 
                 # Use the two input bits to look up the result in the
                 # lookup table
+                bit_out = Signal(reset_less=True)
                 comb += bit_out.eq(Mux(bit_b,
                                        Mux(bit_a, lut[3], lut[1]),
                                        Mux(bit_a, lut[2], lut[0])))
                 # Set the output to the result above
                 comb += cr_out_arr[bt].eq(bit_out)
 
+            ##### mtcrf #####
             with m.Case(InternalOp.OP_MTCRF):
                 fxm = Signal(xfx_fields['FXM'][0:-1].shape())
                 comb += fxm.eq(xfx_fields['FXM'][0:-1])
 
-                # mtcrf
+                # replicate every fxm field in the insn to 4-bit, as a mask
+                fxl = [Repl(fxm[i], 4) for i in range(8)]
                 mask = Signal(32, reset_less=True)
+                comb += mask.eq(Cat(*fxl))
 
-                for i in range(8):
-                    comb += mask[i*4:(i+1)*4].eq(Repl(fxm[i], 4))
-
-                comb += cr_output.eq((self.i.a[0:32] & mask) |
-                                     (self.i.cr & ~mask))
-                    
+                # put input (RA) - mask-selected - into output CR, leave
+                # rest of CR alone.
+                comb += cr_o.eq((self.i.a[0:32] & mask) | (self.i.cr & ~mask))
 
-        comb += self.o.cr.eq(cr_output)
+        comb += self.o.cr.eq(cr_o)
         comb += self.o.ctx.eq(self.i.ctx)
 
         return m
-- 
2.30.2