From 3b0999b246d64716288f6806856510d3f0c5365c Mon Sep 17 00:00:00 2001
From: colepoirier <colepoirier@gmail.com>
Date: Wed, 20 May 2020 12:19:40 -0700
Subject: [PATCH] Added OP_BPERMD to fu/logical pipeline, with test

---
 src/soc/fu/logical/main_stage.py            | 13 +++++---
 src/soc/fu/logical/test/test_pipe_caller.py | 33 ++++++++++++++-------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/soc/fu/logical/main_stage.py b/src/soc/fu/logical/main_stage.py
index 48857089..8c3703e6 100644
--- a/src/soc/fu/logical/main_stage.py
+++ b/src/soc/fu/logical/main_stage.py
@@ -9,6 +9,7 @@ from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
 from nmutil.pipemodbase import PipeModBase
 from nmutil.clz import CLZ
 from soc.fu.logical.pipe_data import LogicalInputData
+from soc.fu.logical.bpermd import Bpermd
 from soc.fu.alu.pipe_data import ALUOutputData
 from ieee754.part.partsig import PartitionedSignal
 from soc.decoder.power_enums import InternalOp
@@ -35,7 +36,7 @@ class LogicalMainStage(PipeModBase):
         return LogicalInputData(self.pspec)
 
     def ospec(self):
-        return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+        return ALUOutputData(self.pspec)  # TODO: ALUIntermediateData
 
     def elaborate(self, platform):
         m = Module()
@@ -74,7 +75,7 @@ class LogicalMainStage(PipeModBase):
                     pc.append(array_of(l, b))
                 pc8 = pc[3]     # array of 8 8-bit counts (popcntb)
                 pc32 = pc[5]    # array of 2 32-bit counts (popcntw)
-                popcnt = pc[-1] # array of 1 64-bit count (popcntd)
+                popcnt = pc[-1]  # array of 1 64-bit count (popcntd)
                 # cascade-tree of adds
                 for idx, (l, b) in enumerate(work):
                     for i in range(l):
@@ -100,7 +101,7 @@ class LogicalMainStage(PipeModBase):
                 # strange instruction which XORs together the LSBs of each byte
                 par0 = Signal(reset_less=True)
                 par1 = Signal(reset_less=True)
-                comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
+                comb += par0.eq(Cat(a[0], a[8], a[16], a[24]).xor())
                 comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
                 with m.If(op.data_len[3] == 1):
                     comb += o.eq(par0 ^ par1)
@@ -128,7 +129,11 @@ class LogicalMainStage(PipeModBase):
                 comb += o.eq(Mux(op.is_32bit, clz.lz-32, clz.lz))
 
             ###### bpermd #######
-            # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
+            with m.Case(InternalOp.OP_BPERM):
+                m.submodules.bpermd = bpermd = Bpermd(64)
+                comb += bpermd.rs.eq(a)
+                comb += bpermd.rb.eq(b)
+                comb += o.eq(bpermd.ra)
 
         ###### sticky overflow and context, both pass-through #####
 
diff --git a/src/soc/fu/logical/test/test_pipe_caller.py b/src/soc/fu/logical/test/test_pipe_caller.py
index 4a22308c..b3b46408 100644
--- a/src/soc/fu/logical/test/test_pipe_caller.py
+++ b/src/soc/fu/logical/test/test_pipe_caller.py
@@ -25,6 +25,7 @@ class TestCase:
         self.sprs = sprs
         self.name = name
 
+
 def get_rec_width(rec):
     recwidth = 0
     # Setup random inputs for dut.op
@@ -33,6 +34,7 @@ def get_rec_width(rec):
         recwidth += width
     return recwidth
 
+
 def set_alu_inputs(alu, dec2, sim):
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
@@ -65,7 +67,6 @@ def set_alu_inputs(alu, dec2, sim):
     yield alu.p.data_i.b.eq(data2)
 
 
-
 def set_extra_alu_inputs(alu, dec2, sim):
     carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
     carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
@@ -73,7 +74,7 @@ def set_extra_alu_inputs(alu, dec2, sim):
     yield alu.p.data_i.xer_ca[1].eq(carry32)
     so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
     yield alu.p.data_i.xer_so.eq(so)
-    
+
 
 # This test bench is a bit different than is usual. Initially when I
 # was writing it, I had all of the tests call a function to create a
@@ -100,6 +101,7 @@ class LogicalTestCase(FHDLTestCase):
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
+
     def run_tst_program(self, prog, initial_regs=[0] * 32, initial_sprs={}):
         tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
         test_data.append(tc)
@@ -110,19 +112,19 @@ class LogicalTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_rand_imm_logical(self):
         insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
         for i in range(10):
             choice = random.choice(insns)
-            imm = random.randint(0, (1<<16)-1)
+            imm = random.randint(0, (1 << 16)-1)
             lst = [f"{choice} 3, 1, {imm}"]
             print(lst)
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_cntz(self):
@@ -132,7 +134,7 @@ class LogicalTestCase(FHDLTestCase):
             lst = [f"{choice} 3, 1"]
             print(lst)
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_parity(self):
@@ -142,7 +144,7 @@ class LogicalTestCase(FHDLTestCase):
             lst = [f"{choice} 3, 1"]
             print(lst)
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_popcnt(self):
@@ -152,7 +154,7 @@ class LogicalTestCase(FHDLTestCase):
             lst = [f"{choice} 3, 1"]
             print(lst)
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst), initial_regs)
 
     def test_popcnt_edge(self):
@@ -170,6 +172,13 @@ class LogicalTestCase(FHDLTestCase):
         initial_regs[2] = 0xd0adb0000afec1de
         self.run_tst_program(Program(lst), initial_regs)
 
+    def test_bpermd(self):
+        lst = ["bpermd 3, 1, 2"]
+        initial_regs = [0] * 32
+        initial_regs[1] = 0xdeadbeefcafec0de
+        initial_regs[2] = 0xd0adb0000afec1de
+        self.run_tst_program(Program(lst), initial_regs)
+
     def test_ilang(self):
         rec = CompALUOpSubset()
 
@@ -206,6 +215,7 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
@@ -230,7 +240,7 @@ class TestRunner(FHDLTestCase):
                     self.assertEqual(fn_unit, Function.LOGICAL.value, code)
                     yield from set_alu_inputs(alu, pdecode2, simulator)
                     yield from set_extra_alu_inputs(alu, pdecode2, simulator)
-                    yield 
+                    yield
                     opname = code.split(' ')[0]
                     yield from simulator.call(opname)
                     index = simulator.pc.CIA.value//4
@@ -252,8 +262,9 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                            traces=[]):
+                           traces=[]):
             sim.run()
+
     def check_extra_alu_outputs(self, alu, dec2, sim, code):
         rc = yield dec2.e.rc.data
         if rc:
-- 
2.30.2