From 24c82e027fd4bfa3ff0c811407a4f9731365b03a Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Wed, 7 Sep 2022 19:40:01 +0100
Subject: [PATCH] add 2nd parallel prefix test, this time subtract
 (non-commutative)

---
 .../decoder/isa/remap_preduce_yield.py        |  8 ++-
 .../isa/test_caller_svp64_parallel_reduce.py  | 49 ++++++++++++++++++-
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/src/openpower/decoder/isa/remap_preduce_yield.py b/src/openpower/decoder/isa/remap_preduce_yield.py
index b9433fb8..a8093803 100644
--- a/src/openpower/decoder/isa/remap_preduce_yield.py
+++ b/src/openpower/decoder/isa/remap_preduce_yield.py
@@ -2,6 +2,7 @@
 # the algorithm is in-place. it does not perform "MV" operations.
 # instead, where a masked-out value *should* be read from is tracked
 from copy import deepcopy
+import operator
 
 # python "yield" can be iterated. use this to make it clear how
 # the indices are generated by using natural-looking nested loops
@@ -76,7 +77,10 @@ def demo():
                "end", bin(lend)[2:], bin(rend)[2:])
 
 
-def preduce_y(vec, pred=None):
+def preduce_y(vec, pred=None, operation=None):
+    if operation is None:
+        operation = operator.add
+
     res = deepcopy(vec)
     xdim = len(vec)
     # set up an SVSHAPE
@@ -106,7 +110,7 @@ def preduce_y(vec, pred=None):
         r = shapes[1][idx]
         (l_idx, lend) = l
         (r_idx, rend) = r
-        res[l_idx] += res[r_idx]
+        res[l_idx] = operation(res[l_idx], res[r_idx])
     return res
 
 # run the demo
diff --git a/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py b/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py
index 7b607ab8..7e5aab65 100644
--- a/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py
+++ b/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py
@@ -26,12 +26,12 @@ class DecoderTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
-    def test_sv_remap1(self):
+    def tst_sv_remap1(self):
         """>>> lst = ["svshape 7, 0, 0, 7, 0",
                         "svremap 31, 0, 1, 0, 0, 0, 0",
                        "sv.add *0, *8, *16"
                         ]
-                REMAP fmadds FRT, FRA, FRC, FRB
+                REMAP add RT,RA,RB
         """
         lst = SVP64Asm(["svshape 7, 0, 0, 7, 0",
                         "svremap 31, 0, 1, 0, 0, 0, 0",
@@ -70,6 +70,51 @@ class DecoderTestCase(FHDLTestCase):
                 self.assertEqual(v, expected[i])
 
 
+    def test_sv_remap2(self):
+        """>>> lst = ["svshape 7, 0, 0, 7, 0",
+                        "svremap 31, 1, 0, 0, 0, 0, 0", # different order
+                       "sv.subf *0, *8, *16"
+                        ]
+                REMAP sv.subf RT,RA,RB - inverted application of RA/RB
+                                         left/right due to subf
+        """
+        lst = SVP64Asm(["svshape 7, 0, 0, 7, 0",
+                        "svremap 31, 1, 0, 0, 0, 0, 0",
+                       "sv.subf *0, *0, *0"
+                        ])
+        lst = list(lst)
+
+        gprs = [0] * 64
+        vec = [1, 2, 3, 4, 9, 5, 6]
+
+        # and create a linear result2, same scheme
+        #result1 = [0] * (ydim1*xdim2)
+
+
+        res = []
+        # store GPRs
+        for i, x in enumerate(vec):
+            gprs[i] = x
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_regs=gprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+            for i in range(7):
+                val = sim.gpr(i).value
+                res.append(val)
+                print ("i", i, val)
+            # confirm that the results are as expected, mask with 64-bit
+            expected = preduce_y(vec, operation=operator.sub)
+            for i, v in enumerate(res):
+                self.assertEqual(v&0xffffffffffffffff,
+                                 expected[i]&0xffffffffffffffff)
+
     def run_tst_program(self, prog, initial_regs=None,
                               svstate=None,
                               initial_mem=None,
-- 
2.30.2