X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fopenpower%2Fdecoder%2Fisa%2Ftest_caller_svp64_matrix.py;h=e77877de687c2a8baacc6782368c2945d85a33b5;hb=a67c21910a83dbd7abdcdd106c0bae2d78baf212;hp=6e86d16882138e9d45e5c35a3e99fd1a34c8dea7;hpb=c2814018538c5f0761f1fe2d435e4fd7da6d4488;p=openpower-isa.git

diff --git a/src/openpower/decoder/isa/test_caller_svp64_matrix.py b/src/openpower/decoder/isa/test_caller_svp64_matrix.py
index 6e86d168..e77877de 100644
--- a/src/openpower/decoder/isa/test_caller_svp64_matrix.py
+++ b/src/openpower/decoder/isa/test_caller_svp64_matrix.py
@@ -1,5 +1,5 @@
 from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay, Settle
+from nmigen.sim import Simulator, Delay, Settle
 from nmutil.formaltest import FHDLTestCase
 import unittest
 from openpower.decoder.isa.caller import ISACaller
@@ -15,7 +15,6 @@ from openpower.sv.trans.svp64 import SVP64Asm
 from openpower.consts import SVP64CROffs
 from copy import deepcopy
 from openpower.decoder.helpers import fp64toselectable
-from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
 from functools import reduce
 import operator
 
@@ -26,14 +25,16 @@ class DecoderTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
-    def test_sv_remap(self):
-        """>>> lst = ["svremap 2, 2, 3, 0",
-                       "sv.fmadds 0.v, 8.v, 16.v, 0.v"
+    def tst_sv_remap1(self):
+        """>>> lst = ["svshape 2, 2, 3, 0, 0",
+                        "svremap 31, 1, 2, 3, 0, 0, 0",
+                       "sv.fmadds *0, *8, *16, *0"
                         ]
                 REMAP fmadds FRT, FRA, FRC, FRB
         """
-        lst = SVP64Asm(["svremap 2, 2, 3, 0",
-                       "sv.fmadds 0.v, 8.v, 16.v, 0.v"
+        lst = SVP64Asm(["svshape 2, 2, 3, 0, 0",
+                        "svremap 31, 1, 2, 3, 0, 0, 0",
+                       "sv.fmadds *0, *16, *32, *0"
                         ])
         lst = list(lst)
 
@@ -58,14 +59,15 @@ class DecoderTestCase(FHDLTestCase):
         print ("\t", yf)
 
         # and create a linear result2, same scheme
-        #result2 = [0] * (ydim1*xdim2)
+        #result1 = [0] * (ydim1*xdim2)
 
 
         res = []
         # store FPs
-        for i, (x, y) in enumerate(zip(xf, yf)):
-            fprs[i+8] = fp64toselectable(float(x))  # X matrix
-            fprs[i+16] = fp64toselectable(float(y)) # Y matrix
+        for i, x in enumerate(xf):
+            fprs[i+16] = fp64toselectable(float(x))  # X matrix
+        for i, y in enumerate(yf):
+            fprs[i+32] = fp64toselectable(float(y)) # Y matrix
             continue
             #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
             #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
@@ -73,16 +75,12 @@ class DecoderTestCase(FHDLTestCase):
             #print ("FFT", i, "in", a, b, "coeff", c, "mul",
             #       mul, "res", t, u)
 
-        # SVSTATE (in this case, VL=12, to cover all of matrix)
-        svstate = SVP64State()
-        svstate.vl[0:7] = 12 # VL
-        svstate.maxvl[0:7] = 12 # MAXVL
-        print ("SVSTATE", bin(svstate.spr.asint()))
-
         with Program(lst, bigendian=False) as program:
-            sim = self.run_tst_program(program, svstate=svstate,
-                                       initial_fprs=fprs)
+            sim = self.run_tst_program(program, initial_fprs=fprs)
             print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
             print ("spr svshape1", sim.spr['SVSHAPE1'])
             print ("spr svshape2", sim.spr['SVSHAPE2'])
             print ("spr svshape3", sim.spr['SVSHAPE3'])
@@ -93,6 +91,175 @@ class DecoderTestCase(FHDLTestCase):
             #    self.assertEqual(sim.fpr(i+2), t)
             #    self.assertEqual(sim.fpr(i+6), u)
 
+    def tst_sv_remap2(self):
+        """>>> lst = ["svshape 5, 4, 3, 0, 0",
+                        "svremap 31, 1, 2, 3, 0, 0, 0",
+                       "sv.fmadds *0, *8, *16, *0"
+                        ]
+                REMAP fmadds FRT, FRA, FRC, FRB
+        """
+        lst = SVP64Asm(["svshape 4, 3, 3, 0, 0",
+                        "svremap 31, 1, 2, 3, 0, 0, 0",
+                       "sv.fmadds *0, *16, *32, *0"
+                        ])
+        lst = list(lst)
+
+        # 3x2 matrix
+        X1 = [[1, 2, 3],
+              [3, 4, 5],
+             ]
+        # 2x3 matrix
+        Y1 = [[6, 7],
+              [8, 9],
+              [10, 11],
+             ]
+
+        #### test matrices 2
+        # 3x3 matrix
+        X2 = [[12,7,3],
+            [4 ,5,6],
+            [7 ,8,9],
+            ]
+        # 3x4 matrix
+        Y2 = [[5,8,1,2],
+            [6,7,3,0],
+            [4,5,9,1]]
+
+        #### test matrices 3
+        # 3x4 matrix
+        X3 = [[12,7,3],
+            [4 ,5,6],
+            [7 ,8,9],
+            [2 ,0,1]]
+        # 3x5 matrix
+        Y3 = [[5,8,1,2,3],
+            [6,7,3,0,9],
+            [4,5,9,1,2]]
+
+        X = X2
+        Y = Y2
+
+        # get the dimensions of the 2 matrices
+        xdim1 = len(X[0])
+        ydim1 = len(X)
+        xdim2 = len(Y[0])
+        ydim2 = len(Y)
+
+        print ("xdim2 ydim1 ydim2", xdim2, ydim1, ydim2)
+
+        xf = reduce(operator.add, X)
+        yf = reduce(operator.add, Y)
+        print ("flattened X,Y")
+        print ("\t", xf)
+        print ("\t", yf)
+
+        # and create a linear result2, same scheme
+        #result1 = [0] * (ydim1*xdim2)
+
+
+        res = []
+        # store FPs
+        fprs = [0] * 64
+        for i, x in enumerate(xf):
+            fprs[i+16] = fp64toselectable(float(x))  # X matrix
+        for i, y in enumerate(yf):
+            fprs[i+32] = fp64toselectable(float(y)) # Y matrix
+            continue
+            #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+            #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+            #res.append((t, u))
+            #print ("FFT", i, "in", a, b, "coeff", c, "mul",
+            #       mul, "res", t, u)
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_fprs=fprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+            for i in range(16):
+                print ("i", i, float(sim.fpr(i)))
+            # confirm that the results are as expected
+            #for i, (t, u) in enumerate(res):
+            #    self.assertEqual(sim.fpr(i+2), t)
+            #    self.assertEqual(sim.fpr(i+6), u)
+
+    def test_sv_remap3_horizontal_or(self):
+        """>>> lst = ["svshape 3, 2, 1, 0, 0",
+                        "svremap 31, 1, 3, 1, 1, 1, 0",
+                        "sv.or *0, *0, *6"
+                        ]
+                REMAP horizontal-or using "or RA,RS,RB"
+                same trick can be applied to do horizontal-add
+                or horizontal-multiply.  just remember for multiply
+                to pre-load 1 (1.0) into the results first (or any other
+                scaling factor).
+
+                sv.or is horribly obscure because RA (the destination)
+                actually gets treated as RT by the REMAP subsystem.
+
+                The purpose here is to demonstrate a horizontal mapreduce
+                by using/abusing Matrix REMAP (ignoring the B-Matrix entirely)
+
+                if data is laid out in R G B R G B R G B format and
+                comprises tuples (R<<16 G<<8 B<<0) then a horizontal-or
+                may reduce down to (R<<16) | (G<<8> | (B<<0) on a per-row
+                basis.
+        """
+        # 3x4 matrix of data to be ORed together by row.
+        # Add any number of extra rows (up to 6) here (6 because sv.or *0,*0,*6)
+        X1 = [[0x1, 0x10, 0x100], # 0x111
+              [0x2, 0x40, 0x300], # 0x342
+              [0x9, 0x70, 0x800], # 0x879
+              [0x3, 0x71, 0x460], # overlaps (still ORed) - 0x473
+             ]
+
+        # get the dimensions of the array
+        xdim1 = len(X1[0])
+        ydim1 = len(X1)
+
+        lst = SVP64Asm(["svshape %d, %d, 1, 0, 0" % (xdim1, ydim1),
+                        # also works:
+                        # "svremap 31, 3, 0, 3, 1, 2, 0",
+                        # "sv.ternlogi *12, *0, *6, 250" # 0b11111110
+                        "svremap 31, 1, 3, 1, 1, 1, 0",
+                        "sv.or *0, *0, *6"
+                        ])
+        lst = list(lst)
+
+        print ("xdim1, ydim1", xdim1, ydim1)
+
+        expected = [0] * ydim1
+        for i, row in enumerate(X1):
+            expected[i] = reduce(operator.or_, row)
+            print ("\texpected ORed", hex(expected[i]))
+        xf = reduce(operator.add, X1)
+        print ("flattened X")
+        print ("\t", xf)
+
+        res = []
+        # store FPs
+        gprs = [0] * 64
+        for i, x in enumerate(xf):
+            gprs[i+6] = x
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, gprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+            for i in range(ydim1):
+                print ("i", i, sim.gpr(0+i), hex(expected[i]))
+            for i in range(ydim1):
+                self.assertEqual(sim.gpr(0+i), SelectableInt(expected[i], 64))
+
     def run_tst_program(self, prog, initial_regs=None,
                               svstate=None,
                               initial_mem=None,