REMAP parallel-reduce:
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Sep 2022 14:28:05 +0000 (15:28 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Sep 2022 14:28:05 +0000 (15:28 +0100)
https://bugs.libre-soc.org/show_bug.cgi?id=864
* add 0b0111 csv entry for svshape
* stop sv/trans/svp64.py raising exception for SVrm=0b0111
* add beginnings of svshape SVrm=0b0111 to simplev.mdwn
* add first unit test

openpower/isa/simplev.mdwn
openpower/isatables/minor_22.csv
src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py [new file with mode: 0644]
src/openpower/sv/trans/svp64.py

index 9ef43a2e966ae667ac8d6391fd871fb49f0ad887..61dabd984ccb98f1ac63a633bdcf95efa4a9556d 100644 (file)
@@ -265,6 +265,39 @@ Pseudo-code:
         else
             SVSHAPE0[30:31] <- 0b11          # DCT mode
         SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
+    # set schedule up for parallel reduction
+    if (SVrm = 0b0111) then
+        # calculate the total number of operations (brute-force)
+        vlen[0:6] <- [0] * 7
+        itercount[0:6] <- (0b00 || SVxd) + 0b0000001
+        step[0:6] <- 0b0000001
+        i[0:6] <- 0b0000000
+        do while step <u itercount
+            newstep <- step[1:6] || 0b0
+            j[0:6] <- 0b0000000
+            do while (j+step <u itercount)
+                j <- j + newstep
+                i <- i + 1
+            step <- newstep
+        vlen[0:6] <- i
+        # VL in Matrix Multiply is xd*yd*zd
+        n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
+        vlen[0:6] <- n[14:20]
+        # set up template in SVSHAPE0, then copy to 1-3
+        SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
+        SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
+        SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
+        SVSHAPE0[28:29] <- 0b11           # skip z
+        # copy
+        SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+        SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+        SVSHAPE3[0:31] <- SVSHAPE0[0:31]
+        # set up FRA
+        SVSHAPE1[18:20] <- 0b001          # permute x,z,y
+        SVSHAPE1[28:29] <- 0b01           # skip z
+        # FRC
+        SVSHAPE2[18:20] <- 0b001          # permute x,z,y
+        SVSHAPE2[28:29] <- 0b11           # skip y
     # set VL, MVL and Vertical-First
     SVSTATE[0:6] <- vlen
     SVSTATE[7:13] <- vlen
index 30a636da235fa7b0f912614bfd3f42041e6881b9..fa371d3529472e30569a72a812dbf066a0e38721 100644 (file)
@@ -14,7 +14,9 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou
 0100-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
 0101-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
 0110-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
-#0111-011001, ... is reserved (SVrm=0111), next 2 are svshape, then carry on...
+# this one is for Parallel Reduction (may change to a different instruction)
+0111-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
+# next 2 are svshape, then carry on...
 1010-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
 1011-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
 1100-011001,VL,OP_SVSHAPE,NONE,NONE,NONE,NONE,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,svshape,SVM,,1,unofficial until submitted and approved/renumbered by the opf isa wg
diff --git a/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py b/src/openpower/decoder/isa/test_caller_svp64_parallel_reduce.py
new file mode 100644 (file)
index 0000000..5e9b7c6
--- /dev/null
@@ -0,0 +1,114 @@
+from nmigen import Module, Signal
+from nmigen.sim import Simulator, Delay, Settle
+from nmutil.formaltest import FHDLTestCase
+import unittest
+from openpower.decoder.isa.caller import ISACaller
+from openpower.decoder.power_decoder import (create_pdecode)
+from openpower.decoder.power_decoder2 import (PowerDecode2)
+from openpower.simulator.program import Program
+from openpower.decoder.isa.caller import ISACaller, SVP64State
+from openpower.decoder.selectable_int import SelectableInt
+from openpower.decoder.orderedset import OrderedSet
+from openpower.decoder.isa.all import ISA
+from openpower.decoder.isa.test_caller import Register, run_tst
+from openpower.sv.trans.svp64 import SVP64Asm
+from openpower.consts import SVP64CROffs
+from copy import deepcopy
+from openpower.decoder.helpers import fp64toselectable
+from functools import reduce
+import operator
+
+
+class DecoderTestCase(FHDLTestCase):
+
+    def _check_regs(self, sim, expected):
+        for i in range(32):
+            self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
+
+    def test_sv_remap1(self):
+        """>>> lst = ["svshape 2, 2, 0, 7, 0",
+                        "svremap 31, 1, 2, 3, 0, 0, 0",
+                       #"sv.fadd *0, *8, *16"
+                        ]
+                REMAP fmadds FRT, FRA, FRC, FRB
+        """
+        lst = SVP64Asm(["svshape 2, 2, 0, 7, 0",
+                        #"svremap 31, 1, 2, 3, 0, 0, 0",
+                       #"sv.fmadds *0, *16, *32, *0"
+                        ])
+        lst = list(lst)
+
+        fprs = [0] * 64
+        # 3x2 matrix
+        X1 = [[1, 2, 3],
+              [3, 4, 5],
+             ]
+        # 2x3 matrix
+        Y1 = [[6, 7],
+              [8, 9],
+              [10, 11],
+             ]
+
+        X = X1
+        Y = Y1
+
+        xf = reduce(operator.add, X)
+        yf = reduce(operator.add, Y)
+        print ("flattened X,Y")
+        print ("\t", xf)
+        print ("\t", yf)
+
+        # and create a linear result2, same scheme
+        #result1 = [0] * (ydim1*xdim2)
+
+
+        res = []
+        # store FPs
+        for i, x in enumerate(xf):
+            fprs[i+16] = fp64toselectable(float(x))  # X matrix
+        for i, y in enumerate(yf):
+            fprs[i+32] = fp64toselectable(float(y)) # Y matrix
+            continue
+            #t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
+            #u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
+            #res.append((t, u))
+            #print ("FFT", i, "in", a, b, "coeff", c, "mul",
+            #       mul, "res", t, u)
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, initial_fprs=fprs)
+            print ("spr svshape0", sim.spr['SVSHAPE0'])
+            print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
+            print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
+            print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
+            print ("spr svshape1", sim.spr['SVSHAPE1'])
+            print ("spr svshape2", sim.spr['SVSHAPE2'])
+            print ("spr svshape3", sim.spr['SVSHAPE3'])
+            for i in range(4):
+                print ("i", i, float(sim.fpr(i)))
+            # confirm that the results are as expected
+            #for i, (t, u) in enumerate(res):
+            #    self.assertEqual(sim.fpr(i+2), t)
+            #    self.assertEqual(sim.fpr(i+6), u)
+
+
+    def run_tst_program(self, prog, initial_regs=None,
+                              svstate=None,
+                              initial_mem=None,
+                              initial_fprs=None):
+        if initial_regs is None:
+            initial_regs = [0] * 32
+        simulator = run_tst(prog, initial_regs, mem=initial_mem,
+                                                initial_fprs=initial_fprs,
+                                                svstate=svstate)
+
+        print ("GPRs")
+        simulator.gpr.dump()
+        print ("FPRs")
+        simulator.fpr.dump()
+
+        return simulator
+
+
+if __name__ == "__main__":
+    unittest.main()
index eed7b315529d62ff55298cfecf22bcaf81085d50..43c347213bebfd0afbbc955c6fff2613c40f39a5 100644 (file)
@@ -173,7 +173,7 @@ def svshape(fields):
     SVzd -= 1
 
     # check SVrm for reserved (and svshape2) values
-    assert SVrm not in [0b0111, 0b1000, 0b1001], \
+    assert SVrm not in [0b1000, 0b1001], \
             "svshape reserved SVrm value %s" % bin(SVrm)
 
     return instruction(