bug #1183: attempt first ddffirst mapreduce mode

[openpower-isa.git] / openpower / isa / simplev.mdwn
diff --git a/openpower/isa/simplev.mdwn b/openpower/isa/simplev.mdwn

index 290454cbe4b9cb57ef2c0a928bded094f0282f23..33a02e6612065f290d840e15a596dfc2177de5e5 100644 (file)
--- a/openpower/isa/simplev.mdwn
+++ b/openpower/isa/simplev.mdwn
@@ -11,7 +11,7 @@ SVL-Form
  Pseudo-code:
  
      if SVi[3:4] = 0b11 then
-        # store subvl, pack and unpack in SVSTATE
+        # store pack and unpack in SVSTATE
          SVSTATE[53] <- SVi[5]
          SVSTATE[54] <- SVi[6]
          RT <- [0]*62 || SVSTATE[53:54]
@@ -164,10 +164,9 @@ Pseudo-code:
          # FRC (coefficients)
          SVSHAPE2[28:29] <- 0b10           # k schedule
      # set schedule up for (i)DCT Inner butterfly
-    # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
      # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
-    if ((SVrm = 0b0010) | (SVrm = 0b0100) |
-        (SVrm = 0b1010) | (SVrm = 0b1100)) then
+    if ((SVrm = 0b0100) |
+        (SVrm = 0b1100)) then
          # calculate O(N log2 N)
          n <- [0] * 3
          do while n < 5
@@ -181,17 +180,14 @@ Pseudo-code:
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
          mscale <- (0b0 || SVzd) + 1
-        if (SVrm = 0b1010) | (SVrm = 0b1100) then
+        if (SVrm = 0b1100) then
              SVSHAPE0[30:31] <- 0b11          # iDCT mode
              SVSHAPE0[18:20] <- 0b011         # iDCT Inner Butterfly sub-mode
          else
              SVSHAPE0[30:31] <- 0b01          # DCT mode
              SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
              SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
-        if (SVrm = 0b1100) | (SVrm = 0b0100) then
-            SVSHAPE0[6:11] <- 0b000011       # (i)DCT Inner Butterfly mode 4
-        else
-            SVSHAPE0[6:11] <- 0b000001       # (i)DCT Inner Butterfly mode 2
+        SVSHAPE0[6:11] <- 0b000011       # (i)DCT Inner Butterfly mode 4
          # copy
          SVSHAPE1[0:31] <- SVSHAPE0[0:31]
          SVSHAPE2[0:31] <- SVSHAPE0[0:31]
@@ -282,31 +278,58 @@ Pseudo-code:
          else
              SVSHAPE0[30:31] <- 0b11          # DCT mode
          SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
-    # set schedule up for parallel reduction
+    # set schedule up for parallel reduction or prefix-sum
      if (SVrm = 0b0111) then
+        # is scan/prefix-sum
+        is_scan <- SVyd = 2
          # calculate the total number of operations (brute-force)
          vlen[0:6] <- [0] * 7
          itercount[0:6] <- (0b00 || SVxd) + 0b0000001
-        step[0:6] <- 0b0000001
-        i[0:6] <- 0b0000000
-        do while step <u itercount
-            newstep <- step[1:6] || 0b0
-            j[0:6] <- 0b0000000
-            do while (j+step <u itercount)
-                j <- j + newstep
-                i <- i + 1
-            step <- newstep
-        # VL in Parallel-Reduce is the number of operations
-        vlen[0:6] <- i
+        if is_scan then
+            # prefix sum algorithm with operations replaced with
+            # incrementing vlen
+            dist <- 1
+            vlen[0:6] <- 0
+            do while dist <u itercount
+                start <- dist * 2 - 1
+                step <- dist * 2
+                i <- start
+                do while i <u itercount
+                    vlen[0:6] <- vlen[0:6] + 1
+                    i <- i + step
+                dist <- dist * 2
+            dist <- dist / 2
+            do while dist != 0
+                i <- dist * 3 - 1
+                do while i <u itercount
+                    vlen[0:6] <- vlen[0:6] + 1
+                    i <- i + dist * 2
+                dist <- dist / 2
+        else
+            step <- 0b0000001
+            i <- 0b0000000
+            do while step <u itercount
+                newstep <- step[1:6] || 0b0
+                j[0:6] <- 0b0000000
+                do while (j+step <u itercount)
+                    j <- j + newstep
+                    i <- i + 1
+                step <- newstep
+            # VL in Parallel-Reduce is the number of operations
+            vlen[0:6] <- i
          # set up template in SVSHAPE0, then copy to 1. only 2 needed
          SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
          SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
          mscale <- (0b0 || SVzd) + 1
-        SVSHAPE0[30:31] <- 0b10          # parallel reduce submode
+        SVSHAPE0[30:31] <- 0b10          # parallel reduce/prefix submode
          # copy
          SVSHAPE1[0:31] <- SVSHAPE0[0:31]
-        # set up right operand (left operand 28:29 is zero)
-        SVSHAPE1[28:29] <- 0b01           # right operand
+        # set up submodes: parallel or prefix
+        SVSHAPE0[28:29] <- 0b00   # left operand
+        SVSHAPE1[28:29] <- 0b01   # right operand
+        if is_scan then
+            SVSHAPE0[28:29] <- 0b10   # left operand
+            SVSHAPE1[28:29] <- 0b11   # right operand
      # set VL, MVL and Vertical-First
      m[0:12] <- vlen * mscale
      maxvl[0:6] <- m[6:12]