add FFT SHAPE pseudocode in svremap, and a schedule in ISACaller
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Jul 2021 18:55:41 +0000 (19:55 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 6 Jul 2021 18:55:41 +0000 (19:55 +0100)
openpower/isa/simplev.mdwn
src/openpower/decoder/isa/caller.py

index ec47d0f758973c62eb38663e613bf0ce08d01ee8..37b39e26a7f56b26880cb4fa3a4c37a9b91382b0 100644 (file)
@@ -41,26 +41,41 @@ SVM-Form
 
 Pseudo-code:
 
-    # hack: clear out all SVSHAPEs and set them up for multiply
+    # clear out all SVSHAPEs
     SVSHAPE0[0:31] <- [0] * 32
     SVSHAPE1[0:31] <- [0] * 32
     SVSHAPE2[0:31] <- [0] * 32
     SVSHAPE3[0:31] <- [0] * 32
-    # set up template in SVSHAPE0, then copy to 1-3
-    SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
-    SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
-    SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
-    SVSHAPE0[28:29] <- 0b11           # skip z
-    # copy
-    SVSHAPE1[0:31] <- SVSHAPE0[0:31]
-    SVSHAPE2[0:31] <- SVSHAPE0[0:31]
-    SVSHAPE3[0:31] <- SVSHAPE0[0:31]
-    # set up FRA
-    SVSHAPE1[18:20] <- 0b001          # permute x,z,y
-    SVSHAPE1[28:29] <- 0b01           # skip z
-    # FRC
-    SVSHAPE2[18:20] <- 0b001          # permute x,z,y
-    SVSHAPE2[28:29] <- 0b11           # skip y
+    # set schedule up for multiply
+    if (SVRM = 0b00000) then
+        # set up template in SVSHAPE0, then copy to 1-3
+        SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
+        SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
+        SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
+        SVSHAPE0[28:29] <- 0b11           # skip z
+        # copy
+        SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+        SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+        SVSHAPE3[0:31] <- SVSHAPE0[0:31]
+        # set up FRA
+        SVSHAPE1[18:20] <- 0b001          # permute x,z,y
+        SVSHAPE1[28:29] <- 0b01           # skip z
+        # FRC
+        SVSHAPE2[18:20] <- 0b001          # permute x,z,y
+        SVSHAPE2[28:29] <- 0b11           # skip y
+    # set schedule up for butterfly
+    if (SVRM = 0b00001) then
+        # set up template in SVSHAPE0, then copy to 1-3
+        # for FRA and FRT
+        SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
+        SVSHAPE0[30:31] <- 0b01          # Butterfly mode
+        # copy
+        SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+        SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+        # set up FRB and FRS
+        SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
+        # FRC (coefficients)
+        SVSHAPE2[28:29] <- 0b10           # k schedule
 
 Special Registers Altered:
 
index d769031e95c0507a0b5ff83c9c64b3cf5015fba7..4f26d97a78a8bf27a8978c9e9a3f5e116726ce4e 100644 (file)
@@ -1218,29 +1218,43 @@ class ISACaller:
                 print ("    mode", shape.mode)
                 print ("    skip", shape.skip)
 
-            remaps = [SVSHAPE0.get_iterator(),
-                      SVSHAPE1.get_iterator(),
-                      SVSHAPE2.get_iterator(),
-                      SVSHAPE3.get_iterator(),
+            remaps = [(SVSHAPE0, SVSHAPE0.get_iterator()),
+                      (SVSHAPE1, SVSHAPE1.get_iterator()),
+                      (SVSHAPE2, SVSHAPE2.get_iterator()),
+                      (SVSHAPE3, SVSHAPE3.get_iterator()),
                      ]
             rremaps = []
-            for i, remap in enumerate(remaps):
+            for i, (shape, remap) in enumerate(remaps):
                 # XXX hardcoded! pick dststep for out (i==0) else srcstep
                 step = dststep if (i == 0) else srcstep
                 # this is terrible.  O(N^2) looking for the match. but hey.
                 for idx, remap_idx in enumerate(remap):
                     if idx == step:
                         break
-                if i == 0:
-                    yield self.dec2.o_step.eq(remap_idx)
-                    yield self.dec2.o2_step.eq(remap_idx)
-                elif i == 1:
-                    yield self.dec2.in1_step.eq(remap_idx)
-                elif i == 2:
-                    yield self.dec2.in3_step.eq(remap_idx)
-                elif i == 3:
-                    yield self.dec2.in2_step.eq(remap_idx)
-                rremaps.append((i, idx, remap_idx))
+                # multiply mode
+                if shape.mode == 0b00:
+                    if i == 0:
+                        yield self.dec2.o_step.eq(remap_idx)   # RT
+                        yield self.dec2.o2_step.eq(remap_idx)  # EA
+                    elif i == 1:
+                        yield self.dec2.in1_step.eq(remap_idx) # RA
+                    elif i == 2:
+                        yield self.dec2.in3_step.eq(remap_idx) # RB
+                    elif i == 3:
+                        yield self.dec2.in2_step.eq(remap_idx) # RC
+                # FFT butterfly mode
+                if shape.mode == 0b01:
+                    if i == 0:
+                        yield self.dec2.o_step.eq(remap_idx)   # RT
+                        yield self.dec2.in1_step.eq(remap_idx) # RA
+                    elif i == 1:
+                        yield self.dec2.in2_step.eq(remap_idx) # RB
+                        yield self.dec2.o2_step.eq(remap_idx)  # EA (FRS)
+                    elif i == 2:
+                        yield self.dec2.in3_step.eq(remap_idx) # RC
+                    elif i == 3:
+                        pass # no SVSHAPE3
+                rremaps.append((i, idx, remap_idx)) # debug printing
             for x in rremaps:
                 print ("shape remap", x)
         # after that, settle down (combinatorial) to let Vector reg numbers