add mode for half-swap, to be combined with LD-bit-reversed for loading DCT
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 28 Jul 2021 11:16:18 +0000 (12:16 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Wed, 28 Jul 2021 11:16:18 +0000 (12:16 +0100)
data

openpower/isa/simplev.mdwn
src/openpower/decoder/isa/remap_dct_yield.py
src/openpower/decoder/isa/svshape.py

index b8b565976e43b3b6a9a84f73b651e1d188c340a2..1d670ea05fbafc20fa811998bd89fa18c96b90de 100644 (file)
@@ -233,6 +233,13 @@ Pseudo-code:
         # for cos coefficient
         SVSHAPE1[28:29] <- 0b10           # ci schedule
         SVSHAPE2[28:29] <- 0b11           # size schedule
+    # set schedule up for DCT inverse of half-swapped ordering
+    if (SVRM = 0b0110) then
+        vlen[0:6] <- (0b00 || SVxd)
+        # set up template in SVSHAPE0
+        SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
+        SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
+        SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
     # set VL, MVL and Vertical-First
     SVSTATE[0:6] <- vlen
     SVSTATE[7:13] <- vlen
index 4d920558ef656e9fb1815215dfa48e606bcd86ad..a6bc3a82fa53dab7740e525037ca847fa2db130b 100644 (file)
@@ -41,6 +41,27 @@ def halfrev2(vec, pre_rev=True):
     return res
 
 
+# python "yield" can be iterated. use this to make it clear how
+# the indices are generated by using natural-looking nested loops
+def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
+    # get indices to iterate over, in the required order
+    n = SVSHAPE.lims[0]
+    mode = SVSHAPE.lims[1]
+    print ("inner halfswap loadstore", n, mode, SVSHAPE.skip)
+
+    # reference list for not needing to do data-swaps, just swap what
+    # *indices* are referenced (two levels of indirection at the moment)
+    # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+    ji = list(range(n))
+    ji = halfrev2(ji, True)
+
+    # invert order if requested
+    if SVSHAPE.invxyz[0]:
+        ji.reverse()
+
+    yield from ji
+
+
 # python "yield" can be iterated. use this to make it clear how
 # the indices are generated by using natural-looking nested loops
 def iterate_dct_inner_costable_indices(SVSHAPE):
index 8275e05e1e2d888f46301d3c5a2a3725a83c2098..88484ba2f3ea92a20e43583c92058512dd96d048 100644 (file)
@@ -5,7 +5,8 @@ from openpower.decoder.isa.remap_fft_yield import iterate_butterfly_indices
 from openpower.decoder.isa.remap_dct_yield import (
                                 iterate_dct_inner_butterfly_indices,
                                 iterate_dct_inner_costable_indices,
-                                iterate_dct_outer_butterfly_indices)
+                                iterate_dct_outer_butterfly_indices,
+                                iterate_dct_inner_halfswap_loadstore)
 from openpower.sv.svp64 import SVP64SHAPE
 import os
 from copy import deepcopy
@@ -127,6 +128,8 @@ class SVSHAPE(SelectableInt):
                 iterate_fn = iterate_dct_outer_butterfly_indices
             elif self.ydimsz == 5:
                 iterate_fn = iterate_dct_inner_costable_indices
+            elif self.ydimsz == 6:
+                iterate_fn = iterate_dct_inner_halfswap_loadstore
         # create a **NEW** iterator each time this is called
         return iterate_fn(deepcopy(self))