add LD bit-reversed unit test
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 26 Jun 2021 17:42:56 +0000 (18:42 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sat, 26 Jun 2021 17:42:56 +0000 (18:42 +0100)
add LD/ST bit-reverse logic in ISACaller

src/openpower/decoder/helpers.py
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_svp64_ldst.py
src/openpower/decoder/power_fields.py
src/openpower/sv/trans/svp64.py

index b8ec70b782a110f5970843e553ff1d228d288a6d..802a3e3528045ba98a35d737a12bf245470c4b89 100644 (file)
@@ -378,7 +378,7 @@ def bitrev(val, VL):
     'width' bits of the integer 'val'
     """
     result = 0
-    width = VL.bit_length()
+    width = VL.bit_length()-1
     for _ in range(width):
         result = (result << 1) | (val & 1)
         val >>= 1
index d1e6bd99d6c5010d8f560e87e8a3a34570a082e8..d1a77a2a2c0f0743d9487e47526f727a6cc1dfa9 100644 (file)
@@ -28,7 +28,7 @@ from openpower.decoder.power_enums import (spr_dict, spr_byname, XER_bits,
 
 from openpower.decoder.power_enums import SVPtype
 
-from openpower.decoder.helpers import exts, gtu, ltu, undefined
+from openpower.decoder.helpers import (exts, gtu, ltu, undefined, bitrev)
 from openpower.consts import PIb, MSRb  # big-endian (PowerISA versions)
 from openpower.consts import SVP64CROffs
 from openpower.decoder.power_svp64 import SVP64RM, decode_extra
@@ -1185,33 +1185,51 @@ class ISACaller:
         # use info.form to detect
         replace_d = False # update / replace constant in pseudocode
         if self.is_svp64_mode:
-            D = yield self.dec2.dec.fields.FormD.D[0:16]
-            D = exts(D, 16) # sign-extend to integer
             ldstmode = yield self.dec2.rm_dec.ldstmode
+            # bitreverse mode reads SVD (or SVDS - TODO)
+            # *BUT*... because this is "overloading" of LD operations,
+            # it gets *STORED* into D (or DS, TODO)
+            if ldstmode == SVP64LDSTmode.BITREVERSE.value:
+                imm = yield self.dec2.dec.fields.FormSVD.SVD[0:11]
+                imm = exts(imm, 11) # sign-extend to integer
+                print ("bitrev SVD", imm)
+                replace_d = True
+            else:
+                imm = yield self.dec2.dec.fields.FormD.D[0:16]
+                imm = exts(imm, 16) # sign-extend to integer
             # get the right step. LD is from srcstep, ST is dststep
             op = yield self.dec2.e.do.insn_type
             offsmul = 0
             if op == MicrOp.OP_LOAD.value:
                 offsmul = srcstep
-                log("D-field src", D, offsmul)
+                log("D-field src", imm, offsmul)
             elif op == MicrOp.OP_STORE.value:
                 offsmul = dststep
-                log("D-field dst", D, offsmul)
+                log("D-field dst", imm, offsmul)
+            # bit-reverse mode
+            if ldstmode == SVP64LDSTmode.BITREVERSE.value:
+                # manually look up RC, sigh
+                RC = yield self.dec2.dec.RC[0:5]
+                RC = self.gpr(RC)
+                log ("RC", RC.value, "imm", imm, "offs", bin(offsmul),
+                     "rev", bin(bitrev(offsmul, vl)))
+                imm = SelectableInt((imm * bitrev(offsmul, vl)) << RC.value, 32)
             # Unit-Strided LD/ST adds offset*width to immediate
-            if ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
+            elif ldstmode == SVP64LDSTmode.UNITSTRIDE.value:
                 ldst_len = yield self.dec2.e.do.data_len
-                D = SelectableInt(D + offsmul * ldst_len, 32)
+                imm = SelectableInt(imm + offsmul * ldst_len, 32)
                 replace_d = True
             # Element-strided multiplies the immediate by element step
             elif ldstmode == SVP64LDSTmode.ELSTRIDE.value:
-                D = SelectableInt(D * offsmul, 32)
+                imm = SelectableInt(imm * offsmul, 32)
                 replace_d = True
             ldst_ra_vec = yield self.dec2.rm_dec.ldst_ra_vec
             ldst_imz_in = yield self.dec2.rm_dec.ldst_imz_in
-            log("LDSTmode", ldstmode, offsmul, D, ldst_ra_vec, ldst_imz_in)
+            log("LDSTmode", ldstmode, SVP64LDSTmode.BITREVERSE.value,
+                            offsmul, imm, ldst_ra_vec, ldst_imz_in)
         # new replacement D
         if replace_d:
-            self.namespace['D'] = D
+            self.namespace['D'] = imm
 
         # "special" registers
         for special in info.special_regs:
index 0ebccea84e403c52332d6aaf43cdfa703da09780..99e4f517322d284a1f3de8a7a72864c4a1ff7df0 100644 (file)
@@ -22,7 +22,7 @@ class DecoderTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
-    def test_sv_load_store_elementstride(self):
+    def tst_sv_load_store_elementstride(self):
         """>>> lst = ["addi 1, 0, 0x0010",
                         "addi 2, 0, 0x0008",
                         "addi 5, 0, 0x1234",
@@ -67,7 +67,7 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64))
             self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
 
-    def test_sv_load_store_unitstride(self):
+    def tst_sv_load_store_unitstride(self):
         """>>> lst = ["addi 1, 0, 0x0010",
                         "addi 2, 0, 0x0008",
                         "addi 5, 0, 0x1234",
@@ -110,6 +110,75 @@ class DecoderTestCase(FHDLTestCase):
             self.assertEqual(sim.gpr(9), SelectableInt(0x1234, 64))
             self.assertEqual(sim.gpr(10), SelectableInt(0x1235, 64))
 
+    def test_sv_load_store_bitreverse(self):
+        """>>> lst = ["addi 1, 0, 0x0010",
+                        "addi 2, 0, 0x0004",
+                        "addi 3, 0, 0x0002",
+                        "addi 5, 0, 0x101",
+                        "addi 6, 0, 0x202",
+                        "addi 7, 0, 0x303",
+                        "addi 8, 0, 0x404",
+                        "sv.stw 5.v, 0(1)",
+                        "sv.lwzbr 9.v, 4(1), 2"]
+
+        note: bitreverse mode is... odd.  it's the butterfly generator
+        from Cooley-Tukey FFT:
+        https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
+
+        bitreverse LD is computed as:
+        for i in range(VL):
+            EA = (RA|0) + (EXTS(D) * LDSTsize * bitreverse(i, VL)) << RC
+
+        bitreversal of 0 1 2 3 in binary 0b00 0b01 0b10 0b11
+        produces       0 2 1 3 in binary 0b00 0b10 0b01 0b11
+
+        and thus creates the butterfly needed for one iteration of FFT.
+        the RC (shift) is to be able to offset the LDs by Radix-2 spans
+        """
+        lst = SVP64Asm(["addi 1, 0, 0x0010",
+                        "addi 2, 0, 0x0000",
+                        "addi 5, 0, 0x101",
+                        "addi 6, 0, 0x202",
+                        "addi 7, 0, 0x303",
+                        "addi 8, 0, 0x404",
+                        "sv.stw 5.v, 0(1)",  # scalar r1 + 0 + wordlen*offs
+                        "sv.lwzbr 9.v, 4(1), 2"]) # bit-reversed
+        lst = list(lst)
+
+        # SVSTATE (in this case, VL=4)
+        svstate = SVP64State()
+        svstate.vl[0:7] = 4 # VL
+        svstate.maxvl[0:7] = 4 # MAXVL
+        print ("SVSTATE", bin(svstate.spr.asint()))
+
+        with Program(lst, bigendian=False) as program:
+            sim = self.run_tst_program(program, svstate=svstate)
+            mem = sim.mem.dump(printout=False)
+            print (mem)
+
+            self.assertEqual(mem, [(16, 0x020200000101),
+                                   (24, 0x040400000303)])
+            print(sim.gpr(1))
+            # from STs
+            self.assertEqual(sim.gpr(5), SelectableInt(0x101, 64))
+            self.assertEqual(sim.gpr(6), SelectableInt(0x202, 64))
+            self.assertEqual(sim.gpr(7), SelectableInt(0x303, 64))
+            self.assertEqual(sim.gpr(8), SelectableInt(0x404, 64))
+            # r1=0x10, RC=0, offs=4: contents of memory expected at:
+            #    element 0:   EA = r1 + bitrev(0b00)*4 => 0x10 + 0b00*4 => 0x10
+            #    element 1:   EA = r1 + bitrev(0b01)*4 => 0x10 + 0b10*4 => 0x18
+            #    element 2:   EA = r1 + bitrev(0b10)*4 => 0x10 + 0b01*4 => 0x14
+            #    element 3:   EA = r1 + bitrev(0b11)*4 => 0x10 + 0b10*4 => 0x1c
+            # therefore loaded from (bit-reversed indexing):
+            #    r9  => mem[0x10] which was stored from r5
+            #    r10 => mem[0x18] which was stored from r6
+            #    r11 => mem[0x18] which was stored from r7
+            #    r12 => mem[0x1c] which was stored from r8
+            self.assertEqual(sim.gpr(9), SelectableInt(0x101, 64))
+            self.assertEqual(sim.gpr(10), SelectableInt(0x303, 64))
+            self.assertEqual(sim.gpr(11), SelectableInt(0x202, 64))
+            self.assertEqual(sim.gpr(12), SelectableInt(0x404, 64))
+
     def run_tst_program(self, prog, initial_regs=None,
                               svstate=None):
         if initial_regs is None:
index 8c9726253fdbe1d5ee9d9fa7c90564b0d57eb345..8f8d5544567c4ae658fb296214b0e08b22a86985 100644 (file)
@@ -146,7 +146,7 @@ class DecodeFields:
             "RT": self.FormX.RT,
             "RA": self.FormX.RA,
             "RB": self.FormX.RB,
-            "RC": self.FormVA.RB,
+            "RC": self.FormVA.RC,
             "SI": self.FormD.SI,
             "UI": self.FormD.UI,
             "L": self.FormD.L,
index de30a3854b750d7efbc85b1acc7bbc93824ba8eb..cb9c904c1269563174b25a7206ba6f18371bb824 100644 (file)
@@ -939,6 +939,7 @@ if __name__ == '__main__':
              'sv.addi win2.v, win.v, -1',
              'sv.add./mrr 5.v, 2.v, 1.v',
              'sv.lhzbr 5.v, 11(9.v), 15',
+             'sv.lwzbr 5.v, 11(9.v), 15',
     ]
     isa = SVP64Asm(lst, macros=macros)
     print ("list", list(isa))