starting on pospopcount assembler
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Tue, 21 Nov 2023 17:40:27 +0000 (17:40 +0000)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Fri, 22 Dec 2023 19:26:21 +0000 (19:26 +0000)
src/openpower/decoder/isa/test_caller_svp64_pospopcount.py

index dbefe2213d159371e3934ff3f743e52fae648e06..800218a86f81f7db7736edfe8ed9cc1d9e6e1de3 100644 (file)
@@ -32,48 +32,31 @@ class PosPopCountTestCase(FHDLTestCase):
         for i in range(32):
             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
 
-    def test_sv_load_store_strncpy(self):
-        """>>> lst = [
-                    ]
-
-        strncpy using post-increment ld/st, sv.bc, and data-dependent ffirst.
-        note that /lf (Load-Fault) mode is not set in this example when it
-        should be. however implementing Load-Fault in ISACaller is tricky
-        (requires implementing multiple hardware models)
+    def test_sv_pospopcount(self):
+        """positional popcount
         """
-        maxvl = 4
         lst = SVP64Asm(
             [
                 "mtspr 9, 3",                   # move r3 to CTR
-                "addi 0,0,0",                   # initialise r0 to zero
+                "sv.addi *8, 0, 0",             # initialise r8-r15 to zero
                 # chr-copy loop starts here:
                 #   for (i = 0; i < n && src[i] != '\0'; i++)
                 #        dest[i] = src[i];
-                # VL (and r1) = MIN(CTR,MAXVL=4)
-                "setvl 1,0,%d,0,1,1" % maxvl,
-                # load VL bytes (update r10 addr)
-                "sv.lbzu/pi *16, 1(10)",         # should be /lf here as well
-                "sv.cmpi/ff=eq/vli *0,1,*16,0",  # cmp against zero, truncate VL
-                # store VL bytes (update r12 addr)
-                "sv.stbu/pi *16, 1(12)",
-                "sv.bc/all 0, *2, -0x1c",       # test CTR, stop if cmpi failed
-                # zeroing loop starts here:
-                #   for ( ; i < n; i++)
-                #       dest[i] = '\0';
-                # VL (and r1) = MIN(CTR,MAXVL=4)
-                "setvl 1,0,%d,0,1,1" % maxvl,
-                # store VL zeros (update r12 addr)
-                "sv.stbu/pi 0, 1(12)",
-                "sv.bc 16, *0, -0xc",           # dec CTR by VL, stop at zero
+                # VL = MIN(CTR,MAXVL=8), Rc=1 (CR0 set if CTR ends)
+                "setvl. 3,0,8,0,1,1",
+                # load VL bytes (update r4 addr) but compressed (dw=8)
+                "sv.lbzu/pi/dw=8 *16, 1(4)",   # should be /lf here as well
+                # branch back if still CTR
+                "sv.bc 16, 0, -0x10",    # test CTR, stop if cmpi failed
             ]
         )
         lst = list(lst)
 
-        tst_string = "hello\x00bye\x00"
-        initial_regs = [0] * 32
-        initial_regs[3] = len(tst_string)  # including the zero
-        initial_regs[10] = 16  # load address
-        initial_regs[12] = 40  # store address
+        tst_array = [23,19,25,189,76,255,32,191,67,205,0,39,107]
+        tst_array = [1,2,3,4,5,6,7,8,9] #8,9,10,11,12,13]
+        initial_regs = [0] * 64
+        initial_regs[3] = len(tst_array)
+        initial_regs[4] = 16  # load address
 
         # some memory with identifying garbage in it
         initial_mem = {16: 0xf0f1_f2f3_f4f5_f6f7,
@@ -82,23 +65,15 @@ class PosPopCountTestCase(FHDLTestCase):
                        48: 0x9091_9293_9495_9697,
                        }
 
-        for i, c in enumerate(tst_string):
-            write_byte(initial_mem, 16+i, ord(c))
-
-        # now get the expected results: copy the string to the other address,
-        # but terminate at first zero (strncpy, duh)
-        expected_mem = deepcopy(initial_mem)
-        copyzeros = False
-        strlen = 0
-        for i, c in enumerate(tst_string):
-            c = ord(c)
-            if not copyzeros:
-                write_byte(expected_mem, 40+i, c)
-                strlen = i+1
-            else:
-                write_byte(expected_mem, 40+i, 0)
-            if c == 0:
-                copyzeros = True
+        # overwrite the garbage with the test data
+        for i, c in enumerate(tst_array):
+            write_byte(initial_mem, 16+i, c)
+
+        # now get the expected results: do a simple pospopcount
+        expected = [0]*8
+        for c in tst_array:
+            for j in range(8):
+                expected[j] += (c >> j) & 1
 
         with Program(lst, bigendian=False) as program:
             sim = self.run_tst_program(program, initial_mem=initial_mem,
@@ -110,17 +85,11 @@ class PosPopCountTestCase(FHDLTestCase):
             #    element 1:   r1=0x10, D=24, => EA = 0x10+24*1 = 40 (0x28)
             # therefore, at address 0x10 ==> 0x1234
             # therefore, at address 0x28 ==> 0x1235
-            for (k, val) in expected_mem.items():
-                print("mem, val", k, hex(val))
-            self.assertEqual(mem, list(expected_mem.items()))
-            print(sim.gpr(1))
-            # reg 10 (the LD EA) is expected to be nearest
-            # 16 + strlen, rounded up
-            rounded = ((strlen+maxvl-1) // maxvl) * maxvl
-            self.assertEqual(sim.gpr(10), SelectableInt(16+rounded, 64))
-            # whereas reg 10 (the ST EA) is expected to be 40+strlen
-            self.assertEqual(sim.gpr(12), SelectableInt(
-                40+len(tst_string), 64))
+            for (k, val) in enumerate(expected):
+                print("idx, count", k, val)
+            for (k, val) in enumerate(expected):
+                #self.assertEqual(mem, list(expected_mem.items()))
+                print(sim.gpr(k))
 
     def run_tst_program(self, prog, initial_regs=None,
                         svstate=None, initial_fprs=None,
@@ -134,8 +103,6 @@ class PosPopCountTestCase(FHDLTestCase):
                             mem=initial_mem)
         print("GPRs")
         simulator.gpr.dump()
-        print("FPRs")
-        simulator.fpr.dump()
         return simulator