add worked-out svp64 16-bit maddsubrs test case
authorJacob Lifshay <programmerjake@gmail.com>
Thu, 1 Jun 2023 06:46:23 +0000 (23:46 -0700)
committerJacob Lifshay <programmerjake@gmail.com>
Thu, 1 Jun 2023 06:46:23 +0000 (23:46 -0700)
src/openpower/test/alu/maddsubrs_cases.py

index 366d63a7a17812d1ea7a280ea2ec84369d2d6dd3..0bb58b439ad85236fc536a3f1be9c677b8950a50 100644 (file)
@@ -1,14 +1,15 @@
 from openpower.sv.trans.svp64 import SVP64Asm
 import random
-from openpower.test.common import TestAccumulatorBase
+from openpower.test.common import TestAccumulatorBase, skip_case
 from openpower.endian import bigendian
 from openpower.simulator.program import Program
 from openpower.decoder.selectable_int import SelectableInt
 from openpower.decoder.power_enums import XER_bits
-from openpower.decoder.isa.caller import special_sprs
+from openpower.decoder.isa.caller import SVP64State
 from openpower.decoder.helpers import exts
 from openpower.test.state import ExpectedState
 import unittest
+import math
 
 class MADDSUBRSTestCase(TestAccumulatorBase):
 
@@ -92,6 +93,48 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
         e.intregs[11] = 0xff0000000;
         self.add_case(Program(lst, bigendian), initial_regs, expected=e)
 
+    def case_maddsubrs_16bit_s14(self):
+        p = Program(list(SVP64Asm([
+            "sv.maddsubrs/w=16 *10,*20,14,*30",
+        ])), bigendian)
+
+        initial_regs = [0] * 32
+
+        # use somewhat reasonable i16 values since we're working in
+        # 2.14-bit fixed-point
+
+        initial_regs[10] = 0x1000_2000_3000_4000  # 0x0.4, 0x0.8, 0x0.c, 0x1.0
+
+        # 0x0.48d0, -0x0.0490, 0x0.d158, -0x0.48d4
+        initial_regs[20] = 0x1234_fedc_3456_edcb
+        cospi_16_64 = 11585  # from libvpx -- 0x0.b504 ~ 0.70709 ~ cos(pi/4)
+        initial_regs[30] = cospi_16_64 * 0x1_0001_0001_0001  # splat 4x
+
+        svstate = SVP64State()
+        svstate.vl = 4
+        svstate.maxvl = 4
+
+        e = ExpectedState(pc=8, int_regs=initial_regs)
+        e.intregs[10] = 0
+        e.intregs[11] = 0
+        for i in range(svstate.vl):
+            rt = (initial_regs[10] >> (i * 16)) & 0xFFFF  # extract element
+            rt -= (rt & 0x8000) << 1  # sign extend rt
+            ra = (initial_regs[20] >> (i * 16)) & 0xFFFF
+            ra -= (ra & 0x8000) << 1  # sign extend ra
+            rb = (initial_regs[30] >> (i * 16)) & 0xFFFF
+            rb -= (rb & 0x8000) << 1  # sign extend rb
+            s = rt + ra
+            d = rt - ra
+            # f64 is big enough to represent all relevant values exactly,
+            # so we can use float
+            rt = math.floor((s * rb) / (2 ** 14) + 0.5)  # mul & round & shr
+            rs = math.floor((d * rb) / (2 ** 14) + 0.5)
+            e.intregs[10] |= (rt & 0xFFFF) << (16 * i)  # insert element
+            e.intregs[11] |= (rs & 0xFFFF) << (16 * i)
+
+        self.add_case(p, initial_regs, expected=e, initial_svstate=svstate)
+
     def case_0_maddrs(self):
         isa = SVP64Asm(["maddsubrs 1,10,0,11",
                         "maddrs 1,10,0,12"])