Moved maddsubrs/maddrs/msubrs instructions to separate files
authorKonstantinos Margaritis <konstantinos.margaritis@vectorcamp.gr>
Fri, 21 Jul 2023 14:05:54 +0000 (14:05 +0000)
committerKonstantinos Margaritis <konstantinos.margaritis@vectorcamp.gr>
Fri, 21 Jul 2023 22:31:33 +0000 (22:31 +0000)
As per Jacob's suggestion, simplified maddsubrs by removing masks
and fixing overflow problems.

13 files changed:
openpower/isa/butterfly.mdwn [deleted file]
openpower/isa/maddrs.mdwn [new file with mode: 0644]
openpower/isa/maddsubrs.mdwn [new file with mode: 0644]
openpower/isa/msubrs.mdwn [new file with mode: 0644]
openpower/isatables/RM-1P-2S1D.csv
openpower/isatables/minor_22.csv
src/openpower/decoder/isa/.gitignore
src/openpower/decoder/isa/caller.py
src/openpower/decoder/isa/test_caller_maddrs.py [new file with mode: 0644]
src/openpower/decoder/power_decoder2.py
src/openpower/decoder/power_enums.py
src/openpower/test/alu/maddrs_cases.py [new file with mode: 0644]
src/openpower/test/alu/maddsubrs_cases.py

diff --git a/openpower/isa/butterfly.mdwn b/openpower/isa/butterfly.mdwn
deleted file mode 100644 (file)
index 93a6c4b..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-<!-- SVP64 Butterfly DCT Instructions here described are based on -->
-
-<!-- PLEASE NOTE THESE ARE UNAPPROVED AND DRAFT, NOT SUBMITTED TO OPF ISA WG -->
-
-# [DRAFT] Integer Butterfly Multiply Add/Sub FFT/DCT
-
-A-Form
-
-* maddsubrs  RT,RA,SH,RB
-
-Pseudo-code:
-
-    n <- SH
-    sum <- (RT) + (RA)
-    diff <- (RT) - (RA)
-    prod1 <- MULS(RB, sum)
-    prod2 <- MULS(RB, diff)
-    if n = 0 then
-        prod1_lo <- prod1[XLEN:(XLEN*2) - 1]
-        prod2_lo <- prod2[XLEN:(XLEN*2) - 1]
-        RT <- prod1_lo
-        RS <- prod2_lo
-    else
-        round <- [0]*(XLEN*2)
-        round[XLEN*2 - n] <- 1
-        prod1 <- prod1 + round
-        prod2 <- prod2 + round
-        m <- MASK(XLEN - n - 2, XLEN - 1)
-        res1 <- prod1[XLEN - n:XLEN*2 - n - 1]
-        res2 <- prod2[XLEN - n:XLEN*2 - n - 1]
-        signbit1 <- prod1[0]
-        signbit2 <- prod2[0]
-        smask1 <- ([signbit1]*XLEN) & ¬m
-        smask2 <- ([signbit2]*XLEN) & ¬m
-        RT <- (res1 | smask1)
-        RS <- (res2 | smask2)
-
-Special Registers Altered:
-
-    None
-
-# [DRAFT] Integer Butterfly Multiply Add and Accumulate FFT/DCT
-
-A-Form
-
-* maddrs  RT,RA,SH,RB
-
-Pseudo-code:
-
-    n <- SH
-    prod <- MULS(RB, RA)
-    if n = 0 then
-        prod_lo <- prod[XLEN:(XLEN*2) - 1]
-        RT <- (RT) + prod_lo
-        RS <- (RS) - prod_lo
-    else
-        res1[0:XLEN*2-1] <- (EXTSXL((RT)[0], 1) || (RT)) + prod
-        res2[0:XLEN*2-1] <- (EXTSXL((RS)[0], 1) || (RS)) - prod
-        round <- [0]*XLEN*2
-        round[XLEN*2 - n] <- 1
-        res1 <- res1 + round
-        res2 <- res2 + round
-        signbit1 <- res1[0]
-        signbit2 <- res2[0]
-        m <- MASK(XLEN -n - 2, XLEN - 1)
-        res1 <- res1[XLEN - n:XLEN*2 - n -1]
-        res2 <- res2[XLEN - n:XLEN*2 - n -1]
-        smask1 <- ([signbit1]*XLEN) & ¬m
-        smask2 <- ([signbit2]*XLEN) & ¬m
-        RT <- (res1 | smask1)
-        RS <- (res2 | smask2)
-
-Special Registers Altered:
-
-    None
diff --git a/openpower/isa/maddrs.mdwn b/openpower/isa/maddrs.mdwn
new file mode 100644 (file)
index 0000000..f768044
--- /dev/null
@@ -0,0 +1,27 @@
+<!-- SVP64 Butterfly DCT Instructions here described are based on -->
+
+<!-- PLEASE NOTE THESE ARE UNAPPROVED AND DRAFT, NOT SUBMITTED TO OPF ISA WG -->
+
+# [DRAFT] Integer Butterfly Multiply Add and Accumulate FFT/DCT
+
+A-Form
+
+* maddrs  RT,RA,RB,SH
+
+Pseudo-code:
+
+    n <- SH
+    prod <- MULS(RB, RA)
+    if n = 0 then
+        prod_lo <- prod[XLEN:(XLEN*2) - 1]
+        RT <- (RT) + prod_lo
+    else
+        res[0:XLEN*2-1] <- (EXTSXL((RT)[0], 1) || (RT)) + prod
+        round <- [0]*XLEN*2
+        round[XLEN*2 - n] <- 1
+        res <- res + round
+        RT <- res[XLEN - n:XLEN*2 - n -1]
+
+Special Registers Altered:
+
+    None
diff --git a/openpower/isa/maddsubrs.mdwn b/openpower/isa/maddsubrs.mdwn
new file mode 100644 (file)
index 0000000..96efe5a
--- /dev/null
@@ -0,0 +1,35 @@
+<!-- SVP64 Butterfly DCT Instructions here described are based on -->
+
+<!-- PLEASE NOTE THESE ARE UNAPPROVED AND DRAFT, NOT SUBMITTED TO OPF ISA WG -->
+
+# [DRAFT] Integer Butterfly Multiply Add/Sub Round Shift for FFT/DCT
+
+A-Form
+
+* maddsubrs  RT,RA,RB,SH
+
+Pseudo-code:
+
+    n <- SH
+    sum <- (RT[0] || RT) + (RA[0] || RA)
+    diff <- (RT[0] || RT) - (RA[0] || RA)
+    prod1 <- MULS(RB, sum)
+    prod2 <- MULS(RB, diff)
+    if n = 0 then
+        prod1_lo <- prod1[XLEN+1:(XLEN*2)]
+        prod2_lo <- prod2[XLEN+1:(XLEN*2)]
+        RT <- prod1_lo
+        RS <- prod2_lo
+    else
+        round <- [0]*(XLEN*2 + 1)
+        round[XLEN*2 - n + 1] <- 1
+        prod1 <- prod1 + round
+        prod2 <- prod2 + round
+        res1 <- prod1[XLEN - n + 1:XLEN*2 - n]
+        res2 <- prod2[XLEN - n + 1:XLEN*2 - n]
+        RT <- res1
+        RS <- res2
+
+Special Registers Altered:
+
+    None
diff --git a/openpower/isa/msubrs.mdwn b/openpower/isa/msubrs.mdwn
new file mode 100644 (file)
index 0000000..a2d229d
--- /dev/null
@@ -0,0 +1,27 @@
+<!-- SVP64 Butterfly DCT Instructions here described are based on -->
+
+<!-- PLEASE NOTE THESE ARE UNAPPROVED AND DRAFT, NOT SUBMITTED TO OPF ISA WG -->
+
+# [DRAFT] Integer Butterfly Multiply Add and Accumulate FFT/DCT
+
+A-Form
+
+* msubrs  RT,RA,RB,SH
+
+Pseudo-code:
+
+    n <- SH
+    prod <- MULS(RB, RA)
+    if n = 0 then
+        prod_lo <- prod[XLEN:(XLEN*2) - 1]
+        RT <- (RT) - prod_lo
+    else
+        res[0:XLEN*2-1] <- (EXTSXL((RT)[0], 1) || (RT)) - prod
+        round <- [0]*XLEN*2
+        round[XLEN*2 - n] <- 1
+        res <- res + round
+        RT <- res[XLEN - n:XLEN*2 - n -1]
+
+Special Registers Altered:
+
+    None
index 0de87cee148c426d96007f0f29232a80895f0f5c..3a51f6061ea90bb8c40728c9bc545835d6cab1ad 100644 (file)
@@ -26,6 +26,7 @@ modsw,NORMAL,,1P,EXTRA3,NO,d:RT,s:RA,s:RB,0,RA,RB,0,RT,0,0,0
 30/6=fmrgew,NORMAL,,1P,EXTRA3,NO,d:FRT,s:FRA,s:FRB,0,FRA,FRB,0,FRT,0,0,0
 maddsubrs,NORMAL,,1P,EXTRA3,NO,s:RT;d:RT,s:RA,s:RB,0,RA,0,RB,RT,0,0,0
 maddrs,NORMAL,,1P,EXTRA3,NO,s:RT;d:RT,s:RA,s:RB,0,RA,0,RB,RT,0,0,0
+msubrs,NORMAL,,1P,EXTRA3,NO,s:RT;d:RT,s:RA,s:RB,0,RA,0,RB,RT,0,0,0
 rlwnm,NORMAL,,1P,EXTRA3,NO,d:RA;d:CR0,s:RB,s:RS,0,0,RB,RS,RA,0,CR0,0
 minmax,NORMAL,,1P,EXTRA3,NO,d:RT;d:CR0,s:RA,s:RB,0,RA_OR_ZERO,RB,0,RT,0,CR0,0
 sadd,NORMAL,,1P,EXTRA3,NO,d:RT;d:CR0,s:RA,s:RB,0,RA,RB,0,RT,0,CR0,0
index 5f14c358e8d5cc5f468ffbc2e1c9c726d7965a31..adf1c3a0c64edaf6e807c598858ebb8049d910cb 100644 (file)
@@ -42,3 +42,4 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou
 -----01011-,ALU,OP_FISHMV,FRS,CONST_UI,NONE,FRS,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,fishmv,DX,,1,unofficial until submitted and approved/renumbered by the opf isa wg
 ------01000,ALU,OP_MADDSUBRS,RA,CONST_SH,RB,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,1,0,NONE,0,0,maddsubrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg
 ------01001,ALU,OP_MADDRS,RA,CONST_SH,RB,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,1,0,NONE,0,0,maddrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg
+------01011,ALU,OP_MSUBRS,RA,CONST_SH,RB,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,1,0,NONE,0,0,msubrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg
index c72caceb5159f71d5596389c09995314cb4f2f22..45f8711bb8ff14a17444d6f55022c6d8433fa15d 100644 (file)
@@ -3,7 +3,6 @@
 /bcd.py
 /bitmanip.py
 /branch.py
-/butterfly.py
 /byterev.py
 /comparefixed.py
 /condition.py
@@ -21,6 +20,9 @@
 /fpmove.py
 /fpstore.py
 /fptrans.py
+/maddsubrs.py
+/maddrs.py
+/msubrs.py
 /pifixedload.py
 /pifixedstore.py
 /sprset.py
index 30f2e5211639c8f4391167e050f41003037320e8..3b8d86948894b266ea0c71403d35bdc89fb0ac7c 100644 (file)
@@ -1968,7 +1968,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
                        "mffpr", "mffprs",
                        "ctfpr", "ctfprs",
                        "mtfpr", "mtfprs",
-                       "maddsubrs", "maddrs"
+                       "maddsubrs", "maddrs", "msubrs"
                        ]:
             illegal = False
             ins_name = dotstrp
diff --git a/src/openpower/decoder/isa/test_caller_maddrs.py b/src/openpower/decoder/isa/test_caller_maddrs.py
new file mode 100644 (file)
index 0000000..a8903cf
--- /dev/null
@@ -0,0 +1,27 @@
+""" Decoder tests
+
+related bugs:
+
+ *
+"""
+
+import unittest
+
+from openpower.test.alu.maddrs_cases import MADDRSTestCase
+from openpower.test.runner import TestRunnerBase
+
+# writing the test_caller invocation this way makes it work with pytest
+
+
+class TestMADDRS(TestRunnerBase):
+    def __init__(self, test):
+        assert test == 'test'
+        super().__init__(MADDRSTestCase().test_data)
+
+    def test(self):
+        # dummy function to make unittest try to test this class
+        pass
+
+
+if __name__ == "__main__":
+    unittest.main()
index a2ce43512265d19b353318ebb78c70540c0dd543..83b813a618106cd280685ec8908f9b97c448fe66 100644 (file)
@@ -1082,6 +1082,7 @@ class PowerDecodeSubset(Elaboratable):
             with m.If((major == 22) & xo6.matches(
                     '-01000',  # maddsubrs
                     '-01001',  # maddrs
+                    '-01011',  # msubrs
                 )):
                 comb += self.implicit_rs.eq(1)
                 comb += self.extend_rb_maxvl.eq(1) # extend RB
index aa49b23b85861f235f511a6f144ed847af9e9c0e..44ad2f547e400838c2570bae5d0052989aa7adb9 100644 (file)
@@ -791,6 +791,7 @@ _insns = [
     "maddhd", "maddhdu", "maddld",                      # INT multiply-and-add
     "maddsubrs",         # Integer DCT Butterfly Add Sub and Round Shift
     "maddrs",            # Integer DCT Butterfly Add and Accumulate and Round Shift
+    "msubrs",            # Integer DCT Butterfly Subtract from and Round Shift
     "mcrf", "mcrxr", "mcrxrx", "mfcr/mfocrf",           # CR mvs
     "mfmsr", "mfspr",
     "minmax",                     # AV bitmanip
@@ -954,7 +955,8 @@ class MicrOp(Enum):
     OP_SHADD = 103
     OP_MADDSUBRS = 104
     OP_MADDRS = 105
-    OP_BYTEREV = 106
+    OP_MSUBRS = 106
+    OP_BYTEREV = 107
 
 
 class SelType(Enum):
diff --git a/src/openpower/test/alu/maddrs_cases.py b/src/openpower/test/alu/maddrs_cases.py
new file mode 100644 (file)
index 0000000..b31ed49
--- /dev/null
@@ -0,0 +1,116 @@
+from openpower.insndb.asm import SVP64Asm
+from openpower.test.common import TestAccumulatorBase, skip_case
+from openpower.endian import bigendian
+from openpower.simulator.program import Program
+from openpower.decoder.isa.caller import SVP64State
+from openpower.test.state import ExpectedState
+from nmutil.sim_util import hash_256
+import math
+from fractions import Fraction
+
+
+class MADDRSTestCase(TestAccumulatorBase):
+    def case_0_maddrs(self):
+        isa = SVP64Asm(["maddsubrs 1,10,11,0",
+                        "maddrs 1,10,12,0",
+                        "msubrs 2,10,12,0"])
+        lst = list(isa)
+
+        initial_regs = [0] * 32
+        initial_regs[1] = 0x00000a71
+        initial_regs[10] = 0x0000e6b8
+        initial_regs[11] = 0x00002d41
+        initial_regs[12] = 0x00000d00
+
+        e = ExpectedState(pc=12)
+        e.intregs[1] = 0x3658c869
+        e.intregs[2] = 0xffffffffcd583ef9
+        e.intregs[10] = 0x0000e6b8
+        e.intregs[11] = 0x00002d41
+        e.intregs[12] = 0x00000d00
+        self.add_case(Program(lst, bigendian), initial_regs, expected=e)
+
+    def case_1_maddrs(self):
+        isa = SVP64Asm(["maddsubrs 1,10,11,0",
+                        "maddrs 1,10,12,14",
+                        "msubrs 2,10,12,14"])
+        lst = list(isa)
+
+        initial_regs = [0] * 32
+        initial_regs[1] = 0x00000a71
+        initial_regs[10] = 0x0000e6b8
+        initial_regs[11] = 0x00002d41
+        initial_regs[12] = 0x00000d00
+
+        e = ExpectedState(pc=12)
+        e.intregs[1] = 0x0000d963
+        e.intregs[2] = 0xffffffffffff3561
+        e.intregs[10] = 0x0000e6b8
+        e.intregs[11] = 0x00002d41
+        e.intregs[12] = 0x00000d00
+        self.add_case(Program(lst, bigendian), initial_regs, expected=e)
+
+    def maddrs_many_helper(self, width, shift, prog, case_idx):
+        # if {'width': width, 'shift': shift, 'case_idx': case_idx} \
+        #         != {'width': 8, 'shift': 1, 'case_idx': 0}:
+        #     return  # for debugging
+        gprs = [0] * 32
+        # make some reproducible random inputs
+        k = f"maddrs {width} {shift} {case_idx}"
+        gprs[10] = hash_256(k + " r10") % 2**64
+        gprs[11] = hash_256(k + " r11") % 2**64
+        gprs[20] = hash_256(k + " r20") % 2**64
+        gprs[30] = hash_256(k + " r30") % 2**64
+
+        svstate = SVP64State()
+        svstate.vl = 64 // width  # one full 64-bit register
+        svstate.maxvl = 64 // width
+
+        e = ExpectedState(pc=8, int_regs=gprs)
+        e.intregs[10] = 0
+        e.intregs[11] = 0
+        for i in range(svstate.vl):
+            # extract elements
+            rt = (gprs[10] >> (i * width)) % 2 ** width
+            rs = (gprs[11] >> (i * width)) % 2 ** width
+            ra = (gprs[20] >> (i * width)) % 2 ** width
+            rb = (gprs[30] >> (i * width)) % 2 ** width
+            if rt >= 2 ** (width - 1):
+                rt -= 2 ** width  # sign extend rt
+            if rs >= 2 ** (width - 1):
+                rs -= 2 ** width  # sign extend rs
+            if ra >= 2 ** (width - 1):
+                ra -= 2 ** width  # sign extend ra
+            if rb >= 2 ** (width - 1):
+                rb -= 2 ** width  # sign extend rb
+            prod = rb * ra
+            rt += prod
+            rs -= prod
+            factor = Fraction(1, 2 ** shift)  # shr factor
+            round_up = Fraction(1, 2)
+            # round & shr
+            rt = math.floor(rt * factor + round_up)
+            rs = math.floor(rs * factor + round_up)
+            # insert elements
+            e.intregs[10] |= (rt % 2 ** width) << (width * i)
+            e.intregs[11] |= (rs % 2 ** width) << (width * i)
+
+        with self.subTest(
+            width=width, shift=shift, case_idx=case_idx,
+            RT_in=hex(gprs[10]), RS_in=hex(gprs[11]),
+            RA_in=hex(gprs[20]), RB_in=hex(gprs[30]),
+            expected_RT=hex(e.intregs[10]), expected_RS=hex(e.intregs[11]),
+        ):
+            self.add_case(prog, gprs, expected=e, initial_svstate=svstate)
+
+    def case_maddrs_many(self):
+        for width in 8, 16, 32, 64:
+            shift_end = min(32, width)
+            for shift in range(0, shift_end, shift_end // 8):
+                w = "" if width == 64 else f"/w={width}"
+                prog = Program(list(SVP64Asm([
+                    f"sv.maddrs{w} *10,*20,*30,{shift}",
+                ])), bigendian)
+
+                for case_idx in range(25):
+                    self.maddrs_many_helper(width, shift, prog, case_idx)
index c948272d87839ed8beb2a85ed8441ed3e5e8102d..e2087fa7e315a06ee88d99fea4bde042ac336786 100644 (file)
@@ -11,7 +11,7 @@ from fractions import Fraction
 
 class MADDSUBRSTestCase(TestAccumulatorBase):
     def case_0_maddsubrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,14,11"])
+        isa = SVP64Asm(["maddsubrs 1,10,11,14"])
         lst = list(isa)
 
         initial_regs = [0] * 32
@@ -27,7 +27,7 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
         self.add_case(Program(lst, bigendian), initial_regs, expected=e)
 
     def case_1_maddsubrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,0,11"])
+        isa = SVP64Asm(["maddsubrs 1,10,11,0"])
         lst = list(isa)
 
         initial_regs = [0] * 32
@@ -43,7 +43,7 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
         self.add_case(Program(lst, bigendian), initial_regs, expected=e)
 
     def case_2_maddsubrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,2,11"])
+        isa = SVP64Asm(["maddsubrs 1,10,11,2"])
         lst = list(isa)
 
         initial_regs = [0] * 32
@@ -59,7 +59,7 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
         self.add_case(Program(lst, bigendian), initial_regs, expected=e)
 
     def case_3_maddsubrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,16,11"])
+        isa = SVP64Asm(["maddsubrs 1,10,11,16"])
         lst = list(isa)
 
         initial_regs = [0] * 32
@@ -75,7 +75,7 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
         self.add_case(Program(lst, bigendian), initial_regs, expected=e)
 
     def case_4_maddsubrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,1,11"])
+        isa = SVP64Asm(["maddsubrs 1,10,11,1"])
         lst = list(isa)
 
         initial_regs = [0] * 32
@@ -92,7 +92,7 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
 
     def case_maddsubrs_16bit_s14(self):
         p = Program(list(SVP64Asm([
-            "sv.maddsubrs/w=16 *10,*20,14,*30",
+            "sv.maddsubrs/w=16 *10,*20,*30,14",
         ])), bigendian)
 
         initial_regs = [0] * 32
@@ -189,111 +189,8 @@ class MADDSUBRSTestCase(TestAccumulatorBase):
             for shift in range(0, shift_end, shift_end // 8):
                 w = "" if width == 64 else f"/w={width}"
                 prog = Program(list(SVP64Asm([
-                    f"sv.maddsubrs{w} *10,*20,{shift},*30",
+                    f"sv.maddsubrs{w} *10,*20,*30,{shift}",
                 ])), bigendian)
 
                 for case_idx in range(25):
                     self.maddsubrs_many_helper(width, shift, prog, case_idx)
-
-    def case_0_maddrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,0,11",
-                        "maddrs 1,10,0,12"])
-        lst = list(isa)
-
-        initial_regs = [0] * 32
-        initial_regs[1] = 0x00000a71
-        initial_regs[10] = 0x0000e6b8
-        initial_regs[11] = 0x00002d41
-        initial_regs[12] = 0x00000d00
-
-        e = ExpectedState(pc=8)
-        e.intregs[1] = 0x3658c869
-        e.intregs[2] = 0xffffffffcd583ef9
-        e.intregs[10] = 0x0000e6b8
-        e.intregs[11] = 0x00002d41
-        e.intregs[12] = 0x00000d00
-        self.add_case(Program(lst, bigendian), initial_regs, expected=e)
-
-    def case_1_maddrs(self):
-        isa = SVP64Asm(["maddsubrs 1,10,0,11",
-                        "maddrs 1,10,14,12"])
-        lst = list(isa)
-
-        initial_regs = [0] * 32
-        initial_regs[1] = 0x00000a71
-        initial_regs[10] = 0x0000e6b8
-        initial_regs[11] = 0x00002d41
-        initial_regs[12] = 0x00000d00
-
-        e = ExpectedState(pc=8)
-        e.intregs[1] = 0x0000d963
-        e.intregs[2] = 0xffffffffffff3561
-        e.intregs[10] = 0x0000e6b8
-        e.intregs[11] = 0x00002d41
-        e.intregs[12] = 0x00000d00
-        self.add_case(Program(lst, bigendian), initial_regs, expected=e)
-
-    def maddrs_many_helper(self, width, shift, prog, case_idx):
-        # if {'width': width, 'shift': shift, 'case_idx': case_idx} \
-        #         != {'width': 8, 'shift': 1, 'case_idx': 0}:
-        #     return  # for debugging
-        gprs = [0] * 32
-        # make some reproducible random inputs
-        k = f"maddrs {width} {shift} {case_idx}"
-        gprs[10] = hash_256(k + " r10") % 2**64
-        gprs[11] = hash_256(k + " r11") % 2**64
-        gprs[20] = hash_256(k + " r20") % 2**64
-        gprs[30] = hash_256(k + " r30") % 2**64
-
-        svstate = SVP64State()
-        svstate.vl = 64 // width  # one full 64-bit register
-        svstate.maxvl = 64 // width
-
-        e = ExpectedState(pc=8, int_regs=gprs)
-        e.intregs[10] = 0
-        e.intregs[11] = 0
-        for i in range(svstate.vl):
-            # extract elements
-            rt = (gprs[10] >> (i * width)) % 2 ** width
-            rs = (gprs[11] >> (i * width)) % 2 ** width
-            ra = (gprs[20] >> (i * width)) % 2 ** width
-            rb = (gprs[30] >> (i * width)) % 2 ** width
-            if rt >= 2 ** (width - 1):
-                rt -= 2 ** width  # sign extend rt
-            if rs >= 2 ** (width - 1):
-                rs -= 2 ** width  # sign extend rs
-            if ra >= 2 ** (width - 1):
-                ra -= 2 ** width  # sign extend ra
-            if rb >= 2 ** (width - 1):
-                rb -= 2 ** width  # sign extend rb
-            prod = rb * ra
-            rt += prod
-            rs -= prod
-            factor = Fraction(1, 2 ** shift)  # shr factor
-            round_up = Fraction(1, 2)
-            # round & shr
-            rt = math.floor(rt * factor + round_up)
-            rs = math.floor(rs * factor + round_up)
-            # insert elements
-            e.intregs[10] |= (rt % 2 ** width) << (width * i)
-            e.intregs[11] |= (rs % 2 ** width) << (width * i)
-
-        with self.subTest(
-            width=width, shift=shift, case_idx=case_idx,
-            RT_in=hex(gprs[10]), RS_in=hex(gprs[11]),
-            RA_in=hex(gprs[20]), RB_in=hex(gprs[30]),
-            expected_RT=hex(e.intregs[10]), expected_RS=hex(e.intregs[11]),
-        ):
-            self.add_case(prog, gprs, expected=e, initial_svstate=svstate)
-
-    def case_maddrs_many(self):
-        for width in 8, 16, 32, 64:
-            shift_end = min(32, width)
-            for shift in range(0, shift_end, shift_end // 8):
-                w = "" if width == 64 else f"/w={width}"
-                prog = Program(list(SVP64Asm([
-                    f"sv.maddrs{w} *10,*20,{shift},*30",
-                ])), bigendian)
-
-                for case_idx in range(25):
-                    self.maddrs_many_helper(width, shift, prog, case_idx)