fix add-like CA/OV outputs
authorJacob Lifshay <programmerjake@gmail.com>
Thu, 30 Mar 2023 07:55:20 +0000 (00:55 -0700)
committerJacob Lifshay <programmerjake@gmail.com>
Thu, 30 Mar 2023 07:55:20 +0000 (00:55 -0700)
this is a massive kludge, but that's what lkcl requested due to time constraints

openpower/isatables/RM-2P-1S1D.csv
openpower/isatables/minor_31.csv
src/openpower/decoder/isa/caller.py
src/openpower/decoder/power_enums.py
src/openpower/test/alu/alu_cases.py

index e2f314af303f28f5552a9920706c3116d806f38b..b37785ff68ad7f450f4f23d3e33ca04b0d18d1df 100644 (file)
@@ -6,7 +6,6 @@ setb,NORMAL,,2P,EXTRA3,EN,d:RT,s:BFA,0,0,0,0,0,RT,BFA,0,0
 22/7=mtfsf,NORMAL,,2P,EXTRA3,EN,d:CR1,s:FRB,0,0,0,FRB,0,0,0,CR1,0
 cmpli,CROP,,2P,EXTRA3,EN,d:BF,s:RA,0,0,RA,0,0,0,0,BF,0
 cmpi,CROP,,2P,EXTRA3,EN,d:BF,s:RA,0,0,RA,0,0,0,0,BF,0
-neg,NORMAL,,2P,EXTRA3,EN,d:RT,s:RA,0,0,RA,0,0,RT,0,0,0
 popcntb,NORMAL,,2P,EXTRA3,EN,d:RA,s:RS,0,0,RS,0,0,RA,0,0,0
 prtyw,NORMAL,,2P,EXTRA3,EN,d:RA,s:RS,0,0,RS,0,0,RA,0,0,0
 prtyd,NORMAL,,2P,EXTRA3,EN,d:RA,s:RS,0,0,RS,0,0,RA,0,0,0
@@ -16,7 +15,6 @@ mfspr,NORMAL,,2P,EXTRA3,EN,d:RS,s:SPR,0,0,SPR,0,0,RT,0,0,0
 popcntw,NORMAL,,2P,EXTRA3,EN,d:RA,s:RS,0,0,RS,0,0,RA,0,0,0
 mtspr,NORMAL,,2P,EXTRA3,EN,d:SPR,s:RS,0,0,RS,0,0,SPR,0,0,0
 popcntd,NORMAL,,2P,EXTRA3,EN,d:RA,s:RS,0,0,RS,0,0,RA,0,0,0
-nego,NORMAL,,2P,EXTRA3,EN,d:RT,s:RA,0,0,RA,0,0,RT,0,0,0
 addic,NORMAL,,2P,EXTRA3,EN,d:RT,s:RA,0,0,RA,0,0,RT,0,0,0
 addi,NORMAL,,2P,EXTRA3,EN,d:RT,s:RA,0,0,RA_OR_ZERO,0,0,RT,0,0,0
 addis,NORMAL,,2P,EXTRA3,EN,d:RT,s:RA,0,0,RA_OR_ZERO,0,0,RT,0,0,0
@@ -29,10 +27,12 @@ fishmv,NORMAL,,2P,EXTRA3,EN,d:FRS,s:FRS,0,0,FRS,0,0,FRS,0,0,0
 setvl,NORMAL,,2P,EXTRA3,EN,d:RT,s:RA,0,0,RA_OR_ZERO,0,0,RT_OR_ZERO,0,CR0,0
 cntlzw,NORMAL,,2P,EXTRA3,EN,d:RA;d:CR0,s:RS,0,0,RS,0,0,RA,0,CR0,0
 cntlzd,NORMAL,,2P,EXTRA3,EN,d:RA;d:CR0,s:RS,0,0,RS,0,0,RA,0,CR0,0
+neg,NORMAL,,2P,EXTRA3,EN,d:RT;d:CR0,s:RA,0,0,RA,0,0,RT,0,CR0,0
 subfze,NORMAL,,2P,EXTRA3,EN,d:RT;d:CR0,s:RA,0,0,RA,0,0,RT,0,CR0,0
 addze,NORMAL,,2P,EXTRA3,EN,d:RT;d:CR0,s:RA,0,0,RA,0,0,RT,0,CR0,0
 cnttzw,NORMAL,,2P,EXTRA3,EN,d:RA;d:CR0,s:RS,0,0,RS,0,0,RA,0,CR0,0
 cnttzd,NORMAL,,2P,EXTRA3,EN,d:RA;d:CR0,s:RS,0,0,RS,0,0,RA,0,CR0,0
+nego,NORMAL,,2P,EXTRA3,EN,d:RT;d:CR0,s:RA,0,0,RA,0,0,RT,0,CR0,0
 subfzeo,NORMAL,,2P,EXTRA3,EN,d:RT;d:CR0,s:RA,0,0,RA,0,0,RT,0,CR0,0
 addzeo,NORMAL,,2P,EXTRA3,EN,d:RT;d:CR0,s:RA,0,0,RA,0,0,RT,0,CR0,0
 extsh,NORMAL,,2P,EXTRA3,EN,d:RA;d:CR0,s:RS,0,0,RS,0,0,RA,0,CR0,0
index e4dd1ce47c1142f76e5531ef30bc572e918f8b79..0c977dbedc7e6b76e1503f920137b4d389e1bc54 100644 (file)
@@ -145,8 +145,8 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou
 0b0011101011,MUL,OP_MUL_L64,RA,RB,NONE,RT,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,1,1,RC,0,0,mullw,XO,,,
 0b1011101011,MUL,OP_MUL_L64,RA,RB,NONE,RT,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,1,1,RC,0,0,mullwo,XO,,,
 0b0111011100,LOGICAL,OP_AND,RS,RB,NONE,RA,NONE,CR0,0,1,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,nand,X,,,
-0b0001101000,ALU,OP_ADD,RA,NONE,NONE,RT,NONE,NONE,1,0,ONE,0,NONE,0,0,0,0,0,0,RC,0,0,neg,XO,,,
-0b1001101000,ALU,OP_ADD,RA,NONE,NONE,RT,NONE,NONE,1,0,ONE,0,NONE,0,0,0,0,0,0,RC,0,0,nego,XO,,,
+0b0001101000,ALU,OP_ADD,RA,NONE,NONE,RT,NONE,CR0,1,0,ONE,0,NONE,0,0,0,0,0,0,RC,0,0,neg,XO,,,
+0b1001101000,ALU,OP_ADD,RA,NONE,NONE,RT,NONE,CR0,1,0,ONE,0,NONE,0,0,0,0,0,0,RC,0,0,nego,XO,,,
 0b0001111100,LOGICAL,OP_OR,RS,RB,NONE,RA,NONE,CR0,0,1,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,nor,X,,,
 0b0110111100,LOGICAL,OP_OR,RS,RB,NONE,RA,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,or,X,,,
 0b0110011100,LOGICAL,OP_OR,RS,RB,NONE,RA,NONE,CR0,1,0,ZERO,0,NONE,0,0,0,0,0,0,RC,0,0,orc,X,,,
index 61453f31492d81dd84c98ce6bac6945eb5886ef9..9feec8b492398fe50ad60199517ea5ad1bbadb91 100644 (file)
@@ -1333,6 +1333,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         self.namespace['XER'] = self.spr['XER']
         self.namespace['CA'] = self.spr['XER'][XER_bits['CA']].value
         self.namespace['CA32'] = self.spr['XER'][XER_bits['CA32']].value
+        self.namespace['OV'] = self.spr['XER'][XER_bits['OV']].value
+        self.namespace['OV32'] = self.spr['XER'][XER_bits['OV32']].value
         self.namespace['XLEN'] = xlen
 
         # add some SVSTATE convenience variables
@@ -1367,7 +1369,96 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
             self.namespace['sz'] = SelectableInt(sz, 1)
             self.namespace['SNZ'] = SelectableInt(bc_snz, 1)
 
-    def handle_carry_(self, inputs, output, ca, ca32):
+    def get_kludged_op_add_ca_ov(self, inputs, inp_ca_ov):
+        def ca(a, b, ca_in, width):
+            mask = (1 << width) - 1
+            y = (a & mask) + (b & mask) + ca_in
+            return y >> width
+
+        asmcode = yield self.dec2.dec.op.asmcode
+        insn = insns.get(asmcode)
+        SI = yield self.dec2.dec.SI
+        SI &= 0xFFFF
+        CA, OV = inp_ca_ov
+        inputs = [i.value for i in inputs]
+        if SI & 0x8000:
+            SI -= 0x10000
+        if insn in ("add", "addo", "addc", "addco"):
+            a = inputs[0]
+            b = inputs[1]
+            ca_in = 0
+        elif insn == "addic" or insn == "addic.":
+            a = inputs[0]
+            b = SI
+            ca_in = 0
+        elif insn in ("subf", "subfo", "subfc", "subfco"):
+            a = ~inputs[0]
+            b = inputs[1]
+            ca_in = 1
+        elif insn == "subfic":
+            a = ~inputs[0]
+            b = SI
+            ca_in = 1
+        elif insn == "adde" or insn == "addeo":
+            a = inputs[0]
+            b = inputs[1]
+            ca_in = CA
+        elif insn == "subfe" or insn == "subfeo":
+            a = ~inputs[0]
+            b = inputs[1]
+            ca_in = CA
+        elif insn == "addme" or insn == "addmeo":
+            a = inputs[0]
+            b = ~0
+            ca_in = CA
+        elif insn == "addze" or insn == "addzeo":
+            a = inputs[0]
+            b = 0
+            ca_in = CA
+        elif insn == "subfme" or insn == "subfmeo":
+            a = ~inputs[0]
+            b = ~0
+            ca_in = CA
+        elif insn == "subfze" or insn == "subfzeo":
+            a = ~inputs[0]
+            b = 0
+            ca_in = CA
+        elif insn == "addex":
+            # CA[32] aren't actually written, just generate so we have
+            # something to return
+            ca64 = ov64 = ca(inputs[0], inputs[1], OV, 64)
+            ca32 = ov32 = ca(inputs[0], inputs[1], OV, 32)
+            return ca64, ca32, ov64, ov32
+        elif insn == "neg" or insn == "nego":
+            a = ~inputs[0]
+            b = 0
+            ca_in = 1
+        else:
+            raise NotImplementedError(
+                "op_add kludge unimplemented instruction: ", asmcode, insn)
+
+        ca64 = ca(a, b, ca_in, 64)
+        ca32 = ca(a, b, ca_in, 32)
+        ov64 = ca64 != ca(a, b, ca_in, 63)
+        ov32 = ca32 != ca(a, b, ca_in, 31)
+        return ca64, ca32, ov64, ov32
+
+    def handle_carry_(self, inputs, output, ca, ca32, inp_ca_ov):
+        op = yield self.dec2.e.do.insn_type
+        if op == MicrOp.OP_ADD.value and ca is None and ca32 is None:
+            retval = yield from self.get_kludged_op_add_ca_ov(
+                inputs, inp_ca_ov)
+            ca, ca32, ov, ov32 = retval
+            asmcode = yield self.dec2.dec.op.asmcode
+            if insns.get(asmcode) == 'addex':
+                # TODO: if 32-bit mode, set ov to ov32
+                self.spr['XER'][XER_bits['OV']] = ov
+                self.spr['XER'][XER_bits['OV32']] = ov32
+            else:
+                # TODO: if 32-bit mode, set ca to ca32
+                self.spr['XER'][XER_bits['CA']] = ca
+                self.spr['XER'][XER_bits['CA32']] = ca32
+            return
         inv_a = yield self.dec2.e.do.invert_in
         if inv_a:
             inputs[0] = ~inputs[0]
@@ -1412,7 +1503,17 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         if ca32 is None:  # already written
             self.spr['XER'][XER_bits['CA32']] = cy32
 
-    def handle_overflow(self, inputs, output, div_overflow):
+    def handle_overflow(self, inputs, output, div_overflow, inp_ca_ov):
+        op = yield self.dec2.e.do.insn_type
+        if op == MicrOp.OP_ADD.value:
+            retval = yield from self.get_kludged_op_add_ca_ov(
+                inputs, inp_ca_ov)
+            ca, ca32, ov, ov32 = retval
+            # TODO: if 32-bit mode, set ov to ov32
+            self.spr['XER'][XER_bits['OV']] = ov
+            self.spr['XER'][XER_bits['OV32']] = ov32
+            self.spr['XER'][XER_bits['SO']] |= ov
+            return
         if hasattr(self.dec2.e.do, "invert_in"):
             inv_a = yield self.dec2.e.do.invert_in
             if inv_a:
@@ -1945,6 +2046,9 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
             end_loop = no_in_vec or srcstep == vl-1 or dststep == vl-1
             self.namespace['end_loop'] = SelectableInt(end_loop, 1)
 
+        inp_ca_ov = (self.spr['XER'][XER_bits['CA']].value,
+                     self.spr['XER'][XER_bits['OV']].value)
+
         # execute actual instruction here (finally)
         log("inputs", inputs)
         results = info.func(self, *inputs)
@@ -1974,12 +2078,13 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
 
         # detect if CA/CA32 already in outputs (sra*, basically)
         ca = outs.get("CA")
-        ca32 = outs.get("CA32 ")
+        ca32 = outs.get("CA32")
 
         log("carry already done?", ca, ca32, output_names)
         carry_en = yield self.dec2.e.do.output_carry
         if carry_en:
-            yield from self.handle_carry_(inputs, results[0], ca, ca32)
+            yield from self.handle_carry_(
+                inputs, results[0], ca, ca32, inp_ca_ov=inp_ca_ov)
 
         # get outout named "overflow" and "CR0"
         overflow = outs.get('overflow')
@@ -1991,7 +2096,8 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
             ov_ok = yield self.dec2.e.do.oe.ok
             log("internal overflow", ins_name, overflow, "en?", ov_en, ov_ok)
             if ov_en & ov_ok:
-                yield from self.handle_overflow(inputs, results[0], overflow)
+                yield from self.handle_overflow(
+                    inputs, results[0], overflow, inp_ca_ov=inp_ca_ov)
 
         # only do SVP64 dest predicated Rc=1 if dest-pred is not enabled
         rc_en = False
index 506b2dc7c5cd93076d58ece85bb2598b5aad9719..90a91e1dba50b4af8da59b9628d5d4c4ca634bdd 100644 (file)
@@ -689,6 +689,7 @@ _insns = [
     "NONE", "add", "addc", "addco", "adde", "addeo",
     "addi", "addic", "addic.", "addis",
     "addme", "addmeo", "addo", "addze", "addzeo",
+    "addex",
     "addg6s",
     "and", "andc", "andi.", "andis.",
     "attn",
index 14169d960d3ea60682eecd2d0b0a554ada52f1a4..c489cee73216da2cabc925b7544f2ccf14ec02e7 100644 (file)
@@ -1,5 +1,5 @@
 import random
-from openpower.test.common import TestAccumulatorBase
+from openpower.test.common import TestAccumulatorBase, skip_case
 from openpower.endian import bigendian
 from openpower.simulator.program import Program
 from openpower.decoder.selectable_int import SelectableInt
@@ -87,6 +87,46 @@ def check_addmeo_subfmeo_matches_reference(instr, case_filter, out):
 
 
 class ALUTestCase(TestAccumulatorBase):
+    def case_addex(self):
+        lst = [f"addex 3, 4, 5, 0"]
+        program = Program(lst, bigendian)
+        values = (*range(-2, 4), ~1 << 63, (1 << 63) - 1)
+        for ra in values:
+            ra %= 1 << 64
+            for rb in values:
+                rb %= 1 << 64
+                for ov in (0, 1):
+                    with self.subTest(ra=hex(ra), rb=hex(rb), ov=ov):
+                        initial_regs = [0] * 32
+                        initial_regs[4] = ra
+                        initial_regs[5] = rb
+                        initial_sprs = {}
+                        xer = SelectableInt(0, 64)
+                        xer[XER_bits['OV']] = ov
+                        initial_sprs[special_sprs['XER']] = xer
+                        e = ExpectedState(pc=4)
+                        v = ra + rb + ov
+                        v32 = (ra % (1 << 32)) + (rb % (1 << 32)) + ov
+                        ov = v >> 64
+                        ov32 = v32 >> 32
+                        e.intregs[3] = v % (1 << 64)
+                        e.intregs[4] = ra
+                        e.intregs[5] = rb
+                        e.ov = (ov32 << 1) | ov
+                        self.add_case(program, initial_regs,
+                                      initial_sprs=initial_sprs, expected=e)
+
+    def case_nego_(self):
+        lst = [f"nego. 3, 4"]
+        initial_regs = [0] * 32
+        initial_regs[4] = 0
+        e = ExpectedState(pc=4)
+        e.intregs[3] = 0
+        e.intregs[4] = 0
+        e.so = 0
+        e.ov = 0
+        e.crregs[0] = 2
+        self.add_case(Program(lst, bigendian), initial_regs, expected=e)
 
     def case_1_regression(self):
         lst = [f"add. 3, 1, 2"]
@@ -167,14 +207,14 @@ class ALUTestCase(TestAccumulatorBase):
                 e.intregs[3] = result & ((2**64)-1)
                 eq = 0
                 gt = 0
-                le = 0
+                lt = 0
                 if (e.intregs[3] & (1 << 63)) != 0:
-                    le = 1
+                    lt = 1
                 elif e.intregs[3] == 0:
                     eq = 1
                 else:
                     gt = 1
-                e.crregs[0] = (eq << 1) | (gt << 2) | (le << 3)
+                e.crregs[0] = (eq << 1) | (gt << 2) | (lt << 3)
             elif choice == "subf":
                 result = ~initial_regs[1] + initial_regs[2] + 1
                 e.intregs[3] = result & ((2**64)-1)
@@ -414,9 +454,9 @@ class ALUTestCase(TestAccumulatorBase):
             result = result & ((1 << 64)-1)  # round
             eq = 0
             gt = 0
-            le = 0
+            lt = 0
             if (result & (1 << 63)) != 0:
-                le = 1
+                lt = 1
             elif result == 0:
                 eq = 1
             else:
@@ -428,9 +468,9 @@ class ALUTestCase(TestAccumulatorBase):
             e.intregs[5] = result
             # carry_out goes into bit 0 of ca, carry_out32 into bit 1
             e.ca = carry_out | (carry_out32 << 1)
-            # eq goes into bit 1 of CR0, gt into bit 2, le into bit 3.
+            # eq goes into bit 1 of CR0, gt into bit 2, lt into bit 3.
             # SO goes into bit 0 but overflow doesn't occur here [we hope]
-            e.crregs[0] = (eq << 1) | (gt << 2) | (le << 3)
+            e.crregs[0] = (eq << 1) | (gt << 2) | (lt << 3)
 
             self.add_case(Program(lst, bigendian),
                           initial_regs, initial_sprs, expected=e)
@@ -661,13 +701,13 @@ class ALUTestCase(TestAccumulatorBase):
 
     def case_pia_ca_ov_cases(self):
         wanted_outputs = 'ca', 'ca32', 'ov', 'ov32', 'so'
+        # only test one variant of each instr --
+        # the variant with Rc=1 OV=1 as much as possible
         wanted_instrs = {
-            'addi', 'paddi', 'addis', 'add', 'addic', 'subf', 'subfic', 'addc',
-            'subfc', 'adde', 'subfe', 'addme', 'subfme', 'addze', 'subfze',
-            'addex', 'neg',
+            'addi', 'paddi', 'addis', 'addo.', 'addic.', 'subfo.', 'subfic',
+            'addco.', 'subfco.', 'addeo.', 'subfeo.', 'addmeo.', 'subfmeo.',
+            'addzeo.', 'subfzeo.', 'addex', 'nego.',
         }
-        wanted_instrs |= {i + 'o' for i in wanted_instrs}
-        # intentionally don't test Rc=1 instrs
         unary_inputs = {
             '0x0', '0x1', '0x2',
             '0xFFFFFFFFFFFFFFFF', '0xFFFFFFFFFFFFFFFE',
@@ -709,27 +749,33 @@ class ALUTestCase(TestAccumulatorBase):
                 continue
             if not any(i in case['native_outputs'] for i in wanted_outputs):
                 continue
-            if case.get('so') == True:
+            so_in = case.get('so', False)
+            ca_in = case.get('ca', False)
+            ca32_in = case.get('ca32', False)
+            ov_in = case.get('ov', False)
+            ov32_in = case.get('ov32', False)
+            if so_in:
                 continue
-            if case.get('ov32') == True:
+            if ov32_in:
                 continue
-            if case.get('ca32') == True:
+            if ca32_in:
                 continue
             initial_regs = [0] * 32
             initial_sprs = {}
             xer = SelectableInt(0, 64)
-            xer[XER_bits['CA']] = case.get('ca', False)
-            xer[XER_bits['OV']] = case.get('ov', False)
+            xer[XER_bits['CA']] = ca_in
+            xer[XER_bits['OV']] = ov_in
             initial_sprs[special_sprs['XER']] = xer
             e = ExpectedState(pc=4)
             e.intregs[3] = int(case['native_outputs']['rt'], 0)
-            ca_out = case['native_outputs'].get('ca', False)
-            ca32_out = case['native_outputs'].get('ca32', False)
-            ov_out = case['native_outputs'].get('ov', False)
-            ov32_out = case['native_outputs'].get('ov32', False)
+            ca_out = case['native_outputs'].get('ca', ca_in)
+            ca32_out = case['native_outputs'].get('ca32', ca32_in)
+            ov_out = case['native_outputs'].get('ov', ov_in)
+            ov32_out = case['native_outputs'].get('ov32', ov32_in)
+            so_out = case['native_outputs'].get('so', so_in)
             e.ca = ca_out | (ca32_out << 1)
             e.ov = ov_out | (ov32_out << 1)
-            e.so = int(case['native_outputs'].get('so', False))
+            e.so = int(so_out)
             if 'rb' in case:  # binary op
                 pass32 = matches(case, ra=binary_inputs32, rb=binary_inputs32)
                 pass64 = matches(case, ra=binary_inputs64, rb=binary_inputs64)
@@ -757,6 +803,13 @@ class ALUTestCase(TestAccumulatorBase):
                     continue
                 asm = f'{instr} 3, 4'
                 e.intregs[4] = initial_regs[4] = int(case['ra'], 0)
+            if 'cr0' in case['native_outputs']:
+                cr0 = case['native_outputs']['cr0']
+                v = cr0['lt'] << 3
+                v |= cr0['gt'] << 2
+                v |= cr0['eq'] << 1
+                v |= cr0['so']
+                e.crregs[0] = v
             with self.subTest(case=repr(case)):
                 if asm not in programs:
                     programs[asm] = Program([asm], bigendian)