From 325264104b56d82985a0a1227c1e33c13b836102 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.ibm.com>
Date: Sat, 6 Feb 2021 17:17:31 +0530
Subject: [PATCH] arch-power: Fix arithmetic instructions

The latest Power ISA introduces two new bits that record
carry and overflow out of bit 31 of the result, namely
CA32 and OV32 respectively, thereby changing the behaviour
of the add and subtract instructions that set them. Also,
now that 64-bit registers are being used, the nature of
the result, i.e. less than, greater than or equal to zero,
must be set by a 64-bit signed comparison of the result
to zero. This fixes the following instructions.
  * Add Immediate (addi)
  * Add Immediate Shifted (addis)
  * Add (add[o][.])
  * Subtract From (subf[o][.])
  * Add Immediate Carrying (addic)
  * Add Immediate Carrying and Record (addic.)
  * Subtract From Immediate Carrying (subfic)
  * Add Carrying (addc[o][.])
  * Subtract From Carrying (subfc[o][.])
  * Add Extended (adde[o][.])
  * Subtract From Extended (subfe[o][.])
  * Add to Zero Extended (addze[o][.])
  * Subtract From Zero Extended (subfze[o][.])
  * Negate (neg[o][.])
  * Multiply Low Immediate (mulli)
  * Multiply Low Word (mullw[o][.])
  * Multiply High Word (mulhw[.])
  * Multiply High Word Unsigned (mulhwu[.])
  * Divide Word (divw[o][.])
  * Divide Word Unsigned (divwu[o][.])

Change-Id: I8c79f1dca8b19010ed7b734d7ec9bb598df428c3
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
---
 src/arch/power/isa/decoder.isa         | 29 +++++++++++++-------------
 src/arch/power/isa/formats/integer.isa | 20 +++++++++++++++---
 src/arch/power/miscregs.hh             |  2 ++
 3 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index f9fe68a9a..e678f0e7a 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -198,9 +198,8 @@ decode PO default Unknown::unknown() {
         true);
 
         7: mulli({{
-            int32_t src = Ra_sw;
-            int64_t prod = src * simm;
-            Rt = (uint32_t)prod;
+            int64_t res = Ra_sd * simm;
+            Rt = res;
         }});
     }
 
@@ -532,29 +531,31 @@ decode PO default Unknown::unknown() {
             // with destination register Rt.
             format IntArithCheckRcOp {
                 75: mulhw({{
-                    int64_t prod = Ra_sd * Rb_sd;
-                    Rt = prod >> 32;
+                    uint64_t res = (int64_t)Ra_sw * Rb_sw;
+                    res = res >> 32;
+                    Rt = res;
                 }});
 
                 11: mulhwu({{
-                    uint64_t prod = Ra_ud * Rb_ud;
-                    Rt = prod >> 32;
+                    uint64_t res = (uint64_t)Ra_uw * Rb_uw;
+                    res = res >> 32;
+                    Rt = res;
                 }});
 
                 235: mullw({{
-                    int64_t prod = Ra_sd * Rb_sd; Rt = prod;
-                    if (prod != (int32_t)prod) {
+                    int64_t res = (int64_t)Ra_sw * Rb_sw;
+                    if (res != (int32_t)res) {
                         setOV = true;
                     }
+                    Rt = res;
                 }},
                 true);
 
                 491: divw({{
                     int32_t src1 = Ra_sw;
                     int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
+                    if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) {
+                        Rt = (uint32_t)(src1 / src2);
                     } else {
                         Rt = 0;
                         setOV = true;
@@ -563,8 +564,8 @@ decode PO default Unknown::unknown() {
                 true);
 
                 459: divwu({{
-                    uint32_t src1 = Ra_sw;
-                    uint32_t src2 = Rb_sw;
+                    uint32_t src1 = Ra_uw;
+                    uint32_t src2 = Rb_uw;
                     if (src2 != 0) {
                         Rt = src1 / src2;
                     } else {
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa
index 01ea9ba20..aa9cc2539 100644
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -77,33 +77,47 @@ setXERCode = 'XER = xer;'
 
 computeCR0Code = '''
     Cr cr = CR;
-    cr.cr0 = makeCRField((int32_t)%(result)s, (int32_t)0, xer.so);
+    cr.cr0 = makeCRField((int64_t)%(result)s, (int64_t)0, xer.so);
     CR = cr;
 '''
 
 computeCACode = '''
-    if (findCarry(32, %(result)s, %(inputa)s, %(inputb)s)) {
+    if (findCarry(64, %(result)s, %(inputa)s, %(inputb)s)) {
         xer.ca = 1;
     } else {
         xer.ca = 0;
     }
+
+    if (findCarry(32, %(result)s, %(inputa)s, %(inputb)s)) {
+        xer.ca32 = 1;
+    } else {
+        xer.ca32 = 0;
+    }
 '''
 
 computeOVCode = '''
-    if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
+    if (findOverflow(64, %(result)s, %(inputa)s, %(inputb)s)) {
         xer.ov = 1;
         xer.so = 1;
     } else {
         xer.ov = 0;
     }
+
+    if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
+        xer.ov32 = 1;
+    } else {
+        xer.ov32 = 0;
+    }
 '''
 
 setOVCode = '''
     if (setOV) {
         xer.ov = 1;
+        xer.ov32 = 1;
         xer.so = 1;
     } else {
         xer.ov = 0;
+        xer.ov32 = 0;
     }
 '''
 
diff --git a/src/arch/power/miscregs.hh b/src/arch/power/miscregs.hh
index dc9e9afa1..1c9746b90 100644
--- a/src/arch/power/miscregs.hh
+++ b/src/arch/power/miscregs.hh
@@ -67,6 +67,8 @@ BitUnion32(Xer)
     Bitfield<31> so;
     Bitfield<30> ov;
     Bitfield<29> ca;
+    Bitfield<19> ov32;
+    Bitfield<18> ca32;
 EndBitUnion(Xer)
 
 BitUnion32(Fpscr)
-- 
2.30.2