From 6072698fe879c9f30c84d43c64bca4cae5c5f78a Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.vnet.ibm.com>
Date: Thu, 7 Jun 2018 10:24:28 +0530
Subject: [PATCH] arch-power: Fix fixed-point arithmetic add and subtract
 instructions

This fixes the following arithmetic instructions:
  * Add Immediate (addi)
  * Add Immediate Shifted (addis)
  * Add (add[o][.])
  * Subtract From (subf[o][.])
  * Add Immediate Carrying (addic)
  * Add Immediate Carrying and Record (addic.)
  * Subtract From Immediate Carrying (subfic)
  * Add Carrying (addc[o][.])
  * Subtract From Carrying (subfc[o][.])
  * Add Extended (adde[o][.])
  * Subtract From Extended (subfe[o][.])
  * Add to Zero Extended (addze[o][.])
  * Subtract From Zero Extended (subfze[o][.])
  * Negate (neg[o][.])

This also fixes disassembly generation for all of the above.

Change-Id: I431020a3f8b8610d6e18d1450848a50f477912cb
Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 src/arch/power/insts/integer.cc        | 111 +++++++++++++++++++++++--
 src/arch/power/insts/integer.hh        |  39 +++++++++
 src/arch/power/isa/decoder.isa         |  40 +++++----
 src/arch/power/isa/formats/integer.isa |  58 ++++++++-----
 src/arch/power/miscregs.hh             |   2 +
 5 files changed, 206 insertions(+), 44 deletions(-)

diff --git a/src/arch/power/insts/integer.cc b/src/arch/power/insts/integer.cc
index 1f81a15dc..62a8b26fb 100644
--- a/src/arch/power/insts/integer.cc
+++ b/src/arch/power/insts/integer.cc
@@ -85,15 +85,110 @@ IntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
 {
     stringstream ss;
 
+    ccprintf(ss, "%-10s ", mnemonic);
+
+    // Print the first destination only
+    if (_numDestRegs > 0) {
+        printReg(ss, _destRegIdx[0]);
+    }
+
+    // Print the source register
+    if (_numSrcRegs > 0) {
+        if (_numDestRegs > 0) {
+            ss << ", ";
+        }
+        printReg(ss, _srcRegIdx[0]);
+    }
+
+    // Print the immediate value last
+    ss << ", " << (int32_t)imm;
+
+    return ss.str();
+}
+
+
+string
+IntArithOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    stringstream ss;
+    bool printSecondSrc = true;
+
     // Generate the correct mnemonic
     string myMnemonic(mnemonic);
 
     // Special cases
-    if (!myMnemonic.compare("addi") && _numSrcRegs == 0) {
-        myMnemonic = "li";
-    } else if (!myMnemonic.compare("addis") && _numSrcRegs == 0) {
-        myMnemonic = "lis";
+    if (!myMnemonic.compare("addme") ||
+        !myMnemonic.compare("addze") ||
+        !myMnemonic.compare("subfme") ||
+        !myMnemonic.compare("subfze") ||
+        !myMnemonic.compare("neg")){
+        printSecondSrc = false;
     }
+
+    // Additional characters depending on isa bits being set
+    if (oeSet) myMnemonic = myMnemonic + "o";
+    if (rcSet) myMnemonic = myMnemonic + ".";
+    ccprintf(ss, "%-10s ", myMnemonic);
+
+    // Print the first destination only
+    if (_numDestRegs > 0) {
+        printReg(ss, _destRegIdx[0]);
+    }
+
+    // Print the first source register
+    if (_numSrcRegs > 0) {
+        if (_numDestRegs > 0) {
+            ss << ", ";
+        }
+        printReg(ss, _srcRegIdx[0]);
+
+        // Print the second source register
+        if (_numSrcRegs > 1 && printSecondSrc) {
+            ss << ", ";
+            printReg(ss, _srcRegIdx[1]);
+        }
+    }
+
+    return ss.str();
+}
+
+
+string
+IntImmArithOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
+{
+    stringstream ss;
+    bool negateSimm = false;
+
+    // Generate the correct mnemonic
+    string myMnemonic(mnemonic);
+
+    // Special cases
+    if (!myMnemonic.compare("addi")) {
+        if (_numSrcRegs == 0) {
+            myMnemonic = "li";
+        } else if (simm < 0) {
+            myMnemonic = "subi";
+            negateSimm = true;
+        }
+    } else if (!myMnemonic.compare("addis")) {
+        if (_numSrcRegs == 0) {
+            myMnemonic = "lis";
+        } else if (simm < 0) {
+            myMnemonic = "subis";
+            negateSimm = true;
+        }
+    } else if (!myMnemonic.compare("addic") && simm < 0) {
+        myMnemonic = "subic";
+        negateSimm = true;
+    } else if (!myMnemonic.compare("addic_")) {
+        if (simm < 0) {
+            myMnemonic = "subic.";
+            negateSimm = true;
+        } else {
+            myMnemonic = "addic.";
+        }
+    }
+
     ccprintf(ss, "%-10s ", myMnemonic);
 
     // Print the first destination only
@@ -109,8 +204,12 @@ IntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
         printReg(ss, _srcRegIdx[0]);
     }
 
-    // Print the immediate value last
-    ss << ", " << (int32_t)imm;
+    // Print the immediate value
+    if (negateSimm) {
+        ss << ", " << -simm;
+    } else {
+        ss << ", " << simm;
+    }
 
     return ss.str();
 }
diff --git a/src/arch/power/insts/integer.hh b/src/arch/power/insts/integer.hh
index 1e5e56c92..0c1513219 100644
--- a/src/arch/power/insts/integer.hh
+++ b/src/arch/power/insts/integer.hh
@@ -143,6 +143,45 @@ class IntImmOp : public IntOp
 };
 
 
+/**
+ * Class for integer arithmetic operations.
+ */
+class IntArithOp : public IntOp
+{
+  protected:
+
+    /// Constructor
+    IntArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntOp(mnem, _machInst, __opClass)
+    {
+    }
+
+    std::string generateDisassembly(
+            Addr pc, const SymbolTable *symtab) const override;
+};
+
+
+/**
+ * Class for integer immediate arithmetic operations.
+ */
+class IntImmArithOp : public IntArithOp
+{
+  protected:
+
+    int32_t simm;
+
+    /// Constructor
+    IntImmArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntArithOp(mnem, _machInst, __opClass),
+        simm((int16_t)machInst.si)
+    {
+    }
+
+    std::string generateDisassembly(
+            Addr pc, const SymbolTable *symtab) const override;
+};
+
+
 /**
  * Class for integer operations with a shift.
  */
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index ac69be9ea..765408a67 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -190,26 +190,34 @@ decode PO default Unknown::unknown() {
     }
 
     format IntImmArithCheckRaOp {
-        14: addi({{ Rt = Ra + imm; }},
-                 {{ Rt = imm }});
-
-        15: addis({{ Rt = Ra + (imm << 16); }},
-                  {{ Rt = imm << 16; }});
+        14: addi({{ Rt = Ra + simm; }},
+                 {{ Rt = simm }});
+        15: addis({{ Rt = Ra + (simm << 16); }},
+                  {{ Rt = simm << 16; }});
     }
 
     format IntImmArithOp {
-        12: addic({{ uint32_t src = Ra; Rt = src + imm; }},
-                  [computeCA]);
-
-        13: addic_({{ uint32_t src = Ra; Rt = src + imm; }},
-                   [computeCA, computeCR0]);
-
-        8: subfic({{ int32_t src = ~Ra; Rt = src + imm + 1; }},
-                  [computeCA]);
+        12: addic({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true);
+
+        13: addic_({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true, true);
+
+        8: subfic({{
+            uint64_t src = ~Ra;
+            Rt = src + simm + 1;
+        }},
+        true);
 
         7: mulli({{
             int32_t src = Ra_sw;
-            int64_t prod = src * imm;
+            int64_t prod = src * simm;
             Rt = (uint32_t)prod;
         }});
     }
@@ -540,11 +548,11 @@ decode PO default Unknown::unknown() {
                 104: neg({{ ~Ra }}, {{ 1 }});
                 138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
                           true);
-                234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }},
                            true);
                 136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
                            true);
-                232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                232: subfme({{ ~Ra }}, {{ -1ULL }}, {{ xer.ca }},
                             true);
                 202: addze({{ Ra }}, {{ xer.ca }},
                            computeCA = true);
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa
index 4489dae47..eac6db29b 100644
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -73,25 +73,37 @@ setXERCode = 'XER = xer;'
 
 computeCR0Code = '''
     Cr cr = CR;
-    cr.cr0 = makeCRField((int32_t)%(result)s, (int32_t)0, xer.so);
+    cr.cr0 = makeCRField((int64_t)%(result)s, (int64_t)0, xer.so);
     CR = cr;
 '''
 
 computeCACode = '''
-    if (findCarry(32, %(result)s, %(inputa)s, %(inputb)s)) {
+    if (findCarry(64, %(result)s, %(inputa)s, %(inputb)s)) {
         xer.ca = 1;
     } else {
         xer.ca = 0;
     }
+
+    if (findCarry(32, %(result)s, %(inputa)s, %(inputb)s)) {
+        xer.ca32 = 1;
+    } else {
+        xer.ca32 = 0;
+    }
 '''
 
 computeOVCode = '''
-    if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
+    if (findOverflow(64, %(result)s, %(inputa)s, %(inputb)s)) {
         xer.ov = 1;
         xer.so = 1;
     } else {
         xer.ov = 0;
     }
+
+    if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
+        xer.ov32 = 1;
+    } else {
+        xer.ov32 = 0;
+    }
 '''
 
 computeDivOVCode = '''
@@ -132,21 +144,23 @@ def format IntImmOp(code, inst_flags = []) {{
 // value in source register Ra, hence the use of src to hold the actual
 // value. The control flags include the use of code to compute the
 // carry bit or the CR0 code.
-def format IntImmArithOp(code, ctrl_flags = [], inst_flags = []) {{
+def format IntImmArithOp(code, computeCA = 0, computeCR0 = 0,
+                         inst_flags = []) {{
+
+    # Set up the dictionary
+    dict = {'result':'Rt', 'inputa':'src', 'inputb':'simm'}
 
-    # Set up the dictionary and deal with control flags
-    dict = {'result':'Rt', 'inputa':'src', 'inputb':'imm'}
-    if ctrl_flags:
+    # Deal with computing CR0 and carry
+    if computeCA or computeCR0:
         code += readXERCode
-        for val in ctrl_flags:
-            if val == 'computeCA':
-                code += computeCACode % dict + setXERCode
-            elif val == 'computeCR0':
-                code += computeCR0Code % dict
+    if computeCA:
+        code += computeCACode % dict + setXERCode
+    if computeCR0:
+        code += computeCR0Code % dict
 
     # Generate the class
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags, BasicDecode,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags, BasicDecode,
                  BasicConstructor)
 }};
 
@@ -159,12 +173,12 @@ def format IntImmArithCheckRaOp(code, code_ra0, inst_flags = []) {{
 
     # First the version where Ra is non-zero
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags,
                  CheckRaDecode, BasicConstructor)
 
     # Now another version where Ra == 0
     (header_output_ra0, decoder_output_ra0, _, exec_output_ra0) = \
-        GenAluOp(name, Name + 'RaZero', 'IntImmOp', code_ra0, inst_flags,
+        GenAluOp(name, Name + 'RaZero', 'IntImmArithOp', code_ra0, inst_flags,
                  CheckRaDecode, BasicConstructor)
 
     # Finally, add to the other outputs
@@ -260,9 +274,9 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
     dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
 
     # Add code to set up variables and do the sum
-    code  = 'uint32_t src1 = ' + src1 + ';\n'
-    code += 'uint32_t src2 = ' + src2 + ';\n'
-    code += 'uint32_t ca = ' + ca + ';\n'
+    code  = 'uint64_t src1 = ' + src1 + ';\n'
+    code += 'uint64_t src2 = ' + src2 + ';\n'
+    code += 'uint64_t ca = ' + ca + ';\n'
     code += 'Rt = src1 + src2 + ca;\n'
 
     # Add code for calculating the carry, if needed
@@ -280,16 +294,16 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
 
     # Generate the classes
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
                  CheckRcOeDecode, BasicConstructor)
     (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
+        GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
                  CheckRcOeDecode, IntRcConstructor)
     (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
-        GenAluOp(name, Name + 'OeSet', 'IntOp', code_oe1, inst_flags,
+        GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
                  CheckRcOeDecode, IntOeConstructor)
     (header_output_rc1_oe1, decoder_output_rc1_oe1, _, exec_output_rc1_oe1) = \
-        GenAluOp(name, Name + 'RcSetOeSet', 'IntOp', code_rc1_oe1,
+        GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
                  inst_flags, CheckRcOeDecode, IntRcOeConstructor)
 
     # Finally, add to the other outputs
diff --git a/src/arch/power/miscregs.hh b/src/arch/power/miscregs.hh
index eba97d439..444f88e40 100644
--- a/src/arch/power/miscregs.hh
+++ b/src/arch/power/miscregs.hh
@@ -57,6 +57,8 @@ BitUnion32(Xer)
     Bitfield<31> so;
     Bitfield<30> ov;
     Bitfield<29> ca;
+    Bitfield<19> ov32;
+    Bitfield<18> ca32;
 EndBitUnion(Xer)
 
 BitUnion32(Fpscr)
-- 
2.30.2