From: Sandipan Das <sandipan@linux.ibm.com>
Date: Sat, 6 Feb 2021 11:47:28 +0000 (+0530)
Subject: arch-power: Refactor arithmetic instructions
X-Git-Tag: develop-gem5-snapshot~53
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4506c1652a5427361e2dfeafb044d8a5e1385e73;p=gem5.git

arch-power: Refactor arithmetic instructions

This changes the base classes for integer arithmetic
instructions and introduces two new classes that are used
to distinguish between instructions using register and
immediate operands.

Decoding has also been consolidated using formats that can
generate code after determining if an instruction records
carry and overflow and also if it records the nature of the
result, i.e. lesser than, greater than or equal to zero.
However, for multiply and divide instructions, the code to
determine if an overflow has occurred has been moved to the
instruction definition itself. The formats have also been
updated to make use of the new base classes.

Change-Id: I23d70ac4bad4d25d876308db0b3564c092bf574c
Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
---

diff --git a/src/arch/power/insts/integer.hh b/src/arch/power/insts/integer.hh
index d81f98d87..9efda4337 100644
--- a/src/arch/power/insts/integer.hh
+++ b/src/arch/power/insts/integer.hh
@@ -141,6 +141,39 @@ class IntImmOp : public IntOp
 };
 
 
+/**
+ * Class for integer arithmetic operations.
+ */
+class IntArithOp : public IntOp
+{
+  protected:
+
+    /// Constructor
+    IntArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntOp(mnem, _machInst, __opClass)
+    {
+    }
+};
+
+
+/**
+ * Class for integer immediate arithmetic operations.
+ */
+class IntImmArithOp : public IntArithOp
+{
+  protected:
+
+    int32_t simm;
+
+    /// Constructor
+    IntImmArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntArithOp(mnem, _machInst, __opClass),
+        simm((int16_t)machInst.si)
+    {
+    }
+};
+
+
 /**
  * Class for integer operations with a shift.
  */
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa
index e27fd927c..f9fe68a9a 100644
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -172,26 +172,34 @@ decode PO default Unknown::unknown() {
     }
 
     format IntImmArithCheckRaOp {
-        14: addi({{ Rt = Ra + imm; }},
-                 {{ Rt = imm }});
-
-        15: addis({{ Rt = Ra + (imm << 16); }},
-                  {{ Rt = imm << 16; }});
+        14: addi({{ Rt = Ra + simm; }},
+                 {{ Rt = simm }});
+        15: addis({{ Rt = Ra + (simm << 16); }},
+                  {{ Rt = simm << 16; }});
     }
 
     format IntImmArithOp {
-        12: addic({{ uint32_t src = Ra; Rt = src + imm; }},
-                  [computeCA]);
-
-        13: addic_({{ uint32_t src = Ra; Rt = src + imm; }},
-                   [computeCA, computeCR0]);
-
-        8: subfic({{ int32_t src = ~Ra; Rt = src + imm + 1; }},
-                  [computeCA]);
+        12: addic({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true);
+
+        13: addic_({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true, true);
+
+        8: subfic({{
+            uint64_t src = ~Ra;
+            Rt = src + simm + 1;
+        }},
+        true);
 
         7: mulli({{
             int32_t src = Ra_sw;
-            int64_t prod = src * imm;
+            int64_t prod = src * simm;
             Rt = (uint32_t)prod;
         }});
     }
@@ -508,11 +516,11 @@ decode PO default Unknown::unknown() {
                 104: neg({{ ~Ra }}, {{ 1 }});
                 138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
                           true);
-                234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }},
                            true);
                 136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
                            true);
-                232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                232: subfme({{ ~Ra }}, {{ -1ULL }}, {{ xer.ca }},
                             true);
                 202: addze({{ Ra }}, {{ xer.ca }},
                            computeCA = true);
@@ -522,21 +530,22 @@ decode PO default Unknown::unknown() {
 
             // Arithmetic instructions all use source registers Ra and Rb,
             // with destination register Rt.
-            format IntArithOp {
+            format IntArithCheckRcOp {
                 75: mulhw({{
                     int64_t prod = Ra_sd * Rb_sd;
                     Rt = prod >> 32;
                 }});
+
                 11: mulhwu({{
                     uint64_t prod = Ra_ud * Rb_ud;
                     Rt = prod >> 32;
                 }});
-                235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }});
-                747: mullwo({{
-                    int64_t src1 = Ra_sd;
-                    int64_t src2 = Rb;
-                    int64_t prod = src1 * src2;
-                    Rt = prod;
+
+                235: mullw({{
+                    int64_t prod = Ra_sd * Rb_sd; Rt = prod;
+                    if (prod != (int32_t)prod) {
+                        setOV = true;
+                    }
                 }},
                 true);
 
@@ -548,18 +557,7 @@ decode PO default Unknown::unknown() {
                         Rt = src1 / src2;
                     } else {
                         Rt = 0;
-                    }
-                }});
-
-                1003: divwo({{
-                    int32_t src1 = Ra_sw;
-                    int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
-                    } else {
-                        Rt = 0;
-                        divSetOV = true;
+                        setOV = true;
                     }
                 }},
                 true);
@@ -571,18 +569,8 @@ decode PO default Unknown::unknown() {
                         Rt = src1 / src2;
                     } else {
                         Rt = 0;
+                        setOV = true;
                     }
-                }});
-
-                971: divwuo({{
-                  uint32_t src1 = Ra_sw;
-                  uint32_t src2 = Rb_sw;
-                  if (src2 != 0) {
-                      Rt = src1 / src2;
-                  } else {
-                      Rt = 0;
-                      divSetOV = true;
-                  }
                 }},
                 true);
             }
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa
index 50badce53..01ea9ba20 100644
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -98,17 +98,12 @@ computeOVCode = '''
     }
 '''
 
-computeDivOVCode = '''
-    if (divSetOV) {
+setOVCode = '''
+    if (setOV) {
         xer.ov = 1;
         xer.so = 1;
     } else {
-        if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
-            xer.ov = 1;
-            xer.so = 1;
-        } else {
-            xer.ov = 0;
-        }
+        xer.ov = 0;
     }
 '''
 
@@ -136,21 +131,23 @@ def format IntImmOp(code, inst_flags = []) {{
 // value in source register Ra, hence the use of src to hold the actual
 // value. The control flags include the use of code to compute the
 // carry bit or the CR0 code.
-def format IntImmArithOp(code, ctrl_flags = [], inst_flags = []) {{
+def format IntImmArithOp(code, computeCA = 0, computeCR0 = 0,
+                         inst_flags = []) {{
+
+    # Set up the dictionary
+    dict = {'result':'Rt', 'inputa':'src', 'inputb':'simm'}
 
-    # Set up the dictionary and deal with control flags
-    dict = {'result':'Rt', 'inputa':'src', 'inputb':'imm'}
-    if ctrl_flags:
+    # Deal with computing CR0 and carry
+    if computeCA or computeCR0:
         code += readXERCode
-        for val in ctrl_flags:
-            if val == 'computeCA':
-                code += computeCACode % dict + setXERCode
-            elif val == 'computeCR0':
-                code += computeCR0Code % dict
+    if computeCA:
+        code += computeCACode % dict + setXERCode
+    if computeCR0:
+        code += computeCR0Code % dict
 
     # Generate the class
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags, BasicDecode,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags, BasicDecode,
                  BasicConstructor)
 }};
 
@@ -163,12 +160,12 @@ def format IntImmArithCheckRaOp(code, code_ra0, inst_flags = []) {{
 
     # First the version where Ra is non-zero
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags,
                  CheckRaDecode, BasicConstructor)
 
     # Now another version where Ra == 0
     (header_output_ra0, decoder_output_ra0, _, exec_output_ra0) = \
-        GenAluOp(name, Name + 'RaZero', 'IntImmOp', code_ra0, inst_flags,
+        GenAluOp(name, Name + 'RaZero', 'IntImmArithOp', code_ra0, inst_flags,
                  CheckRaDecode, BasicConstructor)
 
     # Finally, add to the other outputs
@@ -264,9 +261,9 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
     dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
 
     # Add code to set up variables and do the sum
-    code  = 'uint32_t src1 = ' + src1 + ';\n'
-    code += 'uint32_t src2 = ' + src2 + ';\n'
-    code += 'uint32_t ca = ' + ca + ';\n'
+    code  = 'uint64_t src1 = ' + src1 + ';\n'
+    code += 'uint64_t src2 = ' + src2 + ';\n'
+    code += 'uint64_t ca = ' + ca + ';\n'
     code += 'Rt = src1 + src2 + ca;\n'
 
     # Add code for calculating the carry, if needed
@@ -284,16 +281,16 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
 
     # Generate the classes
     (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
                  CheckRcOeDecode, BasicConstructor)
     (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
+        GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
                  CheckRcOeDecode, IntRcConstructor)
     (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
-        GenAluOp(name, Name + 'OeSet', 'IntOp', code_oe1, inst_flags,
+        GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
                  CheckRcOeDecode, IntOeConstructor)
     (header_output_rc1_oe1, decoder_output_rc1_oe1, _, exec_output_rc1_oe1) = \
-        GenAluOp(name, Name + 'RcSetOeSet', 'IntOp', code_rc1_oe1,
+        GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
                  inst_flags, CheckRcOeDecode, IntRcOeConstructor)
 
     # Finally, add to the other outputs
@@ -309,39 +306,69 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
 
 // Instructions that use source registers Ra and Rb, with the result
 // placed into Rt. Basically multiply and divide instructions. The
-// carry bit is never set, but overflow can be calculated. Division
-// explicitly sets the overflow bit in certain situations and this is
-// dealt with using the 'divSetOV' boolean in decoder.isa. We generate
-// two versions of each instruction to deal with the Rc bit.
-def format IntArithOp(code, computeOV = 0, inst_flags = []) {{
+// carry bit is never set, but overflow can be calculated. In certain
+// situations, the overflow bits have to be set and this is dealt with
+// using the 'setOV' boolean in decoder.isa.
+//
+// In case overflow is to be calculated, we generate four versions of
+// each instruction to deal with different combinations of having the
+// OE bit set or unset and the Rc bit set or unset too. Otherwise, we
+// generate two versions of each instruction to deal with the Rc bit.
+def format IntArithCheckRcOp(code, computeOV = 0, inst_flags = []) {{
 
     # The result is always in Rt, but the source values vary
     dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
 
     # Deal with setting the overflow flag
     if computeOV:
-        code = 'bool divSetOV = false;\n' + code
-        code += computeDivOVCode % dict + setXERCode
-
-    # Setup the 2 code versions and add code to access XER if necessary
-    code_rc1 = readXERCode + code + computeCR0Code % dict
-    if computeOV:
-        code = readXERCode + code
-
-    # Generate the classes
-    (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
-                 CheckRcDecode, BasicConstructor)
-
-    # Generate the second class
-    (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
-                 CheckRcDecode, IntRcConstructor)
-
-    # Finally, add to the other outputs
-    header_output += header_output_rc1
-    decoder_output += decoder_output_rc1
-    exec_output += exec_output_rc1
+        # Setup the 4 code versions and add code to access XER if necessary
+        code  = 'M5_VAR_USED bool setOV = false;\n' + code
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+        code_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 += computeCR0Code % dict
+
+        # Generate the classes
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcOeDecode, BasicConstructor)
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+            GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcOeDecode, IntRcConstructor)
+        (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
+            GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
+                     CheckRcOeDecode, IntOeConstructor)
+        (header_output_rc1_oe1, decoder_output_rc1_oe1, _,
+         exec_output_rc1_oe1) = \
+            GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
+                     inst_flags, CheckRcOeDecode, IntRcOeConstructor)
+
+        # Finally, add to the other outputs
+        header_output += \
+            header_output_rc1 + header_output_oe1 + header_output_rc1_oe1
+        decoder_output += \
+            decoder_output_rc1 + decoder_output_oe1 + decoder_output_rc1_oe1
+        exec_output += \
+            exec_output_rc1 + exec_output_oe1 + exec_output_rc1_oe1
+
+    else:
+        # Setup the 2 code versions and add code to access XER if necessary
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+
+        # Generate the first class
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcDecode, BasicConstructor)
+
+        # Generate the second class
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+            GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcDecode, IntRcConstructor)
+
+        # Finally, add to the other outputs
+        header_output += header_output_rc1
+        decoder_output += decoder_output_rc1
+        exec_output += exec_output_rc1
 }};