arch-power: Refactor arithmetic instructions

author Sandipan Das <sandipan@linux.ibm.com>

Sat, 6 Feb 2021 11:47:28 +0000 (17:17 +0530)

committer Sandipan Das <sandipan@linux.ibm.com>

Mon, 15 Feb 2021 08:32:38 +0000 (14:02 +0530)
author Sandipan Das <sandipan@linux.ibm.com>
Sat, 6 Feb 2021 11:47:28 +0000 (17:17 +0530)
committer Sandipan Das <sandipan@linux.ibm.com>
Mon, 15 Feb 2021 08:32:38 +0000 (14:02 +0530)
diff --git a/src/arch/power/insts/integer.hh b/src/arch/power/insts/integer.hh

index d81f98d87dfbdbb67994fc9819b2aca2eb721038..9efda43378f7a22341991b2ae96ebe36279d6b06 100644 (file)
--- a/src/arch/power/insts/integer.hh
+++ b/src/arch/power/insts/integer.hh
@@ -141,6 +141,39 @@ class IntImmOp : public IntOp
  };
  
  
+/**
+ * Class for integer arithmetic operations.
+ */
+class IntArithOp : public IntOp
+{
+  protected:
+
+    /// Constructor
+    IntArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntOp(mnem, _machInst, __opClass)
+    {
+    }
+};
+
+
+/**
+ * Class for integer immediate arithmetic operations.
+ */
+class IntImmArithOp : public IntArithOp
+{
+  protected:
+
+    int32_t simm;
+
+    /// Constructor
+    IntImmArithOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+      : IntArithOp(mnem, _machInst, __opClass),
+        simm((int16_t)machInst.si)
+    {
+    }
+};
+
+
  /**
   * Class for integer operations with a shift.
   */
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa

index e27fd927c48fcd64ef8386a305a04b9831b04a55..f9fe68a9aa6a1b36750e83b160a90f14f205e581 100644 (file)
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -172,26 +172,34 @@ decode PO default Unknown::unknown() {
      }
  
      format IntImmArithCheckRaOp {
-        14: addi({{ Rt = Ra + imm; }},
-                 {{ Rt = imm }});
-
-        15: addis({{ Rt = Ra + (imm << 16); }},
-                  {{ Rt = imm << 16; }});
+        14: addi({{ Rt = Ra + simm; }},
+                 {{ Rt = simm }});
+        15: addis({{ Rt = Ra + (simm << 16); }},
+                  {{ Rt = simm << 16; }});
      }
  
      format IntImmArithOp {
-        12: addic({{ uint32_t src = Ra; Rt = src + imm; }},
-                  [computeCA]);
-
-        13: addic_({{ uint32_t src = Ra; Rt = src + imm; }},
-                   [computeCA, computeCR0]);
-
-        8: subfic({{ int32_t src = ~Ra; Rt = src + imm + 1; }},
-                  [computeCA]);
+        12: addic({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true);
+
+        13: addic_({{
+            uint64_t src = Ra;
+            Rt = src + simm;
+        }},
+        true, true);
+
+        8: subfic({{
+            uint64_t src = ~Ra;
+            Rt = src + simm + 1;
+        }},
+        true);
  
          7: mulli({{
              int32_t src = Ra_sw;
-            int64_t prod = src * imm;
+            int64_t prod = src * simm;
              Rt = (uint32_t)prod;
          }});
      }
@@ -508,11 +516,11 @@ decode PO default Unknown::unknown() {
                  104: neg({{ ~Ra }}, {{ 1 }});
                  138: adde({{ Ra }}, {{ Rb }}, {{ xer.ca }},
                            true);
-                234: addme({{ Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                234: addme({{ Ra }}, {{ -1ULL }}, {{ xer.ca }},
                             true);
                  136: subfe({{ ~Ra }}, {{ Rb }}, {{ xer.ca }},
                             true);
-                232: subfme({{ ~Ra }}, {{ (uint32_t)-1 }}, {{ xer.ca }},
+                232: subfme({{ ~Ra }}, {{ -1ULL }}, {{ xer.ca }},
                              true);
                  202: addze({{ Ra }}, {{ xer.ca }},
                             computeCA = true);
@@ -522,21 +530,22 @@ decode PO default Unknown::unknown() {
  
              // Arithmetic instructions all use source registers Ra and Rb,
              // with destination register Rt.
-            format IntArithOp {
+            format IntArithCheckRcOp {
                  75: mulhw({{
                      int64_t prod = Ra_sd * Rb_sd;
                      Rt = prod >> 32;
                  }});
+
                  11: mulhwu({{
                      uint64_t prod = Ra_ud * Rb_ud;
                      Rt = prod >> 32;
                  }});
-                235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }});
-                747: mullwo({{
-                    int64_t src1 = Ra_sd;
-                    int64_t src2 = Rb;
-                    int64_t prod = src1 * src2;
-                    Rt = prod;
+
+                235: mullw({{
+                    int64_t prod = Ra_sd * Rb_sd; Rt = prod;
+                    if (prod != (int32_t)prod) {
+                        setOV = true;
+                    }
                  }},
                  true);
  
@@ -548,18 +557,7 @@ decode PO default Unknown::unknown() {
                          Rt = src1 / src2;
                      } else {
                          Rt = 0;
-                    }
-                }});
-
-                1003: divwo({{
-                    int32_t src1 = Ra_sw;
-                    int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
-                    } else {
-                        Rt = 0;
-                        divSetOV = true;
+                        setOV = true;
                      }
                  }},
                  true);
@@ -571,18 +569,8 @@ decode PO default Unknown::unknown() {
                          Rt = src1 / src2;
                      } else {
                          Rt = 0;
+                        setOV = true;
                      }
-                }});
-
-                971: divwuo({{
-                  uint32_t src1 = Ra_sw;
-                  uint32_t src2 = Rb_sw;
-                  if (src2 != 0) {
-                      Rt = src1 / src2;
-                  } else {
-                      Rt = 0;
-                      divSetOV = true;
-                  }
                  }},
                  true);
              }
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa

index 50badce53142d059ae63492d4704767ae180054a..01ea9ba203e8a1a0d59a846839d84c96dc00dc83 100644 (file)
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -98,17 +98,12 @@ computeOVCode = '''
      }
  '''
  
-computeDivOVCode = '''
-    if (divSetOV) {
+setOVCode = '''
+    if (setOV) {
          xer.ov = 1;
          xer.so = 1;
      } else {
-        if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
-            xer.ov = 1;
-            xer.so = 1;
-        } else {
-            xer.ov = 0;
-        }
+        xer.ov = 0;
      }
  '''
  
@@ -136,21 +131,23 @@ def format IntImmOp(code, inst_flags = []) {{
  // value in source register Ra, hence the use of src to hold the actual
  // value. The control flags include the use of code to compute the
  // carry bit or the CR0 code.
-def format IntImmArithOp(code, ctrl_flags = [], inst_flags = []) {{
+def format IntImmArithOp(code, computeCA = 0, computeCR0 = 0,
+                         inst_flags = []) {{
+
+    # Set up the dictionary
+    dict = {'result':'Rt', 'inputa':'src', 'inputb':'simm'}
  
-    # Set up the dictionary and deal with control flags
-    dict = {'result':'Rt', 'inputa':'src', 'inputb':'imm'}
-    if ctrl_flags:
+    # Deal with computing CR0 and carry
+    if computeCA or computeCR0:
          code += readXERCode
-        for val in ctrl_flags:
-            if val == 'computeCA':
-                code += computeCACode % dict + setXERCode
-            elif val == 'computeCR0':
-                code += computeCR0Code % dict
+    if computeCA:
+        code += computeCACode % dict + setXERCode
+    if computeCR0:
+        code += computeCR0Code % dict
  
      # Generate the class
      (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags, BasicDecode,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags, BasicDecode,
                   BasicConstructor)
  }};
  
@@ -163,12 +160,12 @@ def format IntImmArithCheckRaOp(code, code_ra0, inst_flags = []) {{
  
      # First the version where Ra is non-zero
      (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntImmOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntImmArithOp', code, inst_flags,
                   CheckRaDecode, BasicConstructor)
  
      # Now another version where Ra == 0
      (header_output_ra0, decoder_output_ra0, _, exec_output_ra0) = \
-        GenAluOp(name, Name + 'RaZero', 'IntImmOp', code_ra0, inst_flags,
+        GenAluOp(name, Name + 'RaZero', 'IntImmArithOp', code_ra0, inst_flags,
                   CheckRaDecode, BasicConstructor)
  
      # Finally, add to the other outputs
@@ -264,9 +261,9 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
      dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
  
      # Add code to set up variables and do the sum
-    code  = 'uint32_t src1 = ' + src1 + ';\n'
-    code += 'uint32_t src2 = ' + src2 + ';\n'
-    code += 'uint32_t ca = ' + ca + ';\n'
+    code  = 'uint64_t src1 = ' + src1 + ';\n'
+    code += 'uint64_t src2 = ' + src2 + ';\n'
+    code += 'uint64_t ca = ' + ca + ';\n'
      code += 'Rt = src1 + src2 + ca;\n'
  
      # Add code for calculating the carry, if needed
@@ -284,16 +281,16 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
  
      # Generate the classes
      (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
+        GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
                   CheckRcOeDecode, BasicConstructor)
      (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
+        GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
                   CheckRcOeDecode, IntRcConstructor)
      (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
-        GenAluOp(name, Name + 'OeSet', 'IntOp', code_oe1, inst_flags,
+        GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
                   CheckRcOeDecode, IntOeConstructor)
      (header_output_rc1_oe1, decoder_output_rc1_oe1, _, exec_output_rc1_oe1) = \
-        GenAluOp(name, Name + 'RcSetOeSet', 'IntOp', code_rc1_oe1,
+        GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
                   inst_flags, CheckRcOeDecode, IntRcOeConstructor)
  
      # Finally, add to the other outputs
@@ -309,39 +306,69 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
  
  // Instructions that use source registers Ra and Rb, with the result
  // placed into Rt. Basically multiply and divide instructions. The
-// carry bit is never set, but overflow can be calculated. Division
-// explicitly sets the overflow bit in certain situations and this is
-// dealt with using the 'divSetOV' boolean in decoder.isa. We generate
-// two versions of each instruction to deal with the Rc bit.
-def format IntArithOp(code, computeOV = 0, inst_flags = []) {{
+// carry bit is never set, but overflow can be calculated. In certain
+// situations, the overflow bits have to be set and this is dealt with
+// using the 'setOV' boolean in decoder.isa.
+//
+// In case overflow is to be calculated, we generate four versions of
+// each instruction to deal with different combinations of having the
+// OE bit set or unset and the Rc bit set or unset too. Otherwise, we
+// generate two versions of each instruction to deal with the Rc bit.
+def format IntArithCheckRcOp(code, computeOV = 0, inst_flags = []) {{
  
      # The result is always in Rt, but the source values vary
      dict = {'result':'Rt', 'inputa':'src1', 'inputb':'src2'}
  
      # Deal with setting the overflow flag
      if computeOV:
-        code = 'bool divSetOV = false;\n' + code
-        code += computeDivOVCode % dict + setXERCode
-
-    # Setup the 2 code versions and add code to access XER if necessary
-    code_rc1 = readXERCode + code + computeCR0Code % dict
-    if computeOV:
-        code = readXERCode + code
-
-    # Generate the classes
-    (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
-                 CheckRcDecode, BasicConstructor)
-
-    # Generate the second class
-    (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
-                 CheckRcDecode, IntRcConstructor)
-
-    # Finally, add to the other outputs
-    header_output += header_output_rc1
-    decoder_output += decoder_output_rc1
-    exec_output += exec_output_rc1
+        # Setup the 4 code versions and add code to access XER if necessary
+        code  = 'M5_VAR_USED bool setOV = false;\n' + code
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+        code_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 += computeCR0Code % dict
+
+        # Generate the classes
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcOeDecode, BasicConstructor)
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+            GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcOeDecode, IntRcConstructor)
+        (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
+            GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
+                     CheckRcOeDecode, IntOeConstructor)
+        (header_output_rc1_oe1, decoder_output_rc1_oe1, _,
+         exec_output_rc1_oe1) = \
+            GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
+                     inst_flags, CheckRcOeDecode, IntRcOeConstructor)
+
+        # Finally, add to the other outputs
+        header_output += \
+            header_output_rc1 + header_output_oe1 + header_output_rc1_oe1
+        decoder_output += \
+            decoder_output_rc1 + decoder_output_oe1 + decoder_output_rc1_oe1
+        exec_output += \
+            exec_output_rc1 + exec_output_oe1 + exec_output_rc1_oe1
+
+    else:
+        # Setup the 2 code versions and add code to access XER if necessary
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+
+        # Generate the first class
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcDecode, BasicConstructor)
+
+        # Generate the second class
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+            GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcDecode, IntRcConstructor)
+
+        # Finally, add to the other outputs
+        header_output += header_output_rc1
+        decoder_output += decoder_output_rc1
+        exec_output += exec_output_rc1
  }};
author	Sandipan Das <sandipan@linux.ibm.com>
	Sat, 6 Feb 2021 11:47:28 +0000 (17:17 +0530)
committer	Sandipan Das <sandipan@linux.ibm.com>
	Mon, 15 Feb 2021 08:32:38 +0000 (14:02 +0530)
src/arch/power/insts/integer.hh		patch \| blob \| history
src/arch/power/isa/decoder.isa		patch \| blob \| history
src/arch/power/isa/formats/integer.isa		patch \| blob \| history