arch-power: Fix fixed-point arithmetic multiply and divide instructions

author Sandipan Das <sandipan@linux.vnet.ibm.com>

Thu, 7 Jun 2018 05:24:51 +0000 (10:54 +0530)

committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>

Sun, 24 Jan 2021 03:16:49 +0000 (03:16 +0000)
author Sandipan Das <sandipan@linux.vnet.ibm.com>
Thu, 7 Jun 2018 05:24:51 +0000 (10:54 +0530)
committer Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 24 Jan 2021 03:16:49 +0000 (03:16 +0000)
diff --git a/src/arch/power/isa/decoder.isa b/src/arch/power/isa/decoder.isa

index 358d0287b12ca0d80d856bdd0352cd35b72dd972..17be9d5441b94ba561eb4ff32447d9c29f705f6d 100644 (file)
--- a/src/arch/power/isa/decoder.isa
+++ b/src/arch/power/isa/decoder.isa
@@ -214,9 +214,8 @@ decode PO default Unknown::unknown() {
          true);
  
          7: mulli({{
-            int32_t src = Ra_sw;
-            int64_t prod = src * simm;
-            Rt = (uint32_t)prod;
+            int64_t res = Ra_sd * simm;
+            Rt = res;
          }});
      }
  
@@ -562,65 +561,47 @@ decode PO default Unknown::unknown() {
              // with destination register Rt.
              format IntArithOp {
                  75: mulhw({{
-                    int64_t prod = Ra_sd * Rb_sd;
-                    Rt = prod >> 32;
+                    uint64_t res = (int64_t)Ra_sw * Rb_sw;
+                    res = res >> 32;
+                    Rt = res;
                  }});
+
                  11: mulhwu({{
-                    uint64_t prod = Ra_ud * Rb_ud;
-                    Rt = prod >> 32;
+                    uint64_t res = (uint64_t)Ra_uw * Rb_uw;
+                    res = res >> 32;
+                    Rt = res;
                  }});
-                235: mullw({{ int64_t prod = Ra_sd * Rb_sd; Rt = prod; }});
-                747: mullwo({{
-                    int64_t src1 = Ra_sd;
-                    int64_t src2 = Rb;
-                    int64_t prod = src1 * src2;
-                    Rt = prod;
+
+                235: mullw({{
+                    int64_t res = (int64_t)Ra_sw * Rb_sw;
+                    if (res != (int32_t)res) {
+                        setOV = true;
+                    }
+                    Rt = res;
                  }},
                  true);
  
                  491: divw({{
                      int32_t src1 = Ra_sw;
                      int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
-                    } else {
-                        Rt = 0;
-                    }
-                }});
-
-                1003: divwo({{
-                    int32_t src1 = Ra_sw;
-                    int32_t src2 = Rb_sw;
-                    if ((src1 != 0x80000000 || src2 != 0xffffffff)
-                        && src2 != 0) {
-                        Rt = src1 / src2;
+                    if ((src1 != INT32_MIN || src2 != -1) && src2 != 0) {
+                        Rt = (uint32_t)(src1 / src2);
                      } else {
                          Rt = 0;
-                        divSetOV = true;
+                        setOV = true;
                      }
                  }},
                  true);
  
                  459: divwu({{
-                    uint32_t src1 = Ra_sw;
-                    uint32_t src2 = Rb_sw;
+                    uint32_t src1 = Ra_uw;
+                    uint32_t src2 = Rb_uw;
                      if (src2 != 0) {
                          Rt = src1 / src2;
                      } else {
                          Rt = 0;
+                        setOV = true;
                      }
-                }});
-
-                971: divwuo({{
-                  uint32_t src1 = Ra_sw;
-                  uint32_t src2 = Rb_sw;
-                  if (src2 != 0) {
-                      Rt = src1 / src2;
-                  } else {
-                      Rt = 0;
-                      divSetOV = true;
-                  }
                  }},
                  true);
              }
diff --git a/src/arch/power/isa/formats/integer.isa b/src/arch/power/isa/formats/integer.isa

index 1a79f15ec82e64759dd24f61e4b6fd6bb4f3601e..928c3d3c84309a9b34307934e869188d818c08c3 100644 (file)
--- a/src/arch/power/isa/formats/integer.isa
+++ b/src/arch/power/isa/formats/integer.isa
@@ -104,17 +104,24 @@ computeOVCode = '''
      }
  '''
  
-computeDivOVCode = '''
-    if (divSetOV) {
+setCACode = '''
+    if (setCA) {
+        xer.ca = 1;
+        xer.ca32 = 1;
+    } else {
+        xer.ca = 0;
+        xer.ca32 = 0;
+    }
+'''
+
+setOVCode = '''
+    if (setOV) {
          xer.ov = 1;
+        xer.ov32 = 1;
          xer.so = 1;
      } else {
-        if (findOverflow(32, %(result)s, %(inputa)s, %(inputb)s)) {
-            xer.ov = 1;
-            xer.so = 1;
-        } else {
-            xer.ov = 0;
-        }
+        xer.ov = 0;
+        xer.ov32 = 0;
      }
  '''
  
@@ -317,10 +324,14 @@ def format IntSumOp(src1, src2, ca = {{ 0 }}, computeCA = 0,
  
  // Instructions that use source registers Ra and Rb, with the result
  // placed into Rt. Basically multiply and divide instructions. The
-// carry bit is never set, but overflow can be calculated. Division
-// explicitly sets the overflow bit in certain situations and this is
-// dealt with using the 'divSetOV' boolean in decoder.isa. We generate
-// two versions of each instruction to deal with the Rc bit.
+// carry bit is never set, but overflow can be calculated. In certain
+// situations, the overflow bits have to be set and this is dealt with
+// using the 'setOV' boolean in decoder.isa.
+//
+// In case overflow is to be calculated, we generate four versions of
+// each instruction to deal with different combinations of having the
+// OE bit set or unset and the Rc bit set or unset too. Otherwise, we
+// generate two versions of each instruction to deal with the Rc bit.
  def format IntArithOp(code, computeOV = 0, inst_flags = []) {{
  
      # The result is always in Rt, but the source values vary
@@ -328,28 +339,54 @@ def format IntArithOp(code, computeOV = 0, inst_flags = []) {{
  
      # Deal with setting the overflow flag
      if computeOV:
-        code = 'bool divSetOV = false;\n' + code
-        code += computeDivOVCode % dict + setXERCode
-
-    # Setup the 2 code versions and add code to access XER if necessary
-    code_rc1 = readXERCode + code + computeCR0Code % dict
-    if computeOV:
-        code = readXERCode + code
-
-    # Generate the classes
-    (header_output, decoder_output, decode_block, exec_output) = \
-        GenAluOp(name, Name, 'IntOp', code, inst_flags,
-                 CheckRcDecode, BasicConstructor)
-
-    # Generate the second class
-    (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
-        GenAluOp(name, Name + 'RcSet', 'IntOp', code_rc1, inst_flags,
-                 CheckRcDecode, IntRcConstructor)
-
-    # Finally, add to the other outputs
-    header_output += header_output_rc1
-    decoder_output += decoder_output_rc1
-    exec_output += exec_output_rc1
+        # Setup the 4 code versions and add code to access XER if necessary
+        code  = 'bool setOV M5_VAR_USED = false;\n' + code
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+        code_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 = readXERCode + code + setOVCode + setXERCode
+        code_rc1_oe1 += computeCR0Code % dict
+
+        # Generate the classes
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcOeDecode, BasicConstructor)
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+            GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcOeDecode, IntRcConstructor)
+        (header_output_oe1, decoder_output_oe1, _, exec_output_oe1) = \
+            GenAluOp(name, Name + 'OeSet', 'IntArithOp', code_oe1, inst_flags,
+                     CheckRcOeDecode, IntOeConstructor)
+        (header_output_rc1_oe1, decoder_output_rc1_oe1, _,
+         exec_output_rc1_oe1) = \
+            GenAluOp(name, Name + 'RcSetOeSet', 'IntArithOp', code_rc1_oe1,
+                     inst_flags, CheckRcOeDecode, IntRcOeConstructor)
+
+        # Finally, add to the other outputs
+        header_output += \
+            header_output_rc1 + header_output_oe1 + header_output_rc1_oe1
+        decoder_output += \
+            decoder_output_rc1 + decoder_output_oe1 + decoder_output_rc1_oe1
+        exec_output += \
+            exec_output_rc1 + exec_output_oe1 + exec_output_rc1_oe1
+
+    else:
+        # Setup the 2 code versions and add code to access XER if necessary
+        code_rc1 = readXERCode + code + computeCR0Code % dict
+
+        # Generate the first class
+        (header_output, decoder_output, decode_block, exec_output) = \
+            GenAluOp(name, Name, 'IntArithOp', code, inst_flags,
+                     CheckRcDecode, BasicConstructor)
+
+        # Generate the second class
+        (header_output_rc1, decoder_output_rc1, _, exec_output_rc1) = \
+            GenAluOp(name, Name + 'RcSet', 'IntArithOp', code_rc1, inst_flags,
+                     CheckRcDecode, IntRcConstructor)
+
+        # Finally, add to the other outputs
+        header_output += header_output_rc1
+        decoder_output += decoder_output_rc1
+        exec_output += exec_output_rc1
  }};
author	Sandipan Das <sandipan@linux.vnet.ibm.com>
	Thu, 7 Jun 2018 05:24:51 +0000 (10:54 +0530)
committer	Luke Kenneth Casson Leighton <lkcl@lkcl.net>
	Sun, 24 Jan 2021 03:16:49 +0000 (03:16 +0000)
src/arch/power/isa/decoder.isa		patch \| blob \| history
src/arch/power/isa/formats/integer.isa		patch \| blob \| history