__builtin_<add/sub>_overflow issues on AArch64 (redux)

author Richard Earnshaw <rearnsha@gcc.gnu.org>

Wed, 16 Jan 2019 15:18:05 +0000 (15:18 +0000)

committer Richard Earnshaw <rearnsha@gcc.gnu.org>

Wed, 16 Jan 2019 15:18:05 +0000 (15:18 +0000)
author Richard Earnshaw <rearnsha@gcc.gnu.org>
Wed, 16 Jan 2019 15:18:05 +0000 (15:18 +0000)
committer Richard Earnshaw <rearnsha@gcc.gnu.org>
Wed, 16 Jan 2019 15:18:05 +0000 (15:18 +0000)
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def

index 5fe5ef05b4a4b92fabc33554c007243852a3706f..14c1a43fe8b10e22f07f15c87ccad2783a003177 100644 (file)
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -18,12 +18,25 @@
     along with GCC; see the file COPYING3.  If not see
     <http://www.gnu.org/licenses/>.  */
  
+/* Important note about Carry generation in AArch64.
+
+   Unlike some architectures, the C flag generated by a subtract
+   operation, or a simple compare operation is set to 1 if the result
+   does not overflow in an unsigned sense.  That is, if there is no
+   borrow needed from a higher word.  That means that overflow from
+   addition will set C, but overflow from a subtraction will clear C.
+   We use CC_Cmode to represent detection of overflow from addition as
+   CCmode is used for 'normal' compare (subtraction) operations.  For
+   ADC, the representation becomes more complex still, since we cannot
+   use the normal idiom of comparing the result to one of the input
+   operands; instead we use CC_ADCmode to represent this case.  */
  CC_MODE (CCFP);
  CC_MODE (CCFPE);
  CC_MODE (CC_SWP);
  CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
  CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
-CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* C represents unsigned overflow of a simple addition.  */
+CC_MODE (CC_ADC);   /* Unsigned overflow from an ADC (add with carry).  */
  CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
  
  /* Half-precision floating point for __fp16.  */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index fd60bddb1e1cbcb3dd46c319ccd182c7b9d1cd41..da48c0969dc31d96a3c1b639babd65622929e321 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7089,9 +7089,12 @@ aarch64_emit_call_insn (rtx pat)
  machine_mode
  aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
  {
+  machine_mode mode_x = GET_MODE (x);
+  rtx_code code_x = GET_CODE (x);
+
    /* All floating point compares return CCFP if it is an equality
       comparison, and CCFPE otherwise.  */
-  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+  if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
      {
        switch (code)
         {
@@ -7122,55 +7125,65 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       using the TST instruction with the appropriate bitmask.  */
    if (y == const0_rtx && REG_P (x)
        && (code == EQ || code == NE)
-      && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
+      && (mode_x == HImode || mode_x == QImode))
      return CC_NZmode;
  
    /* Similarly, comparisons of zero_extends from shorter modes can
       be performed using an ANDS with an immediate mask.  */
-  if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
-      && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if (y == const0_rtx && code_x == ZERO_EXTEND
+      && (mode_x == SImode || mode_x == DImode)
        && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
        && (code == EQ || code == NE))
      return CC_NZmode;
  
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
        && y == const0_rtx
        && (code == EQ || code == NE || code == LT || code == GE)
-      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
-         || GET_CODE (x) == NEG
-         || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
+      && (code_x == PLUS || code_x == MINUS || code_x == AND
+         || code_x == NEG
+         || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
               && CONST_INT_P (XEXP (x, 2)))))
      return CC_NZmode;
  
    /* A compare with a shifted operand.  Because of canonicalization,
       the comparison will have to be swapped when we emit the assembly
       code.  */
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
        && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
-      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
-         || GET_CODE (x) == LSHIFTRT
-         || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
+      && (code_x == ASHIFT || code_x == ASHIFTRT
+         || code_x == LSHIFTRT
+         || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
      return CC_SWPmode;
  
    /* Similarly for a negated operand, but we can only do this for
       equalities.  */
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
        && (REG_P (y) || GET_CODE (y) == SUBREG)
        && (code == EQ || code == NE)
-      && GET_CODE (x) == NEG)
+      && code_x == NEG)
      return CC_Zmode;
  
-  /* A test for unsigned overflow.  */
-  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
-      && code == NE
-      && GET_CODE (x) == PLUS
-      && GET_CODE (y) == ZERO_EXTEND)
+  /* A test for unsigned overflow from an addition.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == LTU || code == GEU)
+      && code_x == PLUS
+      && rtx_equal_p (XEXP (x, 0), y))
      return CC_Cmode;
  
+  /* A test for unsigned overflow from an add with carry.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == LTU || code == GEU)
+      && code_x == PLUS
+      && CONST_SCALAR_INT_P (y)
+      && (rtx_mode_t (y, mode_x)
+         == (wi::shwi (1, mode_x)
+             << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
+    return CC_ADCmode;
+
    /* A test for signed overflow.  */
-  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+  if ((mode_x == DImode || mode_x == TImode)
        && code == NE
-      && GET_CODE (x) == PLUS
+      && code_x == PLUS
        && GET_CODE (y) == SIGN_EXTEND)
      return CC_Vmode;
  
@@ -7274,8 +7287,17 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
      case E_CC_Cmode:
        switch (comp_code)
         {
-       case NE: return AARCH64_CS;
-       case EQ: return AARCH64_CC;
+       case LTU: return AARCH64_CS;
+       case GEU: return AARCH64_CC;
+       default: return -1;
+       }
+      break;
+
+    case E_CC_ADCmode:
+      switch (comp_code)
+       {
+       case GEU: return AARCH64_CS;
+       case LTU: return AARCH64_CC;
         default: return -1;
         }
        break;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index 513aec1872a7a5d29233d0bcb0bd1331d306eaaf..e65936a2cb92ef8453b62cef268c90190a67185c 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1871,7 +1871,7 @@
    ""
  {
    emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
-  aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]);
+  aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
  
    DONE;
  })
@@ -1973,7 +1973,7 @@
    emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
    emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
  
-  aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]);
+  aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]);
    DONE;
   })
  
@@ -2010,69 +2010,36 @@
    [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
  )
  
-(define_insn "*add<mode>3_compareC_cconly_imm"
-  [(set (reg:CC_C CC_REGNUM)
-       (ne:CC_C
-         (plus:<DWI>
-           (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
-           (match_operand:<DWI> 2 "const_scalar_int_operand" ""))
-         (zero_extend:<DWI>
-           (plus:GPI
-             (match_dup 0)
-             (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))]
-  "aarch64_zero_extend_const_eq (<DWI>mode, operands[2],
-                                <MODE>mode, operands[1])"
-  "@
-  cmn\\t%<w>0, %1
-  cmp\\t%<w>0, #%n1"
-  [(set_attr "type" "alus_imm")]
-)
-
  (define_insn "*add<mode>3_compareC_cconly"
    [(set (reg:CC_C CC_REGNUM)
-       (ne:CC_C
-         (plus:<DWI>
-           (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
-           (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
-         (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+       (compare:CC_C
+         (plus:GPI
+           (match_operand:GPI 0 "register_operand" "r,r,r")
+           (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))
+         (match_dup 0)))]
    ""
-  "cmn\\t%<w>0, %<w>1"
-  [(set_attr "type" "alus_sreg")]
-)
-
-(define_insn "*add<mode>3_compareC_imm"
-  [(set (reg:CC_C CC_REGNUM)
-       (ne:CC_C
-         (plus:<DWI>
-           (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r"))
-           (match_operand:<DWI> 3 "const_scalar_int_operand" ""))
-         (zero_extend:<DWI>
-           (plus:GPI
-             (match_dup 1)
-             (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))))
-   (set (match_operand:GPI 0 "register_operand" "=r,r")
-       (plus:GPI (match_dup 1) (match_dup 2)))]
-  "aarch64_zero_extend_const_eq (<DWI>mode, operands[3],
-                                 <MODE>mode, operands[2])"
    "@
-  adds\\t%<w>0, %<w>1, %2
-  subs\\t%<w>0, %<w>1, #%n2"
-  [(set_attr "type" "alus_imm")]
+  cmn\\t%<w>0, %<w>1
+  cmn\\t%<w>0, %1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
  )
  
  (define_insn "add<mode>3_compareC"
    [(set (reg:CC_C CC_REGNUM)
         (compare:CC_C
-         (plus:<DWI>
-           (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
-           (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
-         (zero_extend:<DWI>
-           (plus:GPI (match_dup 1) (match_dup 2)))))
-   (set (match_operand:GPI 0 "register_operand" "=r")
+         (plus:GPI
+           (match_operand:GPI 1 "register_operand" "r,r,r")
+           (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
+         (match_dup 1)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
         (plus:GPI (match_dup 1) (match_dup 2)))]
    ""
-  "adds\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "alus_sreg")]
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
  )
  
  (define_insn "*add<mode>3_compareV_cconly_imm"
@@ -2470,7 +2437,7 @@
    [(set (match_operand:GPI 0 "register_operand")
         (plus:GPI
           (plus:GPI
-           (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0))
+           (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0))
             (match_operand:GPI 1 "aarch64_reg_or_zero"))
           (match_operand:GPI 2 "aarch64_reg_or_zero")))]
     ""
@@ -2509,7 +2476,7 @@
  (define_expand "add<mode>3_carryinC"
    [(parallel
       [(set (match_dup 3)
-          (compare:CC_C
+          (compare:CC_ADC
              (plus:<DWI>
                (plus:<DWI>
                  (match_dup 4)
@@ -2517,57 +2484,54 @@
                    (match_operand:GPI 1 "register_operand" "")))
                (zero_extend:<DWI>
                  (match_operand:GPI 2 "register_operand" "")))
-          (zero_extend:<DWI>
-            (plus:GPI
-              (plus:GPI (match_dup 5) (match_dup 1))
-              (match_dup 2)))))
+            (match_dup 6)))
        (set (match_operand:GPI 0 "register_operand")
            (plus:GPI
              (plus:GPI (match_dup 5) (match_dup 1))
              (match_dup 2)))])]
     ""
  {
-  operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM);
-  operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx);
-  operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx);
+  operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM);
+  rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[4] = gen_rtx_LTU (<DWI>mode, ccin, const0_rtx);
+  operands[5] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
+  operands[6] = immed_wide_int_const (wi::shwi (1, <DWI>mode)
+                                     << GET_MODE_BITSIZE (<MODE>mode),
+                                     TImode);
  })
  
  (define_insn "*add<mode>3_carryinC_zero"
-  [(set (reg:CC_C CC_REGNUM)
-       (compare:CC_C
+  [(set (reg:CC_ADC CC_REGNUM)
+       (compare:CC_ADC
           (plus:<DWI>
             (match_operand:<DWI> 2 "aarch64_carry_operation" "")
             (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
-         (zero_extend:<DWI>
-           (plus:GPI
-             (match_operand:GPI 3 "aarch64_carry_operation" "")
-             (match_dup 1)))))
+         (match_operand 4 "const_scalar_int_operand" "")))
     (set (match_operand:GPI 0 "register_operand" "=r")
-       (plus:GPI (match_dup 3) (match_dup 1)))]
-   ""
+       (plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "")
+                 (match_dup 1)))]
+  "rtx_mode_t (operands[4], <DWI>mode)
+   == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))"
     "adcs\\t%<w>0, %<w>1, <w>zr"
    [(set_attr "type" "adc_reg")]
  )
  
  (define_insn "*add<mode>3_carryinC"
-  [(set (reg:CC_C CC_REGNUM)
-       (compare:CC_C
+  [(set (reg:CC_ADC CC_REGNUM)
+       (compare:CC_ADC
           (plus:<DWI>
             (plus:<DWI>
               (match_operand:<DWI> 3 "aarch64_carry_operation" "")
               (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
             (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
-         (zero_extend:<DWI>
-           (plus:GPI
-             (plus:GPI
-               (match_operand:GPI 4 "aarch64_carry_operation" "")
-               (match_dup 1))
-             (match_dup 2)))))
+         (match_operand 5 "const_scalar_int_operand" "")))
     (set (match_operand:GPI 0 "register_operand" "=r")
         (plus:GPI
-         (plus:GPI (match_dup 4) (match_dup 1))
+         (plus:GPI (match_operand:GPI 4 "aarch64_carry_operation" "")
+                   (match_dup 1))
           (match_dup 2)))]
-   ""
+  "rtx_mode_t (operands[5], <DWI>mode)
+   == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))"
     "adcs\\t%<w>0, %<w>1, %<w>2"
    [(set_attr "type" "adc_reg")]
  )
@@ -2594,8 +2558,8 @@
     ""
  {
    rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM);
-  operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx);
-  operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx);
+  operands[3] = gen_rtx_LTU (<DWI>mode, cc, const0_rtx);
+  operands[4] = gen_rtx_LTU (<MODE>mode, cc, const0_rtx);
  })
  
  (define_insn "*add<mode>3_carryinV_zero"
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md

index 9103b8284d60f7271930158466bf509a9b81cdaa..855cf7b52f840a81e0bbd2b31fd59fc860060626 100644 (file)
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -347,23 +347,37 @@
    (match_code "eq,ne"))
  
  (define_special_predicate "aarch64_carry_operation"
-  (match_code "ne,geu")
+  (match_code "ltu,geu")
  {
    if (XEXP (op, 1) != const0_rtx)
      return false;
-  machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode);
    rtx op0 = XEXP (op, 0);
-  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  machine_mode ccmode = GET_MODE (op0);
+  if (ccmode == CC_Cmode)
+    return GET_CODE (op) == LTU;
+  if (ccmode == CC_ADCmode || ccmode == CCmode)
+    return GET_CODE (op) == GEU;
+  return false;
  })
  
+; borrow is essentially the inverse of carry since the sense of the C flag
+; is inverted during subtraction.  See the note in aarch64-modes.def.
  (define_special_predicate "aarch64_borrow_operation"
-  (match_code "eq,ltu")
+  (match_code "geu,ltu")
  {
    if (XEXP (op, 1) != const0_rtx)
      return false;
-  machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode);
    rtx op0 = XEXP (op, 0);
-  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  machine_mode ccmode = GET_MODE (op0);
+  if (ccmode == CC_Cmode)
+    return GET_CODE (op) == GEU;
+  if (ccmode == CC_ADCmode || ccmode == CCmode)
+    return GET_CODE (op) == LTU;
+  return false;
  })
  
  ;; True if the operand is memory reference suitable for a load/store exclusive.
author	Richard Earnshaw <rearnsha@gcc.gnu.org>
	Wed, 16 Jan 2019 15:18:05 +0000 (15:18 +0000)
committer	Richard Earnshaw <rearnsha@gcc.gnu.org>
	Wed, 16 Jan 2019 15:18:05 +0000 (15:18 +0000)
gcc/config/aarch64/aarch64-modes.def		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| history
gcc/config/aarch64/predicates.md		patch \| blob \| history