From f7343f20c56b54e11d965d34e5f88be7ceaf095b Mon Sep 17 00:00:00 2001 From: Richard Earnshaw Date: Wed, 16 Jan 2019 15:18:05 +0000 Subject: [PATCH] __builtin__overflow issues on AArch64 (redux) Further investigation showed that my previous patch for this issue was still incomplete. The problem stemmed from what I suspect was a mis-understanding of the way overflow is calculated on aarch64 when values are subtracted (and hence in comparisons). In this case, unlike addition, the carry flag is /cleared/ if there is overflow (technically, underflow) and set when that does not happen. This patch clears up this issue by using CCmode for all subtractive operations (this can fully describe the normal overflow conditions without anything particularly fancy); clears up the way we express normal unsigned overflow using CC_Cmode (the result of a sum is less than one of the operands) and adds a new mode, CC_ADCmode to handle expressing overflow of an add-with-carry operation, where the standard idiom is no-longer sufficient to describe the overflow condition. PR target/86891 * config/aarch64/aarch64-modes.def: Add comment about how the carry bit is set by add and compare. (CC_ADC): New CC_MODE. * config/aarch64/aarch64.c (aarch64_select_cc_mode): Use variables to cache the code and mode of X. Adjust the shape of a CC_Cmode comparison. Add detection for CC_ADCmode. (aarch64_get_condition_code_1): Update code support for CC_Cmode. Add CC_ADCmode. * config/aarch64/aarch64.md (uaddv4): Use LTU with CCmode. (uaddvti4): Comparison result is in CC_ADCmode and the condition is GEU. (add3_compareC_cconly_imm): Delete. Merge into... (add3_compareC_cconly): ... this. Restructure the comparison to eliminate the need for zero-extending the operands. (add3_compareC_imm): Delete. Merge into ... (add3_compareC): ... this. Restructure the comparison to eliminate the need for zero-extending the operands. (add3_carryin): Use LTU for the overflow detection. (add3_carryinC): Use CC_ADCmode for the result of the carry out. Reexpress comparison for overflow. (add3_carryinC_zero): Update for change to add3_carryinC. (add3_carryinC): Likewise. (add3_carryinV): Use LTU for carry between partials. * config/aarch64/predicates.md (aarch64_carry_operation): Update handling of CC_Cmode and add CC_ADCmode. (aarch64_borrow_operation): Likewise. From-SVN: r267971 --- gcc/config/aarch64/aarch64-modes.def | 15 +++- gcc/config/aarch64/aarch64.c | 68 +++++++++----- gcc/config/aarch64/aarch64.md | 130 ++++++++++----------------- gcc/config/aarch64/predicates.md | 26 ++++-- 4 files changed, 126 insertions(+), 113 deletions(-) diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 5fe5ef05b4a..14c1a43fe8b 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -18,12 +18,25 @@ along with GCC; see the file COPYING3. If not see . */ +/* Important note about Carry generation in AArch64. + + Unlike some architectures, the C flag generated by a subtract + operation, or a simple compare operation is set to 1 if the result + does not overflow in an unsigned sense. That is, if there is no + borrow needed from a higher word. That means that overflow from + addition will set C, but overflow from a subtraction will clear C. + We use CC_Cmode to represent detection of overflow from addition as + CCmode is used for 'normal' compare (subtraction) operations. For + ADC, the representation becomes more complex still, since we cannot + use the normal idiom of comparing the result to one of the input + operands; instead we use CC_ADCmode to represent this case. */ CC_MODE (CCFP); CC_MODE (CCFPE); CC_MODE (CC_SWP); CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ -CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ +CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ +CC_MODE (CC_ADC); /* Unsigned overflow from an ADC (add with carry). */ CC_MODE (CC_V); /* Only V bit of condition flags is valid. */ /* Half-precision floating point for __fp16. */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index fd60bddb1e1..da48c0969dc 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7089,9 +7089,12 @@ aarch64_emit_call_insn (rtx pat) machine_mode aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) { + machine_mode mode_x = GET_MODE (x); + rtx_code code_x = GET_CODE (x); + /* All floating point compares return CCFP if it is an equality comparison, and CCFPE otherwise. */ - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + if (GET_MODE_CLASS (mode_x) == MODE_FLOAT) { switch (code) { @@ -7122,55 +7125,65 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) using the TST instruction with the appropriate bitmask. */ if (y == const0_rtx && REG_P (x) && (code == EQ || code == NE) - && (GET_MODE (x) == HImode || GET_MODE (x) == QImode)) + && (mode_x == HImode || mode_x == QImode)) return CC_NZmode; /* Similarly, comparisons of zero_extends from shorter modes can be performed using an ANDS with an immediate mask. */ - if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND - && (GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if (y == const0_rtx && code_x == ZERO_EXTEND + && (mode_x == SImode || mode_x == DImode) && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode) && (code == EQ || code == NE)) return CC_NZmode; - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if ((mode_x == SImode || mode_x == DImode) && y == const0_rtx && (code == EQ || code == NE || code == LT || code == GE) - && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND - || GET_CODE (x) == NEG - || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1)) + && (code_x == PLUS || code_x == MINUS || code_x == AND + || code_x == NEG + || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1)) && CONST_INT_P (XEXP (x, 2))))) return CC_NZmode; /* A compare with a shifted operand. Because of canonicalization, the comparison will have to be swapped when we emit the assembly code. */ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if ((mode_x == SImode || mode_x == DImode) && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx) - && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT - || GET_CODE (x) == LSHIFTRT - || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)) + && (code_x == ASHIFT || code_x == ASHIFTRT + || code_x == LSHIFTRT + || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND)) return CC_SWPmode; /* Similarly for a negated operand, but we can only do this for equalities. */ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) + if ((mode_x == SImode || mode_x == DImode) && (REG_P (y) || GET_CODE (y) == SUBREG) && (code == EQ || code == NE) - && GET_CODE (x) == NEG) + && code_x == NEG) return CC_Zmode; - /* A test for unsigned overflow. */ - if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) - && code == NE - && GET_CODE (x) == PLUS - && GET_CODE (y) == ZERO_EXTEND) + /* A test for unsigned overflow from an addition. */ + if ((mode_x == DImode || mode_x == TImode) + && (code == LTU || code == GEU) + && code_x == PLUS + && rtx_equal_p (XEXP (x, 0), y)) return CC_Cmode; + /* A test for unsigned overflow from an add with carry. */ + if ((mode_x == DImode || mode_x == TImode) + && (code == LTU || code == GEU) + && code_x == PLUS + && CONST_SCALAR_INT_P (y) + && (rtx_mode_t (y, mode_x) + == (wi::shwi (1, mode_x) + << (GET_MODE_BITSIZE (mode_x).to_constant () / 2)))) + return CC_ADCmode; + /* A test for signed overflow. */ - if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) + if ((mode_x == DImode || mode_x == TImode) && code == NE - && GET_CODE (x) == PLUS + && code_x == PLUS && GET_CODE (y) == SIGN_EXTEND) return CC_Vmode; @@ -7274,8 +7287,17 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) case E_CC_Cmode: switch (comp_code) { - case NE: return AARCH64_CS; - case EQ: return AARCH64_CC; + case LTU: return AARCH64_CS; + case GEU: return AARCH64_CC; + default: return -1; + } + break; + + case E_CC_ADCmode: + switch (comp_code) + { + case GEU: return AARCH64_CS; + case LTU: return AARCH64_CC; default: return -1; } break; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 513aec1872a..e65936a2cb9 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1871,7 +1871,7 @@ "" { emit_insn (gen_add3_compareC (operands[0], operands[1], operands[2])); - aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); DONE; }) @@ -1973,7 +1973,7 @@ emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); - aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]); DONE; }) @@ -2010,69 +2010,36 @@ [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) -(define_insn "*add3_compareC_cconly_imm" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus: - (zero_extend: (match_operand:GPI 0 "register_operand" "r,r")) - (match_operand: 2 "const_scalar_int_operand" "")) - (zero_extend: - (plus:GPI - (match_dup 0) - (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))] - "aarch64_zero_extend_const_eq (mode, operands[2], - mode, operands[1])" - "@ - cmn\\t%0, %1 - cmp\\t%0, #%n1" - [(set_attr "type" "alus_imm")] -) - (define_insn "*add3_compareC_cconly" [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus: - (zero_extend: (match_operand:GPI 0 "register_operand" "r")) - (zero_extend: (match_operand:GPI 1 "register_operand" "r"))) - (zero_extend: (plus:GPI (match_dup 0) (match_dup 1)))))] + (compare:CC_C + (plus:GPI + (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")) + (match_dup 0)))] "" - "cmn\\t%0, %1" - [(set_attr "type" "alus_sreg")] -) - -(define_insn "*add3_compareC_imm" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus: - (zero_extend: (match_operand:GPI 1 "register_operand" "r,r")) - (match_operand: 3 "const_scalar_int_operand" "")) - (zero_extend: - (plus:GPI - (match_dup 1) - (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))))) - (set (match_operand:GPI 0 "register_operand" "=r,r") - (plus:GPI (match_dup 1) (match_dup 2)))] - "aarch64_zero_extend_const_eq (mode, operands[3], - mode, operands[2])" "@ - adds\\t%0, %1, %2 - subs\\t%0, %1, #%n2" - [(set_attr "type" "alus_imm")] + cmn\\t%0, %1 + cmn\\t%0, %1 + cmp\\t%0, #%n1" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) (define_insn "add3_compareC" [(set (reg:CC_C CC_REGNUM) (compare:CC_C - (plus: - (zero_extend: (match_operand:GPI 1 "register_operand" "r")) - (zero_extend: (match_operand:GPI 2 "register_operand" "r"))) - (zero_extend: - (plus:GPI (match_dup 1) (match_dup 2))))) - (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (match_operand:GPI 1 "register_operand" "r,r,r") + (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")) + (match_dup 1))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") (plus:GPI (match_dup 1) (match_dup 2)))] "" - "adds\\t%0, %1, %2" - [(set_attr "type" "alus_sreg")] + "@ + adds\\t%0, %1, %2 + adds\\t%0, %1, %2 + subs\\t%0, %1, #%n2" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) (define_insn "*add3_compareV_cconly_imm" @@ -2470,7 +2437,7 @@ [(set (match_operand:GPI 0 "register_operand") (plus:GPI (plus:GPI - (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0)) + (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0)) (match_operand:GPI 1 "aarch64_reg_or_zero")) (match_operand:GPI 2 "aarch64_reg_or_zero")))] "" @@ -2509,7 +2476,7 @@ (define_expand "add3_carryinC" [(parallel [(set (match_dup 3) - (compare:CC_C + (compare:CC_ADC (plus: (plus: (match_dup 4) @@ -2517,57 +2484,54 @@ (match_operand:GPI 1 "register_operand" ""))) (zero_extend: (match_operand:GPI 2 "register_operand" ""))) - (zero_extend: - (plus:GPI - (plus:GPI (match_dup 5) (match_dup 1)) - (match_dup 2))))) + (match_dup 6))) (set (match_operand:GPI 0 "register_operand") (plus:GPI (plus:GPI (match_dup 5) (match_dup 1)) (match_dup 2)))])] "" { - operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); - operands[4] = gen_rtx_NE (mode, operands[3], const0_rtx); - operands[5] = gen_rtx_NE (mode, operands[3], const0_rtx); + operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM); + rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_LTU (mode, ccin, const0_rtx); + operands[5] = gen_rtx_LTU (mode, ccin, const0_rtx); + operands[6] = immed_wide_int_const (wi::shwi (1, mode) + << GET_MODE_BITSIZE (mode), + TImode); }) (define_insn "*add3_carryinC_zero" - [(set (reg:CC_C CC_REGNUM) - (compare:CC_C + [(set (reg:CC_ADC CC_REGNUM) + (compare:CC_ADC (plus: (match_operand: 2 "aarch64_carry_operation" "") (zero_extend: (match_operand:GPI 1 "register_operand" "r"))) - (zero_extend: - (plus:GPI - (match_operand:GPI 3 "aarch64_carry_operation" "") - (match_dup 1))))) + (match_operand 4 "const_scalar_int_operand" ""))) (set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (match_dup 3) (match_dup 1)))] - "" + (plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1)))] + "rtx_mode_t (operands[4], mode) + == (wi::shwi (1, mode) << (unsigned) GET_MODE_BITSIZE (mode))" "adcs\\t%0, %1, zr" [(set_attr "type" "adc_reg")] ) (define_insn "*add3_carryinC" - [(set (reg:CC_C CC_REGNUM) - (compare:CC_C + [(set (reg:CC_ADC CC_REGNUM) + (compare:CC_ADC (plus: (plus: (match_operand: 3 "aarch64_carry_operation" "") (zero_extend: (match_operand:GPI 1 "register_operand" "r"))) (zero_extend: (match_operand:GPI 2 "register_operand" "r"))) - (zero_extend: - (plus:GPI - (plus:GPI - (match_operand:GPI 4 "aarch64_carry_operation" "") - (match_dup 1)) - (match_dup 2))))) + (match_operand 5 "const_scalar_int_operand" ""))) (set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI - (plus:GPI (match_dup 4) (match_dup 1)) + (plus:GPI (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) (match_dup 2)))] - "" + "rtx_mode_t (operands[5], mode) + == (wi::shwi (1, mode) << (unsigned) GET_MODE_BITSIZE (mode))" "adcs\\t%0, %1, %2" [(set_attr "type" "adc_reg")] ) @@ -2594,8 +2558,8 @@ "" { rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); - operands[3] = gen_rtx_NE (mode, cc, const0_rtx); - operands[4] = gen_rtx_NE (mode, cc, const0_rtx); + operands[3] = gen_rtx_LTU (mode, cc, const0_rtx); + operands[4] = gen_rtx_LTU (mode, cc, const0_rtx); }) (define_insn "*add3_carryinV_zero" diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 9103b8284d6..855cf7b52f8 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -347,23 +347,37 @@ (match_code "eq,ne")) (define_special_predicate "aarch64_carry_operation" - (match_code "ne,geu") + (match_code "ltu,geu") { if (XEXP (op, 1) != const0_rtx) return false; - machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode); rtx op0 = XEXP (op, 0); - return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode; + if (!REG_P (op0) || REGNO (op0) != CC_REGNUM) + return false; + machine_mode ccmode = GET_MODE (op0); + if (ccmode == CC_Cmode) + return GET_CODE (op) == LTU; + if (ccmode == CC_ADCmode || ccmode == CCmode) + return GET_CODE (op) == GEU; + return false; }) +; borrow is essentially the inverse of carry since the sense of the C flag +; is inverted during subtraction. See the note in aarch64-modes.def. (define_special_predicate "aarch64_borrow_operation" - (match_code "eq,ltu") + (match_code "geu,ltu") { if (XEXP (op, 1) != const0_rtx) return false; - machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode); rtx op0 = XEXP (op, 0); - return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode; + if (!REG_P (op0) || REGNO (op0) != CC_REGNUM) + return false; + machine_mode ccmode = GET_MODE (op0); + if (ccmode == CC_Cmode) + return GET_CODE (op) == GEU; + if (ccmode == CC_ADCmode || ccmode == CCmode) + return GET_CODE (op) == LTU; + return false; }) ;; True if the operand is memory reference suitable for a load/store exclusive. -- 2.30.2