From 6a0d3939018de736da03cb54a86fa2395b5bc464 Mon Sep 17 00:00:00 2001 From: Steve Ellcey Date: Wed, 10 Apr 2019 20:28:19 +0000 Subject: [PATCH] re PR rtl-optimization/87763 (aarch64 target testcases fail after r265398) 2018-04-10 Steve Ellcey PR rtl-optimization/87763 * config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p): New prototype. * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p): New function. * config/aarch64/aarch64.md (*aarch64_bfi5_shift): New instruction. (*aarch64_bfi5_shift_alt): Ditto. (*aarch64_bfi4_noand): Ditto. (*aarch64_bfi4_noand_alt): Ditto. (*aarch64_bfi4_noshift): Ditto. From-SVN: r270266 --- gcc/ChangeLog | 14 +++++ gcc/config/aarch64/aarch64-protos.h | 3 + gcc/config/aarch64/aarch64.c | 29 ++++++++++ gcc/config/aarch64/aarch64.md | 88 +++++++++++++++++++++++++++++ 4 files changed, 134 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0219ddb8b02..a9db4b424d4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2018-04-10 Steve Ellcey + + PR rtl-optimization/87763 + * config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p): + New prototype. + * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p): + New function. + * config/aarch64/aarch64.md (*aarch64_bfi5_shift): + New instruction. + (*aarch64_bfi5_shift_alt): Ditto. + (*aarch64_bfi4_noand): Ditto. + (*aarch64_bfi4_noand_alt): Ditto. + (*aarch64_bfi4_noshift): Ditto. + 2019-04-10 Jonathan Wakely * doc/invoke.texi (Optimize Options): Change "Nevertheless" to diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index b035e35f33b..b6c0d0a8eb6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx); void aarch64_declare_function_name (FILE *, const char*, tree); bool aarch64_legitimate_pic_operand_p (rtx); bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx); +bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); opt_machine_mode aarch64_sve_pred_mode (unsigned int); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 95e5b039865..9be75485ce3 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9336,6 +9336,35 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask, & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0; } +/* Return true if the masks and a shift amount from an RTX of the form + ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into + a BFI instruction of mode MODE. See *arch64_bfi patterns. */ + +bool +aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode, + unsigned HOST_WIDE_INT mask1, + unsigned HOST_WIDE_INT shft_amnt, + unsigned HOST_WIDE_INT mask2) +{ + unsigned HOST_WIDE_INT t; + + /* Verify that there is no overlap in what bits are set in the two masks. */ + if (mask1 != ~mask2) + return false; + + /* Verify that mask2 is not all zeros or ones. */ + if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U) + return false; + + /* The shift amount should always be less than the mode size. */ + gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode)); + + /* Verify that the mask being shifted is contiguous and would be in the + least significant bits after shifting by shft_amnt. */ + t = mask2 + (HOST_WIDE_INT_1U << shft_amnt); + return (t == (t & -t)); +} + /* Calculate the cost of calculating X, storing it in *COST. Result is true if the total cost of the operation has now been calculated. */ static bool diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index ab8786a933e..e0df975a032 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5491,6 +5491,94 @@ [(set_attr "type" "bfm")] ) +;; Match a bfi instruction where the shift of OP3 means that we are +;; actually copying the least significant bits of OP3 into OP0 by way +;; of the AND masks and the IOR instruction. A similar instruction +;; with the two parts of the IOR swapped around was never triggered +;; in a bootstrap build and test of GCC so it was not included. + +(define_insn "*aarch64_bfi5_shift" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI + (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "aarch64_simd_shift_imm_" "n")) + (match_operand:GPI 5 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), + UINTVAL (operands[4]), + UINTVAL(operands[5]))" + "bfi\t%0, %3, %4, %P5" + [(set_attr "type" "bfm")] +) + +(define_insn "*aarch64_bfi5_shift_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (ashift:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "aarch64_simd_shift_imm_" "n")) + (match_operand:GPI 3 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 4 "register_operand" "0") + (match_operand:GPI 5 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[5]), + UINTVAL (operands[2]), + UINTVAL(operands[3]))" + "bfi\t%0, %1, %2, %P3" + [(set_attr "type" "bfm")] +) + +;; Like *aarch64_bfi5_shift but with no and of the ashift because +;; the shift is large enough to remove the need for an AND instruction. + +(define_insn "*aarch64_bfi4_noand" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (ashift:GPI + (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "aarch64_simd_shift_imm_" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), + UINTVAL (operands[4]), + HOST_WIDE_INT_M1U << UINTVAL (operands[4]) )" +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (mode) - UINTVAL (operands[4])); + return "bfi\t%0, %3, %4, %5"; +} + [(set_attr "type" "bfm")] +) + +(define_insn "*aarch64_bfi4_noand_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (ashift:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "aarch64_simd_shift_imm_" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "0") + (match_operand:GPI 4 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[4]), + UINTVAL (operands[2]), + HOST_WIDE_INT_M1U << UINTVAL (operands[2]) )" +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (mode) - UINTVAL (operands[2])); + return "bfi\t%0, %1, %2, %5"; +} + [(set_attr "type" "bfm")] +) + +;; Like *aarch64_bfi5_shift but with no shifting, we are just +;; copying the least significant bits of OP3 to OP0. + +(define_insn "*aarch64_bfi4_noshift" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" + "bfi\t%0, %3, 0, %P4" + [(set_attr "type" "bfm")] +) + (define_insn "*extr_insv_lower_reg" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n") -- 2.30.2