From 5c7c6c5fc13503b2b71aee34d57370ae03531809 Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Thu, 22 Aug 2019 15:06:37 +0000 Subject: [PATCH] [ARM] Cleanup DImode shifts Like the logical operations, expand all shifts early rather than only sometimes. The Neon shift expansions are never emitted (not even with -fneon-for-64bits), so they are not useful. So all the late expansions and Neon shift patterns can be removed, and shifts are more optimized as a result. Since some extend patterns use Neon DImode shifts, remove the Neon extend variants and related splits. A simple example now generates the same efficient code after this patch with -mfpu=neon and -mfpu=vfp (previously just the fact of having Neon enabled resulted inefficient code for no reason). unsigned long long f(unsigned long long x, unsigned long long y) { return x & (y >> 33); } Before: strd r4, r5, [sp, #-8]! lsr r4, r3, #1 mov r5, #0 and r1, r1, r5 and r0, r0, r4 ldrd r4, r5, [sp] add sp, sp, #8 bx lr After: and r0, r0, r3, lsr #1 mov r1, #0 bx lr Bootstrap and regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57 gcc/ * config/arm/iterators.md (qhs_extenddi_cstr): Update. (qhs_extenddi_cstr): Likewise. * config/arm/arm.md (ashldi3): Always expand early. (ashlsi3): Likewise. (ashrsi3): Likewise. (zero_extenddi2): Remove Neon variants. (extenddi2): Likewise. * config/arm/neon.md (ashldi3_neon_noclobber): Remove. (signed_shift_di3_neon): Likewise. (unsigned_shift_di3_neon): Likewise. (ashrdi3_neon_imm_noclobber): Likewise. (lshrdi3_neon_imm_noclobber): Likewise. (di3_neon): Likewise. (split extend): Remove DI extend split patterns. gcc/testsuite/ * gcc.target/arm/neon-extend-1.c: Remove test. * gcc.target/arm/neon-extend-2.c: Remove test. From-SVN: r274824 --- gcc/ChangeLog | 17 ++ gcc/config/arm/arm.md | 126 ++-------- gcc/config/arm/iterators.md | 4 +- gcc/config/arm/neon.md | 229 ------------------- gcc/testsuite/ChangeLog | 5 + gcc/testsuite/gcc.target/arm/neon-extend-1.c | 13 -- gcc/testsuite/gcc.target/arm/neon-extend-2.c | 13 -- 7 files changed, 48 insertions(+), 359 deletions(-) delete mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-1.c delete mode 100644 gcc/testsuite/gcc.target/arm/neon-extend-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e6057c41824..6c1335e918a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2019-08-22 Wilco Dijkstra + + * config/arm/iterators.md (qhs_extenddi_cstr): Update. + (qhs_extenddi_cstr): Likewise. + * config/arm/arm.md (ashldi3): Always expand early. + (ashlsi3): Likewise. + (ashrsi3): Likewise. + (zero_extenddi2): Remove Neon variants. + (extenddi2): Likewise. + * config/arm/neon.md (ashldi3_neon_noclobber): Remove. + (signed_shift_di3_neon): Likewise. + (unsigned_shift_di3_neon): Likewise. + (ashrdi3_neon_imm_noclobber): Likewise. + (lshrdi3_neon_imm_noclobber): Likewise. + (di3_neon): Likewise. + (split extend): Remove DI extend split patterns. + 2019-08-22 Wilco Dijkstra * config/arm/arm.md (split and/eor/ior): Remove Neon check. diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index eb5838d24a8..4f6e3aa1661 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -3621,44 +3621,14 @@ (define_expand "ashldi3" [(set (match_operand:DI 0 "s_register_operand") (ashift:DI (match_operand:DI 1 "s_register_operand") - (match_operand:SI 2 "general_operand")))] + (match_operand:SI 2 "reg_or_int_operand")))] "TARGET_32BIT" " - if (TARGET_NEON) - { - /* Delay the decision whether to use NEON or core-regs until - register allocation. */ - emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2])); - DONE; - } - else - { - /* Only the NEON case can handle in-memory shift counts. */ - if (!reg_or_int_operand (operands[2], SImode)) - operands[2] = force_reg (SImode, operands[2]); - } - - if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT) - ; /* No special preparation statements; expand pattern as above. */ - else - { - rtx scratch1, scratch2; - - /* Ideally we should use iwmmxt here if we could know that operands[1] - ends up already living in an iwmmxt register. Otherwise it's - cheaper to have the alternate code being generated than moving - values to iwmmxt regs and back. */ - - /* Expand operation using core-registers. - 'FAIL' would achieve the same thing, but this is a bit smarter. */ - scratch1 = gen_reg_rtx (SImode); - scratch2 = gen_reg_rtx (SImode); - arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], - operands[2], scratch1, scratch2); - DONE; - } - " -) + arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], + operands[2], gen_reg_rtx (SImode), + gen_reg_rtx (SImode)); + DONE; +") (define_expand "ashlsi3" [(set (match_operand:SI 0 "s_register_operand") @@ -3681,35 +3651,11 @@ (match_operand:SI 2 "reg_or_int_operand")))] "TARGET_32BIT" " - if (TARGET_NEON) - { - /* Delay the decision whether to use NEON or core-regs until - register allocation. */ - emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2])); - DONE; - } - - if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT) - ; /* No special preparation statements; expand pattern as above. */ - else - { - rtx scratch1, scratch2; - - /* Ideally we should use iwmmxt here if we could know that operands[1] - ends up already living in an iwmmxt register. Otherwise it's - cheaper to have the alternate code being generated than moving - values to iwmmxt regs and back. */ - - /* Expand operation using core-registers. - 'FAIL' would achieve the same thing, but this is a bit smarter. */ - scratch1 = gen_reg_rtx (SImode); - scratch2 = gen_reg_rtx (SImode); - arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], - operands[2], scratch1, scratch2); - DONE; - } - " -) + arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1], + operands[2], gen_reg_rtx (SImode), + gen_reg_rtx (SImode)); + DONE; +") (define_expand "ashrsi3" [(set (match_operand:SI 0 "s_register_operand") @@ -3729,35 +3675,11 @@ (match_operand:SI 2 "reg_or_int_operand")))] "TARGET_32BIT" " - if (TARGET_NEON) - { - /* Delay the decision whether to use NEON or core-regs until - register allocation. */ - emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2])); - DONE; - } - - if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT) - ; /* No special preparation statements; expand pattern as above. */ - else - { - rtx scratch1, scratch2; - - /* Ideally we should use iwmmxt here if we could know that operands[1] - ends up already living in an iwmmxt register. Otherwise it's - cheaper to have the alternate code being generated than moving - values to iwmmxt regs and back. */ - - /* Expand operation using core-registers. - 'FAIL' would achieve the same thing, but this is a bit smarter. */ - scratch1 = gen_reg_rtx (SImode); - scratch2 = gen_reg_rtx (SImode); - arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1], - operands[2], scratch1, scratch2); - DONE; - } - " -) + arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1], + operands[2], gen_reg_rtx (SImode), + gen_reg_rtx (SImode)); + DONE; +") (define_expand "lshrsi3" [(set (match_operand:SI 0 "s_register_operand") @@ -4782,30 +4704,30 @@ ;; Zero and sign extension instructions. (define_insn "zero_extenddi2" - [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,w") + [(set (match_operand:DI 0 "s_register_operand" "=r,?r") (zero_extend:DI (match_operand:QHSI 1 "" "")))] "TARGET_32BIT " "#" - [(set_attr "length" "8,4,8,8") - (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits") + [(set_attr "length" "4,8") + (set_attr "arch" "*,*") (set_attr "ce_count" "2") (set_attr "predicable" "yes") - (set_attr "type" "multiple,mov_reg,multiple,multiple")] + (set_attr "type" "mov_reg,multiple")] ) (define_insn "extenddi2" - [(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,?r,w") + [(set (match_operand:DI 0 "s_register_operand" "=r,?r,?r") (sign_extend:DI (match_operand:QHSI 1 "" "")))] "TARGET_32BIT " "#" - [(set_attr "length" "8,4,8,8,8") + [(set_attr "length" "4,8,8") (set_attr "ce_count" "2") (set_attr "shift" "1") (set_attr "predicable" "yes") - (set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits") - (set_attr "type" "multiple,mov_reg,multiple,multiple,multiple")] + (set_attr "arch" "*,a,t") + (set_attr "type" "mov_reg,multiple,multiple")] ) ;; Splits for all extensions to DImode diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index eca16636ade..fa6f0c0529d 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -741,8 +741,8 @@ (define_mode_attr qhs_extenddi_op [(SI "s_register_operand") (HI "nonimmediate_operand") (QI "arm_reg_or_extendqisi_mem_op")]) -(define_mode_attr qhs_extenddi_cstr [(SI "r,0,r,r,r") (HI "r,0,rm,rm,r") (QI "r,0,rUq,rm,r")]) -(define_mode_attr qhs_zextenddi_cstr [(SI "r,0,r,r") (HI "r,0,rm,r") (QI "r,0,rm,r")]) +(define_mode_attr qhs_extenddi_cstr [(SI "0,r,r") (HI "0,rm,rm") (QI "0,rUq,rm")]) +(define_mode_attr qhs_zextenddi_cstr [(SI "0,r") (HI "0,rm") (QI "0,rm")]) ;; Mode attributes used for fixed-point support. (define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index ef73c77abee..757f2c0f537 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1135,173 +1135,6 @@ [(set_attr "type" "neon_load1_1reg,neon_from_gp")] ) -(define_insn "ashldi3_neon_noclobber" - [(set (match_operand:DI 0 "s_register_operand" "=w,w") - (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w") - (match_operand:DI 2 "reg_or_int_operand" " i,w")))] - "TARGET_NEON && reload_completed - && (!CONST_INT_P (operands[2]) - || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))" - "@ - vshl.u64\t%P0, %P1, %2 - vshl.u64\t%P0, %P1, %P2" - [(set_attr "type" "neon_shift_imm, neon_shift_reg")] -) - -(define_insn_and_split "ashldi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w") - (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w") - (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i"))) - (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X")) - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X")) - (clobber (reg:CC_C CC_REGNUM))] - "TARGET_NEON" - "#" - "TARGET_NEON && reload_completed" - [(const_int 0)] - " - { - if (IS_VFP_REGNUM (REGNO (operands[0]))) - { - if (CONST_INT_P (operands[2])) - { - if (INTVAL (operands[2]) < 1) - { - emit_insn (gen_movdi (operands[0], operands[1])); - DONE; - } - else if (INTVAL (operands[2]) > 63) - operands[2] = gen_rtx_CONST_INT (VOIDmode, 63); - } - else - { - emit_insn (gen_neon_load_count (operands[5], operands[2])); - operands[2] = operands[5]; - } - - /* Ditch the unnecessary clobbers. */ - emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1], - operands[2])); - } - else - { - /* The shift expanders support either full overlap or no overlap. */ - gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1])); - - arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1], - operands[2], operands[3], operands[4]); - } - DONE; - }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,speed,*,*") - (set_attr "type" "multiple")] -) - -; The shift amount needs to be negated for right-shifts -(define_insn "signed_shift_di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") - (match_operand:DI 2 "s_register_operand" " w")] - UNSPEC_ASHIFT_SIGNED))] - "TARGET_NEON && reload_completed" - "vshl.s64\t%P0, %P1, %P2" - [(set_attr "type" "neon_shift_reg")] -) - -; The shift amount needs to be negated for right-shifts -(define_insn "unsigned_shift_di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" " w") - (match_operand:DI 2 "s_register_operand" " w")] - UNSPEC_ASHIFT_UNSIGNED))] - "TARGET_NEON && reload_completed" - "vshl.u64\t%P0, %P1, %P2" - [(set_attr "type" "neon_shift_reg")] -) - -(define_insn "ashrdi3_neon_imm_noclobber" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w") - (match_operand:DI 2 "const_int_operand" " i")))] - "TARGET_NEON && reload_completed - && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" - "vshr.s64\t%P0, %P1, %2" - [(set_attr "type" "neon_shift_imm")] -) - -(define_insn "lshrdi3_neon_imm_noclobber" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w") - (match_operand:DI 2 "const_int_operand" " i")))] - "TARGET_NEON && reload_completed - && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64" - "vshr.u64\t%P0, %P1, %2" - [(set_attr "type" "neon_shift_imm")] -) - -;; ashrdi3_neon -;; lshrdi3_neon -(define_insn_and_split "di3_neon" - [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w") - (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w") - (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i"))) - (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X")) - (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X")) - (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X")) - (clobber (reg:CC CC_REGNUM))] - "TARGET_NEON" - "#" - "TARGET_NEON && reload_completed" - [(const_int 0)] - " - { - if (IS_VFP_REGNUM (REGNO (operands[0]))) - { - if (CONST_INT_P (operands[2])) - { - if (INTVAL (operands[2]) < 1) - { - emit_insn (gen_movdi (operands[0], operands[1])); - DONE; - } - else if (INTVAL (operands[2]) > 64) - operands[2] = gen_rtx_CONST_INT (VOIDmode, 64); - - /* Ditch the unnecessary clobbers. */ - emit_insn (gen_di3_neon_imm_noclobber (operands[0], - operands[1], - operands[2])); - } - else - { - /* We must use a negative left-shift. */ - emit_insn (gen_negsi2 (operands[3], operands[2])); - emit_insn (gen_neon_load_count (operands[5], operands[3])); - emit_insn (gen__shift_di3_neon (operands[0], operands[1], - operands[5])); - } - } - else - { - /* The shift expanders support either full overlap or no overlap. */ - gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]) - || REGNO (operands[0]) == REGNO (operands[1])); - - /* This clobbers CC (ASHIFTRT by register only). */ - arm_emit_coreregs_64bit_shift (, operands[0], operands[1], - operands[2], operands[3], operands[4]); - } - - DONE; - }" - [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits") - (set_attr "opt" "*,*,speed,speed,speed,*,*") - (set_attr "type" "multiple")] -) - ;; Widening operations (define_expand "widen_ssum3" @@ -6792,65 +6625,3 @@ if (BYTES_BIG_ENDIAN) "vabd. %0, %1, %2" [(set_attr "type" "neon_fp_abd_s")] ) - -;; Copy from core-to-neon regs, then extend, not vice-versa - -(define_split - [(set (match_operand:DI 0 "s_register_operand" "") - (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))] - "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" - [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) - (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))] - { - operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); - }) - -(define_split - [(set (match_operand:DI 0 "s_register_operand" "") - (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))] - "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" - [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) - (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))] - { - operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); - }) - -(define_split - [(set (match_operand:DI 0 "s_register_operand" "") - (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))] - "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" - [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) - (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))] - { - operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); - }) - -(define_split - [(set (match_operand:DI 0 "s_register_operand" "") - (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))] - "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" - [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1))) - (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))] - { - operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0])); - }) - -(define_split - [(set (match_operand:DI 0 "s_register_operand" "") - (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))] - "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" - [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1))) - (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))] - { - operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0])); - }) - -(define_split - [(set (match_operand:DI 0 "s_register_operand" "") - (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))] - "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))" - [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1))) - (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))] - { - operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0])); - }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 03fd8328577..a3890674c86 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-08-22 Wilco Dijkstra + + * gcc.target/arm/neon-extend-1.c: Remove test. + * gcc.target/arm/neon-extend-2.c: Remove test. + 2019-08-22 Sylvia Taylor * gcc.target/aarch64/advsimd-intrinsics/vld1x4.c: New test. diff --git a/gcc/testsuite/gcc.target/arm/neon-extend-1.c b/gcc/testsuite/gcc.target/arm/neon-extend-1.c deleted file mode 100644 index cfe83ce1bde..00000000000 --- a/gcc/testsuite/gcc.target/arm/neon-extend-1.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-require-effective-target arm_neon_hw } */ -/* { dg-options "-O2" } */ -/* { dg-add-options arm_neon } */ - -void -f (unsigned int a) -{ - unsigned long long b = a; - asm volatile ("@ extended to %0" : : "w" (b)); -} - -/* { dg-final { scan-assembler "vdup.32" } } */ -/* { dg-final { scan-assembler "vshr.u64" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-extend-2.c b/gcc/testsuite/gcc.target/arm/neon-extend-2.c deleted file mode 100644 index 1c5a17e4278..00000000000 --- a/gcc/testsuite/gcc.target/arm/neon-extend-2.c +++ /dev/null @@ -1,13 +0,0 @@ -/* { dg-require-effective-target arm_neon_hw } */ -/* { dg-options "-O2" } */ -/* { dg-add-options arm_neon } */ - -void -f (int a) -{ - long long b = a; - asm volatile ("@ extended to %0" : : "w" (b)); -} - -/* { dg-final { scan-assembler "vdup.32" } } */ -/* { dg-final { scan-assembler "vshr.s64" } } */ -- 2.30.2