From: Dennis Zhang Date: Fri, 23 Oct 2020 07:53:53 +0000 (+0100) Subject: arm: Auto-vectorization for MVE: vsub X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=98161c248c88f873bbffba23664c540f551d89d5;p=gcc.git arm: Auto-vectorization for MVE: vsub This patch enables MVE vsub instructions for auto-vectorization. The sub3 in vec-common.md is modified to use new mode macros to include MVE extension for vectorization. MVE vsub insns in mve.md are modified to use 'minus' instead of unspec expression to support sub3. Use VDQ instead fo VALL to cover all supported modes. The redundant sub3 insns in neon.md are then removed. gcc/ChangeLog: 2020-10-23 Dennis Zhang * config/arm/mve.md (mve_vsubq): New entry for vsub instruction using expression 'minus'. (mve_vsubq_f): Use minus instead of VSUBQ_F unspec. * config/arm/neon.md (sub3, sub3_fp16): Removed. (neon_vsub): Use gen_sub3 instead of gen_sub3_fp16. * config/arm/vec-common.md (sub3): Use the new mode macros ARM_HAVE__ARITH. Use iterator VDQ instead of VALL. gcc/testsuite/ChangeLog: * gcc.target/arm/simd/mve-vsub_1.c: New test. --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c00f7589c7e..8bc8f3e3761 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2020-10-23 Dennis Zhang + + * config/arm/mve.md (mve_vsubq): New entry for vsub instruction + using expression 'minus'. + (mve_vsubq_f): Use minus instead of VSUBQ_F unspec. + * config/arm/neon.md (sub3, sub3_fp16): Removed. + (neon_vsub): Use gen_sub3 instead of gen_sub3_fp16. + * config/arm/vec-common.md (sub3): Use the new mode macros + ARM_HAVE__ARITH. Use iterator VDQ instead of VALL. + 2020-10-22 Alan Modra * config/rs6000/rs6000.c (rs6000_emit_xxspltidp_v2df): Delete diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 47c34b9919b..ecbaaa91501 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -1957,6 +1957,17 @@ [(set_attr "type" "mve_move") ]) +(define_insn "mve_vsubq" + [ + (set (match_operand:MVE_2 0 "s_register_operand" "=w") + (minus:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w") + (match_operand:MVE_2 2 "s_register_operand" "w"))) + ] + "TARGET_HAVE_MVE" + "vsub.i%#\t%q0, %q1, %q2" + [(set_attr "type" "mve_move") +]) + ;; ;; [vabdq_f]) ;; @@ -2860,9 +2871,8 @@ (define_insn "mve_vsubq_f" [ (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") - (match_operand:MVE_0 2 "s_register_operand" "w")] - VSUBQ_F)) + (minus:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w") + (match_operand:MVE_0 2 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vsub.f%#\t%q0, %q1, %q2" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e459b9ac8ef..2d767698378 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -521,28 +521,6 @@ (const_string "neon_sub")))] ) -(define_insn "sub3" - [(set - (match_operand:VH 0 "s_register_operand" "=w") - (minus:VH - (match_operand:VH 1 "s_register_operand" "w") - (match_operand:VH 2 "s_register_operand" "w")))] - "ARM_HAVE_NEON__ARITH" - "vsub.\t%0, %1, %2" - [(set_attr "type" "neon_sub")] -) - -(define_insn "sub3_fp16" - [(set - (match_operand:VH 0 "s_register_operand" "=w") - (minus:VH - (match_operand:VH 1 "s_register_operand" "w") - (match_operand:VH 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST" - "vsub.\t%0, %1, %2" - [(set_attr "type" "neon_sub")] -) - (define_insn "*mul3_neon" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") @@ -1637,7 +1615,7 @@ (match_operand:VH 2 "s_register_operand")] "TARGET_NEON_FP16INST" { - emit_insn (gen_sub3_fp16 (operands[0], operands[1], operands[2])); + emit_insn (gen_sub3 (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 0f117d6b4ca..250e503876a 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -87,18 +87,12 @@ "ARM_HAVE__ARITH" ) -;; Vector arithmetic. Expanders are blank, then unnamed insns implement -;; patterns separately for IWMMXT and Neon. - (define_expand "sub3" - [(set (match_operand:VALL 0 "s_register_operand") - (minus:VALL (match_operand:VALL 1 "s_register_operand") - (match_operand:VALL 2 "s_register_operand")))] - "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) - || flag_unsafe_math_optimizations)) - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" -{ -}) + [(set (match_operand:VDQ 0 "s_register_operand") + (minus:VDQ (match_operand:VDQ 1 "s_register_operand") + (match_operand:VDQ 2 "s_register_operand")))] + "ARM_HAVE__ARITH" +) (define_expand "mul3" [(set (match_operand:VDQWH 0 "s_register_operand") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b9bb76183fb..d7c7c5f2630 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2020-10-23 Dennis Zhang + + * gcc.target/arm/simd/mve-vsub_1.c: New test. + 2020-10-22 Alan Modra * gcc.target/powerpc/vec-splati-runnable.c: Don't abort on diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c b/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c new file mode 100644 index 00000000000..cb3ef3a14e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg -additional-options "-O3 -funsafe-math-optimizations" } */ +/* { dg-additional-options "-O3" } */ + +#include + +void test_vsub_i32 (int32_t * dest, int32_t * a, int32_t * b) { + int i; + for (i=0; i<4; i++) { + dest[i] = a[i] - b[i]; + } +} + +void test_vsub_i32_u (uint32_t * dest, uint32_t * a, uint32_t * b) { + int i; + for (i=0; i<4; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.i32\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ + +void test_vsub_i16 (int16_t * dest, int16_t * a, int16_t * b) { + int i; + for (i=0; i<8; i++) { + dest[i] = a[i] - b[i]; + } +} + +void test_vsub_i16_u (uint16_t * dest, uint16_t * a, uint16_t * b) { + int i; + for (i=0; i<8; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.i16\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ + +void test_vsub_i8 (int8_t * dest, int8_t * a, int8_t * b) { + int i; + for (i=0; i<16; i++) { + dest[i] = a[i] - b[i]; + } +} + +void test_vsub_i8_u (uint8_t * dest, uint8_t * a, uint8_t * b) { + int i; + for (i=0; i<16; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.i8\tq[0-9]+, q[0-9]+, q[0-9]+} 2 } } */ + +void test_vsub_f32 (float * dest, float * a, float * b) { + int i; + for (i=0; i<4; i++) { + dest[i] = a[i] - b[i]; + } +} + +/* { dg-final { scan-assembler-times {vsub\.f32\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ +