From 95e10b8aa1066dbd5c433e613652674b0636fcd1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 29 Sep 2020 11:41:26 +0100 Subject: [PATCH] arm: Add new vector mode macros The AArch32 port now has three vector extensions: iwMMXt, Neon and MVE. We already have some named expanders that are shared by all three, and soon we'll need more. One way of handling this would be to use define_mode_iterators that specify the condition for each mode. For example, (V16QI "TARGET_NEON || TARGET_HAVE_MVE") (V8QI "TARGET_NEON || TARGET_REALLY_IWMXXT") ... (V2SF "TARGET_NEON && flag_unsafe_math_optimizations") etc. However, we'll need several mode iterators, and it would be repetitive to specify the mode condition every time. This patch therefore introduces per-mode macros that say whether we can perform general arithmetic on the mode. Initially there are two sets of macros: ARM_HAVE_NEON__ARITH true if Neon can handle general arithmetic on ARM_HAVE__ARITH true if any vector extension can handle general arithmetic on The macro definitions themselves are undeniably ugly, but hopefully they're justified by the simplifications they allow. The patch converts the addition patterns to use this scheme. Previously there were three copies of the V8HF and V4HF addition patterns for Neon: (1) *add3_neon, which provided plus:VnHF even without TARGET_NEON_FP16INST. This was probably harmless since all the named patterns had an appropriate guard, but it is possible that something could have tried to generate the plus directly, such as by using a REG_EQUAL note to generate a new pattern. (2) addv8hf3_neon and addv4hf3, which had the correct TARGET_NEON_FP16INST target condition, but unnecessarily required flag_unsafe_math_optimizations. Unlike VnSF operations, VnHF operations do not force flush to zero. (3) add3_fp16, which provided plus:VnHF with the correct conditions (TARGET_NEON_FP16INST, with no flag_unsafe_math_optimizations test). The patch in essence renames add3_fp16 to *add3_neon (part of *add3_neon) and removes the other two patterns. gcc/ * config/arm/arm.h (ARM_HAVE_NEON_V8QI_ARITH, ARM_HAVE_NEON_V4HI_ARITH) (ARM_HAVE_NEON_V2SI_ARITH, ARM_HAVE_NEON_V16QI_ARITH): New macros. (ARM_HAVE_NEON_V8HI_ARITH, ARM_HAVE_NEON_V4SI_ARITH): Likewise. (ARM_HAVE_NEON_V2DI_ARITH, ARM_HAVE_NEON_V4HF_ARITH): Likewise. (ARM_HAVE_NEON_V8HF_ARITH, ARM_HAVE_NEON_V2SF_ARITH): Likewise. (ARM_HAVE_NEON_V4SF_ARITH, ARM_HAVE_V8QI_ARITH, ARM_HAVE_V4HI_ARITH) (ARM_HAVE_V2SI_ARITH, ARM_HAVE_V16QI_ARITH, ARM_HAVE_V8HI_ARITH) (ARM_HAVE_V4SI_ARITH, ARM_HAVE_V2DI_ARITH, ARM_HAVE_V4HF_ARITH) (ARM_HAVE_V2SF_ARITH, ARM_HAVE_V8HF_ARITH, ARM_HAVE_V4SF_ARITH): Likewise. * config/arm/iterators.md (VNIM, VNINOTM): Delete. * config/arm/vec-common.md (add3, addv8hf3) (add3): Replace with... (add3): ...this new expander. * config/arm/neon.md (*add3_neon): Use the new ARM_HAVE_NEON__ARITH macros as the C condition. (addv8hf3_neon, addv4hf3, add3_fp16): Delete in favor of the above. (neon_vadd): Use gen_add3 instead of gen_add3_fp16. gcc/testsuite/ * gcc.target/arm/armv8_2-fp16-arith-2.c: Expect FP16 vectorization even without -ffast-math. --- gcc/config/arm/arm.h | 41 ++++++++++++++++ gcc/config/arm/iterators.md | 8 ---- gcc/config/arm/neon.md | 47 +------------------ gcc/config/arm/vec-common.md | 42 ++--------------- .../gcc.target/arm/armv8_2-fp16-arith-2.c | 20 +++++--- 5 files changed, 61 insertions(+), 97 deletions(-) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index f4d3676c5bc..4a63d33c70d 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1110,6 +1110,47 @@ extern const int arm_arch_cde_coproc_bits[]; #define VALID_MVE_STRUCT_MODE(MODE) \ ((MODE) == TImode || (MODE) == OImode || (MODE) == XImode) +/* The conditions under which vector modes are supported for general + arithmetic using Neon. */ + +#define ARM_HAVE_NEON_V8QI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V4HI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V2SI_ARITH TARGET_NEON + +#define ARM_HAVE_NEON_V16QI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V8HI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V4SI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V2DI_ARITH TARGET_NEON + +/* HF operations have their own flush-to-zero control (FPSCR.FZ16). */ +#define ARM_HAVE_NEON_V4HF_ARITH TARGET_NEON_FP16INST +#define ARM_HAVE_NEON_V8HF_ARITH TARGET_NEON_FP16INST + +/* SF operations always flush to zero, regardless of FPSCR.FZ, so we can + only use them for general arithmetic when -funsafe-math-optimizations + is in effect. */ +#define ARM_HAVE_NEON_V2SF_ARITH \ + (TARGET_NEON && flag_unsafe_math_optimizations) +#define ARM_HAVE_NEON_V4SF_ARITH ARM_HAVE_NEON_V2SF_ARITH + +/* The conditions under which vector modes are supported for general + arithmetic by any vector extension. */ + +#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT) +#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT) +#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT) + +#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE) +#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE) +#define ARM_HAVE_V4SI_ARITH (ARM_HAVE_NEON_V4SI_ARITH || TARGET_HAVE_MVE) +#define ARM_HAVE_V2DI_ARITH ARM_HAVE_NEON_V2DI_ARITH + +#define ARM_HAVE_V4HF_ARITH ARM_HAVE_NEON_V4HF_ARITH +#define ARM_HAVE_V2SF_ARITH ARM_HAVE_NEON_V2SF_ARITH + +#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT) +#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT) + /* The register numbers in sequence, for passing to arm_gen_load_multiple. */ extern int arm_regs_in_sequence[]; diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 0bc9eba0722..c70e3bc2731 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -66,14 +66,6 @@ ;; Integer and float modes supported by Neon and IWMMXT. (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) -;; Integer and float modes supported by Neon, IWMMXT and MVE, used by -;; arithmetic epxand patterns. -(define_mode_iterator VNIM [V16QI V8HI V4SI V4SF]) - -;; Integer and float modes supported by Neon and IWMMXT but not MVE, used by -;; arithmetic epxand patterns. -(define_mode_iterator VNINOTM [V2SI V4HI V8QI V2SF V2DI]) - ;; Integer and float modes supported by Neon, IWMMXT and MVE. (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 3e7b51d8ab6..96bf277f501 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -501,7 +501,7 @@ [(set (match_operand:VDQ 0 "s_register_operand" "=w") (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vadd.\t%0, %1, %2" [(set (attr "type") (if_then_else (match_test "") @@ -509,49 +509,6 @@ (const_string "neon_add")))] ) -;; As with SFmode, full support for HFmode vector arithmetic is only available -;; when flag-unsafe-math-optimizations is enabled. - -;; Add pattern with modes V8HF and V4HF is split into separate patterns to add -;; support for standard pattern addv8hf3 in MVE. Following pattern is called -;; from "addv8hf3" standard pattern inside vec-common.md file. - -(define_insn "addv8hf3_neon" - [(set - (match_operand:V8HF 0 "s_register_operand" "=w") - (plus:V8HF - (match_operand:V8HF 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" - "vadd.f16\t%0, %1, %2" - [(set_attr "type" "neon_fp_addsub_s_q")] -) - -(define_insn "addv4hf3" - [(set - (match_operand:V4HF 0 "s_register_operand" "=w") - (plus:V4HF - (match_operand:V4HF 1 "s_register_operand" "w") - (match_operand:V4HF 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" - "vadd.f16\t%0, %1, %2" - [(set_attr "type" "neon_fp_addsub_s_q")] -) - -(define_insn "add3_fp16" - [(set - (match_operand:VH 0 "s_register_operand" "=w") - (plus:VH - (match_operand:VH 1 "s_register_operand" "w") - (match_operand:VH 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST" - "vadd.\t%0, %1, %2" - [(set (attr "type") - (if_then_else (match_test "") - (const_string "neon_fp_addsub_s") - (const_string "neon_add")))] -) - (define_insn "*sub3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") @@ -1837,7 +1794,7 @@ (match_operand:VH 2 "s_register_operand")] "TARGET_NEON_FP16INST" { - emit_insn (gen_add3_fp16 (operands[0], operands[1], operands[2])); + emit_insn (gen_add3 (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index b7e3619caf4..c3c86c46355 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -81,43 +81,11 @@ ;; patterns separately for Neon, IWMMXT and MVE. (define_expand "add3" - [(set (match_operand:VNIM 0 "s_register_operand") - (plus:VNIM (match_operand:VNIM 1 "s_register_operand") - (match_operand:VNIM 2 "s_register_operand")))] - "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) - || flag_unsafe_math_optimizations)) - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) - || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE(mode)) - || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(mode))" -{ -}) - -;; Vector arithmetic. Expanders are blank, then unnamed insns implement -;; patterns separately for Neon and MVE. - -(define_expand "addv8hf3" - [(set (match_operand:V8HF 0 "s_register_operand") - (plus:V8HF (match_operand:V8HF 1 "s_register_operand") - (match_operand:V8HF 2 "s_register_operand")))] - "(TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(V8HFmode)) - || (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)" -{ - if (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations) - emit_insn (gen_addv8hf3_neon (operands[0], operands[1], operands[2])); -}) - -;; Vector arithmetic. Expanders are blank, then unnamed insns implement -;; patterns separately for Neon and IWMMXT. - -(define_expand "add3" - [(set (match_operand:VNINOTM 0 "s_register_operand") - (plus:VNINOTM (match_operand:VNINOTM 1 "s_register_operand") - (match_operand:VNINOTM 2 "s_register_operand")))] - "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) - || flag_unsafe_math_optimizations)) - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" -{ -}) + [(set (match_operand:VDQ 0 "s_register_operand") + (plus:VDQ (match_operand:VDQ 1 "s_register_operand") + (match_operand:VDQ 2 "s_register_operand")))] + "ARM_HAVE__ARITH" +) ;; Vector arithmetic. Expanders are blank, then unnamed insns implement ;; patterns separately for IWMMXT and Neon. diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c index 24d0528540d..81bad225a1f 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c @@ -89,17 +89,23 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) /* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */ + +/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ /* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ - /* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */ -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, #0} 2 } } */ - -/* { dg-final { scan-assembler-not {vabs\.f16} } } */ /* { dg-final { scan-assembler-not {vadd\.f32} } } */ /* { dg-final { scan-assembler-not {vsub\.f32} } } */ -- 2.30.2