arm: Make more use of the new mode macros
authorRichard Sandiford <richard.sandiford@arm.com>
Fri, 2 Oct 2020 10:53:05 +0000 (11:53 +0100)
committerRichard Sandiford <richard.sandiford@arm.com>
Fri, 2 Oct 2020 10:53:05 +0000 (11:53 +0100)
As Christophe pointed out, my r11-3522 patch didn't in fact fix
all of the armv8_2-fp16-arith-2.c failures introduced by allowing
FP16 vectorisation without -funsafe-math-optimizations.  I must have
only tested the final patch on my usual arm-linux-gnueabihf bootstrap,
which it turns out treats the test as unsupported.

The focus of the original patch was to use mode macros for
patterns that are shared between Advanced SIMD, iwMMXt and MVE.
This patch uses the mode macros for general neon.md patterns too.

gcc/
* config/arm/neon.md (*sub<VDQ:mode>3_neon): Use the new mode macros
for the insn condition.
(sub<VH:mode>3, *mul<VDQW:mode>3_neon): Likewise.
(mul<VDQW:mode>3add<VDQW:mode>_neon): Likewise.
(mul<VH:mode>3add<VH:mode>_neon): Likewise.
(mul<VDQW:mode>3neg<VDQW:mode>add<VDQW:mode>_neon): Likewise.
(fma<VCVTF:mode>4, fma<VH:mode>4, *fmsub<VCVTF:mode>4): Likewise.
(quad_halves_<code>v4sf, reduc_plus_scal_<VD:mode>): Likewise.
(reduc_plus_scal_<VQ:mode>, reduc_smin_scal_<VD:mode>): Likewise.
(reduc_smin_scal_<VQ:mode>, reduc_smax_scal_<VD:mode>): Likewise.
(reduc_smax_scal_<VQ:mode>, mul<VH:mode>3): Likewise.
(neon_vabd<VF:mode>_2, neon_vabd<VF:mode>_3): Likewise.
(fma<VH:mode>4_intrinsic): Delete.
(neon_vadd<VCVTF:mode>): Use the new mode macros to decide which
form of instruction to generate.
(neon_vmla<VDQW:mode>, neon_vmls<VDQW:mode>): Likewise.
(neon_vsub<VCVTF:mode>): Likewise.
(neon_vfma<VH:mode>): Generate the main fma<mode>4 form instead
of using fma<mode>4_intrinsic.

gcc/testsuite/
* gcc.target/arm/armv8_2-fp16-arith-2.c (float16_t): Use _Float16_t
rather than __fp16.
(float16x4_t, float16x4_t): Likewise.
(fp16_abs): Use __builtin_fabsf16.

gcc/config/arm/neon.md
gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c

index 58832cbf484a706bce9d98d13f7fe1e33fd47a23..85e424e6cf4ffa807e2cbf52a5f7a0ece4ec027a 100644 (file)
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                    (match_operand:VDQ 2 "s_register_operand" "w")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
    (minus:VH
     (match_operand:VH 1 "s_register_operand" "w")
     (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_sub<q>")]
 )
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
                    (match_operand:VDQW 2 "s_register_operand" "w")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
                             (match_operand:VDQW 3 "s_register_operand" "w"))
                  (match_operand:VDQW 1 "s_register_operand" "0")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
        (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
                          (match_operand:VH 3 "s_register_operand" "w"))
                  (match_operand:VH 1 "s_register_operand" "0")))]
-  "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
                 (match_operand:VCVTF 2 "register_operand" "w")
                 (match_operand:VCVTF 3 "register_operand" "0")))]
-  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
     (match_operand:VH 1 "register_operand" "w")
     (match_operand:VH 2 "register_operand" "w")
     (match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_mla_s<q>")]
-)
-
-(define_insn "fma<VH:mode>4_intrinsic"
- [(set (match_operand:VH 0 "register_operand" "=w")
-   (fma:VH
-    (match_operand:VH 1 "register_operand" "w")
-    (match_operand:VH 2 "register_operand" "w")
-    (match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_fp_mla_s<q>")]
 )
         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
                   (match_operand:VCVTF 2 "register_operand" "w")
                   (match_operand:VCVTF 3 "register_operand" "0")))]
-  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
                            (parallel [(const_int 0) (const_int 1)]))
           (vec_select:V2SF (match_dup 1)
                            (parallel [(const_int 2) (const_int 3)]))))]
-  "TARGET_NEON && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_V4SF_ARITH"
   "<VQH_mnem>.f32\t%P0, %e1, %f1"
   [(set_attr "vqh_mnem" "<VQH_mnem>")
    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
 (define_expand "reduc_plus_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VD 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
 {
   rtx vec = gen_reg_rtx (<MODE>mode);
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 (define_expand "reduc_plus_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VQ 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-   && !BYTES_BIG_ENDIAN"
+  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
 
 (define_expand "reduc_smin_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VD 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
 {
   rtx vec = gen_reg_rtx (<MODE>mode);
 
 (define_expand "reduc_smin_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VQ 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-   && !BYTES_BIG_ENDIAN"
+  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
 
 (define_expand "reduc_smax_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VD 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
 {
   rtx vec = gen_reg_rtx (<MODE>mode);
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
 (define_expand "reduc_smax_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VQ 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-   && !BYTES_BIG_ENDIAN"
+  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
 
    (match_operand:VCVTF 2 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
    (mult:VH
     (match_operand:VH 1 "s_register_operand" "w")
     (match_operand:VH 2 "s_register_operand" "w")))]
-  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
 )
    (match_operand:VDQW 3 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
                                             operands[2], operands[3]));
   else
    (match_operand:VH 3 "s_register_operand")]
   "TARGET_NEON_FP16INST"
 {
-  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
-                                      operands[1]));
+  emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
+                            operands[1]));
   DONE;
 })
 
    (match_operand:VDQW 3 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
                 operands[1], operands[2], operands[3]));
   else
    (match_operand:VCVTF 2 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
@@ -6462,7 +6448,7 @@ if (BYTES_BIG_ENDIAN)
  [(set (match_operand:VF 0 "s_register_operand" "=w")
        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
                         (match_operand:VF 2 "s_register_operand" "w"))))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_fp_abd_s<q>")]
 )
@@ -6472,7 +6458,7 @@ if (BYTES_BIG_ENDIAN)
        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
                            (match_operand:VF 2 "s_register_operand" "w")]
                UNSPEC_VSUB)))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_fp_abd_s<q>")]
 )
index 81bad225a1f72a4ff722b5390dd9c970ea28b288..f94109c43965184926f8da34b9cac1f05c1aa210 100644 (file)
@@ -6,9 +6,9 @@
 /* Test instructions generated for half-precision arithmetic without
    unsafe-math-optimizations.  */
 
-typedef __fp16 float16_t;
-typedef __simd64_float16_t float16x4_t;
-typedef __simd128_float16_t float16x8_t;
+typedef _Float16 float16_t;
+typedef _Float16 float16x4_t __attribute__ ((vector_size (8)));
+typedef _Float16 float16x8_t __attribute__ ((vector_size (16)));
 
 typedef short int16x4_t __attribute__ ((vector_size (8)));
 typedef short int int16x8_t  __attribute__ ((vector_size (16)));
@@ -16,7 +16,7 @@ typedef short int int16x8_t  __attribute__ ((vector_size (16)));
 float16_t
 fp16_abs (float16_t a)
 {
-  return (a < 0) ? -a : a;
+  return __builtin_fabsf16 (a);
 }
 
 #define TEST_UNOP(NAME, OPERATOR, TY)          \