From: Tamar Christina <tamar.christina@arm.com>
Date: Tue, 2 Aug 2016 09:25:19 +0000 (+0000)
Subject: [PATCH AArch64] Add more AArch64 NEON intrinsics
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1efafef383b156074d4bd5ed35f656a509c7bf7a;p=gcc.git

[PATCH AArch64] Add more AArch64 NEON intrinsics

Add vmaxnm_f64, vminnm_f64, vmax_f64, vmin_f64.

Committed on behalf of Tamar Christina <tamar.christina@arm.com> .

gcc/

	* config/aarch64/aarch64-simd-builtins.def
	(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
	(__builtin_aarch64_fmaxdf): Likewise.
	(__builtin_aarch64_smin_nandf): Likewise.
	(__builtin_aarch64_smax_nandf): Likewise.
	* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
	* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
	(<fmaxmin><mode>3): ...this.
	* config/aarch64/arm_neon.h (vmaxnm_f64): New.
	(vminnm_f64): Likewise.
	(vmin_f64): Likewise.
	(vmax_f64): Likewise.
	* config/aarch64/iterators.md (FMAXMIN): Merge with...
	(FMAXMIN_UNS): ...this.
	(fmaxmin): Merged with
	(fmaxmin_op): ...this...
	(maxmin_uns_op): ...in to this.

gcc/testsuite/

	* gcc.target/aarch64/vminmaxnm.c: New.
	* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Added float64x1_t
	tests.

From-SVN: r238977
---

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fa109062803..f2c81b077fb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2016-08-02  Tamar Christina  <tamar.christina@arm.com>
+
+	* config/aarch64/aarch64-simd-builtins.def
+	(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
+	(__builtin_aarch64_fmaxdf): Likewise.
+	(__builtin_aarch64_smin_nandf): Likewise.
+	(__builtin_aarch64_smax_nandf): Likewise.
+	* config/aarch64/aarch64-simd.md (<fmaxmin><mode>3): Remove.
+	* config/aarch64/aarch64.md (<fmaxmin><mode>3): Rename to...
+	(<fmaxmin><mode>3): ...this.
+	* config/aarch64/arm_neon.h (vmaxnm_f64): New.
+	(vminnm_f64): Likewise.
+	(vmin_f64): Likewise.
+	(vmax_f64): Likewise.
+	* config/aarch64/iterators.md (FMAXMIN): Merge with...
+	(FMAXMIN_UNS): ...this.
+	(fmaxmin): Merged with
+	(fmaxmin_op): ...this...
+	(maxmin_uns_op): ...in to this.
+
 2016-08-01  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index c7fe08bb21a..e1154b4b278 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -241,19 +241,19 @@
   BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
   BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
 
-  /* Implemented by <maxmin><mode>3.
+  /* Implemented by <maxmin_uns><mode>3.
      smax variants map to fmaxnm,
      smax_nan variants map to fmax.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3)
   BUILTIN_VDQ_BHSI (BINOP, umax, 3)
   BUILTIN_VDQ_BHSI (BINOP, umin, 3)
-  BUILTIN_VHSDF (BINOP, smax_nan, 3)
-  BUILTIN_VHSDF (BINOP, smin_nan, 3)
+  BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
+  BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
 
-  /* Implemented by <fmaxmin><mode>3.  */
-  BUILTIN_VHSDF (BINOP, fmax, 3)
-  BUILTIN_VHSDF (BINOP, fmin, 3)
+  /* Implemented by <maxmin_uns><mode>3.  */
+  BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
+  BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
 
   /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
@@ -549,8 +549,4 @@
   BUILTIN_GPI (UNOP, fix_truncdf, 2)
   BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
   BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
-  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
-
-  /* Implemented by <fmaxmin><mode>3.  */
-  VAR1 (BINOP, fmax, 3, hf)
-  VAR1 (BINOP, fmin, 3, hf)
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
\ No newline at end of file
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 0bf3ac8a875..f2575a0f300 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2038,6 +2038,9 @@
   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
 )
 
+;; Vector forms for fmax, fmin, fmaxnm, fminnm.
+;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
+;; which implement the IEEE fmax ()/fmin () functions.
 (define_insn "<maxmin_uns><mode>3"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
@@ -2048,17 +2051,6 @@
   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
 )
 
-;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
-(define_insn "<fmaxmin><mode>3"
-  [(set (match_operand:VHSDF 0 "register_operand" "=w")
-	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
-		       (match_operand:VHSDF 2 "register_operand" "w")]
-		       FMAXMIN))]
-  "TARGET_SIMD"
-  "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
-  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
-)
-
 ;; 'across lanes' add.
 
 (define_expand "reduc_plus_scal_<mode>"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 9e87a0d532e..f15dd8d8672 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4841,14 +4841,16 @@
   [(set_attr "type" "f_minmax<s>")]
 )
 
-;; Scalar forms for the IEEE-754 fmax()/fmin() functions
-(define_insn "<fmaxmin><mode>3"
+;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
+;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
+;; which implement the IEEE fmax ()/fmin () functions.
+(define_insn "<maxmin_uns><mode>3"
   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
 	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")
 		     (match_operand:GPF_F16 2 "register_operand" "w")]
-		     FMAXMIN))]
+		     FMAXMIN_UNS))]
   "TARGET_FLOAT"
-  "<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2"
+  "<maxmin_uns_op>\\t%<s>0, %<s>1, %<s>2"
   [(set_attr "type" "f_minmax<stype>")]
 )
 
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index ab3a00c9ec7..fcdc977d631 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -17201,6 +17201,14 @@ vmax_f32 (float32x2_t __a, float32x2_t __b)
   return __builtin_aarch64_smax_nanv2sf (__a, __b);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmax_f64 (float64x1_t __a, float64x1_t __b)
+{
+    return (float64x1_t)
+      { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0),
+				      vget_lane_f64 (__b, 0)) };
+}
+
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vmax_s8 (int8x8_t __a, int8x8_t __b)
 {
@@ -17692,6 +17700,14 @@ vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
   return __builtin_aarch64_fmaxv2sf (__a, __b);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmaxnm_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return (float64x1_t)
+    { __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0),
+				vget_lane_f64 (__b, 0)) };
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
@@ -17824,6 +17840,14 @@ vmin_f32 (float32x2_t __a, float32x2_t __b)
   return __builtin_aarch64_smin_nanv2sf (__a, __b);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vmin_f64 (float64x1_t __a, float64x1_t __b)
+{
+    return (float64x1_t)
+	  { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0),
+					  vget_lane_f64 (__b, 0)) };
+}
+
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vmin_s8 (int8x8_t __a, int8x8_t __b)
 {
@@ -17922,6 +17946,14 @@ vminnm_f32 (float32x2_t __a, float32x2_t __b)
   return __builtin_aarch64_fminv2sf (__a, __b);
 }
 
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vminnm_f64 (float64x1_t __a, float64x1_t __b)
+{
+  return (float64x1_t)
+    { __builtin_aarch64_fmind (vget_lane_f64 (__a, 0),
+				vget_lane_f64 (__b, 0)) };
+}
+
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5e8b0ad9cee..187057f2da0 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1016,9 +1016,8 @@
 (define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
 			        UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
 
-(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
-
-(define_int_iterator FMAXMIN [UNSPEC_FMAXNM UNSPEC_FMINNM])
+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
+				  UNSPEC_FMAXNM UNSPEC_FMINNM])
 
 (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
 
@@ -1102,7 +1101,9 @@
 			      (UNSPEC_FMAXV "smax_nan")
 			      (UNSPEC_FMIN "smin_nan")
 			      (UNSPEC_FMINNMV "smin")
-			      (UNSPEC_FMINV "smin_nan")])
+			      (UNSPEC_FMINV "smin_nan")
+			      (UNSPEC_FMAXNM "fmax")
+			      (UNSPEC_FMINNM "fmin")])
 
 (define_int_attr  maxmin_uns_op [(UNSPEC_UMAXV "umax")
 				 (UNSPEC_UMINV "umin")
@@ -1113,13 +1114,9 @@
 				 (UNSPEC_FMAXV "fmax")
 				 (UNSPEC_FMIN "fmin")
 				 (UNSPEC_FMINNMV "fminnm")
-				 (UNSPEC_FMINV "fmin")])
-
-(define_int_attr fmaxmin [(UNSPEC_FMAXNM "fmax")
-			  (UNSPEC_FMINNM "fmin")])
-
-(define_int_attr fmaxmin_op [(UNSPEC_FMAXNM "fmaxnm")
-			     (UNSPEC_FMINNM "fminnm")])
+				 (UNSPEC_FMINV "fmin")
+				 (UNSPEC_FMAXNM "fmaxnm")
+				 (UNSPEC_FMINNM "fminnm")])
 
 (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
 		      (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3eb9df1a412..f5bd074cf18 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-08-02  Tamar Christina  <tamar.christina@arm.com>
+
+	* gcc.target/aarch64/vminmaxnm.c: New.
+	* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Add float64x1_t
+	tests.
+
 2016-08-01  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	* gcc.target/powerpc/vec-extract-5.c: New tests to test
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
index 96608ebb283..192bad9879b 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
@@ -1,4 +1,4 @@
-/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic.  */
+/* Test the `v[min|max]{nm}{q}_f*' AArch64 SIMD intrinsic.  */
 
 /* { dg-do run } */
 /* { dg-options "-O2" } */
@@ -18,6 +18,7 @@ extern void abort ();
 int
 main (int argc, char **argv)
 {
+  /* v{min|max}nm_f32 normal.  */
   float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
   float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
   float32x2_t f32x2_exp_minnm  = vdup_n_f32 (-1.0);
@@ -28,6 +29,7 @@ main (int argc, char **argv)
   CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
   CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
 
+  /* v{min|max}nm_f32 NaN.  */
   f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
   f32x2_input2 = vdup_n_f32 (1.0);
   f32x2_exp_minnm  = vdup_n_f32 (1.0);
@@ -38,6 +40,7 @@ main (int argc, char **argv)
   CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
   CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
 
+  /* v{min|max}nmq_f32 normal.  */
   float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
   float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
   float32x4_t f32x4_exp_minnm  = vdupq_n_f32 (-1024.0);
@@ -48,6 +51,7 @@ main (int argc, char **argv)
   CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
   CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
 
+  /* v{min|max}nmq_f32 NaN.  */
   f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
   f32x4_input2 = vdupq_n_f32 (-1.0);
   f32x4_exp_minnm  = vdupq_n_f32 (-1.0);
@@ -58,16 +62,57 @@ main (int argc, char **argv)
   CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
   CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
 
+  /* v{min|max}nm_f64 normal.  */
+  float64x1_t f64x1_input1 = vdup_n_f64 (1.23);
+  float64x1_t f64x1_input2 = vdup_n_f64 (4.56);
+  float64x1_t f64x1_exp_minnm  = vdup_n_f64 (1.23);
+  float64x1_t f64x1_exp_maxnm  = vdup_n_f64 (4.56);
+  float64x1_t f64x1_ret_minnm  = vminnm_f64 (f64x1_input1, f64x1_input2);
+  float64x1_t f64x1_ret_maxnm  = vmaxnm_f64 (f64x1_input1, f64x1_input2);
+  CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
+  CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
+
+  /* v{min|max}_f64 normal.  */
+  float64x1_t f64x1_exp_min  = vdup_n_f64 (1.23);
+  float64x1_t f64x1_exp_max  = vdup_n_f64 (4.56);
+  float64x1_t f64x1_ret_min  = vmin_f64 (f64x1_input1, f64x1_input2);
+  float64x1_t f64x1_ret_max  = vmax_f64 (f64x1_input1, f64x1_input2);
+  CHECK (uint64_t, 1, f64x1_ret_min, f64x1_exp_min);
+  CHECK (uint64_t, 1, f64x1_ret_max, f64x1_exp_max);
+
+  /* v{min|max}nmq_f64 normal.  */
   float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
   float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
   float64x2_t f64x2_exp_minnm  = vdupq_n_f64 (1.23);
   float64x2_t f64x2_exp_maxnm  = vdupq_n_f64 (4.56);
   float64x2_t f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
   float64x2_t f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
-
   CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
   CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
 
+  /* v{min|max}nm_f64 NaN.  */
+  f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
+  f64x1_input2 = vdup_n_f64 (1.0);
+  f64x1_exp_minnm  = vdup_n_f64 (1.0);
+  f64x1_exp_maxnm  = vdup_n_f64 (1.0);
+  f64x1_ret_minnm  = vminnm_f64 (f64x1_input1, f64x1_input2);
+  f64x1_ret_maxnm  = vmaxnm_f64 (f64x1_input1, f64x1_input2);
+
+  CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
+  CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
+
+  /* v{min|max}_f64 NaN.  */
+  f64x1_input1 = vdup_n_f64 (-__builtin_nanf (""));
+  f64x1_input2 = vdup_n_f64 (1.0);
+  f64x1_exp_minnm  = vdup_n_f64 (-__builtin_nanf (""));
+  f64x1_exp_maxnm  = vdup_n_f64 (-__builtin_nanf (""));
+  f64x1_ret_minnm  = vmin_f64 (f64x1_input1, f64x1_input2);
+  f64x1_ret_maxnm  = vmax_f64 (f64x1_input1, f64x1_input2);
+
+  CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm);
+  CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm);
+
+  /* v{min|max}nmq_f64 NaN.  */
   f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
   f64x2_input2 = vdupq_n_f64 (1.0);
   f64x2_exp_minnm  = vdupq_n_f64 (1.0);
diff --git a/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c b/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c
new file mode 100644
index 00000000000..bdaa5649971
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+/* For each of these intrinsics, we map directly to an unspec in RTL.
+   We're just using the argument directly and returning the result, so we
+   can precisely specify the exact instruction pattern and register
+   allocations we expect.  */
+
+float64x1_t
+test_vmaxnm_f64 (float64x1_t a, float64x1_t b)
+{
+  /* { dg-final { scan-assembler-times "fmaxnm\td0, d0, d1" 1 } } */
+  return vmaxnm_f64 (a, b);
+}
+
+float64x1_t
+test_vminnm_f64 (float64x1_t a, float64x1_t b)
+{
+  /* { dg-final { scan-assembler-times "fminnm\td0, d0, d1" 1 } } */
+  return vminnm_f64 (a, b);
+}
+
+float64x1_t
+test_vmax_f64 (float64x1_t a, float64x1_t b)
+{
+  /* { dg-final { scan-assembler-times "fmax\td0, d0, d1" 1 } } */
+  return vmax_f64 (a, b);
+}
+
+float64x1_t
+test_vmin_f64 (float64x1_t a, float64x1_t b)
+{
+  /* { dg-final { scan-assembler-times "fmin\td0, d0, d1" 1 } } */
+  return vmin_f64 (a, b);
+}
\ No newline at end of file