[2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions.
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Tue, 2 Sep 2014 16:00:01 +0000 (16:00 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Tue, 2 Sep 2014 16:00:01 +0000 (16:00 +0000)
PR target/62275
* config/arm/neon.md
(neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
<v_cmp_result>): New pattern.
* config/arm/iterators.md (NEON_VCVT): New int iterator.
* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
* config/arm/arm.c (arm_builtin_vectorized_function): Handle
BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.

PR target/62275
* gcc.target/arm/vect-lceilf_1.c: New test.
* gcc.target/arm/vect-lfloorf_1.c: Likewise.
* gcc.target/arm/vect-lroundf_1.c: Likewise.

From-SVN: r214826

gcc/ChangeLog
gcc/config/arm/arm.c
gcc/config/arm/arm_neon_builtins.def
gcc/config/arm/iterators.md
gcc/config/arm/neon.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/vect-lceilf_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/arm/vect-lroundf_1.c [new file with mode: 0644]

index b46d009219db87eff25718e2d2cf4c5fc0bfb6cf..7bfbd5a310863d0ea7afb3d5ddc2acb1eb05dede 100644 (file)
@@ -1,3 +1,16 @@
+2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       PR target/62275
+       * config/arm/neon.md
+       (neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
+       <v_cmp_result>): New pattern.
+       * config/arm/iterators.md (NEON_VCVT): New int iterator.
+       * config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
+       vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
+       vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
+       * config/arm/arm.c (arm_builtin_vectorized_function): Handle
+       BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.
+
 2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        PR target/62275
index d8bfda3aa98365b3d562566f4db2273e2d737cbe..ba677abd5be31037745c5d50983808571edb476f 100644 (file)
@@ -29946,6 +29946,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
 {
   enum machine_mode in_mode, out_mode;
   int in_n, out_n;
+  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
 
   if (TREE_CODE (type_out) != VECTOR_TYPE
       || TREE_CODE (type_in) != VECTOR_TYPE)
@@ -29991,6 +29992,36 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
             return ARM_FIND_VRINT_VARIANT (vrintz);
           case BUILT_IN_ROUNDF:
             return ARM_FIND_VRINT_VARIANT (vrinta);
+#undef ARM_CHECK_BUILTIN_MODE_1
+#define ARM_CHECK_BUILTIN_MODE_1(C) \
+  (out_mode == SImode && out_n == C \
+   && in_mode == SFmode && in_n == C)
+
+#define ARM_FIND_VCVT_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
+   : (ARM_CHECK_BUILTIN_MODE (4) \
+     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
+     : NULL_TREE))
+
+#define ARM_FIND_VCVTU_VARIANT(N) \
+  (ARM_CHECK_BUILTIN_MODE (2) \
+   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
+   : (ARM_CHECK_BUILTIN_MODE (4) \
+     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
+     : NULL_TREE))
+          case BUILT_IN_LROUNDF:
+            return out_unsigned_p
+                     ? ARM_FIND_VCVTU_VARIANT (vcvta)
+                     : ARM_FIND_VCVT_VARIANT (vcvta);
+          case BUILT_IN_LCEILF:
+            return out_unsigned_p
+                     ? ARM_FIND_VCVTU_VARIANT (vcvtp)
+                     : ARM_FIND_VCVT_VARIANT (vcvtp);
+          case BUILT_IN_LFLOORF:
+            return out_unsigned_p
+                     ? ARM_FIND_VCVTU_VARIANT (vcvtm)
+                     : ARM_FIND_VCVT_VARIANT (vcvtm);
 #undef ARM_CHECK_BUILTIN_MODE
 #define ARM_CHECK_BUILTIN_MODE(C, N) \
   (out_mode == N##Imode && out_n == C \
@@ -30021,9 +30052,12 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
     }
   return NULL_TREE;
 }
+#undef ARM_FIND_VCVT_VARIANT
+#undef ARM_FIND_VCVTU_VARIANT
 #undef ARM_CHECK_BUILTIN_MODE
 #undef ARM_FIND_VRINT_VARIANT
 
+
 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
 static HOST_WIDE_INT
 arm_vector_alignment (const_tree type)
index f4531f36e7aa0bff4d33bb78eb68a480bf389d47..efe5bda965afc1cb1b7583d34db4f4218027ed19 100644 (file)
@@ -141,6 +141,18 @@ VAR2 (RINT, vrintp, v2sf, v4sf),
 VAR2 (RINT, vrintm, v2sf, v4sf),
 VAR2 (RINT, vrintz, v2sf, v4sf),
 VAR2 (RINT, vrintx, v2sf, v4sf),
+VAR1 (RINT, vcvtav2sf, v2si),
+VAR1 (RINT, vcvtav4sf, v4si),
+VAR1 (RINT, vcvtauv2sf, v2si),
+VAR1 (RINT, vcvtauv4sf, v4si),
+VAR1 (RINT, vcvtpv2sf, v2si),
+VAR1 (RINT, vcvtpv4sf, v4si),
+VAR1 (RINT, vcvtpuv2sf, v2si),
+VAR1 (RINT, vcvtpuv4sf, v4si),
+VAR1 (RINT, vcvtmv2sf, v2si),
+VAR1 (RINT, vcvtmv4sf, v4si),
+VAR1 (RINT, vcvtmuv2sf, v2si),
+VAR1 (RINT, vcvtmuv4sf, v4si),
 VAR1 (VTBL, vtbl1, v8qi),
 VAR1 (VTBL, vtbl2, v8qi),
 VAR1 (VTBL, vtbl3, v8qi),
index f7e0e1483c9aad1442b832e277ca3ca4279e995a..021372a107a6deb9cf56abe092805b38ae22be05 100644 (file)
 (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
                               UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
 
+(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA])
+
 (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
                           UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
 
index dc364eeb64e8cbb3b9a2eb5725338110ea3e8a7a..354a105ee951866d112eb3a8507495b1ac1b7a56 100644 (file)
   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
 )
 
+(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
+       (FIXUORS:<V_cmp_result> (unspec:VCVTF
+                              [(match_operand:VCVTF 1 "register_operand" "w")]
+                              NEON_VCVT)))]
+  "TARGET_NEON && TARGET_FPU_ARMV8"
+  "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
+   (set_attr "predicable" "no")]
+)
+
 (define_insn "ior<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
        (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
index cc340df2d6d85793f8967188a6e1ed3f74771045..97e73f83672cc46b971667c0c3530cf88b27e7d1 100644 (file)
@@ -1,3 +1,10 @@
+2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       PR target/62275
+       * gcc.target/arm/vect-lceilf_1.c: New test.
+       * gcc.target/arm/vect-lfloorf_1.c: Likewise.
+       * gcc.target/arm/vect-lroundf_1.c: Likewise.
+
 2014-09-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        PR target/62275
diff --git a/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c b/gcc/testsuite/gcc.target/arm/vect-lceilf_1.c
new file mode 100644 (file)
index 0000000..75705ae
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
+/* { dg-add-options arm_v8_neon } */
+
+#define N 32
+
+void
+foo (int *output, float *input)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = __builtin_lceilf (input[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c b/gcc/testsuite/gcc.target/arm/vect-lfloorf_1.c
new file mode 100644 (file)
index 0000000..298d54e
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
+/* { dg-add-options arm_v8_neon } */
+
+#define N 32
+
+void
+foo (int *output, float *input)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = __builtin_lfloorf (input[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c b/gcc/testsuite/gcc.target/arm/vect-lroundf_1.c
new file mode 100644 (file)
index 0000000..6443821
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
+/* { dg-add-options arm_v8_neon } */
+
+#define N 32
+
+void
+foo (int *output, float *input)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = __builtin_lroundf (input[i]);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */