[ARM] Enable auto-vectorization for copysignf
authorJiong Wang <jiong.wang@arm.com>
Tue, 9 Sep 2014 12:29:36 +0000 (12:29 +0000)
committerJiong Wang <jiwang@gcc.gnu.org>
Tue, 9 Sep 2014 12:29:36 +0000 (12:29 +0000)
  gcc/
    * config/arm/arm.c (NEON_COPYSIGNF): New enum.
    (arm_init_neon_builtins): Support NEON_COPYSIGNF.
    (arm_builtin_vectorized_function): Likewise.
    * config/arm/arm_neon_builtins.def: New macro for copysignf.
    * config/arm/neon.md (neon_copysignf<mode>): New pattern for vector
    copysignf.

  gcc/testsuite/
    * gcc.target/arm/vect-copysignf.c: New testcase.

From-SVN: r215067

gcc/ChangeLog
gcc/config/arm/arm.c
gcc/config/arm/arm_neon_builtins.def
gcc/config/arm/neon.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/arm/vect-copysignf.c [new file with mode: 0644]

index ad2092b770f3897ad1b43798eb119dab237fb6a4..68d9cc2dc2ec278114250b0902090de36019e33d 100644 (file)
@@ -1,3 +1,11 @@
+2014-09-09  Jiong Wang  <jiong.wang@arm.com>
+
+       * config/arm/arm.c (NEON_COPYSIGNF): New enum.
+       (arm_init_neon_builtins): Support NEON_COPYSIGNF.
+       (arm_builtin_vectorized_function): Likewise.
+       * config/arm/arm_neon_builtins.def: New macro for copysignf.
+       * config/arm/neon.md (neon_copysignf<mode>): New pattern for vector copysignf.
+
 2014-09-09  Richard Sandiford  <rdsandiford@googlemail.com>
 
        * bb-reorder.h (default_target_bb_reorder): Remove redundant GTY.
index d75ca4293ed21568fc69d554192d7b5cb648ab37..f0e622d0a7837e9c4fb315ce6f2adf537288e14c 100644 (file)
@@ -23289,6 +23289,7 @@ typedef enum {
   NEON_SETLANE,
   NEON_CREATE,
   NEON_RINT,
+  NEON_COPYSIGNF,
   NEON_DUP,
   NEON_DUPLANE,
   NEON_COMBINE,
@@ -24283,6 +24284,22 @@ arm_init_neon_builtins (void)
            ftype = build_function_type_list (eltype, eltype, NULL);
            break;
        }
+       case NEON_COPYSIGNF:
+         {
+           tree eltype = NULL_TREE;
+           switch (insn_data[d->code].operand[1].mode)
+             {
+             case V2SFmode:
+               eltype = V2SF_type_node;
+               break;
+             case V4SFmode:
+               eltype = V4SF_type_node;
+               break;
+             default: gcc_unreachable ();
+             }
+           ftype = build_function_type_list (eltype, eltype, NULL);
+           break;
+         }
        default:
          gcc_unreachable ();
        }
@@ -25486,6 +25503,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
         NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
 
+    case NEON_COPYSIGNF:
     case NEON_COMBINE:
     case NEON_VTBL:
       return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
@@ -30063,27 +30081,34 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
                      : ARM_FIND_VCVT_VARIANT (vcvtm);
 #undef ARM_CHECK_BUILTIN_MODE
 #define ARM_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##Imode && out_n == C \
-   && in_mode == N##Imode && in_n == C)
+  (out_mode == N##mode && out_n == C \
+   && in_mode == N##mode && in_n == C)
           case BUILT_IN_BSWAP16:
-            if (ARM_CHECK_BUILTIN_MODE (4, H))
+            if (ARM_CHECK_BUILTIN_MODE (4, HI))
               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
-            else if (ARM_CHECK_BUILTIN_MODE (8, H))
+            else if (ARM_CHECK_BUILTIN_MODE (8, HI))
               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
             else
               return NULL_TREE;
           case BUILT_IN_BSWAP32:
-            if (ARM_CHECK_BUILTIN_MODE (2, S))
+            if (ARM_CHECK_BUILTIN_MODE (2, SI))
               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
-            else if (ARM_CHECK_BUILTIN_MODE (4, S))
+            else if (ARM_CHECK_BUILTIN_MODE (4, SI))
               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
             else
               return NULL_TREE;
           case BUILT_IN_BSWAP64:
-            if (ARM_CHECK_BUILTIN_MODE (2, D))
+            if (ARM_CHECK_BUILTIN_MODE (2, DI))
               return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
             else
               return NULL_TREE;
+         case BUILT_IN_COPYSIGNF:
+           if (ARM_CHECK_BUILTIN_MODE (2, SF))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
+           else if (ARM_CHECK_BUILTIN_MODE (4, SF))
+              return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
+           else
+             return NULL_TREE;
 
           default:
             return NULL_TREE;
index efe5bda965afc1cb1b7583d34db4f4218027ed19..229caca6a8056c8ba2bbd38032d1aac46db6934f 100644 (file)
@@ -135,6 +135,7 @@ VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
 VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
 VAR10 (SELECT, vbsl,
         v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+VAR2 (COPYSIGNF, copysignf, v2sf, v4sf),
 VAR2 (RINT, vrintn, v2sf, v4sf),
 VAR2 (RINT, vrinta, v2sf, v4sf),
 VAR2 (RINT, vrintp, v2sf, v4sf),
index 354a105ee951866d112eb3a8507495b1ac1b7a56..38daf35ed845b9a60027fc3ef8108bb12af2df7b 100644 (file)
   DONE;
 })
 
+(define_expand "neon_copysignf<mode>"
+  [(match_operand:VCVTF 0 "register_operand")
+   (match_operand:VCVTF 1 "register_operand")
+   (match_operand:VCVTF 2 "register_operand")]
+  "TARGET_NEON"
+  "{
+     rtx v_bitmask_cast;
+     rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
+     int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+     rtvec v = rtvec_alloc (n_elt);
+
+     /* Create bitmask for vector select.  */
+     for (i = 0; i < n_elt; ++i)
+       RTVEC_ELT (v, i) = GEN_INT (0x80000000);
+
+     emit_move_insn (v_bitmask,
+                    gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
+     emit_move_insn (operands[0], operands[2]);
+     v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
+                                          <VCVTF:V_cmp_result>mode, 0);
+     emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
+                                    operands[1]));
+
+     DONE;
+  }"
+)
+
 (define_insn "neon_vqneg<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
        (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
index 7214b7ee1c737c20df2d9e077cfa01916121ddd8..927a7126a748e39bd1616239f3ed607bd879c43c 100644 (file)
@@ -1,3 +1,7 @@
+2014-09-09  Jiong Wang  <jiong.wang@arm.com>
+
+       * gcc.target/arm/vect-copysignf.c: New testcase.
+
 2014-09-09  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
        * gcc.target/arm/vfp-1.c: Updated expected assembly.
diff --git a/gcc/testsuite/gcc.target/arm/vect-copysignf.c b/gcc/testsuite/gcc.target/arm/vect-copysignf.c
new file mode 100644 (file)
index 0000000..b35dd1f
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
+/* { dg-add-options "arm_neon" } */
+
+extern void abort ();
+
+#define N 16
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
+             -12.5f, -15.6f, -18.7f, -21.8f,
+             24.9f, 27.1f, 30.2f, 33.3f,
+             36.4f, 39.5f, 42.6f, 45.7f};
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
+             -9.0f, 1.0f, -2.0f, 3.0f,
+             -4.0f, -5.0f, 6.0f, 7.0f,
+             -8.0f, -9.0f, 10.0f, 11.0f};
+float r[N];
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = __builtin_copysignf (a[i], b[i]);
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != __builtin_copysignf (a[i], b[i]))
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */