From 97f518b3fe714bb026f8070aa475e86ddbcca509 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Tue, 9 Sep 2014 12:29:36 +0000 Subject: [PATCH] [ARM] Enable auto-vectorization for copysignf gcc/ * config/arm/arm.c (NEON_COPYSIGNF): New enum. (arm_init_neon_builtins): Support NEON_COPYSIGNF. (arm_builtin_vectorized_function): Likewise. * config/arm/arm_neon_builtins.def: New macro for copysignf. * config/arm/neon.md (neon_copysignf): New pattern for vector copysignf. gcc/testsuite/ * gcc.target/arm/vect-copysignf.c: New testcase. From-SVN: r215067 --- gcc/ChangeLog | 8 ++++ gcc/config/arm/arm.c | 39 +++++++++++++++---- gcc/config/arm/arm_neon_builtins.def | 1 + gcc/config/arm/neon.md | 27 +++++++++++++ gcc/testsuite/ChangeLog | 4 ++ gcc/testsuite/gcc.target/arm/vect-copysignf.c | 36 +++++++++++++++++ 6 files changed, 108 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/vect-copysignf.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ad2092b770f..68d9cc2dc2e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2014-09-09 Jiong Wang + + * config/arm/arm.c (NEON_COPYSIGNF): New enum. + (arm_init_neon_builtins): Support NEON_COPYSIGNF. + (arm_builtin_vectorized_function): Likewise. + * config/arm/arm_neon_builtins.def: New macro for copysignf. + * config/arm/neon.md (neon_copysignf): New pattern for vector copysignf. + 2014-09-09 Richard Sandiford * bb-reorder.h (default_target_bb_reorder): Remove redundant GTY. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d75ca4293ed..f0e622d0a78 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -23289,6 +23289,7 @@ typedef enum { NEON_SETLANE, NEON_CREATE, NEON_RINT, + NEON_COPYSIGNF, NEON_DUP, NEON_DUPLANE, NEON_COMBINE, @@ -24283,6 +24284,22 @@ arm_init_neon_builtins (void) ftype = build_function_type_list (eltype, eltype, NULL); break; } + case NEON_COPYSIGNF: + { + tree eltype = NULL_TREE; + switch (insn_data[d->code].operand[1].mode) + { + case V2SFmode: + eltype = V2SF_type_node; + break; + case V4SFmode: + eltype = V4SF_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (eltype, eltype, NULL); + break; + } default: gcc_unreachable (); } @@ -25486,6 +25503,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + case NEON_COPYSIGNF: case NEON_COMBINE: case NEON_VTBL: return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, @@ -30063,27 +30081,34 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) : ARM_FIND_VCVT_VARIANT (vcvtm); #undef ARM_CHECK_BUILTIN_MODE #define ARM_CHECK_BUILTIN_MODE(C, N) \ - (out_mode == N##Imode && out_n == C \ - && in_mode == N##Imode && in_n == C) + (out_mode == N##mode && out_n == C \ + && in_mode == N##mode && in_n == C) case BUILT_IN_BSWAP16: - if (ARM_CHECK_BUILTIN_MODE (4, H)) + if (ARM_CHECK_BUILTIN_MODE (4, HI)) return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false); - else if (ARM_CHECK_BUILTIN_MODE (8, H)) + else if (ARM_CHECK_BUILTIN_MODE (8, HI)) return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false); else return NULL_TREE; case BUILT_IN_BSWAP32: - if (ARM_CHECK_BUILTIN_MODE (2, S)) + if (ARM_CHECK_BUILTIN_MODE (2, SI)) return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false); - else if (ARM_CHECK_BUILTIN_MODE (4, S)) + else if (ARM_CHECK_BUILTIN_MODE (4, SI)) return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false); else return NULL_TREE; case BUILT_IN_BSWAP64: - if (ARM_CHECK_BUILTIN_MODE (2, D)) + if (ARM_CHECK_BUILTIN_MODE (2, DI)) return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false); else return NULL_TREE; + case BUILT_IN_COPYSIGNF: + if (ARM_CHECK_BUILTIN_MODE (2, SF)) + return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false); + else if (ARM_CHECK_BUILTIN_MODE (4, SF)) + return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false); + else + return NULL_TREE; default: return NULL_TREE; diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index efe5bda965a..229caca6a80 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -135,6 +135,7 @@ VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf), VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf), VAR10 (SELECT, vbsl, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR2 (COPYSIGNF, copysignf, v2sf, v4sf), VAR2 (RINT, vrintn, v2sf, v4sf), VAR2 (RINT, vrinta, v2sf, v4sf), VAR2 (RINT, vrintp, v2sf, v4sf), diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 354a105ee95..38daf35ed84 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2580,6 +2580,33 @@ DONE; }) +(define_expand "neon_copysignf" + [(match_operand:VCVTF 0 "register_operand") + (match_operand:VCVTF 1 "register_operand") + (match_operand:VCVTF 2 "register_operand")] + "TARGET_NEON" + "{ + rtx v_bitmask_cast; + rtx v_bitmask = gen_reg_rtx (mode); + int i, n_elt = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (n_elt); + + /* Create bitmask for vector select. */ + for (i = 0; i < n_elt; ++i) + RTVEC_ELT (v, i) = GEN_INT (0x80000000); + + emit_move_insn (v_bitmask, + gen_rtx_CONST_VECTOR (mode, v)); + emit_move_insn (operands[0], operands[2]); + v_bitmask_cast = simplify_gen_subreg (mode, v_bitmask, + mode, 0); + emit_insn (gen_neon_vbsl (operands[0], v_bitmask_cast, operands[0], + operands[1])); + + DONE; + }" +) + (define_insn "neon_vqneg" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7214b7ee1c7..927a7126a74 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-09-09 Jiong Wang + + * gcc.target/arm/vect-copysignf.c: New testcase. + 2014-09-09 Kyrylo Tkachov * gcc.target/arm/vfp-1.c: Updated expected assembly. diff --git a/gcc/testsuite/gcc.target/arm/vect-copysignf.c b/gcc/testsuite/gcc.target/arm/vect-copysignf.c new file mode 100644 index 00000000000..b35dd1f1853 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/vect-copysignf.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ +/* { dg-add-options "arm_neon" } */ + +extern void abort (); + +#define N 16 +float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f, + -12.5f, -15.6f, -18.7f, -21.8f, + 24.9f, 27.1f, 30.2f, 33.3f, + 36.4f, 39.5f, 42.6f, 45.7f}; +float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f}; +float r[N]; + +int +main (void) +{ + int i; + + for (i = 0; i < N; i++) + r[i] = __builtin_copysignf (a[i], b[i]); + + /* check results: */ + for (i = 0; i < N; i++) + if (r[i] != __builtin_copysignf (a[i], b[i])) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ -- 2.30.2