From 6c9c7b735c04bbcf4b840634d484441e764a7175 Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Date: Wed, 9 Jan 2019 13:57:06 +0000 Subject: [PATCH] [Aarch64][SVE] Add copysign and xorsign support This patch adds support for copysign and xorsign builtins to SVE. With the new expands, they can be vectorized using bitwise logical operations. I tested this patch in an aarch64 machine bootstrapping the compiler and running the checks. 2019-01-09 Alejandro Martinez * config/aarch64/aarch64-sve.md (copysign3): New define_expand. (xorsign3): Likewise. 2019-01-09 Alejandro Martinez * gcc.target/aarch64/sve/copysign_1.c: New test for SVE vectorized copysign. * gcc.target/aarch64/sve/copysign_1_run.c: Likewise. * gcc.target/aarch64/sve/xorsign_1.c: New test for SVE vectorized xorsign. * gcc.target/aarch64/sve/xorsign_1_run.c: Likewise. From-SVN: r267764 --- gcc/ChangeLog | 5 ++ gcc/config/aarch64/aarch64-sve.md | 54 ++++++++++++++ gcc/testsuite/ChangeLog | 9 +++ .../gcc.target/aarch64/sve/copysign_1.c | 41 +++++++++++ .../gcc.target/aarch64/sve/copysign_1_run.c | 72 +++++++++++++++++++ .../gcc.target/aarch64/sve/xorsign_1.c | 37 ++++++++++ .../gcc.target/aarch64/sve/xorsign_1_run.c | 72 +++++++++++++++++++ 7 files changed, 290 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/copysign_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/copysign_1_run.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/xorsign_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/xorsign_1_run.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b6550d9a807..6cd2690ae80 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-01-09 Alejandro Martinez + + * config/aarch64/aarch64-sve.md (copysign3): New define_expand. + (xorsign3): Likewise. + 2019-01-09 Jelinek PR middle-end/88758 diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index a21406cb08e..5bb3422a716 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3074,3 +3074,57 @@ insr\t%0., %2 insr\t%0., %2" ) + +(define_expand "copysign3" + [(match_operand:SVE_F 0 "register_operand") + (match_operand:SVE_F 1 "register_operand") + (match_operand:SVE_F 2 "register_operand")] + "TARGET_SVE" + { + rtx sign = gen_reg_rtx (mode); + rtx mant = gen_reg_rtx (mode); + rtx int_res = gen_reg_rtx (mode); + int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; + + rtx arg1 = lowpart_subreg (mode, operands[1], mode); + rtx arg2 = lowpart_subreg (mode, operands[2], mode); + + emit_insn (gen_and3 + (sign, arg2, + aarch64_simd_gen_const_vector_dup (mode, + HOST_WIDE_INT_M1U + << bits))); + emit_insn (gen_and3 + (mant, arg1, + aarch64_simd_gen_const_vector_dup (mode, + ~(HOST_WIDE_INT_M1U + << bits)))); + emit_insn (gen_ior3 (int_res, sign, mant)); + emit_move_insn (operands[0], gen_lowpart (mode, int_res)); + DONE; + } +) + +(define_expand "xorsign3" + [(match_operand:SVE_F 0 "register_operand") + (match_operand:SVE_F 1 "register_operand") + (match_operand:SVE_F 2 "register_operand")] + "TARGET_SVE" + { + rtx sign = gen_reg_rtx (mode); + rtx int_res = gen_reg_rtx (mode); + int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; + + rtx arg1 = lowpart_subreg (mode, operands[1], mode); + rtx arg2 = lowpart_subreg (mode, operands[2], mode); + + emit_insn (gen_and3 + (sign, arg2, + aarch64_simd_gen_const_vector_dup (mode, + HOST_WIDE_INT_M1U + << bits))); + emit_insn (gen_xor3 (int_res, arg1, sign)); + emit_move_insn (operands[0], gen_lowpart (mode, int_res)); + DONE; + } +) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 658f396c26f..5909ee26747 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2019-01-09 Alejandro Martinez + + * gcc.target/aarch64/sve/copysign_1.c: New test for SVE vectorized + copysign. + * gcc.target/aarch64/sve/copysign_1_run.c: Likewise. + * gcc.target/aarch64/sve/xorsign_1.c: New test for SVE vectorized + xorsign. + * gcc.target/aarch64/sve/xorsign_1_run.c: Likewise. + 2019-01-09 Jakub Jelinek PR rtl-optimization/88331 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copysign_1.c b/gcc/testsuite/gcc.target/aarch64/sve/copysign_1.c new file mode 100644 index 00000000000..ca33cec6d7d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/copysign_1.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details --save-temps" } */ + +void +copysign_half (_Float16 * restrict a, _Float16 * restrict b, + _Float16 * restrict r, int n) +{ + for (int i = 0; i < n; i++) + { + r[i] = __builtin_copysignf16 (a[i], b[i]); + } +} + +void +copysign_float (float *restrict a, float *restrict b, float *restrict r, + int n) +{ + for (int i = 0; i < n; i++) + { + r[i] = __builtin_copysignf (a[i], b[i]); + } +} + +void +copysign_double (double *restrict a, double *restrict b, double *restrict r, + int n) +{ + for (int i = 0; i < n; i++) + { + r[i] = __builtin_copysign (a[i], b[i]); + } +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, z[0-9]+\.h, #0x8000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, z[0-9]+\.h, #0x7fff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x7fffffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x8000000000000000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x7fffffffffffffff\n} 1 } } */ +/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/copysign_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/copysign_1_run.c new file mode 100644 index 00000000000..c3d0bfa53cf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/copysign_1_run.c @@ -0,0 +1,72 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ + +#include "copysign_1.c" + +extern void abort (); + +#define N 16 +_Float16 ah[N] = { -0.1f16, -3.2f16, -6.3f16, -9.4f16, + -12.5f16, -15.6f16, -18.7f16, -21.8f16, + 24.9f16, 27.1f16, 30.2f16, 33.3f16, + 36.4f16, 39.5f16, 42.6f16, 45.7f +}; + +_Float16 bh[N] = { -1.2f16, 3.4f16, -5.6f16, 7.8f16, + -9.0f16, 1.0f16, -2.0f16, 3.0f16, + -4.0f16, -5.0f16, 6.0f16, 7.0f16, + -8.0f16, -9.0f16, 10.0f16, 11.0f16 +}; + +_Float16 rh[N]; + +float a[N] = { -0.1f, -3.2f, -6.3f, -9.4f, + -12.5f, -15.6f, -18.7f, -21.8f, + 24.9f, 27.1f, 30.2f, 33.3f, + 36.4f, 39.5f, 42.6f, 45.7f +}; + +float b[N] = { -1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f +}; + +float r[N]; + +double ad[N] = { -0.1d, -3.2d, -6.3d, -9.4d, + -12.5d, -15.6d, -18.7d, -21.8d, + 24.9d, 27.1d, 30.2d, 33.3d, + 36.4d, 39.5d, 42.6d, 45.7d +}; + +double bd[N] = { -1.2d, 3.4d, -5.6d, 7.8d, + -9.0d, 1.0d, -2.0d, 3.0d, + -4.0d, -5.0d, 6.0d, 7.0d, + -8.0d, -9.0d, 10.0d, 11.0d +}; + +double rd[N]; + +int +main (void) +{ + int i; + + copysign_half (ah, bh, rh, N); + for (i = 0; i < N; i++) + if (rh[i] != __builtin_copysignf16 (ah[i], bh[i])) + abort (); + + copysign_float (a, b, r, N); + for (i = 0; i < N; i++) + if (r[i] != __builtin_copysignf (a[i], b[i])) + abort (); + + copysign_double (ad, bd, rd, N); + for (i = 0; i < N; i++) + if (rd[i] != __builtin_copysign (ad[i], bd[i])) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/xorsign_1.c b/gcc/testsuite/gcc.target/aarch64/sve/xorsign_1.c new file mode 100644 index 00000000000..5c4ad0222a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/xorsign_1.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math -fdump-tree-vect-details --save-temps" } */ + +void +xorsign_half (_Float16 * restrict a, _Float16 * restrict b, + _Float16 * restrict r, int n) +{ + for (int i = 0; i < n; i++) + { + r[i] = a[i] * __builtin_copysignf16 (1.0f16, b[i]); + } +} + +void +xorsign_float (float *restrict a, float *restrict b, float *restrict r, int n) +{ + for (int i = 0; i < n; i++) + { + r[i] = a[i] * __builtin_copysignf (1.0f, b[i]); + } +} + +void +xorsign_double (double *restrict a, double *restrict b, double *restrict r, + int n) +{ + for (int i = 0; i < n; i++) + { + r[i] = a[i] * __builtin_copysign (1.0d, b[i]); + } +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, z[0-9]+\.h, #0x8000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, z[0-9]+\.s, #0x80000000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, z[0-9]+\.d, #0x8000000000000000\n} 1 } } */ +/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/xorsign_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/xorsign_1_run.c new file mode 100644 index 00000000000..de8ff83700c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/xorsign_1_run.c @@ -0,0 +1,72 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ + +#include "xorsign_1.c" + +extern void abort (); + +#define N 16 +_Float16 ah[N] = { -0.1f16, -3.2f16, -6.3f16, -9.4f16, + -12.5f16, -15.6f16, -18.7f16, -21.8f16, + 24.9f16, 27.1f16, 30.2f16, 33.3f16, + 36.4f16, 39.5f16, 42.6f16, 45.7f +}; + +_Float16 bh[N] = { -1.2f16, 3.4f16, -5.6f16, 7.8f16, + -9.0f16, 1.0f16, -2.0f16, 3.0f16, + -4.0f16, -5.0f16, 6.0f16, 7.0f16, + -8.0f16, -9.0f16, 10.0f16, 11.0f16 +}; + +_Float16 rh[N]; + +float a[N] = { -0.1f, -3.2f, -6.3f, -9.4f, + -12.5f, -15.6f, -18.7f, -21.8f, + 24.9f, 27.1f, 30.2f, 33.3f, + 36.4f, 39.5f, 42.6f, 45.7f +}; + +float b[N] = { -1.2f, 3.4f, -5.6f, 7.8f, + -9.0f, 1.0f, -2.0f, 3.0f, + -4.0f, -5.0f, 6.0f, 7.0f, + -8.0f, -9.0f, 10.0f, 11.0f +}; + +float r[N]; + +double ad[N] = { -0.1d, -3.2d, -6.3d, -9.4d, + -12.5d, -15.6d, -18.7d, -21.8d, + 24.9d, 27.1d, 30.2d, 33.3d, + 36.4d, 39.5d, 42.6d, 45.7d +}; + +double bd[N] = { -1.2d, 3.4d, -5.6d, 7.8d, + -9.0d, 1.0d, -2.0d, 3.0d, + -4.0d, -5.0d, 6.0d, 7.0d, + -8.0d, -9.0d, 10.0d, 11.0d +}; + +double rd[N]; + +int +main (void) +{ + int i; + + xorsign_half (ah, bh, rh, N); + for (i = 0; i < N; i++) + if (rh[i] != ah[i] * __builtin_copysignf16 (1.0f16, bh[i])) + abort (); + + xorsign_float (a, b, r, N); + for (i = 0; i < N; i++) + if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i])) + abort (); + + xorsign_double (ad, bd, rd, N); + for (i = 0; i < N; i++) + if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i])) + abort (); + + return 0; +} -- 2.30.2