From 062468db046f5229421b027886958fa824af09df Mon Sep 17 00:00:00 2001 From: Robin Dapp Date: Fri, 8 Feb 2019 14:25:48 +0000 Subject: [PATCH] S/390: Implement vector copysign. This patch implements the vector copysign operation using vector select and a signbit mask. gcc/ChangeLog: 2019-02-08 Robin Dapp * config/s390/vector.md: Implement vector copysign. gcc/testsuite/ChangeLog: 2019-02-08 Robin Dapp * gcc.target/s390/vector/vec-copysign-execute.c: New test. * gcc.target/s390/vector/vec-copysign.c: New test. From-SVN: r268697 --- gcc/ChangeLog | 4 + gcc/config/s390/vector.md | 25 +++++++ gcc/testsuite/ChangeLog | 5 ++ .../s390/vector/vec-copysign-execute.c | 74 +++++++++++++++++++ .../gcc.target/s390/vector/vec-copysign.c | 38 ++++++++++ 5 files changed, 146 insertions(+) create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-copysign.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f451e15ceed..7bdcc07def1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2019-02-08 Robin Dapp + + * config/s390/vector.md: Implement vector copysign. + 2019-02-08 H.J. Lu * expr.c (expand_constructor): Correct indentations. diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index c9ffab4c8c2..820372eca29 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -1362,6 +1362,31 @@ operands[4] = CONST0_RTX (V2DImode); }) +; Vector copysign, implement using vector select +(define_expand "copysign3" + [(set (match_operand:VFT 0 "register_operand" "") + (if_then_else:VFT + (eq (match_dup 3) + (match_dup 4)) + (match_operand:VFT 1 "register_operand" "") + (match_operand:VFT 2 "register_operand" "")))] + "TARGET_VX" +{ + int sz = GET_MODE_BITSIZE (GET_MODE_INNER (mode)); + int prec = GET_MODE_PRECISION (GET_MODE_INNER (mode)); + wide_int mask_val = wi::shwi (1l << (sz - 1), prec); + + rtx mask = gen_reg_rtx (mode); + + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + for (int i = 0; i < nunits; i++) + RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ()); + + mask = gen_rtx_CONST_VECTOR (mode, v); + operands[3] = force_reg (mode, mask); + operands[4] = CONST0_RTX (mode); +}) ;; ;; Integer compares diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index eeb10000bf5..5cbb14f5486 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-02-08 Robin Dapp + + * gcc.target/s390/vector/vec-copysign-execute.c: New test. + * gcc.target/s390/vector/vec-copysign.c: New test. + 2019-02-08 Richard Biener PR tree-optimization/89247 diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c b/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c new file mode 100644 index 00000000000..a8d675d3a72 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ftree-vectorize -mzarch -march=z13" } */ + +#include +#include + +#define N 20 + +double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, + 27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0, + -nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, + 7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +double r[N]; +double r2[N]; + +void +foo (void) +{ + for (int i = 0; i < N; i++) + r[i] = copysign (a[i], b[i]); +} + +__attribute__((optimize("no-tree-vectorize"))) +void +check (void) +{ + for (int i = 0; i < N; i++) + { + r2[i] = copysign (a[i], b[i]); + assert (r[i] == r2[i] + || (isnan (r[i]) && isnan (r2[i]) + && signbit (r[i]) == signbit (r2[i]))); + } +} + +float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, + 27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0, + -nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, + 7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +float rf[N]; +float rf2[N]; + +__attribute__ ((__target__ ("arch=z14"))) +void +foof (void) +{ + for (int i = 0; i < N; i++) + rf[i] = copysignf (af[i], bf[i]); +} + +__attribute__((optimize("no-tree-vectorize"))) +void +checkf (void) +{ + for (int i = 0; i < N; i++) + { + rf2[i] = copysignf (af[i], bf[i]); + assert (rf[i] == rf2[i] + || (isnan (rf[i]) && isnan (rf2[i]) + && signbit (rf[i]) == signbit (rf2[i]))); + } +} + +int main() +{ + foo (); + check (); + + foof (); + checkf (); + return r[0]; +} diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c b/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c new file mode 100644 index 00000000000..64c6970c23e --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c @@ -0,0 +1,38 @@ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O2 -ftree-vectorize -mzarch" } */ +/* { dg-final { scan-assembler-times "vgmg" 1 } } */ +/* { dg-final { scan-assembler-times "vgmf" 1 } } */ +/* { dg-final { scan-assembler-times "vsel" 2 } } */ + +#include + +#define N 20 + +double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, + 27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0, + -nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, + 7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +double r[N]; +float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9, + 27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0, + -nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199}; +float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0, + 7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3}; +float rf[N]; + +__attribute__ ((__target__ ("arch=z13"))) +void +foo (void) +{ + for (int i = 0; i < N; i++) + r[i] = copysign (a[i], b[i]); +} + +__attribute__ ((__target__ ("arch=z14"))) +void +foof (void) +{ + for (int i = 0; i < N; i++) + rf[i] = copysignf (af[i], bf[i]); +} -- 2.30.2