From: Richard Sandiford Date: Thu, 24 May 2018 12:34:18 +0000 (+0000) Subject: Use canonicalize_math_after_vectorization_p for FMA folds X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c453ccc2335bf4267a154c9385eb50a8c45235a1;p=gcc.git Use canonicalize_math_after_vectorization_p for FMA folds The folds in r260348 kicked in before vectorisation, which hurts for two reasons: (1) the current suboptimal handling of nothrow meant that we could drop the flag early and so prevent if-conversion (2) some architectures provide more scalar forms than vector forms (true for Advanced SIMD) (1) is a bug in itself that needs to be fixed eventually, but delaying the folds is still needed for (2). 2018-05-24 Richard Sandiford gcc/ * match.pd: Delay FMA folds until after vectorization. gcc/testsuite/ * gcc.dg/vect/vect-fma-1.c: New test. From-SVN: r260639 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9c7a3868f23..1500fc098cc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2018-05-24 Richard Sandiford + + * match.pd: Delay FMA folds until after vectorization. + 2018-05-24 Andre Vieira PR target/83009 diff --git a/gcc/match.pd b/gcc/match.pd index 2f4c5e6a14b..50f4c882e5e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4703,59 +4703,60 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) wi::to_wide (@ipos) + isize)) (BIT_FIELD_REF @0 @rsize @rpos))))) -(for fmas (FMA) +(if (canonicalize_math_after_vectorization_p ()) + (for fmas (FMA) + (simplify + (fmas:c (negate @0) @1 @2) + (IFN_FNMA @0 @1 @2)) + (simplify + (fmas @0 @1 (negate @2)) + (IFN_FMS @0 @1 @2)) + (simplify + (fmas:c (negate @0) @1 (negate @2)) + (IFN_FNMS @0 @1 @2)) + (simplify + (negate (fmas@3 @0 @1 @2)) + (if (single_use (@3)) + (IFN_FNMS @0 @1 @2)))) + (simplify - (fmas:c (negate @0) @1 @2) + (IFN_FMS:c (negate @0) @1 @2) + (IFN_FNMS @0 @1 @2)) + (simplify + (IFN_FMS @0 @1 (negate @2)) + (IFN_FMA @0 @1 @2)) + (simplify + (IFN_FMS:c (negate @0) @1 (negate @2)) (IFN_FNMA @0 @1 @2)) (simplify - (fmas @0 @1 (negate @2)) - (IFN_FMS @0 @1 @2)) + (negate (IFN_FMS@3 @0 @1 @2)) + (if (single_use (@3)) + (IFN_FNMA @0 @1 @2))) + + (simplify + (IFN_FNMA:c (negate @0) @1 @2) + (IFN_FMA @0 @1 @2)) (simplify - (fmas:c (negate @0) @1 (negate @2)) + (IFN_FNMA @0 @1 (negate @2)) (IFN_FNMS @0 @1 @2)) (simplify - (negate (fmas@3 @0 @1 @2)) + (IFN_FNMA:c (negate @0) @1 (negate @2)) + (IFN_FMS @0 @1 @2)) + (simplify + (negate (IFN_FNMA@3 @0 @1 @2)) (if (single_use (@3)) - (IFN_FNMS @0 @1 @2)))) + (IFN_FMS @0 @1 @2))) -(simplify - (IFN_FMS:c (negate @0) @1 @2) - (IFN_FNMS @0 @1 @2)) -(simplify - (IFN_FMS @0 @1 (negate @2)) - (IFN_FMA @0 @1 @2)) -(simplify - (IFN_FMS:c (negate @0) @1 (negate @2)) - (IFN_FNMA @0 @1 @2)) -(simplify - (negate (IFN_FMS@3 @0 @1 @2)) + (simplify + (IFN_FNMS:c (negate @0) @1 @2) + (IFN_FMS @0 @1 @2)) + (simplify + (IFN_FNMS @0 @1 (negate @2)) + (IFN_FNMA @0 @1 @2)) + (simplify + (IFN_FNMS:c (negate @0) @1 (negate @2)) + (IFN_FMA @0 @1 @2)) + (simplify + (negate (IFN_FNMS@3 @0 @1 @2)) (if (single_use (@3)) - (IFN_FNMA @0 @1 @2))) - -(simplify - (IFN_FNMA:c (negate @0) @1 @2) - (IFN_FMA @0 @1 @2)) -(simplify - (IFN_FNMA @0 @1 (negate @2)) - (IFN_FNMS @0 @1 @2)) -(simplify - (IFN_FNMA:c (negate @0) @1 (negate @2)) - (IFN_FMS @0 @1 @2)) -(simplify - (negate (IFN_FNMA@3 @0 @1 @2)) - (if (single_use (@3)) - (IFN_FMS @0 @1 @2))) - -(simplify - (IFN_FNMS:c (negate @0) @1 @2) - (IFN_FMS @0 @1 @2)) -(simplify - (IFN_FNMS @0 @1 (negate @2)) - (IFN_FNMA @0 @1 @2)) -(simplify - (IFN_FNMS:c (negate @0) @1 (negate @2)) - (IFN_FMA @0 @1 @2)) -(simplify - (negate (IFN_FNMS@3 @0 @1 @2)) - (if (single_use (@3)) - (IFN_FMA @0 @1 @2))) + (IFN_FMA @0 @1 @2)))) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ee9779210cb..a04a3278adf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2018-05-24 Richard Sandiford + + * gcc.dg/vect/vect-fma-1.c: New test. + 2018-05-24 Rainer Orth * gcc.dg/tree-prof/update-loopch.c: Fix dumpfile name in diff --git a/gcc/testsuite/gcc.dg/vect/vect-fma-1.c b/gcc/testsuite/gcc.dg/vect/vect-fma-1.c new file mode 100644 index 00000000000..6b6b4f726e9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-fma-1.c @@ -0,0 +1,58 @@ +/* { dg-require-effective-target scalar_all_fma } */ + +#include "tree-vect.h" + +#define N (VECTOR_BITS * 11 / 64 + 3) + +#define DEF(INV) \ + void __attribute__ ((noipa)) \ + f_##INV (double *restrict a, double *restrict b, \ + double *restrict c, double *restrict d) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + double mb = (INV & 1 ? -b[i] : b[i]); \ + double mc = c[i]; \ + double md = (INV & 2 ? -d[i] : d[i]); \ + double fma = __builtin_fma (mb, mc, md); \ + a[i] = (INV & 4 ? -fma : fma); \ + } \ + } + +#define TEST(INV) \ + { \ + f_##INV (a, b, c, d); \ + for (int i = 0; i < N; ++i) \ + { \ + double mb = (INV & 1 ? -b[i] : b[i]); \ + double mc = c[i]; \ + double md = (INV & 2 ? -d[i] : d[i]); \ + double fma = __builtin_fma (mb, mc, md); \ + double expected = (INV & 4 ? -fma : fma); \ + if (a[i] != expected) \ + __builtin_abort (); \ + asm volatile ("" ::: "memory"); \ + } \ + } + +#define FOR_EACH_INV(T) \ + T (0) T (1) T (2) T (3) T (4) T (5) T (6) T (7) + +FOR_EACH_INV (DEF) + +int +main (void) +{ + double a[N], b[N], c[N], d[N]; + for (int i = 0; i < N; ++i) + { + b[i] = i % 17; + c[i] = i % 9 + 11; + d[i] = i % 13 + 14; + asm volatile ("" ::: "memory"); + } + FOR_EACH_INV (TEST) + return 0; +} + +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 8 "vect" { target vect_double } } } */