re PR middle-end/84309 (Wrong-code with -ffast-math)
authorJakub Jelinek <jakub@redhat.com>
Tue, 13 Feb 2018 08:34:42 +0000 (09:34 +0100)
committerJakub Jelinek <jakub@gcc.gnu.org>
Tue, 13 Feb 2018 08:34:42 +0000 (09:34 +0100)
PR middle-end/84309
* match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into
exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available.
* generic-match-head.c (canonicalize_math_after_vectorization_p): New
inline function.
* gimple-match-head.c (canonicalize_math_after_vectorization_p): New
inline function.
* omp-simd-clone.h: New file.
* omp-simd-clone.c: Include omp-simd-clone.h.
(expand_simd_clones): No longer static.
* tree-vect-patterns.c: Include fold-const-call.h, attribs.h,
cgraph.h and omp-simd-clone.h.
(vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x).
(vect_recog_widen_shift_pattern): Formatting fix.
(vect_pattern_recog_1): Don't check optab for calls.

* gcc.dg/pr84309.c: New test.
* gcc.target/i386/pr84309.c: New test.

From-SVN: r257617

gcc/ChangeLog
gcc/generic-match-head.c
gcc/gimple-match-head.c
gcc/match.pd
gcc/omp-simd-clone.c
gcc/omp-simd-clone.h [new file with mode: 0644]
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/pr84309.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr84309.c [new file with mode: 0644]
gcc/tree-vect-patterns.c

index 5ca6058ce52551d64721684cdc368ac99fbebb3b..cd4a1c5327b00401e181b0f24caf74f77ad7ec96 100644 (file)
@@ -1,5 +1,21 @@
 2018-02-13  Jakub Jelinek  <jakub@redhat.com>
 
+       PR middle-end/84309
+       * match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into
+       exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available.
+       * generic-match-head.c (canonicalize_math_after_vectorization_p): New
+       inline function.
+       * gimple-match-head.c (canonicalize_math_after_vectorization_p): New
+       inline function.
+       * omp-simd-clone.h: New file.
+       * omp-simd-clone.c: Include omp-simd-clone.h.
+       (expand_simd_clones): No longer static.
+       * tree-vect-patterns.c: Include fold-const-call.h, attribs.h,
+       cgraph.h and omp-simd-clone.h.
+       (vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x).
+       (vect_recog_widen_shift_pattern): Formatting fix.
+       (vect_pattern_recog_1): Don't check optab for calls.
+
        PR target/84336
        * config/i386/sse.md (<avx512>_vpermi2var<mode>3_mask): Force
        operands[2] into a REG before using gen_lowpart on it.
index b79f70e5af34b65524eddee7fc25f2b319ae8b4d..f7b6b1f23c58d92fda6f6b9f36da7b35778e028f 100644 (file)
@@ -68,3 +68,12 @@ canonicalize_math_p ()
 {
   return true;
 }
+
+/* Return true if math operations that are beneficial only after
+   vectorization should be canonicalized.  */
+
+static inline bool
+canonicalize_math_after_vectorization_p ()
+{
+  return false;
+}
index 25fa667798d1d3b22e54b72bc9c8edae0ac5762f..172ef0a6909eb23c48ef95aacf07867ed68f2a74 100644 (file)
@@ -831,3 +831,12 @@ canonicalize_math_p ()
 {
   return !cfun || (cfun->curr_properties & PROP_gimple_opt_math) == 0;
 }
+
+/* Return true if math operations that are beneficial only after
+   vectorization should be canonicalized.  */
+
+static inline bool
+canonicalize_math_after_vectorization_p ()
+{
+  return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0;
+}
index 8631153696dbe6d6ef240dd4280130124cb255dc..833effa9b67119f7c829a7a7ea452afb356dcc09 100644 (file)
@@ -3992,15 +3992,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (logs (pows @0 @1))
    (mult @1 (logs @0))))
 
- /* pow(C,x) -> exp(log(C)*x) if C > 0.  */
+ /* pow(C,x) -> exp(log(C)*x) if C > 0,
+    or if C is a positive power of 2,
+    pow(C,x) -> exp2(log2(C)*x).  */
  (for pows (POW)
       exps (EXP)
       logs (LOG)
+      exp2s (EXP2)
+      log2s (LOG2)
   (simplify
    (pows REAL_CST@0 @1)
-    (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)
-        && real_isfinite (TREE_REAL_CST_PTR (@0)))
-     (exps (mult (logs @0) @1)))))
+   (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)
+       && real_isfinite (TREE_REAL_CST_PTR (@0)))
+    (with {
+       const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0);
+       bool use_exp2 = false;
+       if (targetm.libc_has_function (function_c99_misc)
+          && value->cl == rvc_normal)
+        {
+          REAL_VALUE_TYPE frac_rvt = *value;
+          SET_REAL_EXP (&frac_rvt, 1);
+          if (real_equal (&frac_rvt, &dconst1))
+            use_exp2 = true;
+        }
+     }
+     (if (!use_exp2)
+      (exps (mult (logs @0) @1))
+      /* As libmvec doesn't have a vectorized exp2, defer optimizing
+        this until after vectorization.  */
+      (if (canonicalize_math_after_vectorization_p ())
+       (exps (mult (logs @0) @1))))))))
 
  (for sqrts (SQRT)
       cbrts (CBRT)
index b7737a258248fbe20967bc1e2d7f961592ef93a0..56832ebf22d17af73b6c25e61eec7c71b83a86bc 100644 (file)
@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "varasm.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "omp-simd-clone.h"
 
 /* Return the number of elements in vector type VECTYPE, which is associated
    with a SIMD clone.  At present these always have a constant length.  */
@@ -1568,7 +1569,7 @@ simd_clone_adjust (struct cgraph_node *node)
 /* If the function in NODE is tagged as an elemental SIMD function,
    create the appropriate SIMD clones.  */
 
-static void
+void
 expand_simd_clones (struct cgraph_node *node)
 {
   tree attr = lookup_attribute ("omp declare simd",
diff --git a/gcc/omp-simd-clone.h b/gcc/omp-simd-clone.h
new file mode 100644 (file)
index 0000000..c4833e2
--- /dev/null
@@ -0,0 +1,26 @@
+/* OMP constructs' SIMD clone supporting code.
+
+   Copyright (C) 2005-2018 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_OMP_SIMD_CLONE_H
+#define GCC_OMP_SIMD_CLONE_H
+
+extern void expand_simd_clones (struct cgraph_node *);
+
+#endif /* GCC_OMP_SIMD_CLONE_H */
index c1926e7c2000cec3214389272b29bdaf61d51eff..aafa6d3466f5a22ca6cab2cc551abb783c25a431 100644 (file)
@@ -1,5 +1,9 @@
 2018-02-13  Jakub Jelinek  <jakub@redhat.com>
 
+       PR middle-end/84309
+       * gcc.dg/pr84309.c: New test.
+       * gcc.target/i386/pr84309.c: New test.
+
        PR target/84336
        * gcc.target/i386/pr84336.c: New test.
 
diff --git a/gcc/testsuite/gcc.dg/pr84309.c b/gcc/testsuite/gcc.dg/pr84309.c
new file mode 100644 (file)
index 0000000..6fe774e
--- /dev/null
@@ -0,0 +1,14 @@
+/* PR middle-end/84309 */
+/* { dg-do run { target c99_runtime } } */
+/* { dg-options "-O2 -ffast-math" } */
+
+int
+main ()
+{
+  unsigned long a = 1024;
+  unsigned long b = 16 * 1024;
+  unsigned long c = __builtin_pow (2, (__builtin_log2 (a) + __builtin_log2 (b)) / 2);
+  if (c != 4096)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr84309.c b/gcc/testsuite/gcc.target/i386/pr84309.c
new file mode 100644 (file)
index 0000000..d1dd6ce
--- /dev/null
@@ -0,0 +1,16 @@
+/* PR middle-end/84309 */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mavx" } */
+
+double pow (double, double) __attribute__((simd));
+double exp (double) __attribute__((simd));
+extern double a[1024], b[1024];
+
+void
+foo (void)
+{
+  for (int i = 0; i < 1024; ++i)
+    a[i] = pow (2.0, b[i]);
+}
+
+/* { dg-final { scan-assembler "_ZGVcN4v_exp" } } */
index 1279352125df827c0c4bdb2cbff495e92e2b7f54..25a2efb21f8ca062bb0763fdfa7ffcbbfef0ffed 100644 (file)
@@ -41,6 +41,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
+#include "fold-const-call.h"
+#include "attribs.h"
+#include "cgraph.h"
+#include "omp-simd-clone.h"
 
 /* Pattern recognition functions  */
 static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
@@ -1049,7 +1053,7 @@ vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
                        tree *type_out)
 {
   gimple *last_stmt = (*stmts)[0];
-  tree base, exp = NULL;
+  tree base, exp;
   gimple *stmt;
   tree var;
 
@@ -1060,17 +1064,77 @@ vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
     {
     CASE_CFN_POW:
     CASE_CFN_POWI:
-      base = gimple_call_arg (last_stmt, 0);
-      exp = gimple_call_arg (last_stmt, 1);
-      if (TREE_CODE (exp) != REAL_CST
-         && TREE_CODE (exp) != INTEGER_CST)
-        return NULL;
       break;
 
     default:
       return NULL;
     }
 
+  base = gimple_call_arg (last_stmt, 0);
+  exp = gimple_call_arg (last_stmt, 1);
+  if (TREE_CODE (exp) != REAL_CST
+      && TREE_CODE (exp) != INTEGER_CST)
+    {
+      if (flag_unsafe_math_optimizations
+         && TREE_CODE (base) == REAL_CST
+         && !gimple_call_internal_p (last_stmt))
+       {
+         combined_fn log_cfn;
+         built_in_function exp_bfn;
+         switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
+           {
+           case BUILT_IN_POW:
+             log_cfn = CFN_BUILT_IN_LOG;
+             exp_bfn = BUILT_IN_EXP;
+             break;
+           case BUILT_IN_POWF:
+             log_cfn = CFN_BUILT_IN_LOGF;
+             exp_bfn = BUILT_IN_EXPF;
+             break;
+           case BUILT_IN_POWL:
+             log_cfn = CFN_BUILT_IN_LOGL;
+             exp_bfn = BUILT_IN_EXPL;
+             break;
+           default:
+             return NULL;
+           }
+         tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
+         tree exp_decl = builtin_decl_implicit (exp_bfn);
+         /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
+            does that, but if C is a power of 2, we want to use
+            exp2 (log2 (C) * x) in the non-vectorized version, but for
+            vectorization we don't have vectorized exp2.  */
+         if (logc
+             && TREE_CODE (logc) == REAL_CST
+             && exp_decl
+             && lookup_attribute ("omp declare simd",
+                                  DECL_ATTRIBUTES (exp_decl)))
+           {
+             cgraph_node *node = cgraph_node::get_create (exp_decl);
+             if (node->simd_clones == NULL)
+               {
+                 if (node->definition)
+                   return NULL;
+                 expand_simd_clones (node);
+                 if (node->simd_clones == NULL)
+                   return NULL;
+               }
+             stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+             tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
+             gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
+             new_pattern_def_seq (stmt_vinfo, g);
+             *type_in = TREE_TYPE (base);
+             *type_out = NULL_TREE;
+             tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
+             g = gimple_build_call (exp_decl, 1, def);
+             gimple_call_set_lhs (g, res);
+             return g;
+           }
+       }
+
+      return NULL;
+    }
+
   /* We now have a pow or powi builtin function call with a constant
      exponent.  */
 
@@ -1744,8 +1808,8 @@ vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
 
   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
   var = vect_recog_temp_ssa_var (type, NULL);
-  pattern_stmt =
-    gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
+  pattern_stmt
+    gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
   if (wstmt)
     {
       stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
@@ -4439,10 +4503,6 @@ vect_pattern_recog_1 (vect_recog_func *recog_func,
     }
   else
     {
-      machine_mode vec_mode;
-      enum insn_code icode;
-      optab optab;
-
       /* Check target support  */
       type_in = get_vectype_for_scalar_type (type_in);
       if (!type_in)
@@ -4456,19 +4516,18 @@ vect_pattern_recog_1 (vect_recog_func *recog_func,
       pattern_vectype = type_out;
 
       if (is_gimple_assign (pattern_stmt))
-       code = gimple_assign_rhs_code (pattern_stmt);
-      else
-        {
-         gcc_assert (is_gimple_call (pattern_stmt));
-         code = CALL_EXPR;
+       {
+         enum insn_code icode;
+         code = gimple_assign_rhs_code (pattern_stmt);
+         optab optab = optab_for_tree_code (code, type_in, optab_default);
+         machine_mode vec_mode = TYPE_MODE (type_in);
+         if (!optab
+             || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
+             || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
+           return false;
        }
-
-      optab = optab_for_tree_code (code, type_in, optab_default);
-      vec_mode = TYPE_MODE (type_in);
-      if (!optab
-          || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
-          || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
-       return false;
+      else
+       gcc_assert (is_gimple_call (pattern_stmt));
     }
 
   /* Found a vectorizable pattern.  */