From 848bb6fc0e502345536b25e1a110eb7f01eccbc1 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 13 Feb 2018 09:34:42 +0100 Subject: [PATCH] re PR middle-end/84309 (Wrong-code with -ffast-math) PR middle-end/84309 * match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available. * generic-match-head.c (canonicalize_math_after_vectorization_p): New inline function. * gimple-match-head.c (canonicalize_math_after_vectorization_p): New inline function. * omp-simd-clone.h: New file. * omp-simd-clone.c: Include omp-simd-clone.h. (expand_simd_clones): No longer static. * tree-vect-patterns.c: Include fold-const-call.h, attribs.h, cgraph.h and omp-simd-clone.h. (vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x). (vect_recog_widen_shift_pattern): Formatting fix. (vect_pattern_recog_1): Don't check optab for calls. * gcc.dg/pr84309.c: New test. * gcc.target/i386/pr84309.c: New test. From-SVN: r257617 --- gcc/ChangeLog | 16 ++++ gcc/generic-match-head.c | 9 ++ gcc/gimple-match-head.c | 9 ++ gcc/match.pd | 29 ++++++- gcc/omp-simd-clone.c | 3 +- gcc/omp-simd-clone.h | 26 ++++++ gcc/testsuite/ChangeLog | 4 + gcc/testsuite/gcc.dg/pr84309.c | 14 ++++ gcc/testsuite/gcc.target/i386/pr84309.c | 16 ++++ gcc/tree-vect-patterns.c | 107 ++++++++++++++++++------ 10 files changed, 204 insertions(+), 29 deletions(-) create mode 100644 gcc/omp-simd-clone.h create mode 100644 gcc/testsuite/gcc.dg/pr84309.c create mode 100644 gcc/testsuite/gcc.target/i386/pr84309.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5ca6058ce52..cd4a1c5327b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,21 @@ 2018-02-13 Jakub Jelinek + PR middle-end/84309 + * match.pd (pow(C,x) -> exp(log(C)*x)): Optimize instead into + exp2(log2(C)*x) if C is a power of 2 and c99 runtime is available. + * generic-match-head.c (canonicalize_math_after_vectorization_p): New + inline function. + * gimple-match-head.c (canonicalize_math_after_vectorization_p): New + inline function. + * omp-simd-clone.h: New file. + * omp-simd-clone.c: Include omp-simd-clone.h. + (expand_simd_clones): No longer static. + * tree-vect-patterns.c: Include fold-const-call.h, attribs.h, + cgraph.h and omp-simd-clone.h. + (vect_recog_pow_pattern): Optimize pow(C,x) to exp(log(C)*x). + (vect_recog_widen_shift_pattern): Formatting fix. + (vect_pattern_recog_1): Don't check optab for calls. + PR target/84336 * config/i386/sse.md (_vpermi2var3_mask): Force operands[2] into a REG before using gen_lowpart on it. diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c index b79f70e5af3..f7b6b1f23c5 100644 --- a/gcc/generic-match-head.c +++ b/gcc/generic-match-head.c @@ -68,3 +68,12 @@ canonicalize_math_p () { return true; } + +/* Return true if math operations that are beneficial only after + vectorization should be canonicalized. */ + +static inline bool +canonicalize_math_after_vectorization_p () +{ + return false; +} diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c index 25fa667798d..172ef0a6909 100644 --- a/gcc/gimple-match-head.c +++ b/gcc/gimple-match-head.c @@ -831,3 +831,12 @@ canonicalize_math_p () { return !cfun || (cfun->curr_properties & PROP_gimple_opt_math) == 0; } + +/* Return true if math operations that are beneficial only after + vectorization should be canonicalized. */ + +static inline bool +canonicalize_math_after_vectorization_p () +{ + return !cfun || (cfun->curr_properties & PROP_gimple_lvec) != 0; +} diff --git a/gcc/match.pd b/gcc/match.pd index 8631153696d..833effa9b67 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3992,15 +3992,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (logs (pows @0 @1)) (mult @1 (logs @0)))) - /* pow(C,x) -> exp(log(C)*x) if C > 0. */ + /* pow(C,x) -> exp(log(C)*x) if C > 0, + or if C is a positive power of 2, + pow(C,x) -> exp2(log2(C)*x). */ (for pows (POW) exps (EXP) logs (LOG) + exp2s (EXP2) + log2s (LOG2) (simplify (pows REAL_CST@0 @1) - (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0) - && real_isfinite (TREE_REAL_CST_PTR (@0))) - (exps (mult (logs @0) @1))))) + (if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0) + && real_isfinite (TREE_REAL_CST_PTR (@0))) + (with { + const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0); + bool use_exp2 = false; + if (targetm.libc_has_function (function_c99_misc) + && value->cl == rvc_normal) + { + REAL_VALUE_TYPE frac_rvt = *value; + SET_REAL_EXP (&frac_rvt, 1); + if (real_equal (&frac_rvt, &dconst1)) + use_exp2 = true; + } + } + (if (!use_exp2) + (exps (mult (logs @0) @1)) + /* As libmvec doesn't have a vectorized exp2, defer optimizing + this until after vectorization. */ + (if (canonicalize_math_after_vectorization_p ()) + (exps (mult (logs @0) @1)))))))) (for sqrts (SQRT) cbrts (CBRT) diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c index b7737a25824..56832ebf22d 100644 --- a/gcc/omp-simd-clone.c +++ b/gcc/omp-simd-clone.c @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see #include "varasm.h" #include "stringpool.h" #include "attribs.h" +#include "omp-simd-clone.h" /* Return the number of elements in vector type VECTYPE, which is associated with a SIMD clone. At present these always have a constant length. */ @@ -1568,7 +1569,7 @@ simd_clone_adjust (struct cgraph_node *node) /* If the function in NODE is tagged as an elemental SIMD function, create the appropriate SIMD clones. */ -static void +void expand_simd_clones (struct cgraph_node *node) { tree attr = lookup_attribute ("omp declare simd", diff --git a/gcc/omp-simd-clone.h b/gcc/omp-simd-clone.h new file mode 100644 index 00000000000..c4833e21cfa --- /dev/null +++ b/gcc/omp-simd-clone.h @@ -0,0 +1,26 @@ +/* OMP constructs' SIMD clone supporting code. + + Copyright (C) 2005-2018 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_OMP_SIMD_CLONE_H +#define GCC_OMP_SIMD_CLONE_H + +extern void expand_simd_clones (struct cgraph_node *); + +#endif /* GCC_OMP_SIMD_CLONE_H */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c1926e7c200..aafa6d3466f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,9 @@ 2018-02-13 Jakub Jelinek + PR middle-end/84309 + * gcc.dg/pr84309.c: New test. + * gcc.target/i386/pr84309.c: New test. + PR target/84336 * gcc.target/i386/pr84336.c: New test. diff --git a/gcc/testsuite/gcc.dg/pr84309.c b/gcc/testsuite/gcc.dg/pr84309.c new file mode 100644 index 00000000000..6fe774e57cf --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr84309.c @@ -0,0 +1,14 @@ +/* PR middle-end/84309 */ +/* { dg-do run { target c99_runtime } } */ +/* { dg-options "-O2 -ffast-math" } */ + +int +main () +{ + unsigned long a = 1024; + unsigned long b = 16 * 1024; + unsigned long c = __builtin_pow (2, (__builtin_log2 (a) + __builtin_log2 (b)) / 2); + if (c != 4096) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr84309.c b/gcc/testsuite/gcc.target/i386/pr84309.c new file mode 100644 index 00000000000..d1dd6cef815 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr84309.c @@ -0,0 +1,16 @@ +/* PR middle-end/84309 */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx" } */ + +double pow (double, double) __attribute__((simd)); +double exp (double) __attribute__((simd)); +extern double a[1024], b[1024]; + +void +foo (void) +{ + for (int i = 0; i < 1024; ++i) + a[i] = pow (2.0, b[i]); +} + +/* { dg-final { scan-assembler "_ZGVcN4v_exp" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 1279352125d..25a2efb21f8 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -41,6 +41,10 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "internal-fn.h" #include "case-cfn-macros.h" +#include "fold-const-call.h" +#include "attribs.h" +#include "cgraph.h" +#include "omp-simd-clone.h" /* Pattern recognition functions */ static gimple *vect_recog_widen_sum_pattern (vec *, tree *, @@ -1049,7 +1053,7 @@ vect_recog_pow_pattern (vec *stmts, tree *type_in, tree *type_out) { gimple *last_stmt = (*stmts)[0]; - tree base, exp = NULL; + tree base, exp; gimple *stmt; tree var; @@ -1060,17 +1064,77 @@ vect_recog_pow_pattern (vec *stmts, tree *type_in, { CASE_CFN_POW: CASE_CFN_POWI: - base = gimple_call_arg (last_stmt, 0); - exp = gimple_call_arg (last_stmt, 1); - if (TREE_CODE (exp) != REAL_CST - && TREE_CODE (exp) != INTEGER_CST) - return NULL; break; default: return NULL; } + base = gimple_call_arg (last_stmt, 0); + exp = gimple_call_arg (last_stmt, 1); + if (TREE_CODE (exp) != REAL_CST + && TREE_CODE (exp) != INTEGER_CST) + { + if (flag_unsafe_math_optimizations + && TREE_CODE (base) == REAL_CST + && !gimple_call_internal_p (last_stmt)) + { + combined_fn log_cfn; + built_in_function exp_bfn; + switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt))) + { + case BUILT_IN_POW: + log_cfn = CFN_BUILT_IN_LOG; + exp_bfn = BUILT_IN_EXP; + break; + case BUILT_IN_POWF: + log_cfn = CFN_BUILT_IN_LOGF; + exp_bfn = BUILT_IN_EXPF; + break; + case BUILT_IN_POWL: + log_cfn = CFN_BUILT_IN_LOGL; + exp_bfn = BUILT_IN_EXPL; + break; + default: + return NULL; + } + tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base); + tree exp_decl = builtin_decl_implicit (exp_bfn); + /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd + does that, but if C is a power of 2, we want to use + exp2 (log2 (C) * x) in the non-vectorized version, but for + vectorization we don't have vectorized exp2. */ + if (logc + && TREE_CODE (logc) == REAL_CST + && exp_decl + && lookup_attribute ("omp declare simd", + DECL_ATTRIBUTES (exp_decl))) + { + cgraph_node *node = cgraph_node::get_create (exp_decl); + if (node->simd_clones == NULL) + { + if (node->definition) + return NULL; + expand_simd_clones (node); + if (node->simd_clones == NULL) + return NULL; + } + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); + gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc); + new_pattern_def_seq (stmt_vinfo, g); + *type_in = TREE_TYPE (base); + *type_out = NULL_TREE; + tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); + g = gimple_build_call (exp_decl, 1, def); + gimple_call_set_lhs (g, res); + return g; + } + } + + return NULL; + } + /* We now have a pow or powi builtin function call with a constant exponent. */ @@ -1744,8 +1808,8 @@ vect_recog_widen_shift_pattern (vec *stmts, /* Pattern supported. Create a stmt to be used to replace the pattern. */ var = vect_recog_temp_ssa_var (type, NULL); - pattern_stmt = - gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); + pattern_stmt + = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1); if (wstmt) { stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); @@ -4439,10 +4503,6 @@ vect_pattern_recog_1 (vect_recog_func *recog_func, } else { - machine_mode vec_mode; - enum insn_code icode; - optab optab; - /* Check target support */ type_in = get_vectype_for_scalar_type (type_in); if (!type_in) @@ -4456,19 +4516,18 @@ vect_pattern_recog_1 (vect_recog_func *recog_func, pattern_vectype = type_out; if (is_gimple_assign (pattern_stmt)) - code = gimple_assign_rhs_code (pattern_stmt); - else - { - gcc_assert (is_gimple_call (pattern_stmt)); - code = CALL_EXPR; + { + enum insn_code icode; + code = gimple_assign_rhs_code (pattern_stmt); + optab optab = optab_for_tree_code (code, type_in, optab_default); + machine_mode vec_mode = TYPE_MODE (type_in); + if (!optab + || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing + || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) + return false; } - - optab = optab_for_tree_code (code, type_in, optab_default); - vec_mode = TYPE_MODE (type_in); - if (!optab - || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing - || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) - return false; + else + gcc_assert (is_gimple_call (pattern_stmt)); } /* Found a vectorizable pattern. */ -- 2.30.2