From: Bill Schmidt Date: Fri, 20 Apr 2012 14:19:13 +0000 (+0000) Subject: re PR rtl-optimization/44214 (Compiler does not optimize vector divide with -frecipro... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=add6207a867887b9795bb9ad0f445a43b6db38dd;p=gcc.git re PR rtl-optimization/44214 (Compiler does not optimize vector divide with -freciprocal-math (or -ffast-math)) gcc: 2012-04-20 Bill Schmidt PR rtl-optimization/44214 * fold-const.c (exact_inverse): New function. (fold_binary_loc): Fold vector and complex division by constant into multiply by recripocal with flag_reciprocal_math; fold vector division by constant into multiply by reciprocal with exact inverse. gcc/testsuite: 2012-04-20 Bill Schmidt PR rtl-optimization/44214 * gcc.dg/pr44214-1.c: New test. * gcc.dg/pr44214-2.c: Likewise. * gcc.dg/pr44214-3.c: Likewise. From-SVN: r186625 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 557968c3a35..0a3eda63c1a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2012-04-20 Bill Schmidt + + PR rtl-optimization/44214 + * fold-const.c (exact_inverse): New function. + (fold_binary_loc): Fold vector and complex division by constant into + multiply by recripocal with flag_reciprocal_math; fold vector division + by constant into multiply by reciprocal with exact inverse. + 2012-04-20 Jan Hubicka * lto-symtab.c (lto_cgraph_replace_node): Merge needed instead of force flags. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 8aceb733e7d..fa75fdbec36 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc, tree type, return NULL_TREE; } +/* If the real or vector real constant CST of type TYPE has an exact + inverse, return it, else return NULL. */ + +static tree +exact_inverse (tree type, tree cst) +{ + REAL_VALUE_TYPE r; + tree unit_type, *elts; + enum machine_mode mode; + unsigned vec_nelts, i; + + switch (TREE_CODE (cst)) + { + case REAL_CST: + r = TREE_REAL_CST (cst); + + if (exact_real_inverse (TYPE_MODE (type), &r)) + return build_real (type, r); + + return NULL_TREE; + + case VECTOR_CST: + vec_nelts = VECTOR_CST_NELTS (cst); + elts = XALLOCAVEC (tree, vec_nelts); + unit_type = TREE_TYPE (type); + mode = TYPE_MODE (unit_type); + + for (i = 0; i < vec_nelts; i++) + { + r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i)); + if (!exact_real_inverse (mode, &r)) + return NULL_TREE; + elts[i] = build_real (unit_type, r); + } + + return build_vector (type, elts); + + default: + return NULL_TREE; + } +} + /* Fold a binary expression of code CODE and type TYPE with operands OP0 and OP1. LOC is the location of the resulting expression. Return the folded expression if folding is successful. Otherwise, @@ -11734,23 +11776,24 @@ fold_binary_loc (location_t loc, so only do this if -freciprocal-math. We can actually always safely do it if ARG1 is a power of two, but it's hard to tell if it is or not in a portable manner. */ - if (TREE_CODE (arg1) == REAL_CST) + if (optimize + && (TREE_CODE (arg1) == REAL_CST + || (TREE_CODE (arg1) == COMPLEX_CST + && COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1))) + || (TREE_CODE (arg1) == VECTOR_CST + && VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1))))) { if (flag_reciprocal_math - && 0 != (tem = const_binop (code, build_real (type, dconst1), - arg1))) + && 0 != (tem = const_binop (code, build_one_cst (type), arg1))) return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem); - /* Find the reciprocal if optimizing and the result is exact. */ - if (optimize) + /* Find the reciprocal if optimizing and the result is exact. + TODO: Complex reciprocal not implemented. */ + if (TREE_CODE (arg1) != COMPLEX_CST) { - REAL_VALUE_TYPE r; - r = TREE_REAL_CST (arg1); - if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r)) - { - tem = build_real (type, r); - return fold_build2_loc (loc, MULT_EXPR, type, - fold_convert_loc (loc, type, arg0), tem); - } + tree inverse = exact_inverse (TREE_TYPE (arg0), arg1); + + if (inverse) + return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse); } } /* Convert A/B/C to A/(B*C). */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1cfffd8e7f7..e8d4f0dd545 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2012-04-20 Bill Schmidt + + PR rtl-optimization/44214 + * gcc.dg/pr44214-1.c: New test. + * gcc.dg/pr44214-2.c: Likewise. + * gcc.dg/pr44214-3.c: Likewise. + 2012-04-20 Richard Guenther * g++.dg/torture/20120420-1.C: New testcase. diff --git a/gcc/testsuite/gcc.dg/pr44214-1.c b/gcc/testsuite/gcc.dg/pr44214-1.c new file mode 100644 index 00000000000..292ce57c30e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr44214-1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -freciprocal-math -fdump-tree-ccp1" } */ + +typedef double v2df __attribute__ ((vector_size (16))); + +void do_div (v2df *a, v2df *b) +{ + *a = *b / (v2df) { 2.0, 3.0 }; +} + +/* Constant folding should multiply *b by the reciprocals of the + vector elements. The fold does not take place for generic + vectors until the first CCP pass. The string " * " occurs 3 + times: one multiply and two indirect parameters. */ + +/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */ +/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */ +/* { dg-final { cleanup-tree-dump "ccp1" } } */ diff --git a/gcc/testsuite/gcc.dg/pr44214-2.c b/gcc/testsuite/gcc.dg/pr44214-2.c new file mode 100644 index 00000000000..7e8581a2bb3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr44214-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -freciprocal-math -fdump-tree-original" } */ + +void do_div (_Complex double *a, _Complex double *b) +{ + *a = *b / (4.0 - 5.0fi); +} + +/* Constant folding should multiply *b by the reciprocal of 4 - 5i + = 4/41 + (5/41)i. */ + +/* { dg-final { scan-tree-dump-times " \\\* " 1 "original" } } */ +/* { dg-final { scan-tree-dump-times " / " 0 "original" } } */ +/* { dg-final { cleanup-tree-dump "original" } } */ diff --git a/gcc/testsuite/gcc.dg/pr44214-3.c b/gcc/testsuite/gcc.dg/pr44214-3.c new file mode 100644 index 00000000000..46d5ee8c78e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr44214-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ccp1" } */ + +typedef double v2df __attribute__ ((vector_size (16))); + +void do_div (v2df *a, v2df *b) +{ + *a = *b / (v2df) { 2.0, 2.0 }; +} + +/* Since 2.0 has an exact reciprocal, constant folding should multiply *b + by the reciprocals of the vector elements. As a result there should be + one vector multiply and zero divides in the optimized code. The fold + does not take place for generic vectors until the first CCP pass. The + string " * " occurs 3 times: one multiply and two indirect parameters. */ + +/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */ +/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */ +/* { dg-final { cleanup-tree-dump "ccp1" } } */