From: Thomas Koenig Date: Sun, 2 Jun 2019 15:18:22 +0000 (+0000) Subject: re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5d9c602d8374bd4330b53ae4dc6a2534199cc397;p=gcc.git re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native starting with r271377) 2019-06-02 Thomas Koenig PR fortran/90539 * trans-expr.c (gfc_conv_subref_array_arg): If the size of the expression can be determined to be one, treat it as contiguous. Set likelyhood of presence of an actual argument according to PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous according to PRED_FORTRAN_CONTIGUOUS. 2019-06-02 Thomas Koenig PR fortran/90539 * predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor. 2019-06-02 Thomas Koenig PR fortran/90539 * gfortran.dg/internal_pack_24.f90: New test. From-SVN: r271844 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 20bdc2bec37..0d4ba896ea4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2019-06-02 Thomas Koenig + + PR fortran/90539 + * predict.def (PRED_FORTRAN_CONTIGUOUS): New predictor. + 2019-06-01 Martin Sebor PR middle-end/90694 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 16cc7e9cfaf..1c3f8bc5948 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,6 +1,15 @@ +2019-06-02 Thomas Koenig + + PR fortran/90539 + * trans-expr.c (gfc_conv_subref_array_arg): If the size of the + expression can be determined to be one, treat it as contiguous. + Set likelyhood of presence of an actual argument according to + PRED_FORTRAN_ABSENT_DUMMY and likelyhood of being contiguous + according to PRED_FORTRAN_CONTIGUOUS. + 2019-05-30 Thomas Koenig - * gfc-internals.texi (Translating to GENERIC): New chapter. + * gfc-internals.texi (Translating to GENERIC): New chapter. 2019-05-30 Marek Polacek diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 5183029a666..d23520fdbaa 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -4922,15 +4922,35 @@ class_array_fcn: gfc_se cont_se, array_se; stmtblock_t if_block, else_block; tree if_stmt, else_stmt; + mpz_t size; + bool size_set; cont_var = gfc_create_var (boolean_type_node, "contiguous"); - /* cont_var = is_contiguous (expr); . */ - gfc_init_se (&cont_se, parmse); - gfc_conv_is_contiguous_expr (&cont_se, expr); - gfc_add_block_to_block (&se->pre, &(&cont_se)->pre); - gfc_add_modify (&se->pre, cont_var, cont_se.expr); - gfc_add_block_to_block (&se->pre, &(&cont_se)->post); + /* If the size is known to be one at compile-time, set + cont_var to true unconditionally. This may look + inelegant, but we're only doing this during + optimization, so the statements will be optimized away, + and this saves complexity here. */ + + size_set = gfc_array_size (expr, &size); + if (size_set && mpz_cmp_ui (size, 1) == 0) + { + gfc_add_modify (&se->pre, cont_var, + build_one_cst (boolean_type_node)); + } + else + { + /* cont_var = is_contiguous (expr); . */ + gfc_init_se (&cont_se, parmse); + gfc_conv_is_contiguous_expr (&cont_se, expr); + gfc_add_block_to_block (&se->pre, &(&cont_se)->pre); + gfc_add_modify (&se->pre, cont_var, cont_se.expr); + gfc_add_block_to_block (&se->pre, &(&cont_se)->post); + } + + if (size_set) + mpz_clear (size); /* arrayse->expr = descriptor of a. */ gfc_init_se (&array_se, se); @@ -4953,7 +4973,9 @@ class_array_fcn: /* And put the above into an if statement. */ pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node, - cont_var, if_stmt, else_stmt); + gfc_likely (cont_var, + PRED_FORTRAN_CONTIGUOUS), + if_stmt, else_stmt); } else { @@ -4976,11 +4998,11 @@ class_array_fcn: gfc_add_modify (&else_block, pointer, build_int_cst (type, 0)); else_stmt = gfc_finish_block (&else_block); - tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var, + tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, + gfc_likely (present_var, + PRED_FORTRAN_ABSENT_DUMMY), pre_stmts, else_stmt); gfc_add_expr_to_block (&se->pre, tmp); - - } else gfc_add_expr_to_block (&se->pre, pre_stmts); @@ -4995,9 +5017,16 @@ class_array_fcn: tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, cont_var, build_zero_cst (boolean_type_node)); + tmp = gfc_unlikely (tmp, PRED_FORTRAN_CONTIGUOUS); + if (pass_optional) - post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, - boolean_type_node, present_var, tmp); + { + tree present_likely = gfc_likely (present_var, + PRED_FORTRAN_ABSENT_DUMMY); + post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + boolean_type_node, present_likely, + tmp); + } else post_cond = tmp; } diff --git a/gcc/predict.def b/gcc/predict.def index 53b39ab0e3a..24c1385943e 100644 --- a/gcc/predict.def +++ b/gcc/predict.def @@ -229,3 +229,10 @@ DEF_PREDICTOR (PRED_FORTRAN_ABSENT_DUMMY, "Fortran absent dummy", \ to be very likely. */ DEF_PREDICTOR (PRED_FORTRAN_LOOP_PREHEADER, "Fortran loop preheader", \ HITRATE (99), 0) + +/* Fortran assumed size arrays can be non-contiguous, so they need + to be repacked. */ + +DEF_PREDICTOR (PRED_FORTRAN_CONTIGUOUS, "Fortran contiguous", \ + HITRATE (75), 0) + diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bf406ae88f4..6535ca09f32 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-06-02 Thomas Koenig + + PR fortran/90539 + * gfortran.dg/internal_pack_24.f90: New test. + 2019-06-01 Iain Sandoe PR target/90698 diff --git a/gcc/testsuite/gfortran.dg/internal_pack_24.f90 b/gcc/testsuite/gfortran.dg/internal_pack_24.f90 new file mode 100644 index 00000000000..cc2443edcc0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/internal_pack_24.f90 @@ -0,0 +1,39 @@ +! { dg-do run } +! { dg-additional-options "-O -fdump-tree-optimized" } +module y + implicit none +contains + subroutine foo(a,b,c,d,e,f) + real, dimension(1), intent(inout) :: a, b, c, d, e, f + if (any([a,b,c,d,e,f] /= [1,2,3,4,5,6])) stop 1 + a = -a + b = -b + c = -c + d = -d + e = -e + f = -f + end subroutine foo +end module y +module x + use y + implicit none +contains + subroutine bar(a) + real, dimension(:) :: a + integer :: n1, n3, n5 + n1 = 1 + n3 = 3 + n5 = 5 + call foo(a(n1:n1), a(n1+1:n1+1), a(n3:n3), a(n3+1:n3+1), a(n5:n5), a(n5+1:n5+1)) + end subroutine bar +end module x + +program main + use x + real, dimension(6) :: a,b + b = [1,2,3,4,5,6] + a = b + call bar(a) + if (any(a /= -b)) stop 2 +end program main +! { dg-final { scan-tree-dump-not "contiguous" "optimized" } }