From: Thomas Koenig Date: Wed, 29 May 2019 20:30:45 +0000 (+0000) Subject: re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1585b483236dc2e9a9460a11c14cf3b32a967a84;p=gcc.git re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native starting with r271377) 2019-05-29 Thomas Koenig PR fortran/90539 * gfortran.h (gfc_has_dimen_vector_ref): Add prototype. * trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous. (gfc_conv_is_contiguous_expr): Add prototype. * frontend-passes.c (has_dimen_vector_ref): Remove prototype, rename to (gfc_has_dimen_vector_ref): New function name. (matmul_temp_args): Use gfc_has_dimen_vector_ref. (inline_matmul_assign): Likewise. * trans-array.c (gfc_conv_array_parameter): Also check for absence of a vector subscript before calling gfc_conv_subref_array_arg. Pass additional argument to gfc_conv_subref_array_arg. * trans-expr.c (gfc_conv_subref_array_arg): Add argument check_contiguous. If that is true, check if the argument is contiguous and do not repack in that case. * trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split away most of the work into, and call (gfc_conv_intrinsic_is_coniguous_expr): New function. 2019-05-29 Thomas Koenig PR fortran/90539 * gfortran.dg/internal_pack_21.f90: Adjust scan patterns. * gfortran.dg/internal_pack_22.f90: New test. * gfortran.dg/internal_pack_23.f90: New test. From-SVN: r271751 --- diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 07b485b5a50..4c126b7fa7a 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,24 @@ +2019-05-29 Thomas Koenig + + PR fortran/90539 + * gfortran.h (gfc_has_dimen_vector_ref): Add prototype. + * trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous. + (gfc_conv_is_contiguous_expr): Add prototype. + * frontend-passes.c (has_dimen_vector_ref): Remove prototype, + rename to + (gfc_has_dimen_vector_ref): New function name. + (matmul_temp_args): Use gfc_has_dimen_vector_ref. + (inline_matmul_assign): Likewise. + * trans-array.c (gfc_conv_array_parameter): Also check for absence + of a vector subscript before calling gfc_conv_subref_array_arg. + Pass additional argument to gfc_conv_subref_array_arg. + * trans-expr.c (gfc_conv_subref_array_arg): Add argument + check_contiguous. If that is true, check if the argument + is contiguous and do not repack in that case. + * trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split + away most of the work into, and call + (gfc_conv_intrinsic_is_coniguous_expr): New function. + 2019-05-29 Jakub Jelinek PR fortran/90329 diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c index d4264dafa6f..87df5048b71 100644 --- a/gcc/fortran/frontend-passes.c +++ b/gcc/fortran/frontend-passes.c @@ -54,7 +54,6 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *, static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *, bool *); static int call_external_blas (gfc_code **, int *, void *); -static bool has_dimen_vector_ref (gfc_expr *); static int matmul_temp_args (gfc_code **, int *,void *data); static int index_interchange (gfc_code **, int*, void *); @@ -2868,7 +2867,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, { if (matrix_a->expr_type == EXPR_VARIABLE && (gfc_check_dependency (matrix_a, expr1, true) - || has_dimen_vector_ref (matrix_a))) + || gfc_has_dimen_vector_ref (matrix_a))) a_tmp = true; } else @@ -2881,7 +2880,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, { if (matrix_b->expr_type == EXPR_VARIABLE && (gfc_check_dependency (matrix_b, expr1, true) - || has_dimen_vector_ref (matrix_b))) + || gfc_has_dimen_vector_ref (matrix_b))) b_tmp = true; } else @@ -3681,8 +3680,8 @@ scalarized_expr (gfc_expr *e_in, gfc_expr **index, int count_index) /* Helper function to check for a dimen vector as subscript. */ -static bool -has_dimen_vector_ref (gfc_expr *e) +bool +gfc_has_dimen_vector_ref (gfc_expr *e) { gfc_array_ref *ar; int i; @@ -3838,8 +3837,8 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, if (matrix_b == NULL) return 0; - if (has_dimen_vector_ref (expr1) || has_dimen_vector_ref (matrix_a) - || has_dimen_vector_ref (matrix_b)) + if (gfc_has_dimen_vector_ref (expr1) || gfc_has_dimen_vector_ref (matrix_a) + || gfc_has_dimen_vector_ref (matrix_b)) return 0; /* We do not handle data dependencies yet. */ diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 54987ac878b..798297bd724 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -3535,6 +3535,7 @@ typedef int (*walk_expr_fn_t) (gfc_expr **, int *, void *); int gfc_dummy_code_callback (gfc_code **, int *, void *); int gfc_expr_walker (gfc_expr **, walk_expr_fn_t, void *); int gfc_code_walker (gfc_code **, walk_code_fn_t, walk_expr_fn_t, void *); +bool gfc_has_dimen_vector_ref (gfc_expr *e); /* simplify.c */ diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 9c96d897f41..56d534d0444 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -8139,12 +8139,12 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77, optimizers. */ if (g77 && optimize && !optimize_size && expr->expr_type == EXPR_VARIABLE - && !is_pointer (expr) && (fsym == NULL - || fsym->ts.type != BT_ASSUMED)) + && !is_pointer (expr) && ! gfc_has_dimen_vector_ref (expr) + && (fsym == NULL || fsym->ts.type != BT_ASSUMED)) { gfc_conv_subref_array_arg (se, expr, g77, fsym ? fsym->attr.intent : INTENT_INOUT, - false, fsym, proc_name, sym); + false, fsym, proc_name, sym, true); return; } diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index b7a8456c021..5183029a666 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -4579,7 +4579,7 @@ void gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77, sym_intent intent, bool formal_ptr, const gfc_symbol *fsym, const char *proc_name, - gfc_symbol *sym) + gfc_symbol *sym, bool check_contiguous) { gfc_se lse; gfc_se rse; @@ -4602,7 +4602,7 @@ gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77, pass_optional = fsym && fsym->attr.optional && sym && sym->attr.optional; - if (pass_optional) + if (pass_optional || check_contiguous) { gfc_init_se (&work_se, NULL); parmse = &work_se; @@ -4880,50 +4880,136 @@ class_array_fcn: else parmse->expr = gfc_build_addr_expr (NULL_TREE, parmse->expr); - if (pass_optional) + /* Basically make this into + + if (present) + { + if (contiguous) + { + pointer = a; + } + else + { + parmse->pre(); + pointer = parmse->expr; + } + } + else + pointer = NULL; + + foo (pointer); + if (present && !contiguous) + se->post(); + + */ + + if (pass_optional || check_contiguous) { - tree present; tree type; stmtblock_t else_block; tree pre_stmts, post_stmts; tree pointer; tree else_stmt; + tree present_var = NULL_TREE; + tree cont_var = NULL_TREE; + tree post_cond; - /* Make this into + type = TREE_TYPE (parmse->expr); + pointer = gfc_create_var (type, "arg_ptr"); + + if (check_contiguous) + { + gfc_se cont_se, array_se; + stmtblock_t if_block, else_block; + tree if_stmt, else_stmt; + + cont_var = gfc_create_var (boolean_type_node, "contiguous"); + + /* cont_var = is_contiguous (expr); . */ + gfc_init_se (&cont_se, parmse); + gfc_conv_is_contiguous_expr (&cont_se, expr); + gfc_add_block_to_block (&se->pre, &(&cont_se)->pre); + gfc_add_modify (&se->pre, cont_var, cont_se.expr); + gfc_add_block_to_block (&se->pre, &(&cont_se)->post); + + /* arrayse->expr = descriptor of a. */ + gfc_init_se (&array_se, se); + gfc_conv_expr_descriptor (&array_se, expr); + gfc_add_block_to_block (&se->pre, &(&array_se)->pre); + gfc_add_block_to_block (&se->pre, &(&array_se)->post); + + /* if_stmt = { pointer = &a[0]; } . */ + gfc_init_block (&if_block); + tmp = gfc_conv_array_data (array_se.expr); + tmp = fold_convert (type, tmp); + gfc_add_modify (&if_block, pointer, tmp); + if_stmt = gfc_finish_block (&if_block); + + /* else_stmt = { parmse->pre(); pointer = parmse->expr; } . */ + gfc_init_block (&else_block); + gfc_add_block_to_block (&else_block, &parmse->pre); + gfc_add_modify (&else_block, pointer, parmse->expr); + else_stmt = gfc_finish_block (&else_block); + + /* And put the above into an if statement. */ + pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node, + cont_var, if_stmt, else_stmt); + } + else + { + /* pointer = pramse->expr; . */ + gfc_add_modify (&parmse->pre, pointer, parmse->expr); + pre_stmts = gfc_finish_block (&parmse->pre); + } - if (present (a)) - { - parmse->pre; - optional = parse->expr; - } - else - optional = NULL; - call foo (optional); - if (present (a)) - parmse->post; + if (pass_optional) + { + present_var = gfc_create_var (boolean_type_node, "present"); - */ + /* present_var = present(sym); . */ + tmp = gfc_conv_expr_present (sym); + tmp = fold_convert (boolean_type_node, tmp); + gfc_add_modify (&se->pre, present_var, tmp); - type = TREE_TYPE (parmse->expr); - pointer = gfc_create_var (type, "optional"); - tmp = gfc_conv_expr_present (sym); - present = gfc_evaluate_now (tmp, &se->pre); - gfc_add_modify (&parmse->pre, pointer, parmse->expr); - pre_stmts = gfc_finish_block (&parmse->pre); - - gfc_init_block (&else_block); - gfc_add_modify (&else_block, pointer, build_int_cst (type, 0)); - else_stmt = gfc_finish_block (&else_block); - - tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present, - pre_stmts, else_stmt); - gfc_add_expr_to_block (&se->pre, tmp); + /* else_stmt = { pointer = NULL; } . */ + gfc_init_block (&else_block); + gfc_add_modify (&else_block, pointer, build_int_cst (type, 0)); + else_stmt = gfc_finish_block (&else_block); + + tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var, + pre_stmts, else_stmt); + gfc_add_expr_to_block (&se->pre, tmp); + + + } + else + gfc_add_expr_to_block (&se->pre, pre_stmts); post_stmts = gfc_finish_block (&parmse->post); - tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present, + + /* Put together the post stuff, plus the optional + deallocation. */ + if (check_contiguous) + { + /* !cont_var. */ + tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, + cont_var, + build_zero_cst (boolean_type_node)); + if (pass_optional) + post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + boolean_type_node, present_var, tmp); + else + post_cond = tmp; + } + else + { + gcc_assert (pass_optional); + post_cond = present_var; + } + + tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, post_cond, post_stmts, build_empty_stmt (input_location)); gfc_add_expr_to_block (&se->post, tmp); - se->expr = pointer; } diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index e0a4c6709de..f6edd685212 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -2832,6 +2832,17 @@ static void gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) { gfc_expr *arg; + arg = expr->value.function.actual->expr; + gfc_conv_is_contiguous_expr (se, arg); + se->expr = fold_convert (gfc_typenode_for_spec (&expr->ts), se->expr); +} + +/* This function does the work for gfc_conv_intrinsic_is_contiguous, + plus it can be called directly. */ + +void +gfc_conv_is_contiguous_expr (gfc_se *se, gfc_expr *arg) +{ gfc_ss *ss; gfc_se argse; tree desc, tmp, stride, extent, cond; @@ -2839,8 +2850,6 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) tree fncall0; gfc_array_spec *as; - arg = expr->value.function.actual->expr; - if (arg->ts.type == BT_CLASS) gfc_add_class_array_ref (arg); @@ -2878,7 +2887,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, stride, build_int_cst (TREE_TYPE (stride), 1)); - for (i = 0; i < expr->value.function.actual->expr->rank - 1; i++) + for (i = 0; i < arg->rank - 1; i++) { tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[i]); extent = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]); @@ -2896,7 +2905,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) cond = fold_build2_loc (input_location, TRUTH_AND_EXPR, boolean_type_node, cond, tmp); } - se->expr = convert (gfc_typenode_for_spec (&expr->ts), cond); + se->expr = cond; } } diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index e0118abaf18..0305d331ff7 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -535,7 +535,10 @@ int gfc_conv_procedure_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *, void gfc_conv_subref_array_arg (gfc_se *, gfc_expr *, int, sym_intent, bool, const gfc_symbol *fsym = NULL, const char *proc_name = NULL, - gfc_symbol *sym = NULL); + gfc_symbol *sym = NULL, + bool check_contiguous = false); + +void gfc_conv_is_contiguous_expr (gfc_se *, gfc_expr *); /* Generate code for a scalar assignment. */ tree gfc_trans_scalar_assign (gfc_se *, gfc_se *, gfc_typespec, bool, bool, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bb500f07e8c..a0e8b7bc492 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2019-05-29 Thomas Koenig + + PR fortran/90539 + * gfortran.dg/internal_pack_21.f90: Adjust scan patterns. + * gfortran.dg/internal_pack_22.f90: New test. + * gfortran.dg/internal_pack_23.f90: New test. + 2019-05-29 Jan Hubicka * tree-ssa/alias-access-spath-1.c: new testcase. diff --git a/gcc/testsuite/gfortran.dg/internal_pack_21.f90 b/gcc/testsuite/gfortran.dg/internal_pack_21.f90 index d0ce942a9f8..54e43ffa1cb 100644 --- a/gcc/testsuite/gfortran.dg/internal_pack_21.f90 +++ b/gcc/testsuite/gfortran.dg/internal_pack_21.f90 @@ -20,5 +20,5 @@ END MODULE M1 USE M1 CALL S2() END -! { dg-final { scan-tree-dump-times "optional" 4 "original" } } +! { dg-final { scan-tree-dump-times "arg_ptr" 5 "original" } } ! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } } diff --git a/gcc/testsuite/gfortran.dg/internal_pack_22.f90 b/gcc/testsuite/gfortran.dg/internal_pack_22.f90 new file mode 100644 index 00000000000..4e9fe59ceab --- /dev/null +++ b/gcc/testsuite/gfortran.dg/internal_pack_22.f90 @@ -0,0 +1,36 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original -O" } +! Check that absent and present dummy arguments work with +! packing when handing them down to an old-fashioned argument. + +module x + implicit none +contains + subroutine foo (a,b) + real, dimension(:), intent(inout), optional :: a, b + if (present(a)) stop 1 + if (.not. present(b)) stop 2 + call bar (a, b) + end subroutine foo + + subroutine bar (a,b) + real, dimension(2), intent(inout), optional :: a, b + real :: tmp + if (present(a)) stop 3 + if (.not. present(b)) stop 4 + tmp = b(2) + b(2) = b(1) + b(1) = tmp + end subroutine bar +end module x + +program main + use x + implicit none + real, dimension(2) :: b + b(1) = 1. + b(2) = 42. + call foo(b=b) + if (b(1) /= 42. .or. b(2) /= 1.) stop 5 +end program main +! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } } diff --git a/gcc/testsuite/gfortran.dg/internal_pack_23.f90 b/gcc/testsuite/gfortran.dg/internal_pack_23.f90 new file mode 100644 index 00000000000..8df82c8b36b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/internal_pack_23.f90 @@ -0,0 +1,27 @@ +! { dg-do run } +! PR fortran/90539 - this used to cause an ICE. + +module t2 + implicit none +contains + subroutine foo(a) + real, dimension(*) :: a + if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1 + end subroutine foo +end module t2 + +module t1 + use t2 + implicit none +contains + subroutine bar(a) + real, dimension(:) :: a + if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1 + call foo(a) + end subroutine bar +end module t1 + +program main + use t1 + call bar([1.0, 2.0]) +end program main