From b210f45f527eb017810af815bbb97a8f6939385f Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 7 Feb 2018 15:46:17 +0000 Subject: [PATCH] re PR tree-optimization/84037 (Speed regression of polyhedron benchmark since r256644) 2018-02-07 Richard Biener PR tree-optimization/84037 * tree-vectorizer.h (struct _loop_vec_info): Add ivexpr_map member. (cse_and_gimplify_to_preheader): Declare. (vect_get_place_in_interleaving_chain): Likewise. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize ivexpr_map. (_loop_vec_info::~_loop_vec_info): Delete it. (cse_and_gimplify_to_preheader): New function. * tree-vect-slp.c (vect_get_place_in_interleaving_chain): Export. * tree-vect-stmts.c (vectorizable_store): CSE base and steps. (vectorizable_load): Likewise. For grouped stores always base the IV on the first element. * tree-vect-loop-manip.c (vect_loop_versioning): Unshare versioning condition before gimplifying. From-SVN: r257453 --- gcc/ChangeLog | 17 +++++++++++++++ gcc/tree-vect-loop-manip.c | 3 ++- gcc/tree-vect-loop.c | 29 +++++++++++++++++++++++++ gcc/tree-vect-slp.c | 2 +- gcc/tree-vect-stmts.c | 44 ++++++++++++++++++++++---------------- gcc/tree-vectorizer.h | 5 +++++ 6 files changed, 80 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 73c144f5f6a..9c4d0e87b87 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2018-02-07 Richard Biener + + PR tree-optimization/84037 + * tree-vectorizer.h (struct _loop_vec_info): Add ivexpr_map member. + (cse_and_gimplify_to_preheader): Declare. + (vect_get_place_in_interleaving_chain): Likewise. + * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize + ivexpr_map. + (_loop_vec_info::~_loop_vec_info): Delete it. + (cse_and_gimplify_to_preheader): New function. + * tree-vect-slp.c (vect_get_place_in_interleaving_chain): Export. + * tree-vect-stmts.c (vectorizable_store): CSE base and steps. + (vectorizable_load): Likewise. For grouped stores always base + the IV on the first element. + * tree-vect-loop-manip.c (vect_loop_versioning): Unshare versioning + condition before gimplifying. + 2018-02-07 Jakub Jelinek * tree-eh.c (operation_could_trap_helper_p): Ignore honor_trapv for diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 53684e57e94..96d40c8c4b3 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -3015,7 +3015,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo, vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); } - cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list, + cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr), + &gimplify_stmt_list, is_gimple_condexpr, NULL_TREE); gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index c5301684028..3a511474813 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1128,6 +1128,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in) unaligned_dr (NULL), peeling_for_alignment (0), ptr_mask (0), + ivexpr_map (NULL), slp_unrolling_factor (1), single_scalar_iteration_cost (0), vectorizable (false), @@ -1251,10 +1252,38 @@ _loop_vec_info::~_loop_vec_info () free (bbs); release_vec_loop_masks (&masks); + delete ivexpr_map; loop->aux = NULL; } +/* Return an invariant or register for EXPR and emit necessary + computations in the LOOP_VINFO loop preheader. */ + +tree +cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr) +{ + if (is_gimple_reg (expr) + || is_gimple_min_invariant (expr)) + return expr; + + if (! loop_vinfo->ivexpr_map) + loop_vinfo->ivexpr_map = new hash_map; + tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr); + if (! cached) + { + gimple_seq stmts = NULL; + cached = force_gimple_operand (unshare_expr (expr), + &stmts, true, NULL_TREE); + if (stmts) + { + edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); + gsi_insert_seq_on_edge_immediate (e, stmts); + } + } + return cached; +} + /* Return true if we can use CMP_TYPE as the comparison type to produce all masks required to mask LOOP_VINFO. */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index ca28632d6ba..c9f0feac76a 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -188,7 +188,7 @@ vect_free_oprnd_info (vec &oprnds_info) /* Find the place of the data-ref in STMT in the interleaving chain that starts from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ -static int +int vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt) { gimple *next_stmt = first_stmt; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 0a5c4cbacda..64a728eb129 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -6455,7 +6455,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, tree offvar; tree ivstep; tree running_off; - gimple_seq stmts = NULL; tree stride_base, stride_step, alias_off; tree vec_oprnd; unsigned int g; @@ -6467,11 +6466,11 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, stride_base = fold_build_pointer_plus - (unshare_expr (DR_BASE_ADDRESS (first_dr)), + (DR_BASE_ADDRESS (first_dr), size_binop (PLUS_EXPR, - convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))), + convert_to_ptrofftype (DR_OFFSET (first_dr)), convert_to_ptrofftype (DR_INIT (first_dr)))); - stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr))); + stride_step = fold_convert (sizetype, DR_STEP (first_dr)); /* For a store with loop-invariant (but other than power-of-2) stride (i.e. not a grouped access) like so: @@ -6563,15 +6562,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, standard_iv_increment_position (loop, &incr_gsi, &insert_after); + stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); + ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); create_iv (stride_base, ivstep, NULL, loop, &incr_gsi, insert_after, &offvar, NULL); incr = gsi_stmt (incr_gsi); set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo)); - stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE); - if (stmts) - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); prev_stmt_info = NULL; alias_off = build_int_cst (ref_type, 0); @@ -7484,27 +7483,37 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, tree ivstep; tree running_off; vec *v = NULL; - gimple_seq stmts = NULL; tree stride_base, stride_step, alias_off; /* Checked by get_load_store_type. */ unsigned int const_nunits = nunits.to_constant (); + unsigned HOST_WIDE_INT cst_offset = 0; gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); gcc_assert (!nested_in_vect_loop); - if (slp && grouped_load) + if (grouped_load) { first_stmt = GROUP_FIRST_ELEMENT (stmt_info); first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); - group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); - ref_type = get_group_alias_ptr_type (first_stmt); } else { first_stmt = stmt; first_dr = dr; + } + if (slp && grouped_load) + { + group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); + ref_type = get_group_alias_ptr_type (first_stmt); + } + else + { + if (grouped_load) + cst_offset + = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))) + * vect_get_place_in_interleaving_chain (stmt, first_stmt)); group_size = 1; - ref_type = reference_alias_ptr_type (DR_REF (first_dr)); + ref_type = reference_alias_ptr_type (DR_REF (dr)); } stride_base @@ -7536,16 +7545,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, standard_iv_increment_position (loop, &incr_gsi, &insert_after); - create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL, + stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base); + ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep); + create_iv (stride_base, ivstep, NULL, loop, &incr_gsi, insert_after, &offvar, NULL); incr = gsi_stmt (incr_gsi); set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo)); - stride_step = force_gimple_operand (unshare_expr (stride_step), - &stmts, true, NULL_TREE); - if (stmts) - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step); prev_stmt_info = NULL; running_off = offvar; @@ -7634,7 +7642,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, for (i = 0; i < nloads; i++) { tree this_off = build_int_cst (TREE_TYPE (alias_off), - group_el * elsz); + group_el * elsz + cst_offset); new_stmt = gimple_build_assign (make_ssa_name (ltype), build2 (MEM_REF, ltype, running_off, this_off)); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 56e875f20ca..33e6a915ea4 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -440,6 +440,9 @@ typedef struct _loop_vec_info : public vec_info { /* Cost vector for a single scalar iteration. */ auto_vec scalar_cost_vec; + /* Map of IV base/step expressions to inserted name in the preheader. */ + hash_map *ivexpr_map; + /* The unrolling factor needed to SLP the loop. In case of that pure SLP is applied to the loop, i.e., no unrolling is needed, this is 1. */ poly_uint64 slp_unrolling_factor; @@ -1544,6 +1547,7 @@ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, stmt_vector_for_cost *, stmt_vector_for_cost *, stmt_vector_for_cost *); +extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); /* In tree-vect-slp.c. */ extern void vect_free_slp_instance (slp_instance); @@ -1564,6 +1568,7 @@ extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode, tree * = NULL, tree * = NULL); extern void duplicate_and_interleave (gimple_seq *, tree, vec, unsigned int, vec &); +extern int vect_get_place_in_interleaving_chain (gimple *, gimple *); /* In tree-vect-patterns.c. */ /* Pattern recognition functions. -- 2.30.2