From 1a4b99c172b8a5485d84e24db16ccbd847a4b1b7 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 28 May 2015 07:15:57 +0000 Subject: [PATCH] tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec member. 2015-05-28 Richard Biener * tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec member. (SLP_INSTANCE_BODY_COST_VEC): Remove. (vect_update_slp_costs_according_to_vf): Likewise. (vect_slp_analyze_operations): Update prototype. * tree-vect-loop.c (vect_analyze_loop_2): Remove call to vect_update_slp_costs_according_to_vf, adjust. * tree-vect-slp.c (vect_free_slp_instance): Adjust. (vect_analyze_slp_cost_1): Likewise. (vect_analyze_slp_cost): Likewise. Properly deal with widening reduction ops. Commit body costs. (vect_analyze_slp_instance): Adjust. Do not analyze SLP cost for loops from here. (vect_slp_analyze_operations): But do it from here when the vectorization factor is known and stmts are analyzed. (vect_bb_vectorization_profitable_p): Simplify. (vect_slp_analyze_bb_1): Do not compute SLP cost here. (vect_update_slp_costs_according_to_vf): Remove. From-SVN: r223798 --- gcc/ChangeLog | 21 ++++++ gcc/tree-vect-loop.c | 7 +- gcc/tree-vect-slp.c | 149 +++++++++++++++--------------------------- gcc/tree-vectorizer.h | 8 +-- 4 files changed, 78 insertions(+), 107 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 29c26c40661..8742a222141 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2015-05-28 Richard Biener + + * tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec + member. + (SLP_INSTANCE_BODY_COST_VEC): Remove. + (vect_update_slp_costs_according_to_vf): Likewise. + (vect_slp_analyze_operations): Update prototype. + * tree-vect-loop.c (vect_analyze_loop_2): Remove call to + vect_update_slp_costs_according_to_vf, adjust. + * tree-vect-slp.c (vect_free_slp_instance): Adjust. + (vect_analyze_slp_cost_1): Likewise. + (vect_analyze_slp_cost): Likewise. Properly deal with + widening reduction ops. Commit body costs. + (vect_analyze_slp_instance): Adjust. Do not analyze SLP + cost for loops from here. + (vect_slp_analyze_operations): But do it from here when + the vectorization factor is known and stmts are analyzed. + (vect_bb_vectorization_profitable_p): Simplify. + (vect_slp_analyze_bb_1): Do not compute SLP cost here. + (vect_update_slp_costs_according_to_vf): Remove. + 2015-05-27 Magnus Granberg H.J. Lu diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b93685e8160..8fe4dc6ad30 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1814,15 +1814,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo) /* Update the vectorization factor based on the SLP decision. */ vect_update_vf_for_slp (loop_vinfo); - /* Once VF is set, SLP costs should be updated since the number of - created vector stmts depends on VF. */ - vect_update_slp_costs_according_to_vf (loop_vinfo); - /* Analyze operations in the SLP instances. Note this may remove unsupported SLP instances which makes the above SLP kind detection invalid. */ unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); - vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); + vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), + LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) return false; } diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 214c96c7944..9137144e67a 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -130,7 +130,6 @@ vect_free_slp_instance (slp_instance instance) { vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); SLP_INSTANCE_LOADS (instance).release (); - SLP_INSTANCE_BODY_COST_VEC (instance).release (); free (instance); } @@ -1546,13 +1545,11 @@ vect_find_last_scalar_stmt_in_slp (slp_tree node) /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ static void -vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, - slp_instance instance, slp_tree node, +vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, stmt_vector_for_cost *prologue_cost_vec, + stmt_vector_for_cost *body_cost_vec, unsigned ncopies_for_cost) { - stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance); - unsigned i; slp_tree child; gimple stmt, s; @@ -1563,9 +1560,8 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, /* Recurse down the SLP tree. */ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (child) - vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, - instance, child, prologue_cost_vec, - ncopies_for_cost); + vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, + body_cost_vec, ncopies_for_cost); /* Look at the first scalar stmt to determine the cost. */ stmt = SLP_TREE_SCALAR_STMTS (node)[0]; @@ -1622,7 +1618,8 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, enum vect_def_type dt; if (!op || op == lhs) continue; - if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, + if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info), + STMT_VINFO_BB_VINFO (stmt_info), &def_stmt, &def, &dt)) { /* Without looking at the actual initializer a vector of @@ -1642,8 +1639,7 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, /* Compute the cost for the SLP instance INSTANCE. */ static void -vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, - slp_instance instance, unsigned nunits) +vect_analyze_slp_cost (slp_instance instance, void *data) { stmt_vector_for_cost body_cost_vec, prologue_cost_vec; unsigned ncopies_for_cost; @@ -1654,20 +1650,38 @@ vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is GROUP_SIZE / NUNITS otherwise. */ unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance); + slp_tree node = SLP_INSTANCE_TREE (instance); + stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); + /* Adjust the group_size by the vectorization factor which is always one + for basic-block vectorization. */ + if (STMT_VINFO_LOOP_VINFO (stmt_info)) + group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info)); + unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); + /* For reductions look at a reduction operand in case the reduction + operation is widening like DOT_PROD or SAD. */ + if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) + { + gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0]; + switch (gimple_assign_rhs_code (stmt)) + { + case DOT_PROD_EXPR: + case SAD_EXPR: + nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type + (TREE_TYPE (gimple_assign_rhs1 (stmt)))); + break; + default:; + } + } ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits; prologue_cost_vec.create (10); body_cost_vec.create (10); - SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec; - vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, - instance, SLP_INSTANCE_TREE (instance), - &prologue_cost_vec, ncopies_for_cost); + vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), + &prologue_cost_vec, &body_cost_vec, + ncopies_for_cost); /* Record the prologue costs, which were delayed until we were - sure that SLP was successful. Unlike the body costs, we know - the final values now regardless of the loop vectorization factor. */ - void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) - : BB_VINFO_TARGET_COST_DATA (bb_vinfo)); + sure that SLP was successful. */ FOR_EACH_VEC_ELT (prologue_cost_vec, i, si) { struct _stmt_vec_info *stmt_info @@ -1676,7 +1690,17 @@ vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, si->misalign, vect_prologue); } + /* Record the instance's instructions in the target cost model. */ + FOR_EACH_VEC_ELT (body_cost_vec, i, si) + { + struct _stmt_vec_info *stmt_info + = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; + (void) add_stmt_cost (data, si->count, si->kind, stmt_info, + si->misalign, vect_body); + } + prologue_cost_vec.release (); + body_cost_vec.release (); } /* Analyze an SLP instance starting from a group of grouped stores. Call @@ -1811,7 +1835,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, SLP_INSTANCE_TREE (new_instance) = node; SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; - SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL; SLP_INSTANCE_LOADS (new_instance) = loads; /* Compute the load permutation. */ @@ -1863,13 +1886,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (loop_vinfo) - { - /* Compute the costs of this SLP instance. Delay this for BB - vectorization as we don't have vector types computed yet. */ - vect_analyze_slp_cost (loop_vinfo, bb_vinfo, - new_instance, TYPE_VECTOR_SUBPARTS (vectype)); - LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance); - } + LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance); else BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance); @@ -2237,7 +2254,7 @@ vect_slp_analyze_node_operations (slp_tree node) operations are supported. */ bool -vect_slp_analyze_operations (vec slp_instances) +vect_slp_analyze_operations (vec slp_instances, void *data) { slp_instance instance; int i; @@ -2259,7 +2276,11 @@ vect_slp_analyze_operations (vec slp_instances) slp_instances.ordered_remove (i); } else - i++; + { + /* Compute the costs of the SLP instance. */ + vect_analyze_slp_cost (instance, data); + i++; + } } if (!slp_instances.length ()) @@ -2342,26 +2363,9 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) { vec slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); slp_instance instance; - int i, j; + int i; unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0; unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0; - void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo); - stmt_vec_info stmt_info = NULL; - stmt_vector_for_cost body_cost_vec; - stmt_info_for_cost *ci; - - /* Calculate vector costs. */ - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { - body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance); - - FOR_EACH_VEC_ELT (body_cost_vec, j, ci) - { - stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL; - (void) add_stmt_cost (target_cost_data, ci->count, ci->kind, - stmt_info, ci->misalign, vect_body); - } - } /* Calculate scalar cost. */ FOR_EACH_VEC_ELT (slp_instances, i, instance) @@ -2519,7 +2523,8 @@ vect_slp_analyze_bb_1 (basic_block bb) return NULL; } - if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo))) + if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo), + BB_VINFO_TARGET_COST_DATA (bb_vinfo))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2529,15 +2534,6 @@ vect_slp_analyze_bb_1 (basic_block bb) return NULL; } - /* Compute the costs of the SLP instances. */ - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { - gimple stmt = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]; - tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); - vect_analyze_slp_cost (NULL, bb_vinfo, - instance, TYPE_VECTOR_SUBPARTS (vectype)); - } - /* Cost model: check if the vectorization is worthwhile. */ if (!unlimited_cost_model (NULL) && !vect_bb_vectorization_profitable_p (bb_vinfo)) @@ -2616,45 +2612,6 @@ vect_slp_analyze_bb (basic_block bb) } -/* SLP costs are calculated according to SLP instance unrolling factor (i.e., - the number of created vector stmts depends on the unrolling factor). - However, the actual number of vector stmts for every SLP node depends on - VF which is set later in vect_analyze_operations (). Hence, SLP costs - should be updated. In this function we assume that the inside costs - calculated in vect_model_xxx_cost are linear in ncopies. */ - -void -vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo) -{ - unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - vec slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); - slp_instance instance; - stmt_vector_for_cost body_cost_vec; - stmt_info_for_cost *si; - void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); - - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "=== vect_update_slp_costs_according_to_vf ===\n"); - - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { - /* We assume that costs are linear in ncopies. */ - int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance); - - /* Record the instance's instructions in the target cost model. - This was delayed until here because the count of instructions - isn't known beforehand. */ - body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance); - - FOR_EACH_VEC_ELT (body_cost_vec, j, si) - (void) add_stmt_cost (data, si->count * ncopies, si->kind, - vinfo_for_stmt (si->stmt), si->misalign, - vect_body); - } -} - - /* For constant and loop invariant defs of SLP_NODE this function returns (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index ae795a9f7d5..9976096d476 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -128,9 +128,6 @@ typedef struct _slp_instance { /* The unrolling factor required to vectorized this SLP instance. */ unsigned int unrolling_factor; - /* Vectorization costs associated with SLP instance. */ - stmt_vector_for_cost body_cost_vec; - /* The group of nodes that contain loads of this SLP instance. */ vec loads; } *slp_instance; @@ -140,7 +137,6 @@ typedef struct _slp_instance { #define SLP_INSTANCE_TREE(S) (S)->root #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor -#define SLP_INSTANCE_BODY_COST_VEC(S) (S)->body_cost_vec #define SLP_INSTANCE_LOADS(S) (S)->loads #define SLP_TREE_CHILDREN(S) (S)->children @@ -1114,9 +1110,9 @@ extern void vect_free_slp_instance (slp_instance); extern bool vect_transform_slp_perm_load (slp_tree, vec , gimple_stmt_iterator *, int, slp_instance, bool); -extern bool vect_slp_analyze_operations (vec slp_instances); +extern bool vect_slp_analyze_operations (vec slp_instances, + void *); extern bool vect_schedule_slp (loop_vec_info, bb_vec_info); -extern void vect_update_slp_costs_according_to_vf (loop_vec_info); extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned); extern bool vect_make_slp_decision (loop_vec_info); extern void vect_detect_hybrid_slp (loop_vec_info); -- 2.30.2