From 23847df44028af7bf9b73ce25b38ebb49aeed8c7 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 9 Apr 2013 14:12:34 +0000 Subject: [PATCH] tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code dealing with cost. 2013-04-09 Richard Biener * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code dealing with cost. (vect_build_slp_tree): Likewise. (vect_analyze_slp_cost_1, vect_analyze_slp_cost): New functions calculating the cost of a SLP instance. (vect_analyze_slp_instance): Use it from here, after building the SLP tree. From-SVN: r197635 --- gcc/ChangeLog | 10 ++ gcc/tree-vect-slp.c | 221 ++++++++++++++++++++++++++------------------ 2 files changed, 143 insertions(+), 88 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 69cefc06f20..42823116e04 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2013-04-09 Richard Biener + + * tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code + dealing with cost. + (vect_build_slp_tree): Likewise. + (vect_analyze_slp_cost_1, vect_analyze_slp_cost): New functions + calculating the cost of a SLP instance. + (vect_analyze_slp_instance): Use it from here, after building + the SLP tree. + 2013-04-09 Jakub Jelinek PR middle-end/56883 diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 34131060ee5..28f86b39550 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -199,11 +199,8 @@ vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt) static bool vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, - slp_tree slp_node, gimple stmt, - int ncopies_for_cost, bool first, - vec *oprnds_info, - stmt_vector_for_cost *prologue_cost_vec, - stmt_vector_for_cost *body_cost_vec) + gimple stmt, bool first, + vec *oprnds_info) { tree oprnd; unsigned int i, number_of_oprnds; @@ -211,8 +208,6 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, gimple def_stmt; enum vect_def_type dt = vect_uninitialized_def; enum vect_def_type dt_op0 = vect_uninitialized_def; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - tree lhs = gimple_get_lhs (stmt); struct loop *loop = NULL; enum tree_code rhs_code; bool different_types = false; @@ -344,22 +339,6 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, { def_op0 = def; dt_op0 = dt; - /* Analyze costs (for the first stmt of the group only). */ - if (REFERENCE_CLASS_P (lhs)) - /* Store. */ - vect_model_store_cost (stmt_info, ncopies_for_cost, false, - dt, slp_node, prologue_cost_vec, - body_cost_vec); - else - { - enum vect_def_type dts[2]; - dts[0] = dt; - dts[1] = vect_uninitialized_def; - /* Not memory operation (we don't call this function for - loads). */ - vect_model_simple_cost (stmt_info, ncopies_for_cost, dts, - prologue_cost_vec, body_cost_vec); - } } } else @@ -479,13 +458,11 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, static bool vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, - slp_tree *node, unsigned int group_size, int *outside_cost, - int ncopies_for_cost, unsigned int *max_nunits, + slp_tree *node, unsigned int group_size, + unsigned int *max_nunits, vec *load_permutation, vec *loads, - unsigned int vectorization_factor, bool *loads_permuted, - stmt_vector_for_cost *prologue_cost_vec, - stmt_vector_for_cost *body_cost_vec) + unsigned int vectorization_factor, bool *loads_permuted) { unsigned int i; vec stmts = SLP_TREE_SCALAR_STMTS (*node); @@ -750,11 +727,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, if (REFERENCE_CLASS_P (lhs)) { /* Store. */ - if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, - stmt, ncopies_for_cost, - (i == 0), &oprnds_info, - prologue_cost_vec, - body_cost_vec)) + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, + stmt, (i == 0), &oprnds_info)) { vect_free_oprnd_info (oprnds_info); return false; @@ -863,11 +837,6 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, vect_free_oprnd_info (oprnds_info); return false; } - - /* Analyze costs (for the first stmt in the group). */ - vect_model_load_cost (vinfo_for_stmt (stmt), - ncopies_for_cost, false, *node, - prologue_cost_vec, body_cost_vec); } /* Store the place of this load in the interleaving chain. In @@ -943,10 +912,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, } /* Find the def-stmts. */ - if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt, - ncopies_for_cost, (i == 0), - &oprnds_info, prologue_cost_vec, - body_cost_vec)) + if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt, + (i == 0), &oprnds_info)) { vect_free_oprnd_info (oprnds_info); return false; @@ -959,12 +926,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, { loads->safe_push (*node); if (permutation) - { - gimple first_stmt = stmts[0]; - *loads_permuted = true; - (void) record_stmt_cost (body_cost_vec, group_size, vec_perm, - vinfo_for_stmt (first_stmt), 0, vect_body); - } + *loads_permuted = true; else { /* We don't check here complex numbers chains, so we set @@ -989,10 +951,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, child = vect_create_new_slp_node (oprnd_info->def_stmts); if (!child || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, - outside_cost, ncopies_for_cost, max_nunits, load_permutation, loads, - vectorization_factor, loads_permuted, - prologue_cost_vec, body_cost_vec)) + vectorization_factor, loads_permuted)) { if (child) oprnd_info->def_stmts = vNULL; @@ -1510,6 +1470,122 @@ vect_find_last_store_in_slp_instance (slp_instance instance) return last_store; } +/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ + +static void +vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, + slp_instance instance, slp_tree node, + stmt_vector_for_cost *prologue_cost_vec, + unsigned ncopies_for_cost) +{ + stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance); + + unsigned i; + slp_tree child; + gimple stmt, s; + stmt_vec_info stmt_info; + tree lhs; + unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance); + + /* Recurse down the SLP tree. */ + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) + vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, + instance, child, prologue_cost_vec, + ncopies_for_cost); + + /* Look at the first scalar stmt to determine the cost. */ + stmt = SLP_TREE_SCALAR_STMTS (node)[0]; + stmt_info = vinfo_for_stmt (stmt); + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + { + if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) + vect_model_store_cost (stmt_info, ncopies_for_cost, false, + vect_uninitialized_def, + node, prologue_cost_vec, body_cost_vec); + else + { + int i; + gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))); + vect_model_load_cost (stmt_info, ncopies_for_cost, false, + node, prologue_cost_vec, body_cost_vec); + /* If the load is permuted record the cost for the permutation. + ??? Loads from multiple chains are let through here only + for a single special case involving complex numbers where + in the end no permutation is necessary. */ + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s) + if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s)) + == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) + && vect_get_place_in_interleaving_chain + (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i) + { + record_stmt_cost (body_cost_vec, group_size, vec_perm, + stmt_info, 0, vect_body); + break; + } + } + } + else + record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, + stmt_info, 0, vect_body); + + /* Scan operands and account for prologue cost of constants/externals. + ??? This over-estimates cost for multiple uses and should be + re-engineered. */ + lhs = gimple_get_lhs (stmt); + for (i = 0; i < gimple_num_ops (stmt); ++i) + { + tree def, op = gimple_op (stmt, i); + gimple def_stmt; + enum vect_def_type dt; + if (!op || op == lhs) + continue; + if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, + &def_stmt, &def, &dt) + && (dt == vect_constant_def || dt == vect_external_def)) + record_stmt_cost (prologue_cost_vec, 1, vector_stmt, + stmt_info, 0, vect_prologue); + } +} + +/* Compute the cost for the SLP instance INSTANCE. */ + +static void +vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, + slp_instance instance, unsigned nunits) +{ + stmt_vector_for_cost body_cost_vec, prologue_cost_vec; + unsigned ncopies_for_cost; + stmt_info_for_cost *si; + unsigned i; + + /* Calculate the number of vector stmts to create based on the unrolling + factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is + GROUP_SIZE / NUNITS otherwise. */ + unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance); + ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits; + + prologue_cost_vec.create (10); + body_cost_vec.create (10); + SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec; + vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, + instance, SLP_INSTANCE_TREE (instance), + &prologue_cost_vec, ncopies_for_cost); + + /* Record the prologue costs, which were delayed until we were + sure that SLP was successful. Unlike the body costs, we know + the final values now regardless of the loop vectorization factor. */ + void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) + : BB_VINFO_TARGET_COST_DATA (bb_vinfo)); + FOR_EACH_VEC_ELT (prologue_cost_vec, i, si) + { + struct _stmt_vec_info *stmt_info + = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; + (void) add_stmt_cost (data, si->count, si->kind, stmt_info, + si->misalign, vect_prologue); + } + + prologue_cost_vec.release (); +} /* Analyze an SLP instance starting from a group of grouped stores. Call vect_build_slp_tree to build a tree of packed stmts if possible. @@ -1526,15 +1602,13 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, tree vectype, scalar_type = NULL_TREE; gimple next; unsigned int vectorization_factor = 0; - int outside_cost = 0, ncopies_for_cost, i; + int i; unsigned int max_nunits = 0; vec load_permutation; vec loads; struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); bool loads_permuted = false; vec scalar_stmts; - stmt_vector_for_cost body_cost_vec, prologue_cost_vec; - stmt_info_for_cost *si; if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) { @@ -1615,26 +1689,14 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, node = vect_create_new_slp_node (scalar_stmts); - /* Calculate the number of vector stmts to create based on the unrolling - factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is - GROUP_SIZE / NUNITS otherwise. */ - ncopies_for_cost = unrolling_factor * group_size / nunits; - load_permutation.create (group_size * group_size); loads.create (group_size); - prologue_cost_vec.create (10); - body_cost_vec.create (10); /* Build the tree for the SLP instance. */ if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, - &outside_cost, ncopies_for_cost, &max_nunits, &load_permutation, &loads, - vectorization_factor, &loads_permuted, - &prologue_cost_vec, &body_cost_vec)) + vectorization_factor, &loads_permuted)) { - void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) - : BB_VINFO_TARGET_COST_DATA (bb_vinfo)); - /* Calculate the unrolling factor based on the smallest type. */ if (max_nunits > nunits) unrolling_factor = least_common_multiple (max_nunits, group_size) @@ -1647,8 +1709,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, "Build SLP failed: unrolling required in basic" " block SLP"); vect_free_slp_tree (node); - body_cost_vec.release (); - prologue_cost_vec.release (); load_permutation.release (); loads.release (); return false; @@ -1659,7 +1719,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, SLP_INSTANCE_TREE (new_instance) = node; SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; - SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec; + SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL; SLP_INSTANCE_LOADS (new_instance) = loads; SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL; SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation; @@ -1678,7 +1738,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, } vect_free_slp_instance (new_instance); - prologue_cost_vec.release (); return false; } @@ -1688,18 +1747,9 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, else SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release (); - /* Record the prologue costs, which were delayed until we were - sure that SLP was successful. Unlike the body costs, we know - the final values now regardless of the loop vectorization factor. */ - FOR_EACH_VEC_ELT (prologue_cost_vec, i, si) - { - struct _stmt_vec_info *stmt_info - = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; - (void) add_stmt_cost (data, si->count, si->kind, stmt_info, - si->misalign, vect_prologue); - } - - prologue_cost_vec.release (); + /* Compute the costs of this SLP instance. */ + vect_analyze_slp_cost (loop_vinfo, bb_vinfo, + new_instance, TYPE_VECTOR_SUBPARTS (vectype)); if (loop_vinfo) LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance); @@ -1711,11 +1761,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, return true; } - else - { - body_cost_vec.release (); - prologue_cost_vec.release (); - } /* Failed to SLP. */ /* Free the allocated memory. */ -- 2.30.2