{
vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
SLP_INSTANCE_LOADS (instance).release ();
+ instance->subgraph_entries.release ();
+ instance->cost_vec.release ();
free (instance);
}
SLP_INSTANCE_LOADS (new_instance) = vNULL;
SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
new_instance->reduc_phis = NULL;
+ new_instance->cost_vec = vNULL;
+ new_instance->subgraph_entries = vNULL;
vect_gather_slp_loads (new_instance, node);
if (dump_enabled_p ())
visited.add (*x);
i++;
- add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
- cost_vec.release ();
+ /* Remember the SLP graph entry cost for later. */
+ instance->cost_vec = cost_vec;
}
}
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
{
hash_set<stmt_vec_info> svisited;
- stmt_vector_for_cost cost_vec;
- cost_vec.create (2);
for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
- instance, &cost_vec, svisited);
- add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
- cost_vec.release ();
+ instance, &instance->cost_vec, svisited);
}
return !vinfo->slp_instances.is_empty ();
}
+/* Get the SLP instance leader from INSTANCE_LEADER thereby transitively
+ closing the eventual chain. */
+
+static slp_instance
+get_ultimate_leader (slp_instance instance,
+ hash_map<slp_instance, slp_instance> &instance_leader)
+{
+ auto_vec<slp_instance *, 8> chain;
+ slp_instance *tem;
+ while (*(tem = instance_leader.get (instance)) != instance)
+ {
+ chain.safe_push (tem);
+ instance = *tem;
+ }
+ while (!chain.is_empty ())
+ *chain.pop () = instance;
+ return instance;
+}
+
+/* Worker of vect_bb_partition_graph, recurse on NODE. */
+
+static void
+vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
+ slp_instance instance, slp_tree node,
+ hash_map<stmt_vec_info, slp_instance> &stmt_to_instance,
+ hash_map<slp_instance, slp_instance> &instance_leader)
+{
+ stmt_vec_info stmt_info;
+ unsigned i;
+ bool all = true;
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
+ {
+ bool existed_p;
+ slp_instance &stmt_instance
+ = stmt_to_instance.get_or_insert (stmt_info, &existed_p);
+ if (!existed_p)
+ all = false;
+ else if (stmt_instance != instance)
+ {
+ /* If we're running into a previously marked stmt make us the
+ leader of the current ultimate leader. This keeps the
+ leader chain acyclic and works even when the current instance
+ connects two previously independent graph parts. */
+ slp_instance stmt_leader
+ = get_ultimate_leader (stmt_instance, instance_leader);
+ if (stmt_leader != instance)
+ instance_leader.put (stmt_leader, instance);
+ }
+ stmt_instance = instance;
+ }
+ /* If not all stmts had been visited we have to recurse on children. */
+ if (all)
+ return;
+
+ slp_tree child;
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+ vect_bb_partition_graph_r (bb_vinfo, instance, child, stmt_to_instance,
+ instance_leader);
+}
+
+/* Partition the SLP graph into pieces that can be costed independently. */
+
+static void
+vect_bb_partition_graph (bb_vec_info bb_vinfo)
+{
+ DUMP_VECT_SCOPE ("vect_bb_partition_graph");
+
+ /* First walk the SLP graph assigning each involved scalar stmt a
+ corresponding SLP graph entry and upon visiting a previously
+ marked stmt, make the stmts leader the current SLP graph entry. */
+ hash_map<stmt_vec_info, slp_instance> stmt_to_instance;
+ hash_map<slp_instance, slp_instance> instance_leader;
+ slp_instance instance;
+ for (unsigned i = 0; bb_vinfo->slp_instances.iterate (i, &instance); ++i)
+ {
+ instance_leader.put (instance, instance);
+ vect_bb_partition_graph_r (bb_vinfo,
+ instance, SLP_INSTANCE_TREE (instance),
+ stmt_to_instance, instance_leader);
+ }
+
+ /* Then collect entries to each independent subgraph. */
+ for (unsigned i = 0; bb_vinfo->slp_instances.iterate (i, &instance); ++i)
+ {
+ slp_instance leader = get_ultimate_leader (instance, instance_leader);
+ leader->subgraph_entries.safe_push (instance);
+ if (dump_enabled_p ()
+ && leader != instance)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "instance %p is leader of %p\n",
+ leader, instance);
+ }
+}
/* Compute the scalar cost of the SLP node NODE and its children
and return it. Do not account defs that are marked in LIFE and
}
}
-/* Check if vectorization of the basic block is profitable. */
+/* Check if vectorization of the basic block is profitable for the
+ subgraph denoted by SLP_INSTANCES. */
static bool
-vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
+vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
+ vec<slp_instance> slp_instances)
{
- vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
slp_instance instance;
int i;
unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
- /* Calculate scalar cost. */
+ void *vect_target_cost_data = init_cost (NULL);
+
+ /* Calculate scalar cost and sum the cost for the vector stmts
+ previously collected. */
stmt_vector_for_cost scalar_costs;
scalar_costs.create (0);
hash_set<slp_tree> visited;
vect_bb_slp_scalar_cost (bb_vinfo,
SLP_INSTANCE_TREE (instance),
&life, &scalar_costs, visited);
+ add_stmt_costs (bb_vinfo, vect_target_cost_data, &instance->cost_vec);
+ instance->cost_vec.release ();
}
/* Unset visited flag. */
stmt_info_for_cost *si;
FOR_EACH_VEC_ELT (scalar_costs, i, si)
gimple_set_visited (si->stmt_info->stmt, false);
- void *target_cost_data = init_cost (NULL);
- add_stmt_costs (bb_vinfo, target_cost_data, &scalar_costs);
+ void *scalar_target_cost_data = init_cost (NULL);
+ add_stmt_costs (bb_vinfo, scalar_target_cost_data, &scalar_costs);
scalar_costs.release ();
unsigned dummy;
- finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
- destroy_cost_data (target_cost_data);
+ finish_cost (scalar_target_cost_data, &dummy, &scalar_cost, &dummy);
+ destroy_cost_data (scalar_target_cost_data);
- /* Complete the target-specific cost calculation. */
- finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
+ /* Complete the target-specific vector cost calculation. */
+ finish_cost (vect_target_cost_data, &vec_prologue_cost,
&vec_inside_cost, &vec_epilogue_cost);
+ destroy_cost_data (vect_target_cost_data);
vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
return true;
}
+/* For each SLP subgraph determine profitability and remove parts not so.
+ Returns true if any profitable to vectorize subgraph remains. */
+
+static bool
+vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
+{
+ slp_instance instance;
+ unsigned i;
+
+ auto_vec<slp_instance> subgraphs (BB_VINFO_SLP_INSTANCES (bb_vinfo).length ());
+ FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
+ if (!instance->subgraph_entries.is_empty ())
+ subgraphs.quick_push (instance);
+ BB_VINFO_SLP_INSTANCES (bb_vinfo).truncate (0);
+ for (i = 0; i < subgraphs.length ();)
+ {
+ instance = subgraphs[i];
+ if (!vect_bb_vectorization_profitable_p (bb_vinfo,
+ instance->subgraph_entries))
+ {
+ /* ??? We need to think of providing better dump/opt-report
+ locations here. */
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: vectorization is not "
+ "profitable.\n");
+ }
+ slp_instance entry;
+ unsigned j;
+ FOR_EACH_VEC_ELT (instance->subgraph_entries, j, entry)
+ if (entry != instance)
+ vect_free_slp_instance (entry, false);
+ vect_free_slp_instance (instance, false);
+ subgraphs.ordered_remove (i);
+ }
+ else
+ {
+ slp_instance entry;
+ unsigned j;
+ FOR_EACH_VEC_ELT (instance->subgraph_entries, j, entry)
+ BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (entry);
+ ++i;
+ }
+ }
+ return !BB_VINFO_SLP_INSTANCES (bb_vinfo).is_empty ();
+}
+
/* Find any vectorizable constructors and add them to the grouped_store
array. */
return false;
}
+ vect_bb_partition_graph (bb_vinfo);
+
/* Cost model: check if the vectorization is worthwhile. */
if (!unlimited_cost_model (NULL)
&& !vect_bb_vectorization_profitable_p (bb_vinfo))