/* Interprocedural constant propagation
Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
- Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
+
+ Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
+ <mjambor@suse.cz>
This file is part of GCC.
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* Interprocedural constant propagation. The aim of interprocedural constant
- propagation (IPCP) is to find which function's argument has the same
- constant value in each invocation throughout the whole program. For example,
- consider the following program:
-
- int g (int y)
- {
- printf ("value is %d",y);
- }
+/* Interprocedural constant propagation (IPA-CP).
- int f (int x)
- {
- g (x);
- }
+ The goal of this transformation is to
- int h (int y)
- {
- g (y);
- }
+ 1) discover functions which are always invoked with some arguments with the
+ same known constant values and modify the functions so that the
+ subsequent optimizations can take advantage of the knowledge, and
- void main (void)
- {
- f (3);
- h (3);
- }
+ 2) partial specialization - create specialized versions of functions
+ transformed in this way if some parameters are known constants only in
+ certain contexts but the estimated tradeoff between speedup and cost size
+ is deemed good.
+ The algorithm also propagates types and attempts to perform type based
+ devirtualization. Types are propagated much like constants.
- The IPCP algorithm will find that g's formal argument y is always called
- with the value 3.
+ The algorithm basically consists of three stages. In the first, functions
+ are analyzed one at a time and jump functions are constructed for all known
+ call-sites. In the second phase, the pass propagates information from the
+ jump functions across the call to reveal what values are available at what
+ call sites, performs estimations of effects of known values on functions and
+ their callees, and finally decides what specialized extra versions should be
+ created. In the third, the special versions materialize and appropriate
+ calls are redirected.
- The algorithm used is based on "Interprocedural Constant Propagation", by
- David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
- 152-161
+ The algorithm used is to a certain extent based on "Interprocedural Constant
+ Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
+ Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
+ Cooper, Mary W. Hall, and Ken Kennedy.
- The optimization is divided into three stages:
First stage - intraprocedural analysis
=======================================
+
This phase computes jump_function and modification flags.
- A jump function for a callsite represents the values passed as an actual
- arguments of a given callsite. There are three types of values:
- Pass through - the caller's formal parameter is passed as an actual argument.
+ A jump function for a call-site represents the values passed as an actual
+ arguments of a given call-site. In principle, there are three types of
+ values:
+
+ Pass through - the caller's formal parameter is passed as an actual
+ argument, plus an operation on it can be performed.
Constant - a constant is passed as an actual argument.
Unknown - neither of the above.
- The jump function info, ipa_jump_func, is stored in ipa_edge_args
- structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
- modified_flags are defined in ipa_node_params structure
- (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
+ All jump function types are described in detail in ipa-prop.h, together with
+ the data structures that represent them and methods of accessing them.
- -ipcp_generate_summary() is the first stage driver.
+ ipcp_generate_summary() is the main function of the first stage.
Second stage - interprocedural analysis
========================================
- This phase does the interprocedural constant propagation.
- It computes lattices for all formal parameters in the program
- and their value that may be:
- TOP - unknown.
- BOTTOM - non constant.
- CONSTANT - constant value.
- Lattice describing a formal parameter p will have a constant value if all
- callsites invoking this function have the same constant value passed to p.
+ This stage is itself divided into two phases. In the first, we propagate
+ known values over the call graph, in the second, we make cloning decisions.
+ It uses a different algorithm than the original Callahan's paper.
- The lattices are stored in ipcp_lattice which is itself in ipa_node_params
- structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
+ First, we traverse the functions topologically from callers to callees and,
+ for each strongly connected component (SCC), we propagate constants
+ according to previously computed jump functions. We also record what known
+ values depend on other known values and estimate local effects. Finally, we
+ propagate cumulative information about these effects from dependant values
+ to those on which they depend.
- -ipcp_iterate_stage() is the second stage driver.
+ Second, we again traverse the call graph in the same topological order and
+ make clones for functions which we know are called with the same values in
+ all contexts and decide about extra specialized clones of functions just for
+ some contexts - these decisions are based on both local estimates and
+ cumulative estimates propagated from callees.
- Third phase - transformation of function code
+ ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
+ third stage.
+
+ Third phase - materialization of clones, call statement updates.
============================================
- Propagates the constant-valued formals into the function.
- For each function whose parameters are constants, we create its clone.
-
- Then we process the clone in two ways:
- 1. We insert an assignment statement 'parameter = const' at the beginning
- of the cloned function.
- 2. For read-only parameters that do not live in memory, we replace all their
- uses with the constant.
-
- We also need to modify some callsites to call the cloned functions instead
- of the original ones. For a callsite passing an argument found to be a
- constant by IPCP, there are two different cases to handle:
- 1. A constant is passed as an argument. In this case the callsite in the
- should be redirected to call the cloned callee.
- 2. A parameter (of the caller) passed as an argument (pass through
- argument). In such cases both the caller and the callee have clones and
- only the callsite in the cloned caller is redirected to call to the
- cloned callee.
-
- This update is done in two steps: First all cloned functions are created
- during a traversal of the call graph, during which all callsites are
- redirected to call the cloned function. Then the callsites are traversed
- and many calls redirected back to fit the description above.
-
- -ipcp_insert_stage() is the third phase driver.
-
-
- This pass also performs devirtualization - turns virtual calls into direct
- ones if it can prove that all invocations of the function call the same
- callee. This is achieved by building a list of all base types (actually,
- their BINFOs) that individual parameters can have in an iterative matter
- just like propagating scalar constants and then examining whether virtual
- calls which take a parameter as their object fold to the same target for all
- these types. If we cannot enumerate all types or there is a type which does
- not have any BINFO associated with it, cannot_devirtualize of the associated
- parameter descriptor is set which is an equivalent of BOTTOM lattice value
- in standard IPA constant propagation.
-*/
+
+ This stage is currently performed by call graph code (mainly in cgraphunit.c
+ and tree-inline.c) according to instructions inserted to the call graph by
+ the second stage. */
#include "config.h"
#include "system.h"
#include "fibheap.h"
#include "params.h"
#include "ipa-inline.h"
+#include "ipa-utils.h"
-/* Number of functions identified as candidates for cloning. When not cloning
- we can simplify iterate stage not forcing it to go through the decision
- on what is profitable and what not. */
-static int n_cloning_candidates;
+struct ipcp_value;
-/* Maximal count found in program. */
-static gcov_type max_count;
+/* Describes a particular source for an IPA-CP value. */
-/* Cgraph nodes that has been completely replaced by cloning during iterate
- * stage and will be removed after ipcp is finished. */
-static bitmap dead_nodes;
+struct ipcp_value_source
+{
+ /* The incoming edge that brought the value. */
+ struct cgraph_edge *cs;
+ /* If the jump function that resulted into his value was a pass-through or an
+ ancestor, this is the ipcp_value of the caller from which the described
+ value has been derived. Otherwise it is NULL. */
+ struct ipcp_value *val;
+ /* Next pointer in a linked list of sources of a value. */
+ struct ipcp_value_source *next;
+ /* If the jump function that resulted into his value was a pass-through or an
+ ancestor, this is the index of the parameter of the caller the jump
+ function references. */
+ int index;
+};
-static void ipcp_print_profile_data (FILE *);
-static void ipcp_function_scale_print (FILE *);
+/* Describes one particular value stored in struct ipcp_lattice. */
-/* Get the original node field of ipa_node_params associated with node NODE. */
-static inline struct cgraph_node *
-ipcp_get_orig_node (struct cgraph_node *node)
+struct ipcp_value
{
- return IPA_NODE_REF (node)->ipcp_orig_node;
-}
+ /* The actual value for the given parameter. This is either an IPA invariant
+ or a TREE_BINFO describing a type that can be used for
+ devirtualization. */
+ tree value;
+ /* The list of sources from which this value originates. */
+ struct ipcp_value_source *sources;
+ /* Next pointers in a linked list of all values in a lattice. */
+ struct ipcp_value *next;
+ /* Next pointers in a linked list of values in a strongly connected component
+ of values. */
+ struct ipcp_value *scc_next;
+ /* Next pointers in a linked list of SCCs of values sorted topologically
+ according their sources. */
+ struct ipcp_value *topo_next;
+ /* A specialized node created for this value, NULL if none has been (so far)
+ created. */
+ struct cgraph_node *spec_node;
+ /* Depth first search number and low link for topological sorting of
+ values. */
+ int dfs, low_link;
+ /* Time benefit and size cost that specializing the function for this value
+ would bring about in this function alone. */
+ int local_time_benefit, local_size_cost;
+ /* Time benefit and size cost that specializing the function for this value
+ can bring about in it's callees (transitively). */
+ int prop_time_benefit, prop_size_cost;
+ /* True if this valye is currently on the topo-sort stack. */
+ bool on_stack;
+};
-/* Return true if NODE describes a cloned/versioned function. */
-static inline bool
-ipcp_node_is_clone (struct cgraph_node *node)
-{
- return (ipcp_get_orig_node (node) != NULL);
-}
+/* Allocation pools for values and their sources in ipa-cp. */
-/* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
- as the ipcp_orig_node field in ipa_node_params. */
-static void
-ipcp_init_cloned_node (struct cgraph_node *orig_node,
- struct cgraph_node *new_node)
-{
- gcc_checking_assert (ipa_node_params_vector
- && (VEC_length (ipa_node_params_t,
- ipa_node_params_vector)
- > (unsigned) cgraph_max_uid));
- gcc_checking_assert (IPA_NODE_REF (new_node)->params);
- IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
-}
+alloc_pool ipcp_values_pool;
+alloc_pool ipcp_sources_pool;
-/* Return scale for NODE. */
-static inline gcov_type
-ipcp_get_node_scale (struct cgraph_node *node)
+/* Lattice describing potential values of a formal parameter of a function and
+ some of their other properties. TOP is represented by a lattice with zero
+ values and with contains_variable and bottom flags cleared. BOTTOM is
+ represented by a lattice with the bottom flag set. In that case, values and
+ contains_variable flag should be disregarded. */
+
+struct ipcp_lattice
{
- return IPA_NODE_REF (node)->count_scale;
-}
+ /* The list of known values and types in this lattice. Note that values are
+ not deallocated if a lattice is set to bottom because there may be value
+ sources referencing them. */
+ struct ipcp_value *values;
+ /* Number of known values and types in this lattice. */
+ int values_count;
+ /* The lattice contains a variable component (in addition to values). */
+ bool contains_variable;
+ /* The value of the lattice is bottom (i.e. variable and unusable for any
+ propagation). */
+ bool bottom;
+ /* There is a virtual call based on this parameter. */
+ bool virt_call;
+};
-/* Set COUNT as scale for NODE. */
-static inline void
-ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
+/* Maximal count found in program. */
+
+static gcov_type max_count;
+
+/* Original overall size of the program. */
+
+static long overall_size, max_new_size;
+
+/* Head of the linked list of topologically sorted values. */
+
+static struct ipcp_value *values_topo;
+
+/* Return the lattice corresponding to the Ith formal parameter of the function
+ described by INFO. */
+static inline struct ipcp_lattice *
+ipa_get_lattice (struct ipa_node_params *info, int i)
{
- IPA_NODE_REF (node)->count_scale = count;
+ gcc_assert (i >= 0 && i <= ipa_get_param_count (info));
+ gcc_checking_assert (!info->ipcp_orig_node);
+ gcc_checking_assert (info->lattices);
+ return &(info->lattices[i]);
}
-/* Return whether LAT is a constant lattice. */
+/* Return whether LAT is a lattice with a single constant and without an
+ undefined value. */
+
static inline bool
-ipcp_lat_is_const (struct ipcp_lattice *lat)
+ipa_lat_is_single_const (struct ipcp_lattice *lat)
{
- if (lat->type == IPA_CONST_VALUE)
- return true;
- else
+ if (lat->bottom
+ || lat->contains_variable
+ || lat->values_count != 1)
return false;
+ else
+ return true;
}
-/* Return whether LAT is a constant lattice that ipa-cp can actually insert
- into the code (i.e. constants excluding member pointers and pointers). */
-static inline bool
-ipcp_lat_is_insertable (struct ipcp_lattice *lat)
-{
- return lat->type == IPA_CONST_VALUE;
-}
+/* Return true iff the CS is an edge within a strongly connected component as
+ computed by ipa_reduced_postorder. */
-/* Return true if LAT1 and LAT2 are equal. */
static inline bool
-ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
+edge_within_scc (struct cgraph_edge *cs)
{
- gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
- if (lat1->type != lat2->type)
- return false;
-
- if (TREE_CODE (lat1->constant) == ADDR_EXPR
- && TREE_CODE (lat2->constant) == ADDR_EXPR
- && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL
- && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL)
- return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)),
- DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0);
- else
- return operand_equal_p (lat1->constant, lat2->constant, 0);
+ struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
+ struct ipa_dfs_info *callee_dfs;
+ struct cgraph_node *callee = cgraph_function_node (cs->callee, NULL);
+
+ callee_dfs = (struct ipa_dfs_info *) callee->aux;
+ return (caller_dfs
+ && callee_dfs
+ && caller_dfs->scc_no == callee_dfs->scc_no);
}
-/* Compute Meet arithmetics:
- Meet (IPA_BOTTOM, x) = IPA_BOTTOM
- Meet (IPA_TOP,x) = x
- Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
- MEET (const_a,const_b) = const_a, if const_a == const_b.*/
+/* Print V which is extracted from a value in a lattice to F. */
+
static void
-ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
- struct ipcp_lattice *lat2)
+print_ipcp_constant_value (FILE * f, tree v)
{
- if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
+ if (TREE_CODE (v) == TREE_BINFO)
{
- res->type = IPA_BOTTOM;
- return;
+ fprintf (f, "BINFO ");
+ print_generic_expr (f, BINFO_TYPE (v), 0);
}
- if (lat1->type == IPA_TOP)
+ else if (TREE_CODE (v) == ADDR_EXPR
+ && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
{
- res->type = lat2->type;
- res->constant = lat2->constant;
- return;
+ fprintf (f, "& ");
+ print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)), 0);
}
- if (lat2->type == IPA_TOP)
- {
- res->type = lat1->type;
- res->constant = lat1->constant;
- return;
- }
- if (!ipcp_lats_are_equal (lat1, lat2))
- {
- res->type = IPA_BOTTOM;
- return;
- }
- res->type = lat1->type;
- res->constant = lat1->constant;
-}
-
-/* True when OLD_LAT and NEW_LAT values are not the same. */
-
-static bool
-ipcp_lattice_changed (struct ipcp_lattice *old_lat,
- struct ipcp_lattice *new_lat)
-{
- if (old_lat->type == new_lat->type)
- {
- if (!ipcp_lat_is_const (old_lat))
- return false;
- if (ipcp_lats_are_equal (old_lat, new_lat))
- return false;
- }
- return true;
+ else
+ print_generic_expr (f, v, 0);
}
/* Print all ipcp_lattices of all functions to F. */
+
static void
-ipcp_print_all_lattices (FILE * f)
+print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
{
struct cgraph_node *node;
int i, count;
- fprintf (f, "\nLattice:\n");
- for (node = cgraph_nodes; node; node = node->next)
+ fprintf (f, "\nLattices:\n");
+ FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
{
struct ipa_node_params *info;
- if (!node->analyzed)
- continue;
info = IPA_NODE_REF (node);
- fprintf (f, " Node: %s:\n", cgraph_node_name (node));
+ fprintf (f, " Node: %s/%i:\n", cgraph_node_name (node), node->uid);
count = ipa_get_param_count (info);
for (i = 0; i < count; i++)
{
struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+ struct ipcp_value *val;
+ bool prev = false;
fprintf (f, " param [%d]: ", i);
- if (lat->type == IPA_CONST_VALUE)
+ if (lat->bottom)
+ {
+ fprintf (f, "BOTTOM\n");
+ continue;
+ }
+
+ if (!lat->values_count && !lat->contains_variable)
+ {
+ fprintf (f, "TOP\n");
+ continue;
+ }
+
+ if (lat->contains_variable)
+ {
+ fprintf (f, "VARIABLE");
+ prev = true;
+ if (dump_benefits)
+ fprintf (f, "\n");
+ }
+
+ for (val = lat->values; val; val = val->next)
{
- tree cst = lat->constant;
- fprintf (f, "type is CONST ");
- print_generic_expr (f, cst, 0);
- if (TREE_CODE (cst) == ADDR_EXPR
- && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL)
+ if (dump_benefits && prev)
+ fprintf (f, " ");
+ else if (!dump_benefits && prev)
+ fprintf (f, ", ");
+ else
+ prev = true;
+
+ print_ipcp_constant_value (f, val->value);
+
+ if (dump_sources)
{
- fprintf (f, " -> ");
- print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)),
- 0);
+ struct ipcp_value_source *s;
+
+ fprintf (f, " [from:");
+ for (s = val->sources; s; s = s->next)
+ fprintf (f, " %i(%i)", s->cs->caller->uid,s->cs->frequency);
+ fprintf (f, "]");
}
+
+ if (dump_benefits)
+ fprintf (f, " [loc_time: %i, loc_size: %i, "
+ "prop_time: %i, prop_size: %i]\n",
+ val->local_time_benefit, val->local_size_cost,
+ val->prop_time_benefit, val->prop_size_cost);
}
- else if (lat->type == IPA_TOP)
- fprintf (f, "type is TOP");
- else
- fprintf (f, "type is BOTTOM");
- if (ipa_param_cannot_devirtualize_p (info, i))
- fprintf (f, " - cannot_devirtualize set\n");
- else if (ipa_param_types_vec_empty (info, i))
- fprintf (f, " - type list empty\n");
- else
+ if (!dump_benefits)
fprintf (f, "\n");
}
}
}
-/* Return true if ipcp algorithms would allow cloning NODE. */
+/* Determine whether it is at all technically possible to create clones of NODE
+ and store this information in the ipa_node_params structure associated
+ with NODE. */
-static bool
-ipcp_versionable_function_p (struct cgraph_node *node)
+static void
+determine_versionability (struct cgraph_node *node)
{
struct cgraph_edge *edge;
-
- /* We always version the actual function and redirect through the aliases. */
- if (node->alias)
- return false;
-
- /* We don't know how to clone thunks. */
- if (node->thunk.thunk_p)
- return false;
+ const char *reason = NULL;
/* There are a number of generic reasons functions cannot be versioned. We
also cannot remove parameters if there are type attributes such as fnspec
present. */
- if (!inline_summary (node)->versionable
- || TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
- return false;
+ if (node->alias || node->thunk.thunk_p)
+ reason = "alias or thunk";
+ else if (!inline_summary (node)->versionable)
+ reason = "inliner claims it is so";
+ else if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
+ reason = "there are type attributes";
+ else if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
+ reason = "insufficient body availability";
+ else
+ /* Removing arguments doesn't work if the function takes varargs
+ or use __builtin_apply_args.
+ FIXME: handle this together with can_change_signature flag. */
+ for (edge = node->callees; edge; edge = edge->next_callee)
+ {
+ tree t = edge->callee->decl;
+ if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
+ && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS
+ || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START))
+ {
+ reason = "prohibitive builtins called";
+ break;
+ };
+ }
- /* Removing arguments doesn't work if the function takes varargs
- or use __builtin_apply_args.
- FIXME: handle this together with can_change_signature flag. */
- for (edge = node->callees; edge; edge = edge->next_callee)
- {
- tree t = edge->callee->decl;
- if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
- && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS
- || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START))
- return false;
- }
+ if (reason && dump_file && !node->alias && !node->thunk.thunk_p)
+ fprintf (dump_file, "Function %s/%i is not versionable, reason: %s.\n",
+ cgraph_node_name (node), node->uid, reason);
- return true;
+ IPA_NODE_REF (node)->node_versionable = (reason == NULL);
}
-/* Return true if this NODE is viable candidate for cloning. */
+/* Return true if it is at all technically possible to create clones of a
+ NODE. */
+
static bool
-ipcp_cloning_candidate_p (struct cgraph_node *node)
+ipcp_versionable_function_p (struct cgraph_node *node)
{
- int n_calls = 0;
- int n_hot_calls = 0;
- gcov_type direct_call_sum = 0;
- struct cgraph_edge *e;
+ return IPA_NODE_REF (node)->node_versionable;
+}
- /* We look through aliases, so we clone the aliased function instead. */
- if (node->alias)
- return false;
+/* Structure holding accumulated information about callers of a node. */
- /* We never clone functions that are not visible from outside.
- FIXME: in future we should clone such functions when they are called with
- different constants, but current ipcp implementation is not good on this.
- */
- if (cgraph_only_called_directly_p (node) || !node->analyzed)
- return false;
+struct caller_statistics
+{
+ gcov_type count_sum;
+ int n_calls, n_hot_calls, freq_sum;
+};
- /* When function address is taken, we are pretty sure it will be called in hidden way. */
- if (node->address_taken)
- {
- if (dump_file)
- fprintf (dump_file, "Not considering %s for cloning; address is taken.\n",
- cgraph_node_name (node));
- return false;
- }
+/* Initialize fields of STAT to zeroes. */
- if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
- {
- if (dump_file)
- fprintf (dump_file, "Not considering %s for cloning; body is overwritable.\n",
- cgraph_node_name (node));
- return false;
- }
- if (!ipcp_versionable_function_p (node))
- {
- if (dump_file)
- fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
- cgraph_node_name (node));
- return false;
- }
- for (e = node->callers; e; e = e->next_caller)
- {
- direct_call_sum += e->count;
- n_calls ++;
- if (cgraph_maybe_hot_edge_p (e))
- n_hot_calls ++;
- }
+static inline void
+init_caller_stats (struct caller_statistics *stats)
+{
+ stats->count_sum = 0;
+ stats->n_calls = 0;
+ stats->n_hot_calls = 0;
+ stats->freq_sum = 0;
+}
+
+/* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
+ non-thunk incoming edges to NODE. */
+
+static bool
+gather_caller_stats (struct cgraph_node *node, void *data)
+{
+ struct caller_statistics *stats = (struct caller_statistics *) data;
+ struct cgraph_edge *cs;
+
+ for (cs = node->callers; cs; cs = cs->next_caller)
+ if (cs->caller->thunk.thunk_p)
+ cgraph_for_node_and_aliases (cs->caller, gather_caller_stats,
+ stats, false);
+ else
+ {
+ stats->count_sum += cs->count;
+ stats->freq_sum += cs->frequency;
+ stats->n_calls++;
+ if (cgraph_maybe_hot_edge_p (cs))
+ stats->n_hot_calls ++;
+ }
+ return false;
+
+}
+
+/* Return true if this NODE is viable candidate for cloning. */
+
+static bool
+ipcp_cloning_candidate_p (struct cgraph_node *node)
+{
+ struct caller_statistics stats;
+
+ gcc_checking_assert (cgraph_function_with_gimple_body_p (node));
- if (!n_calls)
+ if (!flag_ipa_cp_clone)
{
if (dump_file)
- fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
+ fprintf (dump_file, "Not considering %s for cloning; "
+ "-fipa-cp-clone disabled.\n",
cgraph_node_name (node));
return false;
}
- if (inline_summary (node)->self_size < n_calls)
- {
- if (dump_file)
- fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
- cgraph_node_name (node));
- return true;
- }
- if (!flag_ipa_cp_clone)
+ if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
{
if (dump_file)
- fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
+ fprintf (dump_file, "Not considering %s for cloning; "
+ "optimizing it for size.\n",
cgraph_node_name (node));
return false;
}
- if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
+ init_caller_stats (&stats);
+ cgraph_for_node_and_aliases (node, gather_caller_stats, &stats, false);
+
+ if (inline_summary (node)->self_size < stats.n_calls)
{
if (dump_file)
- fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
+ fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
cgraph_node_name (node));
- return false;
+ return true;
}
/* When profile is available and function is hot, propagate into it even if
significantly. */
if (max_count)
{
- if (direct_call_sum > node->count * 90 / 100)
+ if (stats.count_sum > node->count * 90 / 100)
{
if (dump_file)
- fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
+ fprintf (dump_file, "Considering %s for cloning; "
+ "usually called directly.\n",
cgraph_node_name (node));
return true;
}
}
- if (!n_hot_calls)
+ if (!stats.n_hot_calls)
{
if (dump_file)
fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
return true;
}
-/* Mark parameter with index I of function described by INFO as unsuitable for
- devirtualization. Return true if it has already been marked so. */
+/* Arrays representing a topological ordering of call graph nodes and a stack
+ of noes used during constant propagation. */
-static bool
-ipa_set_param_cannot_devirtualize (struct ipa_node_params *info, int i)
+struct topo_info
{
- bool ret = info->params[i].cannot_devirtualize;
- info->params[i].cannot_devirtualize = true;
- if (info->params[i].types)
- VEC_free (tree, heap, info->params[i].types);
- return ret;
+ struct cgraph_node **order;
+ struct cgraph_node **stack;
+ int nnodes, stack_top;
+};
+
+/* Allocate the arrays in TOPO and topologically sort the nodes into order. */
+
+static void
+build_toporder_info (struct topo_info *topo)
+{
+ topo->order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
+ topo->stack = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
+ topo->stack_top = 0;
+ topo->nnodes = ipa_reduced_postorder (topo->order, true, true, NULL);
}
-/* Initialize ipcp_lattices array. The lattices corresponding to supported
- types (integers, real types and Fortran constants defined as const_decls)
- are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
+/* Free information about strongly connected components and the arrays in
+ TOPO. */
+
static void
-ipcp_initialize_node_lattices (struct cgraph_node *node)
+free_toporder_info (struct topo_info *topo)
+{
+ ipa_free_postorder_info ();
+ free (topo->order);
+ free (topo->stack);
+}
+
+/* Add NODE to the stack in TOPO, unless it is already there. */
+
+static inline void
+push_node_to_stack (struct topo_info *topo, struct cgraph_node *node)
{
- int i;
struct ipa_node_params *info = IPA_NODE_REF (node);
- enum ipa_lattice_type type;
+ if (info->node_enqueued)
+ return;
+ info->node_enqueued = 1;
+ topo->stack[topo->stack_top++] = node;
+}
- if (ipa_is_called_with_var_arguments (info))
- type = IPA_BOTTOM;
- /* We don't know how to clone thunks even when they are local. */
- else if (node->local.local
- && !node->thunk.thunk_p)
- type = IPA_TOP;
- /* When cloning is allowed, we can assume that externally visible functions
- are not called. We will compensate this by cloning later. */
- else if (ipcp_cloning_candidate_p (node))
- type = IPA_TOP, n_cloning_candidates ++;
- else
- type = IPA_BOTTOM;
+/* Pop a node from the stack in TOPO and return it or return NULL if the stack
+ is empty. */
- for (i = 0; i < ipa_get_param_count (info) ; i++)
+static struct cgraph_node *
+pop_node_from_stack (struct topo_info *topo)
+{
+ if (topo->stack_top)
{
- ipa_get_lattice (info, i)->type = type;
- if (type == IPA_BOTTOM)
- ipa_set_param_cannot_devirtualize (info, i);
+ struct cgraph_node *node;
+ topo->stack_top--;
+ node = topo->stack[topo->stack_top];
+ IPA_NODE_REF (node)->node_enqueued = 0;
+ return node;
}
+ else
+ return NULL;
}
-/* Build a constant tree with type TREE_TYPE and value according to LAT.
- Return the tree, or, if it is not possible to convert such value
- to TREE_TYPE, NULL. */
-static tree
-build_const_val (struct ipcp_lattice *lat, tree tree_type)
+/* Set lattice LAT to bottom and return true if it previously was not set as
+ such. */
+
+static inline bool
+set_lattice_to_bottom (struct ipcp_lattice *lat)
{
- tree val;
+ bool ret = !lat->bottom;
+ lat->bottom = true;
+ return ret;
+}
- gcc_assert (ipcp_lat_is_const (lat));
- val = lat->constant;
+/* Mark lattice as containing an unknown value and return true if it previously
+ was not marked as such. */
- if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
- {
- if (fold_convertible_p (tree_type, val))
- return fold_build1 (NOP_EXPR, tree_type, val);
- else if (TYPE_SIZE (tree_type) == TYPE_SIZE (TREE_TYPE (val)))
- return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
- else
- return NULL;
- }
- return val;
+static inline bool
+set_lattice_contains_variable (struct ipcp_lattice *lat)
+{
+ bool ret = !lat->contains_variable;
+ lat->contains_variable = true;
+ return ret;
}
-/* Compute the proper scale for NODE. It is the ratio between the number of
- direct calls (represented on the incoming cgraph_edges) and sum of all
- invocations of NODE (represented as count in cgraph_node).
+/* Initialize ipcp_lattices. */
- FIXME: This code is wrong. Since the callers can be also clones and
- the clones are not scaled yet, the sums gets unrealistically high.
- To properly compute the counts, we would need to do propagation across
- callgraph (as external call to A might imply call to non-cloned B
- if A's clone calls cloned B). */
static void
-ipcp_compute_node_scale (struct cgraph_node *node)
+initialize_node_lattices (struct cgraph_node *node)
{
- gcov_type sum;
- struct cgraph_edge *cs;
+ struct ipa_node_params *info = IPA_NODE_REF (node);
+ struct cgraph_edge *ie;
+ bool disable = false, variable = false;
+ int i;
- sum = 0;
- /* Compute sum of all counts of callers. */
- for (cs = node->callers; cs != NULL; cs = cs->next_caller)
- sum += cs->count;
- /* Work around the unrealistically high sum problem. We just don't want
- the non-cloned body to have negative or very low frequency. Since
- majority of execution time will be spent in clones anyway, this should
- give good enough profile. */
- if (sum > node->count * 9 / 10)
- sum = node->count * 9 / 10;
- if (node->count == 0)
- ipcp_set_node_scale (node, 0);
- else
- ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
-}
+ gcc_checking_assert (cgraph_function_with_gimple_body_p (node));
+ if (ipa_is_called_with_var_arguments (info))
+ disable = true;
+ else if (!node->local.local)
+ {
+ /* When cloning is allowed, we can assume that externally visible
+ functions are not called. We will compensate this by cloning
+ later. */
+ if (ipcp_versionable_function_p (node)
+ && ipcp_cloning_candidate_p (node))
+ variable = true;
+ else
+ disable = true;
+ }
-/* Return true if there are some formal parameters whose value is IPA_TOP (in
- the whole compilation unit). Change their values to IPA_BOTTOM, since they
- most probably get their values from outside of this compilation unit. */
-static bool
-ipcp_change_tops_to_bottom (void)
-{
- int i, count;
- struct cgraph_node *node;
- bool prop_again;
+ if (disable || variable)
+ {
+ for (i = 0; i < ipa_get_param_count (info) ; i++)
+ {
+ struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+ if (disable)
+ set_lattice_to_bottom (lat);
+ else
+ set_lattice_contains_variable (lat);
+ }
+ if (dump_file && (dump_flags & TDF_DETAILS)
+ && node->alias && node->thunk.thunk_p)
+ fprintf (dump_file, "Marking all lattices of %s/%i as %s\n",
+ cgraph_node_name (node), node->uid,
+ disable ? "BOTTOM" : "VARIABLE");
+ }
- prop_again = false;
- for (node = cgraph_nodes; node; node = node->next)
- if (!node->alias)
+ for (ie = node->indirect_calls; ie; ie = ie->next_callee)
+ if (ie->indirect_info->polymorphic)
{
- struct ipa_node_params *info = IPA_NODE_REF (node);
- count = ipa_get_param_count (info);
- for (i = 0; i < count; i++)
- {
- struct ipcp_lattice *lat = ipa_get_lattice (info, i);
- if (lat->type == IPA_TOP)
- {
- prop_again = true;
- if (dump_file)
- {
- fprintf (dump_file, "Forcing param ");
- print_generic_expr (dump_file, ipa_get_param (info, i), 0);
- fprintf (dump_file, " of node %s to bottom.\n",
- cgraph_node_name (node));
- }
- lat->type = IPA_BOTTOM;
- }
- if (!ipa_param_cannot_devirtualize_p (info, i)
- && ipa_param_types_vec_empty (info, i))
- {
- prop_again = true;
- ipa_set_param_cannot_devirtualize (info, i);
- if (dump_file)
- {
- fprintf (dump_file, "Marking param ");
- print_generic_expr (dump_file, ipa_get_param (info, i), 0);
- fprintf (dump_file, " of node %s as unusable for "
- "devirtualization.\n",
- cgraph_node_name (node));
- }
- }
- }
+ gcc_checking_assert (ie->indirect_info->param_index >= 0);
+ ipa_get_lattice (info, ie->indirect_info->param_index)->virt_call = 1;
}
- return prop_again;
}
-/* Insert BINFO to the list of known types of parameter number I of the
- function described by CALLEE_INFO. Return true iff the type information
- associated with the callee parameter changed in any way. */
+/* Return the result of a (possibly arithmetic) pass through jump function
+ JFUNC on the constant value INPUT. Return NULL_TREE if that cannot be
+ determined or itself is considered an interprocedural invariant. */
-static bool
-ipcp_add_param_type (struct ipa_node_params *callee_info, int i, tree binfo)
+static tree
+ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input)
{
- int j, count;
+ tree restype, res;
- if (ipa_param_cannot_devirtualize_p (callee_info, i))
- return false;
+ gcc_checking_assert (is_gimple_ip_invariant (input));
+ if (jfunc->value.pass_through.operation == NOP_EXPR)
+ return input;
- if (callee_info->params[i].types)
- {
- count = VEC_length (tree, callee_info->params[i].types);
- for (j = 0; j < count; j++)
- if (VEC_index (tree, callee_info->params[i].types, j) == binfo)
- return false;
- }
+ if (TREE_CODE_CLASS (jfunc->value.pass_through.operation)
+ == tcc_comparison)
+ restype = boolean_type_node;
+ else
+ restype = TREE_TYPE (input);
+ res = fold_binary (jfunc->value.pass_through.operation, restype,
+ input, jfunc->value.pass_through.operand);
- if (VEC_length (tree, callee_info->params[i].types)
- == (unsigned) PARAM_VALUE (PARAM_DEVIRT_TYPE_LIST_SIZE))
- return !ipa_set_param_cannot_devirtualize (callee_info, i);
+ if (res && !is_gimple_ip_invariant (res))
+ return NULL_TREE;
- VEC_safe_push (tree, heap, callee_info->params[i].types, binfo);
- return true;
+ return res;
}
-/* Copy known types information for parameter number CALLEE_IDX of CALLEE_INFO
- from a parameter of CALLER_INFO as described by JF. Return true iff the
- type information changed in any way. JF must be a pass-through or an
- ancestor jump function. */
+/* Return the result of an ancestor jump function JFUNC on the constant value
+ INPUT. Return NULL_TREE if that cannot be determined. */
-static bool
-ipcp_copy_types (struct ipa_node_params *caller_info,
- struct ipa_node_params *callee_info,
- int callee_idx, struct ipa_jump_func *jf)
+static tree
+ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
{
- int caller_idx, j, count;
- bool res;
-
- if (ipa_param_cannot_devirtualize_p (callee_info, callee_idx))
- return false;
-
- if (jf->type == IPA_JF_PASS_THROUGH)
+ if (TREE_CODE (input) == ADDR_EXPR)
{
- if (jf->value.pass_through.operation != NOP_EXPR)
- {
- ipa_set_param_cannot_devirtualize (callee_info, callee_idx);
- return true;
- }
- caller_idx = jf->value.pass_through.formal_id;
+ tree t = TREE_OPERAND (input, 0);
+ t = build_ref_for_offset (EXPR_LOCATION (t), t,
+ jfunc->value.ancestor.offset,
+ jfunc->value.ancestor.type, NULL, false);
+ return build_fold_addr_expr (t);
}
else
- caller_idx = jf->value.ancestor.formal_id;
+ return NULL_TREE;
+}
- if (ipa_param_cannot_devirtualize_p (caller_info, caller_idx))
+/* Determine whether JFUNC evaluates to a known value (that is either a
+ constant or a binfo) and if so, return it. Otherwise return NULL. INFO
+ describes the caller node so that pass-through jump functions can be
+ evaluated. */
+
+static tree
+ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc)
+{
+ if (jfunc->type == IPA_JF_CONST)
+ return jfunc->value.constant;
+ else if (jfunc->type == IPA_JF_KNOWN_TYPE)
+ return jfunc->value.base_binfo;
+ else if (jfunc->type == IPA_JF_PASS_THROUGH
+ || jfunc->type == IPA_JF_ANCESTOR)
{
- ipa_set_param_cannot_devirtualize (callee_info, callee_idx);
- return true;
- }
+ tree input;
+ int idx;
- if (!caller_info->params[caller_idx].types)
- return false;
+ if (jfunc->type == IPA_JF_PASS_THROUGH)
+ idx = jfunc->value.pass_through.formal_id;
+ else
+ idx = jfunc->value.ancestor.formal_id;
- res = false;
- count = VEC_length (tree, caller_info->params[caller_idx].types);
- for (j = 0; j < count; j++)
- {
- tree binfo = VEC_index (tree, caller_info->params[caller_idx].types, j);
- if (jf->type == IPA_JF_ANCESTOR)
+ if (info->ipcp_orig_node)
+ input = VEC_index (tree, info->known_vals, idx);
+ else
{
- binfo = get_binfo_at_offset (binfo, jf->value.ancestor.offset,
- jf->value.ancestor.type);
- if (!binfo)
+ struct ipcp_lattice *lat;
+
+ if (!info->lattices)
{
- ipa_set_param_cannot_devirtualize (callee_info, callee_idx);
- return true;
+ gcc_checking_assert (!flag_ipa_cp);
+ return NULL_TREE;
}
+ lat = ipa_get_lattice (info, idx);
+ if (!ipa_lat_is_single_const (lat))
+ return NULL_TREE;
+ input = lat->values->value;
+ }
+
+ if (!input)
+ return NULL_TREE;
+
+ if (jfunc->type == IPA_JF_PASS_THROUGH)
+ {
+ if (jfunc->value.pass_through.operation == NOP_EXPR)
+ return input;
+ else if (TREE_CODE (input) == TREE_BINFO)
+ return NULL_TREE;
+ else
+ return ipa_get_jf_pass_through_result (jfunc, input);
+ }
+ else
+ {
+ if (TREE_CODE (input) == TREE_BINFO)
+ return get_binfo_at_offset (input, jfunc->value.ancestor.offset,
+ jfunc->value.ancestor.type);
+ else
+ return ipa_get_jf_ancestor_result (jfunc, input);
}
- res |= ipcp_add_param_type (callee_info, callee_idx, binfo);
}
- return res;
+ else
+ return NULL_TREE;
}
-/* Propagate type information for parameter of CALLEE_INFO number I as
- described by JF. CALLER_INFO describes the caller. Return true iff the
- type information changed in any way. */
+/* Determine whether JFUNC evaluates to a constant and if so, return it.
+ Otherwise return NULL. INFO describes the caller node so that pass-through
+ jump functions can be evaluated. */
-static bool
-ipcp_propagate_types (struct ipa_node_params *caller_info,
- struct ipa_node_params *callee_info,
- struct ipa_jump_func *jf, int i)
+tree
+ipa_cst_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc)
{
- switch (jf->type)
- {
- case IPA_JF_UNKNOWN:
- case IPA_JF_CONST_MEMBER_PTR:
- case IPA_JF_CONST:
- break;
+ tree res = ipa_value_from_jfunc (info, jfunc);
- case IPA_JF_KNOWN_TYPE:
- return ipcp_add_param_type (callee_info, i, jf->value.base_binfo);
+ if (res && TREE_CODE (res) == TREE_BINFO)
+ return NULL_TREE;
+ else
+ return res;
+}
- case IPA_JF_PASS_THROUGH:
- case IPA_JF_ANCESTOR:
- return ipcp_copy_types (caller_info, callee_info, i, jf);
- }
- /* If we reach this we cannot use this parameter for devirtualization. */
- return !ipa_set_param_cannot_devirtualize (callee_info, i);
-}
+/* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
+ bottom, not containing a variable component and without any known value at
+ the same time. */
-/* Interprocedural analysis. The algorithm propagates constants from the
- caller's parameters to the callee's arguments. */
-static void
-ipcp_propagate_stage (void)
+DEBUG_FUNCTION void
+ipcp_verify_propagated_values (void)
{
- int i;
- struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
- struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
- struct ipcp_lattice *dest_lat;
- struct cgraph_edge *cs;
- struct ipa_jump_func *jump_func;
- struct ipa_func_list *wl;
- int count;
-
- ipa_check_create_node_params ();
- ipa_check_create_edge_args ();
+ struct cgraph_node *node;
- /* Initialize worklist to contain all functions. */
- wl = ipa_init_func_list ();
- while (wl)
+ FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
{
- struct cgraph_node *node = ipa_pop_func_from_list (&wl);
struct ipa_node_params *info = IPA_NODE_REF (node);
+ int i, count = ipa_get_param_count (info);
- for (cs = node->callees; cs; cs = cs->next_callee)
+ for (i = 0; i < count; i++)
{
- struct cgraph_node *callee = cgraph_function_or_thunk_node (cs->callee, NULL);
- struct ipa_node_params *callee_info = IPA_NODE_REF (callee);
- struct ipa_edge_args *args = IPA_EDGE_REF (cs);
-
- if (ipa_is_called_with_var_arguments (callee_info)
- || !cs->callee->analyzed
- || ipa_is_called_with_var_arguments (callee_info))
- continue;
+ struct ipcp_lattice *lat = ipa_get_lattice (info, i);
- count = ipa_get_cs_argument_count (args);
- for (i = 0; i < count; i++)
+ if (!lat->bottom
+ && !lat->contains_variable
+ && lat->values_count == 0)
{
- jump_func = ipa_get_ith_jump_func (args, i);
- ipa_lattice_from_jfunc (info, &inc_lat, jump_func);
- dest_lat = ipa_get_lattice (callee_info, i);
- ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
- if (ipcp_lattice_changed (&new_lat, dest_lat))
+ if (dump_file)
{
- dest_lat->type = new_lat.type;
- dest_lat->constant = new_lat.constant;
- ipa_push_func_to_list (&wl, callee);
+ fprintf (dump_file, "\nIPA lattices after constant "
+ "propagation:\n");
+ print_all_lattices (dump_file, true, false);
}
- if (ipcp_propagate_types (info, callee_info, jump_func, i))
- ipa_push_func_to_list (&wl, callee);
+ gcc_unreachable ();
}
}
}
}
-/* Call the constant propagation algorithm and re-call it if necessary
- (if there are undetermined values left). */
+/* Return true iff X and Y should be considered equal values by IPA-CP. */
+
+static bool
+values_equal_for_ipcp_p (tree x, tree y)
+{
+ gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
+
+ if (x == y)
+ return true;
+
+ if (TREE_CODE (x) == TREE_BINFO || TREE_CODE (y) == TREE_BINFO)
+ return false;
+
+ if (TREE_CODE (x) == ADDR_EXPR
+ && TREE_CODE (y) == ADDR_EXPR
+ && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
+ && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
+ return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
+ DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
+ else
+ return operand_equal_p (x, y, 0);
+}
+
+/* Add a new value source to VAL, marking that a value comes from edge CS and
+ (if the underlying jump function is a pass-through or an ancestor one) from
+ a caller value SRC_VAL of a caller parameter described by SRC_INDEX. */
+
static void
-ipcp_iterate_stage (void)
+add_value_source (struct ipcp_value *val, struct cgraph_edge *cs,
+ struct ipcp_value *src_val, int src_idx)
{
- struct cgraph_node *node;
- n_cloning_candidates = 0;
+ struct ipcp_value_source *src;
- if (dump_file)
- fprintf (dump_file, "\nIPA iterate stage:\n\n");
+ src = (struct ipcp_value_source *) pool_alloc (ipcp_sources_pool);
+ src->cs = cs;
+ src->val = src_val;
+ src->index = src_idx;
- if (in_lto_p)
- ipa_update_after_lto_read ();
+ src->next = val->sources;
+ val->sources = src;
+}
+
+
+/* Try to add NEWVAL to LAT, potentially creating a new struct ipcp_value for
+ it. CS, SRC_VAL and SRC_INDEX are meant for add_value_source and have the
+ same meaning. */
+
+static bool
+add_value_to_lattice (struct ipcp_lattice *lat, tree newval,
+ struct cgraph_edge *cs, struct ipcp_value *src_val,
+ int src_idx)
+{
+ struct ipcp_value *val;
+
+ if (lat->bottom)
+ return false;
+
+
+ for (val = lat->values; val; val = val->next)
+ if (values_equal_for_ipcp_p (val->value, newval))
+ {
+ if (edge_within_scc (cs))
+ {
+ struct ipcp_value_source *s;
+ for (s = val->sources; s ; s = s->next)
+ if (s->cs == cs)
+ break;
+ if (s)
+ return false;
+ }
+
+ add_value_source (val, cs, src_val, src_idx);
+ return false;
+ }
+
+ if (lat->values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
+ {
+ /* We can only free sources, not the values themselves, because sources
+ of other values in this this SCC might point to them. */
+ for (val = lat->values; val; val = val->next)
+ {
+ while (val->sources)
+ {
+ struct ipcp_value_source *src = val->sources;
+ val->sources = src->next;
+ pool_free (ipcp_sources_pool, src);
+ }
+ }
+
+ lat->values = NULL;
+ return set_lattice_to_bottom (lat);
+ }
+
+ lat->values_count++;
+ val = (struct ipcp_value *) pool_alloc (ipcp_values_pool);
+ memset (val, 0, sizeof (*val));
+
+ add_value_source (val, cs, src_val, src_idx);
+ val->value = newval;
+ val->next = lat->values;
+ lat->values = val;
+ return true;
+}
- for (node = cgraph_nodes; node; node = node->next)
- if (!node->alias)
+/* Propagate values through a pass-through jump function JFUNC associated with
+ edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
+ is the index of the source parameter. */
+
+static bool
+propagate_vals_accross_pass_through (struct cgraph_edge *cs,
+ struct ipa_jump_func *jfunc,
+ struct ipcp_lattice *src_lat,
+ struct ipcp_lattice *dest_lat,
+ int src_idx)
+{
+ struct ipcp_value *src_val;
+ bool ret = false;
+
+ if (jfunc->value.pass_through.operation == NOP_EXPR)
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ ret |= add_value_to_lattice (dest_lat, src_val->value, cs,
+ src_val, src_idx);
+ /* Do not create new values when propagating within an SCC because if there
+ arithmetic functions with circular dependencies, there is infinite number
+ of them and we would just make lattices bottom. */
+ else if (edge_within_scc (cs))
+ ret = set_lattice_contains_variable (dest_lat);
+ else
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
{
- ipcp_initialize_node_lattices (node);
- ipcp_compute_node_scale (node);
+ tree cstval = src_val->value;
+
+ if (TREE_CODE (cstval) == TREE_BINFO)
+ {
+ ret |= set_lattice_contains_variable (dest_lat);
+ continue;
+ }
+ cstval = ipa_get_jf_pass_through_result (jfunc, cstval);
+
+ if (cstval)
+ ret |= add_value_to_lattice (dest_lat, cstval, cs, src_val, src_idx);
+ else
+ ret |= set_lattice_contains_variable (dest_lat);
}
+
+ return ret;
+}
+
+/* Propagate values through an ancestor jump function JFUNC associated with
+ edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
+ is the index of the source parameter. */
+
+static bool
+propagate_vals_accross_ancestor (struct cgraph_edge *cs,
+ struct ipa_jump_func *jfunc,
+ struct ipcp_lattice *src_lat,
+ struct ipcp_lattice *dest_lat,
+ int src_idx)
+{
+ struct ipcp_value *src_val;
+ bool ret = false;
+
+ if (edge_within_scc (cs))
+ return set_lattice_contains_variable (dest_lat);
+
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ {
+ tree t = src_val->value;
+
+ if (TREE_CODE (t) == TREE_BINFO)
+ t = get_binfo_at_offset (t, jfunc->value.ancestor.offset,
+ jfunc->value.ancestor.type);
+ else
+ t = ipa_get_jf_ancestor_result (jfunc, t);
+
+ if (t)
+ ret |= add_value_to_lattice (dest_lat, t, cs, src_val, src_idx);
+ else
+ ret |= set_lattice_contains_variable (dest_lat);
+ }
+
+ return ret;
+}
+
+/* Propagate values across jump function JFUNC that is associated with edge CS
+ and put the values into DEST_LAT. */
+
+static bool
+propagate_accross_jump_function (struct cgraph_edge *cs,
+ struct ipa_jump_func *jfunc,
+ struct ipcp_lattice *dest_lat)
+{
+ if (dest_lat->bottom)
+ return false;
+
+ if (jfunc->type == IPA_JF_CONST
+ || jfunc->type == IPA_JF_KNOWN_TYPE)
+ {
+ tree val;
+
+ if (jfunc->type == IPA_JF_KNOWN_TYPE)
+ val = jfunc->value.base_binfo;
+ else
+ val = jfunc->value.constant;
+ return add_value_to_lattice (dest_lat, val, cs, NULL, 0);
+ }
+ else if (jfunc->type == IPA_JF_PASS_THROUGH
+ || jfunc->type == IPA_JF_ANCESTOR)
+ {
+ struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+ struct ipcp_lattice *src_lat;
+ int src_idx;
+ bool ret;
+
+ if (jfunc->type == IPA_JF_PASS_THROUGH)
+ src_idx = jfunc->value.pass_through.formal_id;
+ else
+ src_idx = jfunc->value.ancestor.formal_id;
+
+ src_lat = ipa_get_lattice (caller_info, src_idx);
+ if (src_lat->bottom)
+ return set_lattice_contains_variable (dest_lat);
+
+ /* If we would need to clone the caller and cannot, do not propagate. */
+ if (!ipcp_versionable_function_p (cs->caller)
+ && (src_lat->contains_variable
+ || (src_lat->values_count > 1)))
+ return set_lattice_contains_variable (dest_lat);
+
+ if (jfunc->type == IPA_JF_PASS_THROUGH)
+ ret = propagate_vals_accross_pass_through (cs, jfunc, src_lat,
+ dest_lat, src_idx);
+ else
+ ret = propagate_vals_accross_ancestor (cs, jfunc, src_lat, dest_lat,
+ src_idx);
+
+ if (src_lat->contains_variable)
+ ret |= set_lattice_contains_variable (dest_lat);
+
+ return ret;
+ }
+
+ /* TODO: We currently do not handle member method pointers in IPA-CP (we only
+ use it for indirect inlining), we should propagate them too. */
+ return set_lattice_contains_variable (dest_lat);
+}
+
+/* Propagate constants from the caller to the callee of CS. INFO describes the
+ caller. */
+
+static bool
+propagate_constants_accross_call (struct cgraph_edge *cs)
+{
+ struct ipa_node_params *callee_info;
+ enum availability availability;
+ struct cgraph_node *callee, *alias_or_thunk;
+ struct ipa_edge_args *args;
+ bool ret = false;
+ int i, count;
+
+ callee = cgraph_function_node (cs->callee, &availability);
+ if (!callee->analyzed)
+ return false;
+ gcc_checking_assert (cgraph_function_with_gimple_body_p (callee));
+ callee_info = IPA_NODE_REF (callee);
+ if (ipa_is_called_with_var_arguments (callee_info))
+ return false;
+
+ args = IPA_EDGE_REF (cs);
+ count = ipa_get_cs_argument_count (args);
+
+ /* If this call goes through a thunk we must not propagate to the first (0th)
+ parameter. However, we might need to uncover a thunk from below a series
+ of aliases first. */
+ alias_or_thunk = cs->callee;
+ while (alias_or_thunk->alias)
+ alias_or_thunk = cgraph_alias_aliased_node (alias_or_thunk);
+ if (alias_or_thunk->thunk.thunk_p)
+ {
+ ret |= set_lattice_contains_variable (ipa_get_lattice (callee_info, 0));
+ i = 1;
+ }
+ else
+ i = 0;
+
+ for (; i < count; i++)
+ {
+ struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
+ struct ipcp_lattice *dest_lat = ipa_get_lattice (callee_info, i);
+
+ if (availability == AVAIL_OVERWRITABLE)
+ ret |= set_lattice_contains_variable (dest_lat);
+ else
+ ret |= propagate_accross_jump_function (cs, jump_func, dest_lat);
+ }
+ return ret;
+}
+
+/* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
+ (which can contain both constants and binfos) or KNOWN_BINFOS (which can be
+ NULL) return the destination. If simple thunk delta must be applied too,
+ store it to DELTA. */
+
+static tree
+get_indirect_edge_target (struct cgraph_edge *ie, tree *delta,
+ VEC (tree, heap) *known_vals,
+ VEC (tree, heap) *known_binfos)
+{
+ int param_index = ie->indirect_info->param_index;
+ HOST_WIDE_INT token, anc_offset;
+ tree otr_type;
+ tree t;
+
+ if (param_index == -1)
+ return NULL_TREE;
+
+ if (!ie->indirect_info->polymorphic)
+ {
+ tree t = VEC_index (tree, known_vals, param_index);
+ if (t &&
+ TREE_CODE (t) == ADDR_EXPR
+ && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
+ {
+ *delta = NULL_TREE;
+ return TREE_OPERAND (t, 0);
+ }
+ else
+ return NULL_TREE;
+ }
+
+ token = ie->indirect_info->otr_token;
+ anc_offset = ie->indirect_info->anc_offset;
+ otr_type = ie->indirect_info->otr_type;
+
+ t = VEC_index (tree, known_vals, param_index);
+ if (!t && known_binfos)
+ t = VEC_index (tree, known_binfos, param_index);
+ if (!t)
+ return NULL_TREE;
+
+ if (TREE_CODE (t) != TREE_BINFO)
+ {
+ tree binfo;
+ binfo = gimple_extract_devirt_binfo_from_cst (t);
+ if (!binfo)
+ return NULL_TREE;
+ binfo = get_binfo_at_offset (binfo, anc_offset, otr_type);
+ if (!binfo)
+ return NULL_TREE;
+ return gimple_get_virt_method_for_binfo (token, binfo, delta);
+ }
+ else
+ {
+ tree binfo;
+
+ binfo = get_binfo_at_offset (t, anc_offset, otr_type);
+ if (!binfo)
+ return NULL_TREE;
+ return gimple_get_virt_method_for_binfo (token, binfo, delta);
+ }
+}
+
+/* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS
+ and KNOWN_BINFOS. */
+
+static int
+devirtualization_time_bonus (struct cgraph_node *node,
+ VEC (tree, heap) *known_csts,
+ VEC (tree, heap) *known_binfos)
+{
+ struct cgraph_edge *ie;
+ int res = 0;
+
+ for (ie = node->indirect_calls; ie; ie = ie->next_callee)
+ {
+ struct cgraph_node *callee;
+ struct inline_summary *isummary;
+ tree delta, target;
+
+ target = get_indirect_edge_target (ie, &delta, known_csts, known_binfos);
+ if (!target)
+ continue;
+
+ /* Only bare minimum benefit for clearly un-inlineable targets. */
+ res += 1;
+ callee = cgraph_get_node (target);
+ if (!callee || !callee->analyzed)
+ continue;
+ isummary = inline_summary (callee);
+ if (!isummary->inlinable)
+ continue;
+
+ /* FIXME: The values below need re-considering and perhaps also
+ integrating into the cost metrics, at lest in some very basic way. */
+ if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4)
+ res += 31;
+ else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2)
+ res += 15;
+ else if (isummary->size <= MAX_INLINE_INSNS_AUTO
+ || DECL_DECLARED_INLINE_P (callee->decl))
+ res += 7;
+ }
+
+ return res;
+}
+
+/* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
+ and SIZE_COST and with the sum of frequencies of incoming edges to the
+ potential new clone in FREQUENCIES. */
+
+static bool
+good_cloning_opportunity_p (struct cgraph_node *node, int time_benefit,
+ int freq_sum, gcov_type count_sum, int size_cost)
+{
+ if (time_benefit == 0
+ || !flag_ipa_cp_clone
+ || !optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
+ return false;
+
+ gcc_checking_assert (size_cost >= 0);
+
+ /* FIXME: These decisions need tuning. */
+ if (max_count)
+ {
+ int evaluation, factor = (count_sum * 1000) / max_count;
+
+ evaluation = (time_benefit * factor) / size_cost;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " good_cloning_opportunity_p (time: %i, "
+ "size: %i, count_sum: " HOST_WIDE_INT_PRINT_DEC
+ ") -> evaluation: %i, threshold: %i\n",
+ time_benefit, size_cost, (HOST_WIDE_INT) count_sum,
+ evaluation, 500);
+
+ return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD);
+ }
+ else
+ {
+ int evaluation = (time_benefit * freq_sum) / size_cost;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " good_cloning_opportunity_p (time: %i, "
+ "size: %i, freq_sum: %i) -> evaluation: %i, threshold: %i\n",
+ time_benefit, size_cost, freq_sum, evaluation,
+ CGRAPH_FREQ_BASE /2);
+
+ return evaluation >= PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD);
+ }
+}
+
+
+/* Allocate KNOWN_CSTS and KNOWN_BINFOS and populate them with values of
+ parameters that are known independent of the context. INFO describes the
+ function. If REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all
+ removable parameters will be stored in it. */
+
+static bool
+gather_context_independent_values (struct ipa_node_params *info,
+ VEC (tree, heap) **known_csts,
+ VEC (tree, heap) **known_binfos,
+ int *removable_params_cost)
+{
+ int i, count = ipa_get_param_count (info);
+ bool ret = false;
+
+ *known_csts = NULL;
+ *known_binfos = NULL;
+ VEC_safe_grow_cleared (tree, heap, *known_csts, count);
+ VEC_safe_grow_cleared (tree, heap, *known_binfos, count);
+
+ if (removable_params_cost)
+ *removable_params_cost = 0;
+
+ for (i = 0; i < count ; i++)
+ {
+ struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+
+ if (ipa_lat_is_single_const (lat))
+ {
+ struct ipcp_value *val = lat->values;
+ if (TREE_CODE (val->value) != TREE_BINFO)
+ {
+ VEC_replace (tree, *known_csts, i, val->value);
+ if (removable_params_cost)
+ *removable_params_cost
+ += estimate_move_cost (TREE_TYPE (val->value));
+ ret = true;
+ }
+ else if (lat->virt_call)
+ {
+ VEC_replace (tree, *known_binfos, i, val->value);
+ ret = true;
+ }
+ else if (removable_params_cost
+ && !ipa_is_param_used (info, i))
+ *removable_params_cost
+ += estimate_move_cost (TREE_TYPE (ipa_get_param (info, i)));
+ }
+ else if (removable_params_cost
+ && !ipa_is_param_used (info, i))
+ *removable_params_cost
+ += estimate_move_cost (TREE_TYPE (ipa_get_param (info, i)));
+ }
+
+ return ret;
+}
+
+/* Iterate over known values of parameters of NODE and estimate the local
+ effects in terms of time and size they have. */
+
+static void
+estimate_local_effects (struct cgraph_node *node)
+{
+ struct ipa_node_params *info = IPA_NODE_REF (node);
+ int i, count = ipa_get_param_count (info);
+ VEC (tree, heap) *known_csts, *known_binfos;
+ bool always_const;
+ int base_time = inline_summary (node)->time;
+ int removable_params_cost;
+
+ if (!count || !ipcp_versionable_function_p (node))
+ return;
+
if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nEstimating effects for %s/%i, base_time: %i.\n",
+ cgraph_node_name (node), node->uid, base_time);
+
+ always_const = gather_context_independent_values (info, &known_csts,
+ &known_binfos,
+ &removable_params_cost);
+ if (always_const)
{
- ipcp_print_all_lattices (dump_file);
- ipcp_function_scale_print (dump_file);
+ struct caller_statistics stats;
+ int time, size;
+
+ init_caller_stats (&stats);
+ cgraph_for_node_and_aliases (node, gather_caller_stats, &stats, false);
+ estimate_ipcp_clone_size_and_time (node, known_csts, &size, &time);
+ time -= devirtualization_time_bonus (node, known_csts, known_binfos);
+ time -= removable_params_cost;
+ size -= stats.n_calls * removable_params_cost;
+
+ if (dump_file)
+ fprintf (dump_file, " - context independent values, size: %i, "
+ "time_benefit: %i\n", size, base_time - time);
+
+ if (size <= 0
+ || cgraph_will_be_removed_from_program_if_no_direct_calls (node))
+ {
+ info->clone_for_all_contexts = true;
+ base_time = time;
+
+ if (dump_file)
+ fprintf (dump_file, " Decided to specialize for all "
+ "known contexts, code not going to grow.\n");
+ }
+ else if (good_cloning_opportunity_p (node, base_time - time,
+ stats.freq_sum, stats.count_sum,
+ size))
+ {
+ if (size + overall_size <= max_new_size)
+ {
+ info->clone_for_all_contexts = true;
+ base_time = time;
+ overall_size += size;
+
+ if (dump_file)
+ fprintf (dump_file, " Decided to specialize for all "
+ "known contexts, growth deemed beneficial.\n");
+ }
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " Not cloning for all contexts because "
+ "max_new_size would be reached with %li.\n",
+ size + overall_size);
+ }
}
- ipcp_propagate_stage ();
- if (ipcp_change_tops_to_bottom ())
- /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
- This change should be propagated. */
+ for (i = 0; i < count ; i++)
{
- gcc_assert (n_cloning_candidates);
- ipcp_propagate_stage ();
+ struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+ struct ipcp_value *val;
+ int emc;
+
+ if (lat->bottom
+ || !lat->values
+ || VEC_index (tree, known_csts, i)
+ || VEC_index (tree, known_binfos, i))
+ continue;
+
+ for (val = lat->values; val; val = val->next)
+ {
+ int time, size, time_benefit;
+
+ if (TREE_CODE (val->value) != TREE_BINFO)
+ {
+ VEC_replace (tree, known_csts, i, val->value);
+ VEC_replace (tree, known_binfos, i, NULL_TREE);
+ emc = estimate_move_cost (TREE_TYPE (val->value));
+ }
+ else if (lat->virt_call)
+ {
+ VEC_replace (tree, known_csts, i, NULL_TREE);
+ VEC_replace (tree, known_binfos, i, val->value);
+ emc = 0;
+ }
+ else
+ continue;
+
+ estimate_ipcp_clone_size_and_time (node, known_csts, &size, &time);
+ time_benefit = base_time - time
+ + devirtualization_time_bonus (node, known_csts, known_binfos)
+ + removable_params_cost + emc;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " - estimates for value ");
+ print_ipcp_constant_value (dump_file, val->value);
+ fprintf (dump_file, " for parameter ");
+ print_generic_expr (dump_file, ipa_get_param (info, i), 0);
+ fprintf (dump_file, ": time_benefit: %i, size: %i\n",
+ time_benefit, size);
+ }
+
+ val->local_time_benefit = time_benefit;
+ val->local_size_cost = size;
+ }
}
- if (dump_file)
+
+ VEC_free (tree, heap, known_csts);
+ VEC_free (tree, heap, known_binfos);
+}
+
+
+/* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
+ topological sort of values. */
+
+static void
+add_val_to_toposort (struct ipcp_value *cur_val)
+{
+ static int dfs_counter = 0;
+ static struct ipcp_value *stack;
+ struct ipcp_value_source *src;
+
+ if (cur_val->dfs)
+ return;
+
+ dfs_counter++;
+ cur_val->dfs = dfs_counter;
+ cur_val->low_link = dfs_counter;
+
+ cur_val->topo_next = stack;
+ stack = cur_val;
+ cur_val->on_stack = true;
+
+ for (src = cur_val->sources; src; src = src->next)
+ if (src->val)
+ {
+ if (src->val->dfs == 0)
+ {
+ add_val_to_toposort (src->val);
+ if (src->val->low_link < cur_val->low_link)
+ cur_val->low_link = src->val->low_link;
+ }
+ else if (src->val->on_stack
+ && src->val->dfs < cur_val->low_link)
+ cur_val->low_link = src->val->dfs;
+ }
+
+ if (cur_val->dfs == cur_val->low_link)
{
- fprintf (dump_file, "\nIPA lattices after propagation:\n");
- ipcp_print_all_lattices (dump_file);
- if (dump_flags & TDF_DETAILS)
- ipcp_print_profile_data (dump_file);
+ struct ipcp_value *v, *scc_list = NULL;
+
+ do
+ {
+ v = stack;
+ stack = v->topo_next;
+ v->on_stack = false;
+
+ v->scc_next = scc_list;
+ scc_list = v;
+ }
+ while (v != cur_val);
+
+ cur_val->topo_next = values_topo;
+ values_topo = cur_val;
}
}
-/* Check conditions to forbid constant insertion to function described by
- NODE. */
-static inline bool
-ipcp_node_modifiable_p (struct cgraph_node *node)
+/* Add all values in lattices associated with NODE to the topological sort if
+ they are not there yet. */
+
+static void
+add_all_node_vals_to_toposort (struct cgraph_node *node)
{
- /* Once we will be able to do in-place replacement, we can be more
- lax here. */
- return ipcp_versionable_function_p (node);
+ struct ipa_node_params *info = IPA_NODE_REF (node);
+ int i, count = ipa_get_param_count (info);
+
+ for (i = 0; i < count ; i++)
+ {
+ struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+ struct ipcp_value *val;
+
+ if (lat->bottom || !lat->values)
+ continue;
+ for (val = lat->values; val; val = val->next)
+ add_val_to_toposort (val);
+ }
}
-/* Print count scale data structures. */
+/* One pass of constants propagation along the call graph edges, from callers
+ to callees (requires topological ordering in TOPO), iterate over strongly
+ connected components. */
+
static void
-ipcp_function_scale_print (FILE * f)
+propagate_constants_topo (struct topo_info *topo)
{
- struct cgraph_node *node;
+ int i;
- for (node = cgraph_nodes; node; node = node->next)
+ for (i = topo->nnodes - 1; i >= 0; i--)
{
- if (!node->analyzed)
+ struct cgraph_node *v, *node = topo->order[i];
+ struct ipa_dfs_info *node_dfs_info;
+
+ if (!cgraph_function_with_gimple_body_p (node))
continue;
- fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
- fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
- " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
+
+ node_dfs_info = (struct ipa_dfs_info *) node->aux;
+ /* First, iteratively propagate within the strongly connected component
+ until all lattices stabilize. */
+ v = node_dfs_info->next_cycle;
+ while (v)
+ {
+ push_node_to_stack (topo, v);
+ v = ((struct ipa_dfs_info *) v->aux)->next_cycle;
+ }
+
+ v = node;
+ while (v)
+ {
+ struct cgraph_edge *cs;
+
+ for (cs = v->callees; cs; cs = cs->next_callee)
+ if (edge_within_scc (cs)
+ && propagate_constants_accross_call (cs))
+ push_node_to_stack (topo, cs->callee);
+ v = pop_node_from_stack (topo);
+ }
+
+ /* Afterwards, propagate along edges leading out of the SCC, calculates
+ the local effects of the discovered constants and all valid values to
+ their topological sort. */
+ v = node;
+ while (v)
+ {
+ struct cgraph_edge *cs;
+
+ estimate_local_effects (v);
+ add_all_node_vals_to_toposort (v);
+ for (cs = v->callees; cs; cs = cs->next_callee)
+ if (!edge_within_scc (cs))
+ propagate_constants_accross_call (cs);
+
+ v = ((struct ipa_dfs_info *) v->aux)->next_cycle;
+ }
}
}
-/* Print counts of all cgraph nodes. */
+/* Propagate the estimated effects of individual values along the topological
+ from the dependant values to those they depend on. */
+
static void
-ipcp_print_func_profile_counts (FILE * f)
+propagate_effects (void)
{
- struct cgraph_node *node;
+ struct ipcp_value *base;
- for (node = cgraph_nodes; node; node = node->next)
+ for (base = values_topo; base; base = base->topo_next)
{
- fprintf (f, "function %s: ", cgraph_node_name (node));
- fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
- " \n", (HOST_WIDE_INT) node->count);
+ struct ipcp_value_source *src;
+ struct ipcp_value *val;
+ int time = 0, size = 0;
+
+ for (val = base; val; val = val->scc_next)
+ {
+ time += val->local_time_benefit + val->prop_time_benefit;
+ size += val->local_size_cost + val->prop_size_cost;
+ }
+
+ for (val = base; val; val = val->scc_next)
+ for (src = val->sources; src; src = src->next)
+ if (src->val
+ && cgraph_maybe_hot_edge_p (src->cs))
+ {
+ src->val->prop_time_benefit += time;
+ src->val->prop_size_cost += size;
+ }
}
}
-/* Print counts of all cgraph edges. */
+
+/* Propagate constants, binfos and their effects from the summaries
+ interprocedurally. */
+
static void
-ipcp_print_call_profile_counts (FILE * f)
+ipcp_propagate_stage (struct topo_info *topo)
{
struct cgraph_node *node;
- struct cgraph_edge *cs;
- for (node = cgraph_nodes; node; node = node->next)
+ if (dump_file)
+ fprintf (dump_file, "\n Propagating constants:\n\n");
+
+ if (in_lto_p)
+ ipa_update_after_lto_read ();
+
+
+ FOR_EACH_DEFINED_FUNCTION (node)
+ {
+ struct ipa_node_params *info = IPA_NODE_REF (node);
+
+ determine_versionability (node);
+ if (cgraph_function_with_gimple_body_p (node))
+ {
+ info->lattices = XCNEWVEC (struct ipcp_lattice,
+ ipa_get_param_count (info));
+ initialize_node_lattices (node);
+ }
+ if (node->count > max_count)
+ max_count = node->count;
+ overall_size += inline_summary (node)->self_size;
+ }
+
+ max_new_size = overall_size;
+ if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
+ max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
+ max_new_size += max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
+
+ if (dump_file)
+ fprintf (dump_file, "\noverall_size: %li, max_new_size: %li\n",
+ overall_size, max_new_size);
+
+ propagate_constants_topo (topo);
+#ifdef ENABLE_CHECKING
+ ipcp_verify_propagated_values ();
+#endif
+ propagate_effects ();
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\nIPA lattices after all propagation:\n");
+ print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
+ }
+}
+
+/* Discover newly direct outgoing edges from NODE which is a new clone with
+ known KNOWN_VALS and make them direct. */
+
+static void
+ipcp_discover_new_direct_edges (struct cgraph_node *node,
+ VEC (tree, heap) *known_vals)
+{
+ struct cgraph_edge *ie, *next_ie;
+
+ for (ie = node->indirect_calls; ie; ie = next_ie)
+ {
+ tree delta, target;
+
+ next_ie = ie->next_callee;
+ target = get_indirect_edge_target (ie, &delta, known_vals, NULL);
+ if (target)
+ ipa_make_edge_direct_to_target (ie, target, delta);
+ }
+}
+
+/* Vector of pointers which for linked lists of clones of an original crgaph
+ edge. */
+
+static VEC (cgraph_edge_p, heap) *next_edge_clone;
+
+static inline void
+grow_next_edge_clone_vector (void)
+{
+ if (VEC_length (cgraph_edge_p, next_edge_clone)
+ <= (unsigned) cgraph_edge_max_uid)
+ VEC_safe_grow_cleared (cgraph_edge_p, heap, next_edge_clone,
+ cgraph_edge_max_uid + 1);
+}
+
+/* Edge duplication hook to grow the appropriate linked list in
+ next_edge_clone. */
+
+static void
+ipcp_edge_duplication_hook (struct cgraph_edge *src, struct cgraph_edge *dst,
+ __attribute__((unused)) void *data)
+{
+ grow_next_edge_clone_vector ();
+ VEC_replace (cgraph_edge_p, next_edge_clone, dst->uid,
+ VEC_index (cgraph_edge_p, next_edge_clone, src->uid));
+ VEC_replace (cgraph_edge_p, next_edge_clone, src->uid, dst);
+}
+
+/* Get the next clone in the linked list of clones of an edge. */
+
+static inline struct cgraph_edge *
+get_next_cgraph_edge_clone (struct cgraph_edge *cs)
+{
+ return VEC_index (cgraph_edge_p, next_edge_clone, cs->uid);
+}
+
+/* Return true if edge CS does bring about the value described by SRC. */
+
+static bool
+cgraph_edge_brings_value_p (struct cgraph_edge *cs,
+ struct ipcp_value_source *src)
+{
+ struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+
+ if (IPA_NODE_REF (cs->callee)->ipcp_orig_node
+ || caller_info->node_dead)
+ return false;
+ if (!src->val)
+ return true;
+
+ if (caller_info->ipcp_orig_node)
+ {
+ tree t = VEC_index (tree, caller_info->known_vals, src->index);
+ return (t != NULL_TREE
+ && values_equal_for_ipcp_p (src->val->value, t));
+ }
+ else
{
- for (cs = node->callees; cs; cs = cs->next_callee)
+ struct ipcp_lattice *lat = ipa_get_lattice (caller_info, src->index);
+ if (ipa_lat_is_single_const (lat)
+ && values_equal_for_ipcp_p (src->val->value, lat->values->value))
+ return true;
+ else
+ return false;
+ }
+}
+
+/* Given VAL, iterate over all its sources and if they still hold, add their
+ edge frequency and their number into *FREQUENCY and *CALLER_COUNT
+ respectively. */
+
+static bool
+get_info_about_necessary_edges (struct ipcp_value *val, int *freq_sum,
+ gcov_type *count_sum, int *caller_count)
+{
+ struct ipcp_value_source *src;
+ int freq = 0, count = 0;
+ gcov_type cnt = 0;
+ bool hot = false;
+
+ for (src = val->sources; src; src = src->next)
+ {
+ struct cgraph_edge *cs = src->cs;
+ while (cs)
{
- fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
- cgraph_node_name (cs->callee));
- fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
- (HOST_WIDE_INT) cs->count);
+ if (cgraph_edge_brings_value_p (cs, src))
+ {
+ count++;
+ freq += cs->frequency;
+ cnt += cs->count;
+ hot |= cgraph_maybe_hot_edge_p (cs);
+ }
+ cs = get_next_cgraph_edge_clone (cs);
}
}
+
+ *freq_sum = freq;
+ *count_sum = cnt;
+ *caller_count = count;
+ return hot;
}
-/* Print profile info for all functions. */
-static void
-ipcp_print_profile_data (FILE * f)
+/* Return a vector of incoming edges that do bring value VAL. It is assumed
+ their number is known and equal to CALLER_COUNT. */
+
+static VEC (cgraph_edge_p,heap) *
+gather_edges_for_value (struct ipcp_value *val, int caller_count)
{
- fprintf (f, "\nNODE COUNTS :\n");
- ipcp_print_func_profile_counts (f);
- fprintf (f, "\nCS COUNTS stage:\n");
- ipcp_print_call_profile_counts (f);
+ struct ipcp_value_source *src;
+ VEC (cgraph_edge_p,heap) *ret;
+
+ ret = VEC_alloc (cgraph_edge_p, heap, caller_count);
+ for (src = val->sources; src; src = src->next)
+ {
+ struct cgraph_edge *cs = src->cs;
+ while (cs)
+ {
+ if (cgraph_edge_brings_value_p (cs, src))
+ VEC_quick_push (cgraph_edge_p, ret, cs);
+ cs = get_next_cgraph_edge_clone (cs);
+ }
+ }
+
+ return ret;
}
-/* Build and initialize ipa_replace_map struct according to LAT. This struct is
- processed by versioning, which operates according to the flags set.
- PARM_TREE is the formal parameter found to be constant. LAT represents the
- constant. */
+/* Construct a replacement map for a know VALUE for a formal parameter PARAM.
+ Return it or NULL if for some reason it cannot be created. */
+
static struct ipa_replace_map *
-ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
+get_replacement_map (tree value, tree parm)
{
+ tree req_type = TREE_TYPE (parm);
struct ipa_replace_map *replace_map;
- tree const_val;
- const_val = build_const_val (lat, TREE_TYPE (parm_tree));
- if (const_val == NULL_TREE)
+ if (!useless_type_conversion_p (req_type, TREE_TYPE (value)))
{
- if (dump_file)
+ if (fold_convertible_p (req_type, value))
+ value = fold_build1 (NOP_EXPR, req_type, value);
+ else if (TYPE_SIZE (req_type) == TYPE_SIZE (TREE_TYPE (value)))
+ value = fold_build1 (VIEW_CONVERT_EXPR, req_type, value);
+ else
{
- fprintf (dump_file, " const ");
- print_generic_expr (dump_file, lat->constant, 0);
- fprintf (dump_file, " can't be converted to param ");
- print_generic_expr (dump_file, parm_tree, 0);
- fprintf (dump_file, "\n");
+ if (dump_file)
+ {
+ fprintf (dump_file, " const ");
+ print_generic_expr (dump_file, value, 0);
+ fprintf (dump_file, " can't be converted to param ");
+ print_generic_expr (dump_file, parm, 0);
+ fprintf (dump_file, "\n");
+ }
+ return NULL;
}
- return NULL;
}
+
replace_map = ggc_alloc_ipa_replace_map ();
if (dump_file)
{
- fprintf (dump_file, " replacing param ");
- print_generic_expr (dump_file, parm_tree, 0);
+ fprintf (dump_file, " replacing param ");
+ print_generic_expr (dump_file, parm, 0);
fprintf (dump_file, " with const ");
- print_generic_expr (dump_file, const_val, 0);
+ print_generic_expr (dump_file, value, 0);
fprintf (dump_file, "\n");
}
- replace_map->old_tree = parm_tree;
- replace_map->new_tree = const_val;
+ replace_map->old_tree = parm;
+ replace_map->new_tree = value;
replace_map->replace_p = true;
replace_map->ref_p = false;
return replace_map;
}
-/* Return true if this callsite should be redirected to the original callee
- (instead of the cloned one). */
-static bool
-ipcp_need_redirect_p (struct cgraph_edge *cs)
-{
- struct ipa_node_params *orig_callee_info;
- int i, count;
- struct cgraph_node *node = cgraph_function_or_thunk_node (cs->callee, NULL);
- struct cgraph_node *orig;
-
- if (!n_cloning_candidates)
- return false;
-
- /* We can't redirect anything in thunks, yet. */
- if (cs->caller->thunk.thunk_p)
- return true;
-
- if ((orig = ipcp_get_orig_node (node)) != NULL)
- node = orig;
- if (ipcp_get_orig_node (cs->caller))
- return false;
-
- orig_callee_info = IPA_NODE_REF (node);
- count = ipa_get_param_count (orig_callee_info);
- for (i = 0; i < count; i++)
- {
- struct ipcp_lattice *lat = ipa_get_lattice (orig_callee_info, i);
- struct ipa_jump_func *jump_func;
-
- jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
- if ((ipcp_lat_is_const (lat)
- && jump_func->type != IPA_JF_CONST)
- || (!ipa_param_cannot_devirtualize_p (orig_callee_info, i)
- && !ipa_param_types_vec_empty (orig_callee_info, i)
- && jump_func->type != IPA_JF_CONST
- && jump_func->type != IPA_JF_KNOWN_TYPE))
- return true;
- }
-
- return false;
-}
+/* Dump new profiling counts */
-/* Fix the callsites and the call graph after function cloning was done. */
static void
-ipcp_update_callgraph (void)
+dump_profile_updates (struct cgraph_node *orig_node,
+ struct cgraph_node *new_node)
{
- struct cgraph_node *node;
+ struct cgraph_edge *cs;
- for (node = cgraph_nodes; node; node = node->next)
- if (node->analyzed && ipcp_node_is_clone (node))
- {
- bitmap args_to_skip = NULL;
- struct cgraph_node *orig_node = ipcp_get_orig_node (node);
- struct ipa_node_params *info = IPA_NODE_REF (orig_node);
- int i, count = ipa_get_param_count (info);
- struct cgraph_edge *cs, *next;
+ fprintf (dump_file, " setting count of the specialized node to "
+ HOST_WIDE_INT_PRINT_DEC "\n", (HOST_WIDE_INT) new_node->count);
+ for (cs = new_node->callees; cs ; cs = cs->next_callee)
+ fprintf (dump_file, " edge to %s has count "
+ HOST_WIDE_INT_PRINT_DEC "\n",
+ cgraph_node_name (cs->callee), (HOST_WIDE_INT) cs->count);
+
+ fprintf (dump_file, " setting count of the original node to "
+ HOST_WIDE_INT_PRINT_DEC "\n", (HOST_WIDE_INT) orig_node->count);
+ for (cs = orig_node->callees; cs ; cs = cs->next_callee)
+ fprintf (dump_file, " edge to %s is left with "
+ HOST_WIDE_INT_PRINT_DEC "\n",
+ cgraph_node_name (cs->callee), (HOST_WIDE_INT) cs->count);
+}
- if (node->local.can_change_signature)
- {
- args_to_skip = BITMAP_ALLOC (NULL);
- for (i = 0; i < count; i++)
- {
- struct ipcp_lattice *lat = ipa_get_lattice (info, i);
-
- /* We can proactively remove obviously unused arguments. */
- if (!ipa_is_param_used (info, i))
- {
- bitmap_set_bit (args_to_skip, i);
- continue;
- }
- if (lat->type == IPA_CONST_VALUE)
- bitmap_set_bit (args_to_skip, i);
- }
- }
- for (cs = node->callers; cs; cs = next)
- {
- next = cs->next_caller;
- if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
- {
- if (dump_file)
- fprintf (dump_file, "Redirecting edge %s/%i -> %s/%i "
- "back to %s/%i.",
- cgraph_node_name (cs->caller), cs->caller->uid,
- cgraph_node_name (cs->callee), cs->callee->uid,
- cgraph_node_name (orig_node), orig_node->uid);
- cgraph_redirect_edge_callee (cs, orig_node);
- }
- }
- }
-}
+/* After a specialized NEW_NODE version of ORIG_NODE has been created, update
+ their profile information to reflect this. */
-/* Update profiling info for versioned functions and the functions they were
- versioned from. */
static void
-ipcp_update_profiling (void)
+update_profiling_info (struct cgraph_node *orig_node,
+ struct cgraph_node *new_node)
{
- struct cgraph_node *node, *orig_node;
- gcov_type scale, scale_complement;
struct cgraph_edge *cs;
+ struct caller_statistics stats;
+ gcov_type new_sum, orig_sum;
+ gcov_type remainder, orig_node_count = orig_node->count;
+
+ if (orig_node_count == 0)
+ return;
- for (node = cgraph_nodes; node; node = node->next)
+ init_caller_stats (&stats);
+ cgraph_for_node_and_aliases (orig_node, gather_caller_stats, &stats, false);
+ orig_sum = stats.count_sum;
+ init_caller_stats (&stats);
+ cgraph_for_node_and_aliases (new_node, gather_caller_stats, &stats, false);
+ new_sum = stats.count_sum;
+
+ if (orig_node_count < orig_sum + new_sum)
{
- if (ipcp_node_is_clone (node))
- {
- orig_node = ipcp_get_orig_node (node);
- scale = ipcp_get_node_scale (orig_node);
- node->count = orig_node->count * scale / REG_BR_PROB_BASE;
- scale_complement = REG_BR_PROB_BASE - scale;
-
- gcc_assert (scale_complement >= 0);
- orig_node->count =
- orig_node->count * scale_complement / REG_BR_PROB_BASE;
- for (cs = node->callees; cs; cs = cs->next_callee)
- cs->count = cs->count * scale / REG_BR_PROB_BASE;
- for (cs = orig_node->callees; cs; cs = cs->next_callee)
- cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
- }
+ if (dump_file)
+ fprintf (dump_file, " Problem: node %s/%i has too low count "
+ HOST_WIDE_INT_PRINT_DEC " while the sum of incoming "
+ "counts is " HOST_WIDE_INT_PRINT_DEC "\n",
+ cgraph_node_name (orig_node), orig_node->uid,
+ (HOST_WIDE_INT) orig_node_count,
+ (HOST_WIDE_INT) (orig_sum + new_sum));
+
+ orig_node_count = (orig_sum + new_sum) * 12 / 10;
+ if (dump_file)
+ fprintf (dump_file, " proceeding by pretending it was "
+ HOST_WIDE_INT_PRINT_DEC "\n",
+ (HOST_WIDE_INT) orig_node_count);
}
+
+ new_node->count = new_sum;
+ remainder = orig_node_count - new_sum;
+ orig_node->count = remainder;
+
+ for (cs = new_node->callees; cs ; cs = cs->next_callee)
+ if (cs->frequency)
+ cs->count = cs->count * new_sum / orig_node_count;
+ else
+ cs->count = 0;
+
+ for (cs = orig_node->callees; cs ; cs = cs->next_callee)
+ cs->count = cs->count * remainder / orig_node_count;
+
+ if (dump_file)
+ dump_profile_updates (orig_node, new_node);
}
-/* If NODE was cloned, how much would program grow? */
-static long
-ipcp_estimate_growth (struct cgraph_node *node)
+/* Update the respective profile of specialized NEW_NODE and the original
+ ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
+ have been redirected to the specialized version. */
+
+static void
+update_specialized_profile (struct cgraph_node *new_node,
+ struct cgraph_node *orig_node,
+ gcov_type redirected_sum)
{
struct cgraph_edge *cs;
- int redirectable_node_callers = 0;
- int removable_args = 0;
- bool need_original
- = !cgraph_will_be_removed_from_program_if_no_direct_calls (node);
- VEC (tree, heap) *known_vals = NULL;
- struct ipa_node_params *info;
- int i, count;
- int growth;
+ gcov_type new_node_count, orig_node_count = orig_node->count;
- for (cs = node->callers; cs != NULL; cs = cs->next_caller)
- if (cs->caller == node || !ipcp_need_redirect_p (cs))
- redirectable_node_callers++;
- else
- need_original = true;
+ if (dump_file)
+ fprintf (dump_file, " the sum of counts of redirected edges is "
+ HOST_WIDE_INT_PRINT_DEC "\n", (HOST_WIDE_INT) redirected_sum);
+ if (orig_node_count == 0)
+ return;
- /* If we will be able to fully replace original node, we never increase
- program size. */
- if (!need_original)
- return 0;
+ gcc_assert (orig_node_count >= redirected_sum);
- info = IPA_NODE_REF (node);
- count = ipa_get_param_count (info);
- VEC_safe_grow_cleared (tree, heap, known_vals, count);
- if (node->local.can_change_signature)
- for (i = 0; i < count; i++)
- {
- struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+ new_node_count = new_node->count;
+ new_node->count += redirected_sum;
+ orig_node->count -= redirected_sum;
- /* We can proactively remove obviously unused arguments. */
- if (!ipa_is_param_used (info, i))
- removable_args++;
+ for (cs = new_node->callees; cs ; cs = cs->next_callee)
+ if (cs->frequency)
+ cs->count += cs->count * redirected_sum / new_node_count;
+ else
+ cs->count = 0;
- if (lat->type == IPA_CONST_VALUE)
- {
- removable_args++;
- VEC_replace (tree, known_vals, i, lat->constant);
- }
- }
+ for (cs = orig_node->callees; cs ; cs = cs->next_callee)
+ {
+ gcov_type dec = cs->count * redirected_sum / orig_node_count;
+ if (dec < cs->count)
+ cs->count -= dec;
+ else
+ cs->count = 0;
+ }
- /* We make just very simple estimate of savings for removal of operand from
- call site. Precise cost is difficult to get, as our size metric counts
- constants and moves as free. Generally we are looking for cases that
- small function is called very many times. */
- estimate_ipcp_clone_size_and_time (node, known_vals, &growth, NULL);
- VEC_free (tree, heap, known_vals);
- growth = growth
- - removable_args * redirectable_node_callers;
- if (growth < 0)
- return 0;
- return growth;
+ if (dump_file)
+ dump_profile_updates (orig_node, new_node);
}
+/* Create a specialized version of NODE with known constants and types of
+ parameters in KNOWN_VALS and redirect all edges in CALLERS to it. */
-/* Estimate cost of cloning NODE. */
-static long
-ipcp_estimate_cloning_cost (struct cgraph_node *node)
+static struct cgraph_node *
+create_specialized_node (struct cgraph_node *node,
+ VEC (tree, heap) *known_vals,
+ VEC (cgraph_edge_p,heap) *callers)
{
- int freq_sum = 1;
- gcov_type count_sum = 1;
- struct cgraph_edge *e;
- int cost;
+ struct ipa_node_params *new_info, *info = IPA_NODE_REF (node);
+ VEC (ipa_replace_map_p,gc)* replace_trees = NULL;
+ struct cgraph_node *new_node;
+ int i, count = ipa_get_param_count (info);
+ bitmap args_to_skip;
- cost = ipcp_estimate_growth (node) * 1000;
- if (!cost)
+ gcc_assert (!info->ipcp_orig_node);
+
+ if (node->local.can_change_signature)
{
- if (dump_file)
- fprintf (dump_file, "Versioning of %s will save code size\n",
- cgraph_node_name (node));
- return 0;
+ args_to_skip = BITMAP_GGC_ALLOC ();
+ for (i = 0; i < count; i++)
+ {
+ tree t = VEC_index (tree, known_vals, i);
+
+ if ((t && TREE_CODE (t) != TREE_BINFO)
+ || !ipa_is_param_used (info, i))
+ bitmap_set_bit (args_to_skip, i);
+ }
+ }
+ else
+ args_to_skip = NULL;
+
+ for (i = 0; i < count ; i++)
+ {
+ tree t = VEC_index (tree, known_vals, i);
+ if (t && TREE_CODE (t) != TREE_BINFO)
+ {
+ struct ipa_replace_map *replace_map;
+
+ replace_map = get_replacement_map (t, ipa_get_param (info, i));
+ if (replace_map)
+ VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_map);
+ }
}
- for (e = node->callers; e; e = e->next_caller)
- if (!bitmap_bit_p (dead_nodes, e->caller->uid)
- && !ipcp_need_redirect_p (e))
- {
- count_sum += e->count;
- freq_sum += e->frequency + 1;
- }
+ new_node = cgraph_create_virtual_clone (node, callers, replace_trees,
+ args_to_skip, "constprop");
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " the new node is %s/%i.\n",
+ cgraph_node_name (new_node), new_node->uid);
+ gcc_checking_assert (ipa_node_params_vector
+ && (VEC_length (ipa_node_params_t,
+ ipa_node_params_vector)
+ > (unsigned) cgraph_max_uid));
+ update_profiling_info (node, new_node);
+ new_info = IPA_NODE_REF (new_node);
+ new_info->ipcp_orig_node = node;
+ new_info->known_vals = known_vals;
- if (max_count)
- cost /= count_sum * 1000 / max_count + 1;
- else
- cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
- if (dump_file)
- fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
- cgraph_node_name (node), cost, inline_summary (node)->self_size,
- freq_sum);
- return cost + 1;
+ ipcp_discover_new_direct_edges (new_node, known_vals);
+
+ VEC_free (cgraph_edge_p, heap, callers);
+ return new_node;
}
-/* Walk indirect calls of NODE and if any polymorphic can be turned into a
- direct one now, do so. */
+/* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
+ KNOWN_VALS with constants and types that are also known for all of the
+ CALLERS. */
static void
-ipcp_process_devirtualization_opportunities (struct cgraph_node *node)
+find_more_values_for_callers_subset (struct cgraph_node *node,
+ VEC (tree, heap) *known_vals,
+ VEC (cgraph_edge_p,heap) *callers)
{
struct ipa_node_params *info = IPA_NODE_REF (node);
- struct cgraph_edge *ie, *next_ie;
+ int i, count = ipa_get_param_count (info);
- for (ie = node->indirect_calls; ie; ie = next_ie)
+ for (i = 0; i < count ; i++)
{
- int param_index;
- HOST_WIDE_INT token, anc_offset;
- tree target, delta, otr_type;
- struct ipcp_lattice *lat;
+ struct cgraph_edge *cs;
+ tree newval = NULL_TREE;
+ int j;
- next_ie = ie->next_callee;
- if (!ie->indirect_info->polymorphic)
- continue;
- param_index = ie->indirect_info->param_index;
- if (param_index == -1)
+ if (ipa_get_lattice (info, i)->bottom
+ || VEC_index (tree, known_vals, i))
continue;
- lat = ipa_get_lattice (info, param_index);
- token = ie->indirect_info->otr_token;
- anc_offset = ie->indirect_info->anc_offset;
- otr_type = ie->indirect_info->otr_type;
- target = NULL_TREE;
- if (lat->type == IPA_CONST_VALUE)
- {
- tree binfo = gimple_extract_devirt_binfo_from_cst (lat->constant);
- if (!binfo)
- continue;
- binfo = get_binfo_at_offset (binfo, anc_offset, otr_type);
- if (!binfo)
- continue;
- target = gimple_get_virt_method_for_binfo (token, binfo, &delta);
- }
- else
+ FOR_EACH_VEC_ELT (cgraph_edge_p, callers, j, cs)
{
- int types_count, j;
+ struct ipa_jump_func *jump_func;
+ tree t;
- if (ipa_param_cannot_devirtualize_p (info, param_index)
- || ipa_param_types_vec_empty (info, param_index))
- continue;
+ jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
- types_count = VEC_length (tree, info->params[param_index].types);
- for (j = 0; j < types_count; j++)
+ t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func);
+ if (!t
+ || (newval
+ && !values_equal_for_ipcp_p (t, newval)))
{
- tree binfo = VEC_index (tree, info->params[param_index].types, j);
- tree d, t;
-
- binfo = get_binfo_at_offset (binfo, anc_offset, otr_type);
- if (!binfo)
- {
- target = NULL_TREE;
- break;
- }
-
- t = gimple_get_virt_method_for_binfo (token, binfo, &d);
- if (!t)
- {
- target = NULL_TREE;
- break;
- }
- else if (!target)
- {
- target = t;
- delta = d;
- }
- else if (target != t || !tree_int_cst_equal (delta, d))
- {
- target = NULL_TREE;
- break;
- }
+ newval = NULL_TREE;
+ break;
}
+ else
+ newval = t;
}
- if (target)
- ipa_make_edge_direct_to_target (ie, target, delta);
- }
-}
-
-/* Return number of live constant parameters. */
-static int
-ipcp_const_param_count (struct cgraph_node *node)
-{
- int const_param = 0;
- struct ipa_node_params *info = IPA_NODE_REF (node);
- int count = ipa_get_param_count (info);
- int i;
+ if (newval)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " adding an extra known value ");
+ print_ipcp_constant_value (dump_file, newval);
+ fprintf (dump_file, " for parameter ");
+ print_generic_expr (dump_file, ipa_get_param (info, i), 0);
+ fprintf (dump_file, "\n");
+ }
- for (i = 0; i < count; i++)
- {
- struct ipcp_lattice *lat = ipa_get_lattice (info, i);
- if ((ipcp_lat_is_insertable (lat)
- /* Do not count obviously unused arguments. */
- && ipa_is_param_used (info, i))
- || (!ipa_param_cannot_devirtualize_p (info, i)
- && !ipa_param_types_vec_empty (info, i)))
- const_param++;
+ VEC_replace (tree, known_vals, i, newval);
+ }
}
- return const_param;
}
-/* Given that a formal parameter of NODE given by INDEX is known to be constant
- CST, try to find any indirect edges that can be made direct and make them
- so. Note that INDEX is the number the parameter at the time of analyzing
- parameter uses and parameter removals should not be considered for it. (In
- fact, the parameter itself has just been removed.) */
+/* Given an original NODE and a VAL for which we have already created a
+ specialized clone, look whether there are incoming edges that still lead
+ into the old node but now also bring the requested value and also conform to
+ all other criteria such that they can be redirected the the special node.
+ This function can therefore redirect the final edge in a SCC. */
static void
-ipcp_discover_new_direct_edges (struct cgraph_node *node, int index, tree cst)
+perhaps_add_new_callers (struct cgraph_node *node, struct ipcp_value *val)
{
- struct cgraph_edge *ie, *next_ie;
+ struct ipa_node_params *dest_info = IPA_NODE_REF (val->spec_node);
+ struct ipcp_value_source *src;
+ int count = ipa_get_param_count (dest_info);
+ gcov_type redirected_sum = 0;
- for (ie = node->indirect_calls; ie; ie = next_ie)
+ for (src = val->sources; src; src = src->next)
{
- struct cgraph_indirect_call_info *ici = ie->indirect_info;
+ struct cgraph_edge *cs = src->cs;
+ while (cs)
+ {
+ enum availability availability;
+ bool insufficient = false;
- next_ie = ie->next_callee;
- if (ici->param_index != index
- || ici->polymorphic)
- continue;
+ if (cgraph_function_node (cs->callee, &availability) == node
+ && availability > AVAIL_OVERWRITABLE
+ && cgraph_edge_brings_value_p (cs, src))
+ {
+ struct ipa_node_params *caller_info;
+ struct ipa_edge_args *args;
+ int i;
+
+ caller_info = IPA_NODE_REF (cs->caller);
+ args = IPA_EDGE_REF (cs);
+ for (i = 0; i < count; i++)
+ {
+ struct ipa_jump_func *jump_func;
+ tree val, t;
+
+ val = VEC_index (tree, dest_info->known_vals, i);
+ if (!val)
+ continue;
- ipa_make_edge_direct_to_target (ie, cst, NULL_TREE);
+ jump_func = ipa_get_ith_jump_func (args, i);
+ t = ipa_value_from_jfunc (caller_info, jump_func);
+ if (!t || !values_equal_for_ipcp_p (val, t))
+ {
+ insufficient = true;
+ break;
+ }
+ }
+
+ if (!insufficient)
+ {
+ if (dump_file)
+ fprintf (dump_file, " - adding an extra caller %s/%i"
+ " of %s/%i\n",
+ cgraph_node_name (cs->caller), cs->caller->uid,
+ cgraph_node_name (val->spec_node),
+ val->spec_node->uid);
+
+ cgraph_redirect_edge_callee (cs, val->spec_node);
+ redirected_sum += cs->count;
+ }
+ }
+ cs = get_next_cgraph_edge_clone (cs);
+ }
}
+
+ if (redirected_sum)
+ update_specialized_profile (val->spec_node, node, redirected_sum);
}
-/* Propagate the constant parameters found by ipcp_iterate_stage()
- to the function's code. */
+/* Copy KNOWN_BINFOS to KNOWN_VALS. */
+
static void
-ipcp_insert_stage (void)
+move_binfos_to_values (VEC (tree, heap) *known_vals,
+ VEC (tree, heap) *known_binfos)
{
- struct cgraph_node *node, *node1 = NULL;
+ tree t;
int i;
- VEC (cgraph_edge_p, heap) * redirect_callers;
- VEC (ipa_replace_map_p,gc)* replace_trees;
- int count;
- tree parm_tree;
- struct ipa_replace_map *replace_param;
- fibheap_t heap;
- long overall_size = 0, new_size = 0;
- long max_new_size;
- ipa_check_create_node_params ();
- ipa_check_create_edge_args ();
- if (dump_file)
- fprintf (dump_file, "\nIPA insert stage:\n\n");
-
- dead_nodes = BITMAP_ALLOC (NULL);
+ for (i = 0; VEC_iterate (tree, known_binfos, i, t); i++)
+ if (t)
+ VEC_replace (tree, known_vals, i, t);
+}
- FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
- {
- if (node->count > max_count)
- max_count = node->count;
- overall_size += inline_summary (node)->self_size;
- }
- max_new_size = overall_size;
- if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
- max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
- max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
+/* Decide whether and what specialized clones of NODE should be created. */
- /* First collect all functions we proved to have constant arguments to
- heap. */
- heap = fibheap_new ();
- for (node = cgraph_nodes; node; node = node->next)
- {
- struct ipa_node_params *info;
- /* Propagation of the constant is forbidden in certain conditions. */
- if (!node->analyzed || !ipcp_node_modifiable_p (node))
- continue;
- info = IPA_NODE_REF (node);
- if (ipa_is_called_with_var_arguments (info))
- continue;
- if (ipcp_const_param_count (node))
- node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node),
- node);
- }
-
- /* Now clone in priority order until code size growth limits are met or
- heap is emptied. */
- while (!fibheap_empty (heap))
- {
- struct ipa_node_params *info;
- int growth = 0;
- bitmap args_to_skip;
- struct cgraph_edge *cs;
+static bool
+decide_whether_version_node (struct cgraph_node *node)
+{
+ struct ipa_node_params *info = IPA_NODE_REF (node);
+ int i, count = ipa_get_param_count (info);
+ VEC (tree, heap) *known_csts, *known_binfos;
+ bool ret = false;
- node = (struct cgraph_node *)fibheap_extract_min (heap);
- node->aux = NULL;
- if (dump_file)
- fprintf (dump_file, "considering function %s\n",
- cgraph_node_name (node));
+ if (count == 0)
+ return false;
- growth = ipcp_estimate_growth (node);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nEvaluating opportunities for %s/%i.\n",
+ cgraph_node_name (node), node->uid);
- if (new_size + growth > max_new_size)
- break;
- if (growth
- && cgraph_optimize_for_size_p (node))
- {
- if (dump_file)
- fprintf (dump_file, "Not versioning, cold code would grow");
- continue;
- }
+ gather_context_independent_values (info, &known_csts, &known_binfos,
+ NULL);
- info = IPA_NODE_REF (node);
- count = ipa_get_param_count (info);
+ for (i = 0; i < count ; i++)
+ {
+ struct ipcp_lattice *lat = ipa_get_lattice (info, i);
+ struct ipcp_value *val;
- replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
+ if (lat->bottom
+ || VEC_index (tree, known_csts, i)
+ || VEC_index (tree, known_binfos, i))
+ continue;
- if (node->local.can_change_signature)
- args_to_skip = BITMAP_GGC_ALLOC ();
- else
- args_to_skip = NULL;
- for (i = 0; i < count; i++)
+ for (val = lat->values; val; val = val->next)
{
- struct ipcp_lattice *lat = ipa_get_lattice (info, i);
- parm_tree = ipa_get_param (info, i);
+ int freq_sum, caller_count;
+ gcov_type count_sum;
+ VEC (cgraph_edge_p, heap) *callers;
+ VEC (tree, heap) *kv;
- /* We can proactively remove obviously unused arguments. */
- if (!ipa_is_param_used (info, i))
+ if (val->spec_node)
{
- if (args_to_skip)
- bitmap_set_bit (args_to_skip, i);
+ perhaps_add_new_callers (node, val);
continue;
}
+ else if (val->local_size_cost + overall_size > max_new_size)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " Ignoring candidate value because "
+ "max_new_size would be reached with %li.\n",
+ val->local_size_cost + overall_size);
+ continue;
+ }
+ else if (!get_info_about_necessary_edges (val, &freq_sum, &count_sum,
+ &caller_count))
+ continue;
- if (lat->type == IPA_CONST_VALUE)
+ if (dump_file && (dump_flags & TDF_DETAILS))
{
- replace_param =
- ipcp_create_replace_map (parm_tree, lat);
- if (replace_param == NULL)
- break;
- VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
- if (args_to_skip)
- bitmap_set_bit (args_to_skip, i);
+ fprintf (dump_file, " - considering value ");
+ print_ipcp_constant_value (dump_file, val->value);
+ fprintf (dump_file, " for parameter ");
+ print_generic_expr (dump_file, ipa_get_param (info, i), 0);
+ fprintf (dump_file, " (caller_count: %i)\n", caller_count);
}
- }
- if (i < count)
- {
+
+
+ if (!good_cloning_opportunity_p (node, val->local_time_benefit,
+ freq_sum, count_sum,
+ val->local_size_cost)
+ && !good_cloning_opportunity_p (node,
+ val->local_time_benefit
+ + val->prop_time_benefit,
+ freq_sum, count_sum,
+ val->local_size_cost
+ + val->prop_size_cost))
+ continue;
+
if (dump_file)
- fprintf (dump_file, "Not versioning, some parameters couldn't be replaced");
- continue;
+ fprintf (dump_file, " Creating a specialized node of %s/%i.\n",
+ cgraph_node_name (node), node->uid);
+
+ callers = gather_edges_for_value (val, caller_count);
+ kv = VEC_copy (tree, heap, known_csts);
+ move_binfos_to_values (kv, known_binfos);
+ VEC_replace (tree, kv, i, val->value);
+ find_more_values_for_callers_subset (node, kv, callers);
+ val->spec_node = create_specialized_node (node, kv, callers);
+ overall_size += val->local_size_cost;
+ info = IPA_NODE_REF (node);
+
+ /* TODO: If for some lattice there is only one other known value
+ left, make a special node for it too. */
+ ret = true;
+
+ VEC_replace (tree, kv, i, val->value);
}
+ }
- new_size += growth;
+ if (info->clone_for_all_contexts)
+ {
+ VEC (cgraph_edge_p, heap) *callers;
- /* Look if original function becomes dead after cloning. */
- for (cs = node->callers; cs != NULL; cs = cs->next_caller)
- if (cs->caller == node || ipcp_need_redirect_p (cs))
- break;
- if (!cs && cgraph_will_be_removed_from_program_if_no_direct_calls (node))
- bitmap_set_bit (dead_nodes, node->uid);
+ if (dump_file)
+ fprintf (dump_file, " - Creating a specialized node of %s/%i "
+ "for all known contexts.\n", cgraph_node_name (node),
+ node->uid);
- redirect_callers = collect_callers_of_node (node);
+ callers = collect_callers_of_node (node);
+ move_binfos_to_values (known_csts, known_binfos);
+ create_specialized_node (node, known_csts, callers);
+ info = IPA_NODE_REF (node);
+ info->clone_for_all_contexts = false;
+ ret = true;
+ }
+ else
+ VEC_free (tree, heap, known_csts);
- /* Redirecting all the callers of the node to the
- new versioned node. */
- node1 =
- cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
- args_to_skip, "constprop");
- args_to_skip = NULL;
- VEC_free (cgraph_edge_p, heap, redirect_callers);
- replace_trees = NULL;
+ VEC_free (tree, heap, known_binfos);
+ return ret;
+}
- if (node1 == NULL)
- continue;
- ipcp_process_devirtualization_opportunities (node1);
+/* Transitively mark all callees of NODE within the same SCC as not dead. */
- if (dump_file)
- fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
- cgraph_node_name (node), (int)growth, (int)new_size);
- ipcp_init_cloned_node (node, node1);
+static void
+spread_undeadness (struct cgraph_node *node)
+{
+ struct cgraph_edge *cs;
- info = IPA_NODE_REF (node);
- for (i = 0; i < count; i++)
- {
- struct ipcp_lattice *lat = ipa_get_lattice (info, i);
- if (lat->type == IPA_CONST_VALUE)
- ipcp_discover_new_direct_edges (node1, i, lat->constant);
- }
+ for (cs = node->callees; cs; cs = cs->next_callee)
+ if (edge_within_scc (cs))
+ {
+ struct cgraph_node *callee;
+ struct ipa_node_params *info;
- if (dump_file)
- dump_function_to_file (node1->decl, dump_file, dump_flags);
+ callee = cgraph_function_node (cs->callee, NULL);
+ info = IPA_NODE_REF (callee);
- for (cs = node->callees; cs; cs = cs->next_callee)
- {
- struct cgraph_node *callee = cgraph_function_or_thunk_node (cs->callee, NULL);
- if (callee->aux)
- {
- fibheap_delete_node (heap, (fibnode_t) callee->aux);
- callee->aux = fibheap_insert (heap,
- ipcp_estimate_cloning_cost (callee),
- callee);
- }
- }
+ if (info->node_dead)
+ {
+ info->node_dead = 0;
+ spread_undeadness (callee);
+ }
+ }
+}
+
+/* Return true if NODE has a caller from outside of its SCC that is not
+ dead. Worker callback for cgraph_for_node_and_aliases. */
+
+static bool
+has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
+ void *data ATTRIBUTE_UNUSED)
+{
+ struct cgraph_edge *cs;
+
+ for (cs = node->callers; cs; cs = cs->next_caller)
+ if (cs->caller->thunk.thunk_p
+ && cgraph_for_node_and_aliases (cs->caller,
+ has_undead_caller_from_outside_scc_p,
+ NULL, true))
+ return true;
+ else if (!edge_within_scc (cs)
+ && !IPA_NODE_REF (cs->caller)->node_dead)
+ return true;
+ return false;
+}
+
+
+/* Identify nodes within the same SCC as NODE which are no longer needed
+ because of new clones and will be removed as unreachable. */
+
+static void
+identify_dead_nodes (struct cgraph_node *node)
+{
+ struct cgraph_node *v;
+ for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
+ if (cgraph_will_be_removed_from_program_if_no_direct_calls (v)
+ && !cgraph_for_node_and_aliases (v,
+ has_undead_caller_from_outside_scc_p,
+ NULL, true))
+ IPA_NODE_REF (v)->node_dead = 1;
+
+ for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
+ if (!IPA_NODE_REF (v)->node_dead)
+ spread_undeadness (v);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
+ if (IPA_NODE_REF (v)->node_dead)
+ fprintf (dump_file, " Marking node as dead: %s/%i.\n",
+ cgraph_node_name (v), v->uid);
}
+}
+
+/* The decision stage. Iterate over the topological order of call graph nodes
+ TOPO and make specialized clones if deemed beneficial. */
+
+static void
+ipcp_decision_stage (struct topo_info *topo)
+{
+ int i;
+
+ if (dump_file)
+ fprintf (dump_file, "\nIPA decision stage:\n\n");
- while (!fibheap_empty (heap))
+ for (i = topo->nnodes - 1; i >= 0; i--)
{
- if (dump_file)
- fprintf (dump_file, "skipping function %s\n",
- cgraph_node_name (node));
- node = (struct cgraph_node *) fibheap_extract_min (heap);
- node->aux = NULL;
+ struct cgraph_node *node = topo->order[i];
+ bool change = false, iterate = true;
+
+ while (iterate)
+ {
+ struct cgraph_node *v;
+ iterate = false;
+ for (v = node; v ; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
+ if (cgraph_function_with_gimple_body_p (v)
+ && ipcp_versionable_function_p (v))
+ iterate |= decide_whether_version_node (v);
+
+ change |= iterate;
+ }
+ if (change)
+ identify_dead_nodes (node);
}
- fibheap_delete (heap);
- BITMAP_FREE (dead_nodes);
- ipcp_update_callgraph ();
- ipcp_update_profiling ();
}
/* The IPCP driver. */
+
static unsigned int
ipcp_driver (void)
{
+ struct cgraph_2edge_hook_list *edge_duplication_hook_holder;
+ struct topo_info topo;
+
cgraph_remove_unreachable_nodes (true,dump_file);
+ ipa_check_create_node_params ();
+ ipa_check_create_edge_args ();
+ grow_next_edge_clone_vector ();
+ edge_duplication_hook_holder =
+ cgraph_add_edge_duplication_hook (&ipcp_edge_duplication_hook, NULL);
+ ipcp_values_pool = create_alloc_pool ("IPA-CP values",
+ sizeof (struct ipcp_value), 32);
+ ipcp_sources_pool = create_alloc_pool ("IPA-CP value sources",
+ sizeof (struct ipcp_value_source), 64);
if (dump_file)
{
fprintf (dump_file, "\nIPA structures before propagation:\n");
ipa_print_all_params (dump_file);
ipa_print_all_jump_functions (dump_file);
}
- ipa_check_create_node_params ();
- ipa_check_create_edge_args ();
- /* 2. Do the interprocedural propagation. */
- ipcp_iterate_stage ();
- /* 3. Insert the constants found to the functions. */
- ipcp_insert_stage ();
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "\nProfiling info after insert stage:\n");
- ipcp_print_profile_data (dump_file);
- }
+
+ /* Topological sort. */
+ build_toporder_info (&topo);
+ /* Do the interprocedural propagation. */
+ ipcp_propagate_stage (&topo);
+ /* Decide what constant propagation and cloning should be performed. */
+ ipcp_decision_stage (&topo);
+
/* Free all IPCP structures. */
+ free_toporder_info (&topo);
+ VEC_free (cgraph_edge_p, heap, next_edge_clone);
+ cgraph_remove_edge_duplication_hook (edge_duplication_hook_holder);
ipa_free_all_structures_after_ipa_cp ();
if (dump_file)
fprintf (dump_file, "\nIPA constant propagation end\n");
}
/* Write ipcp summary for nodes in SET. */
+
static void
ipcp_write_summary (cgraph_node_set set,
varpool_node_set vset ATTRIBUTE_UNUSED)
}
/* Read ipcp summary. */
+
static void
ipcp_read_summary (void)
{
}
/* Gate for IPCP optimization. */
+
static bool
cgraph_gate_cp (void)
{