#include "timevar.h"
#include "flags.h"
#include "bitmap.h"
+#include "obstack.h"
+#include "target.h"
-/* Maximum number of fields that a structure should have to be scalarized.
- FIXME This limit has been arbitrarily set to 5. Experiment to find a
- sensible setting. */
-#define MAX_NFIELDS_FOR_SRA 5
+/* This object of this pass is to replace a non-addressable aggregate with a
+ set of independent variables. Most of the time, all of these variables
+ will be scalars. But a secondary objective is to break up larger
+ aggregates into smaller aggregates. In the process we may find that some
+ bits of the larger aggregate can be deleted as unreferenced.
-/* Codes indicating how to copy one structure into another. */
-enum sra_copy_mode { SCALAR_SCALAR, FIELD_SCALAR, SCALAR_FIELD };
+ This substitution is done globally. More localized substitutions would
+ be the purvey of a load-store motion pass.
+
+ The optimization proceeds in phases:
+
+ (1) Identify variables that have types that are candidates for
+ decomposition.
+
+ (2) Scan the function looking for the ways these variables are used.
+ In particular we're interested in the number of times a variable
+ (or member) is needed as a complete unit, and the number of times
+ a variable (or member) is copied.
+
+ (3) Based on the usage profile, instantiate substitution variables.
+
+ (4) Scan the function making replacements.
+*/
-/* Local functions. */
-static inline bool can_be_scalarized_p (tree);
-static tree create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode);
-static inline void scalarize_component_ref (tree, tree *tp);
-static void scalarize_structures (void);
-static void scalarize_stmt (block_stmt_iterator *);
-static void scalarize_modify_expr (block_stmt_iterator *);
-static void scalarize_call_expr (block_stmt_iterator *);
-static void scalarize_asm_expr (block_stmt_iterator *);
-static void scalarize_return_expr (block_stmt_iterator *);
/* The set of aggregate variables that are candidates for scalarization. */
static bitmap sra_candidates;
beginning of the function. */
static bitmap needs_copy_in;
-/* This structure holds the mapping between and element of an aggregate
- and the scalar replacement variable. */
+/* Sets of bit pairs that cache type decomposition and instantiation. */
+static bitmap sra_type_decomp_cache;
+static bitmap sra_type_inst_cache;
+
+/* One of these structures is created for each candidate aggregate
+ and each (accessed) member of such an aggregate. */
struct sra_elt
{
- enum tree_code kind;
- tree base;
- tree field;
- tree replace;
-};
-
-static htab_t sra_map;
+ /* A tree of the elements. Used when we want to traverse everything. */
+ struct sra_elt *parent;
+ struct sra_elt *children;
+ struct sra_elt *sibling;
-static hashval_t
-sra_elt_hash (const void *x)
-{
- const struct sra_elt *e = x;
- hashval_t h = (size_t) e->base * e->kind;
- if (e->kind == COMPONENT_REF)
- h ^= (size_t) e->field;
- return h;
-}
+ /* If this element is a root, then this is the VAR_DECL. If this is
+ a sub-element, this is some token used to identify the reference.
+ In the case of COMPONENT_REF, this is the FIELD_DECL. In the case
+ of an ARRAY_REF, this is the (constant) index. In the case of a
+ complex number, this is a zero or one. */
+ tree element;
-static int
-sra_elt_eq (const void *x, const void *y)
-{
- const struct sra_elt *a = x;
- const struct sra_elt *b = y;
+ /* The type of the element. */
+ tree type;
- if (a->kind != b->kind)
- return false;
- if (a->base != b->base)
- return false;
- if (a->kind == COMPONENT_REF)
- if (a->field != b->field)
- return false;
+ /* A VAR_DECL, for any sub-element we've decided to replace. */
+ tree replacement;
- return true;
-}
+ /* The number of times the element is referenced as a whole. I.e.
+ given "a.b.c", this would be incremented for C, but not for A or B. */
+ unsigned int n_uses;
-/* Mark all the variables in V_MAY_DEF operands for STMT for renaming.
- This becomes necessary when we modify all of a non-scalar. */
+ /* The number of times the element is copied to or from another
+ scalarizable element. */
+ unsigned int n_copies;
-static void
-mark_all_v_may_defs (tree stmt)
-{
- v_may_def_optype v_may_defs;
- size_t i, n;
+ /* True if TYPE is scalar. */
+ bool is_scalar;
- get_stmt_operands (stmt);
- v_may_defs = V_MAY_DEF_OPS (stmt_ann (stmt));
- n = NUM_V_MAY_DEFS (v_may_defs);
+ /* True if we saw something about this element that prevents scalarization,
+ such as non-constant indexing. */
+ bool cannot_scalarize;
- for (i = 0; i < n; i++)
- {
- tree sym = V_MAY_DEF_RESULT (v_may_defs, i);
- bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
- }
-}
+ /* True if we've decided that structure-to-structure assignment
+ should happen via memcpy and not per-element. */
+ bool use_block_copy;
-/* Mark all the variables in V_MUST_DEF operands for STMT for renaming.
- This becomes necessary when we modify all of a non-scalar. */
+ /* A flag for use with/after random access traversals. */
+ bool visited;
+};
-static void
-mark_all_v_must_defs (tree stmt)
-{
- v_must_def_optype v_must_defs;
- size_t i, n;
+/* Random access to the child of a parent is performed by hashing.
+ This prevents quadratic behaviour, and allows SRA to function
+ reasonably on larger records. */
+static htab_t sra_map;
- get_stmt_operands (stmt);
- v_must_defs = V_MUST_DEF_OPS (stmt_ann (stmt));
- n = NUM_V_MUST_DEFS (v_must_defs);
+/* All structures are allocated out of the following obstack. */
+static struct obstack sra_obstack;
- for (i = 0; i < n; i++)
- {
- tree sym = V_MUST_DEF_OP (v_must_defs, i);
- bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
- }
-}
+/* Debugging functions. */
+static void dump_sra_elt_name (FILE *, struct sra_elt *);
+extern void debug_sra_elt_name (struct sra_elt *);
+\f
/* Return true if DECL is an SRA candidate. */
static bool
return DECL_P (decl) && bitmap_bit_p (sra_candidates, var_ann (decl)->uid);
}
-/* Return true if EXP is of the form <ref decl>, where REF is one of the
- field access references we handle and DECL is an SRA candidate. */
+/* Return true if TYPE is a scalar type. */
static bool
-is_sra_candidate_ref (tree exp)
+is_sra_scalar_type (tree type)
{
- switch (TREE_CODE (exp))
- {
- case COMPONENT_REF:
- case REALPART_EXPR:
- case IMAGPART_EXPR:
- return is_sra_candidate_decl (TREE_OPERAND (exp, 0));
-
- default:
- break;
- }
-
- return false;
+ enum tree_code code = TREE_CODE (type);
+ return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
+ || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
+ || code == CHAR_TYPE || code == POINTER_TYPE || code == OFFSET_TYPE
+ || code == REFERENCE_TYPE);
}
-/* Return true if EXP is of the form <ref decl>, where REF is a nest of
- references handled by handle_components_p and DECL is an SRA candidate.
- *VAR_P is set to DECL. */
+/* Return true if TYPE can be decomposed into a set of independent variables.
+
+ Note that this doesn't imply that all elements of TYPE can be
+ instantiated, just that if we decide to break up the type into
+ separate pieces that it can be done. */
static bool
-is_sra_candidate_complex_ref (tree exp, tree *var_p)
+type_can_be_decomposed_p (tree type)
{
- tree orig_exp = exp;
-
- while (TREE_CODE (exp) == REALPART_EXPR || TREE_CODE (exp) == IMAGPART_EXPR
- || handled_component_p (exp))
- exp = TREE_OPERAND (exp, 0);
+ unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
+ tree t;
- if (orig_exp != exp && is_sra_candidate_decl (exp))
- {
- *var_p = exp;
- return true;
- }
-
- return false;
-}
-
-/* Return the scalar in SRA_MAP[VAR_IX][FIELD_IX]. If none exists, create
- a new scalar with type TYPE. */
+ /* Avoid searching the same type twice. */
+ if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
+ return true;
+ if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
+ return false;
-static tree
-lookup_scalar (struct sra_elt *key, tree type)
-{
- struct sra_elt **slot, *res;
+ /* The type must have a definite non-zero size. */
+ if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type)))
+ goto fail;
- slot = (struct sra_elt **) htab_find_slot (sra_map, key, INSERT);
- res = *slot;
- if (!res)
+ /* The type must be a non-union aggregate. */
+ switch (TREE_CODE (type))
{
- res = xmalloc (sizeof (*res));
- *slot = res;
- *res = *key;
- res->replace = make_rename_temp (type, "SR");
+ case RECORD_TYPE:
+ {
+ bool saw_one_field = false;
- if (DECL_NAME (key->base) && !DECL_IGNORED_P (key->base))
- {
- char *name = NULL;
- switch (key->kind)
- {
- case COMPONENT_REF:
- if (!DECL_NAME (key->field))
- break;
- name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
- "$",
- IDENTIFIER_POINTER (DECL_NAME (key->field)),
- NULL);
- break;
- case REALPART_EXPR:
- name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
- "$real", NULL);
- break;
- case IMAGPART_EXPR:
- name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
- "$imag", NULL);
- break;
- default:
- abort ();
- }
- if (name)
+ for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
+ if (TREE_CODE (t) == FIELD_DECL)
{
- DECL_NAME (res->replace) = get_identifier (name);
- free (name);
- }
- }
-
- DECL_SOURCE_LOCATION (res->replace) = DECL_SOURCE_LOCATION (key->base);
- TREE_NO_WARNING (res->replace) = TREE_NO_WARNING (key->base);
- DECL_ARTIFICIAL (res->replace) = DECL_ARTIFICIAL (key->base);
- }
-
- return res->replace;
-}
-
-
-/* Given a structure reference VAR.FIELD, return a scalar representing it.
- If no scalar is found, a new one is created and added to the SRA_MAP
- matrix. */
-
-static tree
-get_scalar_for_field (tree var, tree field)
-{
- struct sra_elt key;
-
-#ifdef ENABLE_CHECKING
- /* Validate that FIELD actually exists in VAR's type. */
- {
- tree f;
- for (f = TYPE_FIELDS (TREE_TYPE (var)); f ; f = TREE_CHAIN (f))
- if (f == field)
- goto found;
- abort ();
- found:;
- }
-#endif
+ /* Reject incorrectly represented bit fields. */
+ if (DECL_BIT_FIELD (t)
+ && (tree_low_cst (DECL_SIZE (t), 1)
+ != TYPE_PRECISION (TREE_TYPE (t))))
+ goto fail;
- key.kind = COMPONENT_REF;
- key.base = var;
- key.field = field;
+ saw_one_field = true;
+ }
- return lookup_scalar (&key, TREE_TYPE (field));
-}
+ /* Record types must have at least one field. */
+ if (!saw_one_field)
+ goto fail;
+ }
+ break;
+ case ARRAY_TYPE:
+ /* Array types must have a fixed lower and upper bound. */
+ t = TYPE_DOMAIN (type);
+ if (t == NULL)
+ goto fail;
+ if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
+ goto fail;
+ if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
+ goto fail;
+ break;
-/* Similarly for the parts of a complex type. */
+ case COMPLEX_TYPE:
+ break;
-static tree
-get_scalar_for_complex_part (tree var, enum tree_code part)
-{
- struct sra_elt key;
+ default:
+ goto fail;
+ }
- key.kind = part;
- key.base = var;
+ bitmap_set_bit (sra_type_decomp_cache, cache+0);
+ return true;
- return lookup_scalar (&key, TREE_TYPE (TREE_TYPE (var)));
+ fail:
+ bitmap_set_bit (sra_type_decomp_cache, cache+1);
+ return false;
}
-/* Return true if the fields of VAR can be replaced by scalar temporaries.
- This only happens if VAR is not call-clobbered and it contains less
- than MAX_NFIELDS_FOR_SRA scalar fields. */
+/* Return true if DECL can be decomposed into a set of independent
+ (though not necessarily scalar) variables. */
-static inline bool
-can_be_scalarized_p (tree var)
+static bool
+decl_can_be_decomposed_p (tree var)
{
- tree field, type;
- int nfields;
+ /* Early out for scalars. */
+ if (is_sra_scalar_type (TREE_TYPE (var)))
+ return false;
+ /* The variable must not be aliased. */
if (!is_gimple_non_addressable (var))
{
if (dump_file && (dump_flags & TDF_DETAILS))
return false;
}
+ /* The variable must not be volatile. */
if (TREE_THIS_VOLATILE (var))
{
if (dump_file && (dump_flags & TDF_DETAILS))
return false;
}
- /* Any COMPLEX_TYPE that has reached this point can be scalarized. */
- if (TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE)
+ /* We must be able to decompose the variable's type. */
+ if (!type_can_be_decomposed_p (TREE_TYPE (var)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Cannot scalarize variable ");
+ print_generic_expr (dump_file, var, dump_flags);
+ fprintf (dump_file, " because its type cannot be decomposed\n");
+ }
+ return false;
+ }
+
+ return true;
+}
+
+/* Return true if TYPE can be *completely* decomposed into scalars. */
+
+static bool
+type_can_instantiate_all_elements (tree type)
+{
+ if (is_sra_scalar_type (type))
return true;
+ if (!type_can_be_decomposed_p (type))
+ return false;
- type = TREE_TYPE (var);
- nfields = 0;
- for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ switch (TREE_CODE (type))
{
- if (TREE_CODE (field) != FIELD_DECL)
- continue;
+ case RECORD_TYPE:
+ {
+ unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
+ tree f;
- /* FIXME: We really should recurse down the type hierarchy and
- scalarize the fields at the leaves. */
- if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file,
- " because it contains an aggregate type field, ");
- print_generic_expr (dump_file, field, dump_flags);
- fprintf (dump_file, "\n");
- }
+ if (bitmap_bit_p (sra_type_inst_cache, cache+0))
+ return true;
+ if (bitmap_bit_p (sra_type_inst_cache, cache+1))
return false;
- }
- /* FIXME: Similarly. Indeed, considering that we treat complex
- as an aggregate, this is exactly the same problem.
- Structures with __complex__ fields are tested in the libstdc++
- testsuite: 26_numerics/complex_inserters_extractors.cc. */
- if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
+ for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
+ if (TREE_CODE (f) == FIELD_DECL)
{
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file,
- " because it contains a __complex__ field, ");
- print_generic_expr (dump_file, field, dump_flags);
- fprintf (dump_file, "\n");
+ if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
+ {
+ bitmap_set_bit (sra_type_inst_cache, cache+1);
+ return false;
+ }
}
- return false;
- }
- /* FIXME. We don't scalarize structures with bit fields yet. To
- support this, we should make sure that all the fields fit in one
- word and modify every operation done on the scalarized bit fields
- to mask them properly. */
- if (DECL_BIT_FIELD (field))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file,
- " because it contains a bit-field, ");
- print_generic_expr (dump_file, field, dump_flags);
- fprintf (dump_file, "\n");
- }
- return false;
- }
+ bitmap_set_bit (sra_type_inst_cache, cache+0);
+ return true;
+ }
- nfields++;
- if (nfields > MAX_NFIELDS_FOR_SRA)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Cannot scalarize variable ");
- print_generic_expr (dump_file, var, dump_flags);
- fprintf (dump_file,
- " because it contains more than %d fields\n",
- MAX_NFIELDS_FOR_SRA);
- }
- return false;
- }
- }
+ case ARRAY_TYPE:
+ return type_can_instantiate_all_elements (TREE_TYPE (type));
- /* If the structure had no FIELD_DECLs, then don't bother
- scalarizing it. */
- return nfields > 0;
-}
+ case COMPLEX_TYPE:
+ return true;
+ default:
+ abort ();
+ }
+}
-/* Replace the COMPONENT_REF, REALPART_EXPR or IMAGPART_EXPR pointed-to by
- TP inside STMT with the corresponding scalar replacement from SRA_MAP. */
+/* Test whether ELT or some sub-element cannot be scalarized. */
-static inline void
-scalarize_component_ref (tree stmt, tree *tp)
+static bool
+can_completely_scalarize_p (struct sra_elt *elt)
{
- tree t = *tp, obj = TREE_OPERAND (t, 0);
+ struct sra_elt *c;
+
+ if (elt->cannot_scalarize)
+ return false;
+
+ for (c = elt->children; c ; c = c->sibling)
+ if (!can_completely_scalarize_p (c))
+ return false;
+
+ return true;
+}
- /* When scalarizing a function argument, we will need to insert copy-in
- operations from the original PARM_DECLs. Note that these copy-in
- operations may end up being dead, but we won't know until we rename
- the new variables into SSA. */
- if (TREE_CODE (obj) == PARM_DECL)
- bitmap_set_bit (needs_copy_in, var_ann (obj)->uid);
+\f
+/* A simplified tree hashing algorithm that only handles the types of
+ trees we expect to find in sra_elt->element. */
+static hashval_t
+sra_hash_tree (tree t)
+{
switch (TREE_CODE (t))
{
- case COMPONENT_REF:
- t = get_scalar_for_field (obj, TREE_OPERAND (t, 1));
- break;
- case REALPART_EXPR:
- case IMAGPART_EXPR:
- t = get_scalar_for_complex_part (obj, TREE_CODE (t));
- break;
+ case VAR_DECL:
+ case PARM_DECL:
+ case RESULT_DECL:
+ case FIELD_DECL:
+ return DECL_UID (t);
+ case INTEGER_CST:
+ return TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
default:
abort ();
}
-
- *tp = t;
- modify_stmt (stmt);
}
+/* Hash function for type SRA_PAIR. */
-/* Scalarize the structure assignment for the statement pointed by SI_P. */
-
-static void
-scalarize_structure_assignment (block_stmt_iterator *si_p)
+static hashval_t
+sra_elt_hash (const void *x)
{
- var_ann_t lhs_ann, rhs_ann;
- tree lhs, rhs, list, orig_stmt;
- bool lhs_can, rhs_can;
-
- orig_stmt = bsi_stmt (*si_p);
- lhs = TREE_OPERAND (orig_stmt, 0);
- rhs = TREE_OPERAND (orig_stmt, 1);
- list = NULL_TREE;
+ const struct sra_elt *e = x;
+ const struct sra_elt *p;
+ hashval_t h;
-#if defined ENABLE_CHECKING
- if (TREE_CODE (orig_stmt) != MODIFY_EXPR)
- abort ();
-#endif
+ h = sra_hash_tree (e->element);
- /* Remove all type casts from RHS. This may seem heavy handed but
- it's actually safe and it is necessary in the presence of C++
- reinterpret_cast<> where structure assignments of different
- structures will be present in the IL. This was the case of PR
- 13347 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13347) which
- had something like this:
-
- struct A f;
- struct B g;
- f = (struct A)g;
-
- Both 'f' and 'g' were scalarizable, but the presence of the type
- cast was causing SRA to not replace the RHS of the assignment
- with g's scalar replacements. Furthermore, the fact that this
- assignment reached this point without causing syntax errors means
- that the type cast is safe and that a field-by-field assignment
- from 'g' into 'f' is the right thing to do. */
- STRIP_NOPS (rhs);
-
- lhs_ann = DECL_P (lhs) ? var_ann (lhs) : NULL;
- rhs_ann = DECL_P (rhs) ? var_ann (rhs) : NULL;
-
-#if defined ENABLE_CHECKING
- /* Two different variables should not have the same UID. */
- if (lhs_ann
- && rhs_ann
- && lhs != rhs
- && lhs_ann->uid == rhs_ann->uid)
- abort ();
-#endif
+ /* Take into account everything back up the chain. Given that chain
+ lengths are rarely very long, this should be acceptable. If we
+ truely identify this as a performance problem, it should work to
+ hash the pointer value "e->parent". */
+ for (p = e->parent; p ; p = p->parent)
+ h = (h * 65521) ^ sra_hash_tree (p->element);
- lhs_can = lhs_ann && bitmap_bit_p (sra_candidates, lhs_ann->uid);
- rhs_can = rhs_ann && bitmap_bit_p (sra_candidates, rhs_ann->uid);
+ return h;
+}
+
+/* Equality function for type SRA_PAIR. */
- /* Both LHS and RHS are scalarizable. */
- if (lhs_can && rhs_can)
- list = create_scalar_copies (lhs, rhs, SCALAR_SCALAR);
+static int
+sra_elt_eq (const void *x, const void *y)
+{
+ const struct sra_elt *a = x;
+ const struct sra_elt *b = y;
- /* Only RHS is scalarizable. */
- else if (rhs_can)
- list = create_scalar_copies (lhs, rhs, FIELD_SCALAR);
+ if (a->parent != b->parent)
+ return false;
- /* Only LHS is scalarizable. */
- else if (lhs_can)
- list = create_scalar_copies (lhs, rhs, SCALAR_FIELD);
+ /* All the field/decl stuff is unique. */
+ if (a->element == b->element)
+ return true;
- /* If neither side is scalarizable, do nothing else. */
+ /* The only thing left is integer equality. */
+ if (TREE_CODE (a->element) == INTEGER_CST
+ && TREE_CODE (b->element) == INTEGER_CST)
+ return tree_int_cst_equal (a->element, b->element);
else
- return;
-
- /* Set line number information for our replacements. */
- if (EXPR_HAS_LOCATION (orig_stmt))
- annotate_all_with_locus (&list, EXPR_LOCATION (orig_stmt));
-
- /* Replace the existing statement with the newly created list of
- scalarized copies. When replacing the original statement, the first
- copy replaces it and the remaining copies are inserted either after
- the first copy or on the outgoing edges of the original statement's
- block. */
- {
- tree_stmt_iterator tsi = tsi_start (list);
- bsi_replace (si_p, tsi_stmt (tsi), true);
- tsi_delink (&tsi);
- if (stmt_ends_bb_p (orig_stmt))
- insert_edge_copies (list, bb_for_stmt (orig_stmt));
- else
- bsi_insert_after (si_p, list, BSI_CONTINUE_LINKING);
- }
+ return false;
}
+/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT
+ may be null, in which case CHILD must be a DECL. */
-/* Traverse all the referenced variables in the program looking for
- structures that could be replaced with scalars. */
-
-static bool
-find_candidates_for_sra (void)
+static struct sra_elt *
+lookup_element (struct sra_elt *parent, tree child, tree type,
+ enum insert_option insert)
{
- size_t i;
- bool any_set = false;
+ struct sra_elt dummy;
+ struct sra_elt **slot;
+ struct sra_elt *elt;
- for (i = 0; i < num_referenced_vars; i++)
+ dummy.parent = parent;
+ dummy.element = child;
+
+ slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
+ if (!slot && insert == NO_INSERT)
+ return NULL;
+
+ elt = *slot;
+ if (!elt && insert == INSERT)
{
- tree var = referenced_var (i);
+ *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
+ memset (elt, 0, sizeof (*elt));
+
+ elt->parent = parent;
+ elt->element = child;
+ elt->type = type;
+ elt->is_scalar = is_sra_scalar_type (type);
+
+ if (parent)
+ {
+ elt->sibling = parent->children;
+ parent->children = elt;
+ }
- if ((TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE
- || TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE)
- && can_be_scalarized_p (var))
+ /* If this is a parameter, then if we want to scalarize, we have
+ one copy from the true function parameter. Count it now. */
+ if (TREE_CODE (child) == PARM_DECL)
{
- bitmap_set_bit (sra_candidates, var_ann (var)->uid);
- any_set = true;
+ elt->n_copies = 1;
+ bitmap_set_bit (needs_copy_in, var_ann (child)->uid);
}
}
- return any_set;
+ return elt;
}
+/* Return true if the ARRAY_REF in EXPR is a constant, in bounds access. */
-/* Insert STMT on all the outgoing edges out of BB. Note that if BB
- has more than one edge, STMT will be replicated for each edge. Also,
- abnormal edges will be ignored. */
-
-void
-insert_edge_copies (tree stmt, basic_block bb)
+static bool
+is_valid_const_index (tree expr)
{
- edge e;
- bool first_copy;
+ tree dom, t, index = TREE_OPERAND (expr, 1);
- first_copy = true;
- for (e = bb->succ; e; e = e->succ_next)
- {
- /* We don't need to insert copies on abnormal edges. The
- value of the scalar replacement is not guaranteed to
- be valid through an abnormal edge. */
- if (!(e->flags & EDGE_ABNORMAL))
- {
- if (first_copy)
- {
- bsi_insert_on_edge (e, stmt);
- first_copy = false;
- }
- else
- bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt));
- }
- }
-}
+ if (TREE_CODE (index) != INTEGER_CST)
+ return false;
+ /* Watch out for stupid user tricks, indexing outside the array.
-/* Append a new assignment statement to TSI. */
+ Careful, we're not called only on scalarizable types, so do not
+ assume constant array bounds. We needn't do anything with such
+ cases, since they'll be referring to objects that we should have
+ already rejected for scalarization, so returning false is fine. */
-static tree
-csc_assign (tree_stmt_iterator *tsi, tree lhs, tree rhs)
+ dom = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (expr, 0)));
+ if (dom == NULL)
+ return false;
+
+ t = TYPE_MIN_VALUE (dom);
+ if (!t || TREE_CODE (t) != INTEGER_CST)
+ return false;
+ if (tree_int_cst_lt (index, t))
+ return false;
+
+ t = TYPE_MAX_VALUE (dom);
+ if (!t || TREE_CODE (t) != INTEGER_CST)
+ return false;
+ if (tree_int_cst_lt (t, index))
+ return false;
+
+ return true;
+}
+
+/* Create or return the SRA_ELT structure for EXPR if the expression
+ refers to a scalarizable variable. */
+
+static struct sra_elt *
+maybe_lookup_element_for_expr (tree expr)
{
- tree stmt = build (MODIFY_EXPR, TREE_TYPE (lhs), lhs, rhs);
- modify_stmt (stmt);
- tsi_link_after (tsi, stmt, TSI_NEW_STMT);
- return stmt;
+ struct sra_elt *elt;
+ tree child;
+
+ switch (TREE_CODE (expr))
+ {
+ case VAR_DECL:
+ case PARM_DECL:
+ case RESULT_DECL:
+ if (is_sra_candidate_decl (expr))
+ return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
+ return NULL;
+
+ case ARRAY_REF:
+ /* We can't scalarize variable array indicies. */
+ if (is_valid_const_index (expr))
+ child = TREE_OPERAND (expr, 1);
+ else
+ return NULL;
+ break;
+
+ case COMPONENT_REF:
+ /* Don't look through unions. */
+ if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
+ return NULL;
+ child = TREE_OPERAND (expr, 1);
+ break;
+
+ case REALPART_EXPR:
+ child = integer_zero_node;
+ break;
+ case IMAGPART_EXPR:
+ child = integer_one_node;
+ break;
+
+ default:
+ return NULL;
+ }
+
+ elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
+ if (elt)
+ return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
+ return NULL;
+}
+
+\f
+/* Functions to walk just enough of the tree to see all scalarizable
+ references, and categorize them. */
+
+/* A set of callbacks for phases 2 and 4. They'll be invoked for the
+ various kinds of references seen. In all cases, *BSI is an iterator
+ pointing to the statement being processed. */
+struct sra_walk_fns
+{
+ /* Invoked when ELT is required as a unit. Note that ELT might refer to
+ a leaf node, in which case this is a simple scalar reference. *EXPR_P
+ points to the location of the expression. IS_OUTPUT is true if this
+ is a left-hand-side reference. */
+ void (*use) (struct sra_elt *elt, tree *expr_p,
+ block_stmt_iterator *bsi, bool is_output);
+
+ /* Invoked when we have a copy between two scalarizable references. */
+ void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
+ block_stmt_iterator *bsi);
+
+ /* Invoked when ELT is initialized from a constant. VALUE may be NULL,
+ in which case it should be treated as an empty CONSTRUCTOR. */
+ void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
+
+ /* Invoked when we have a copy between one scalarizable reference ELT
+ and one non-scalarizable reference OTHER. IS_OUTPUT is true if ELT
+ is on the left-hand side. */
+ void (*ldst) (struct sra_elt *elt, tree other,
+ block_stmt_iterator *bsi, bool is_output);
+
+ /* True during phase 2, false during phase 4. */
+ /* ??? This is a hack. */
+ bool initial_scan;
+};
+
+#ifdef ENABLE_CHECKING
+/* Invoked via walk_tree, if *TP contains an candidate decl, return it. */
+
+static tree
+sra_find_candidate_decl (tree *tp, int *walk_subtrees,
+ void *data ATTRIBUTE_UNUSED)
+{
+ tree t = *tp;
+ enum tree_code code = TREE_CODE (t);
+
+ if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
+ {
+ *walk_subtrees = 0;
+ if (is_sra_candidate_decl (t))
+ return t;
+ }
+ else if (TYPE_P (t))
+ *walk_subtrees = 0;
+
+ return NULL;
+}
+#endif
+
+/* Walk most expressions looking for a scalarizable aggregate.
+ If we find one, invoke FNS->USE. */
+
+static void
+sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
+ const struct sra_walk_fns *fns)
+{
+ tree expr = *expr_p;
+ tree inner = expr;
+
+ /* We're looking to collect a reference expression between EXPR and INNER,
+ such that INNER is a scalarizable decl and all other nodes through EXPR
+ are references that we can scalarize. If we come across something that
+ we can't scalarize, we reset EXPR. This has the effect of making it
+ appear that we're referring to the larger expression as a whole. */
+
+ while (1)
+ switch (TREE_CODE (inner))
+ {
+ case VAR_DECL:
+ case PARM_DECL:
+ case RESULT_DECL:
+ /* If there is a scalarizable decl at the bottom, then process it. */
+ if (is_sra_candidate_decl (inner))
+ {
+ struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
+ fns->use (elt, expr_p, bsi, is_output);
+ }
+ return;
+
+ case ARRAY_REF:
+ /* Non-constant index means any member may be accessed. Prevent the
+ expression from being scalarized. If we were to treat this as a
+ reference to the whole array, we can wind up with a single dynamic
+ index reference inside a loop being overridden by several constant
+ index references during loop setup. It's possible that this could
+ be avoided by using dynamic usage counts based on BB trip counts
+ (based on loop analysis or profiling), but that hardly seems worth
+ the effort. */
+ /* ??? Hack. Figure out how to push this into the scan routines
+ without duplicating too much code. */
+ if (!is_valid_const_index (inner))
+ {
+ if (fns->initial_scan)
+ {
+ struct sra_elt *elt
+ = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0));
+ if (elt)
+ elt->cannot_scalarize = true;
+ }
+ return;
+ }
+ /* ??? Are we assured that non-constant bounds and stride will have
+ the same value everywhere? I don't think Fortran will... */
+ if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
+ goto use_all;
+ inner = TREE_OPERAND (inner, 0);
+ break;
+
+ case COMPONENT_REF:
+ /* A reference to a union member constitutes a reference to the
+ entire union. */
+ if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
+ goto use_all;
+ /* ??? See above re non-constant stride. */
+ if (TREE_OPERAND (inner, 2))
+ goto use_all;
+ inner = TREE_OPERAND (inner, 0);
+ break;
+
+ case REALPART_EXPR:
+ case IMAGPART_EXPR:
+ inner = TREE_OPERAND (inner, 0);
+ break;
+
+ case BIT_FIELD_REF:
+ /* A bit field reference (access to *multiple* fields simultaneously)
+ is not currently scalarized. Consider this an access to the
+ complete outer element, to which walk_tree will bring us next. */
+ goto use_all;
+
+ case ARRAY_RANGE_REF:
+ /* Similarly, an subrange reference is used to modify indexing. Which
+ means that the canonical element names that we have won't work. */
+ goto use_all;
+
+ case VIEW_CONVERT_EXPR:
+ case NOP_EXPR:
+ /* Similarly, a view/nop explicitly wants to look at an object in a
+ type other than the one we've scalarized. */
+ goto use_all;
+
+ use_all:
+ expr_p = &TREE_OPERAND (inner, 0);
+ inner = expr = *expr_p;
+ break;
+
+ default:
+#ifdef ENABLE_CHECKING
+ /* Validate that we're not missing any references. */
+ if (walk_tree (&inner, sra_find_candidate_decl, NULL, NULL))
+ abort ();
+#endif
+ return;
+ }
+}
+
+/* Walk a TREE_LIST of values looking for scalarizable aggregates.
+ If we find one, invoke FNS->USE. */
+
+static void
+sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
+ const struct sra_walk_fns *fns)
+{
+ tree op;
+ for (op = list; op ; op = TREE_CHAIN (op))
+ sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
+}
+
+/* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
+ If we find one, invoke FNS->USE. */
+
+static void
+sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
+ const struct sra_walk_fns *fns)
+{
+ sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns);
+}
+
+/* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
+ aggregates. If we find one, invoke FNS->USE. */
+
+static void
+sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
+ const struct sra_walk_fns *fns)
+{
+ sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
+ sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
+}
+
+/* Walk a MODIFY_EXPR and categorize the assignment appropriately. */
+
+static void
+sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi,
+ const struct sra_walk_fns *fns)
+{
+ struct sra_elt *lhs_elt, *rhs_elt;
+ tree lhs, rhs;
+
+ lhs = TREE_OPERAND (expr, 0);
+ rhs = TREE_OPERAND (expr, 1);
+ lhs_elt = maybe_lookup_element_for_expr (lhs);
+ rhs_elt = maybe_lookup_element_for_expr (rhs);
+
+ /* If both sides are scalarizable, this is a COPY operation. */
+ if (lhs_elt && rhs_elt)
+ {
+ fns->copy (lhs_elt, rhs_elt, bsi);
+ return;
+ }
+
+ if (lhs_elt)
+ {
+ /* If this is an assignment from a constant, or constructor, then
+ we have access to all of the elements individually. Invoke INIT. */
+ if (TREE_CODE (rhs) == COMPLEX_EXPR
+ || TREE_CODE (rhs) == COMPLEX_CST
+ || TREE_CODE (rhs) == CONSTRUCTOR)
+ fns->init (lhs_elt, rhs, bsi);
+
+ /* If this is an assignment from read-only memory, treat this as if
+ we'd been passed the constructor directly. Invoke INIT. */
+ else if (TREE_CODE (rhs) == VAR_DECL
+ && TREE_STATIC (rhs)
+ && TREE_READONLY (rhs)
+ && targetm.binds_local_p (rhs))
+ {
+ if (DECL_INITIAL (rhs) != error_mark_node)
+ fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
+ }
+
+ /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
+ The lvalue requirement prevents us from trying to directly scalarize
+ the result of a function call. Which would result in trying to call
+ the function multiple times, and other evil things. */
+ else if (!lhs_elt->is_scalar && is_gimple_addr_expr_arg (rhs))
+ fns->ldst (lhs_elt, rhs, bsi, true);
+
+ /* Otherwise we're being used in some context that requires the
+ aggregate to be seen as a whole. Invoke USE. */
+ else
+ fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true);
+ }
+ else
+ {
+ /* LHS_ELT being null only means that the LHS as a whole is not a
+ scalarizable reference. There may be occurrences of scalarizable
+ variables within, which implies a USE. */
+ sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns);
+ }
+
+ /* Likewise for the right-hand side. The only difference here is that
+ we don't have to handle constants, and the RHS may be a call. */
+ if (rhs_elt)
+ {
+ if (!rhs_elt->is_scalar)
+ fns->ldst (rhs_elt, lhs, bsi, false);
+ else
+ fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false);
+ }
+ else if (TREE_CODE (rhs) == CALL_EXPR)
+ sra_walk_call_expr (rhs, bsi, fns);
+ else
+ sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns);
+}
+
+/* Entry point to the walk functions. Search the entire function,
+ invoking the callbacks in FNS on each of the references to
+ scalarizable variables. */
+
+static void
+sra_walk_function (const struct sra_walk_fns *fns)
+{
+ basic_block bb;
+ block_stmt_iterator si;
+
+ /* ??? Phase 4 could derive some benefit to walking the function in
+ dominator tree order. */
+
+ FOR_EACH_BB (bb)
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt, t;
+ stmt_ann_t ann;
+
+ stmt = bsi_stmt (si);
+ ann = stmt_ann (stmt);
+
+ /* If the statement has no virtual operands, then it doesn't
+ make any structure references that we care about. */
+ if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0
+ && NUM_VUSES (VUSE_OPS (ann)) == 0
+ && NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0)
+ continue;
+
+ switch (TREE_CODE (stmt))
+ {
+ case RETURN_EXPR:
+ /* If we have "return <retval>" then the return value is
+ already exposed for our pleasure. Walk it as a USE to
+ force all the components back in place for the return.
+
+ If we have an embedded assignment, then <retval> is of
+ a type that gets returned in registers in this ABI, and
+ we do not wish to extend their lifetimes. Treat this
+ as a USE of the variable on the RHS of this assignment. */
+
+ t = TREE_OPERAND (stmt, 0);
+ if (TREE_CODE (t) == MODIFY_EXPR)
+ sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns);
+ else
+ sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
+ break;
+
+ case MODIFY_EXPR:
+ sra_walk_modify_expr (stmt, &si, fns);
+ break;
+ case CALL_EXPR:
+ sra_walk_call_expr (stmt, &si, fns);
+ break;
+ case ASM_EXPR:
+ sra_walk_asm_expr (stmt, &si, fns);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+\f
+/* Phase One: Scan all referenced variables in the program looking for
+ structures that could be decomposed. */
+
+static bool
+find_candidates_for_sra (void)
+{
+ size_t i;
+ bool any_set = false;
+
+ for (i = 0; i < num_referenced_vars; i++)
+ {
+ tree var = referenced_var (i);
+ if (decl_can_be_decomposed_p (var))
+ {
+ bitmap_set_bit (sra_candidates, var_ann (var)->uid);
+ any_set = true;
+ }
+ }
+
+ return any_set;
+}
+
+\f
+/* Phase Two: Scan all references to scalarizable variables. Count the
+ number of times they are used or copied respectively. */
+
+/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is
+ considered a copy, because we can decompose the reference such that
+ the sub-elements needn't be contiguous. */
+
+static void
+scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
+ block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
+ bool is_output ATTRIBUTE_UNUSED)
+{
+ elt->n_uses += 1;
+}
+
+static void
+scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
+ block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
+{
+ lhs_elt->n_copies += 1;
+ rhs_elt->n_copies += 1;
+}
+
+static void
+scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
+ block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
+{
+ lhs_elt->n_copies += 1;
+}
+
+static void
+scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
+ block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
+ bool is_output ATTRIBUTE_UNUSED)
+{
+ elt->n_copies += 1;
+}
+
+/* Dump the values we collected during the scanning phase. */
+
+static void
+scan_dump (struct sra_elt *elt)
+{
+ struct sra_elt *c;
+
+ dump_sra_elt_name (dump_file, elt);
+ fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
+
+ for (c = elt->children; c ; c = c->sibling)
+ scan_dump (c);
+}
+
+/* Entry point to phase 2. Scan the entire function, building up
+ scalarization data structures, recording copies and uses. */
+
+static void
+scan_function (void)
+{
+ static const struct sra_walk_fns fns = {
+ scan_use, scan_copy, scan_init, scan_ldst, true
+ };
+
+ sra_walk_function (&fns);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ size_t i;
+
+ fputs ("\nScan results:\n", dump_file);
+ EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i,
+ {
+ tree var = referenced_var (i);
+ struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
+ if (elt)
+ scan_dump (elt);
+ });
+ fputc ('\n', dump_file);
+ }
+}
+\f
+/* Phase Three: Make decisions about which variables to scalarize, if any.
+ All elements to be scalarized have replacement variables made for them. */
+
+/* A subroutine of build_element_name. Recursively build the element
+ name on the obstack. */
+
+static void
+build_element_name_1 (struct sra_elt *elt)
+{
+ tree t;
+ char buffer[32];
+
+ if (elt->parent)
+ {
+ build_element_name_1 (elt->parent);
+ obstack_1grow (&sra_obstack, '$');
+
+ if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
+ {
+ if (elt->element == integer_zero_node)
+ obstack_grow (&sra_obstack, "real", 4);
+ else
+ obstack_grow (&sra_obstack, "imag", 4);
+ return;
+ }
+ }
+
+ t = elt->element;
+ if (TREE_CODE (t) == INTEGER_CST)
+ {
+ /* ??? Eh. Don't bother doing double-wide printing. */
+ sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
+ obstack_grow (&sra_obstack, buffer, strlen (buffer));
+ }
+ else
+ {
+ tree name = DECL_NAME (t);
+ if (name)
+ obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
+ IDENTIFIER_LENGTH (name));
+ else
+ {
+ sprintf (buffer, "D%u", DECL_UID (t));
+ obstack_grow (&sra_obstack, buffer, strlen (buffer));
+ }
+ }
+}
+
+/* Construct a pretty variable name for an element's replacement variable.
+ The name is built on the obstack. */
+
+static char *
+build_element_name (struct sra_elt *elt)
+{
+ build_element_name_1 (elt);
+ obstack_1grow (&sra_obstack, '\0');
+ return obstack_finish (&sra_obstack);
+}
+
+/* Instantiate an element as an independent variable. */
+
+static void
+instantiate_element (struct sra_elt *elt)
+{
+ struct sra_elt *base_elt;
+ tree var, base;
+
+ for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
+ continue;
+ base = base_elt->element;
+
+ elt->replacement = var = make_rename_temp (elt->type, "SR");
+ DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
+ TREE_NO_WARNING (var) = TREE_NO_WARNING (base);
+ DECL_ARTIFICIAL (var) = DECL_ARTIFICIAL (base);
+
+ if (DECL_NAME (base) && !DECL_IGNORED_P (base))
+ {
+ char *pretty_name = build_element_name (elt);
+ DECL_NAME (var) = get_identifier (pretty_name);
+ obstack_free (&sra_obstack, pretty_name);
+ }
+
+ if (dump_file)
+ {
+ fputs (" ", dump_file);
+ dump_sra_elt_name (dump_file, elt);
+ fputs (" -> ", dump_file);
+ print_generic_expr (dump_file, var, dump_flags);
+ fputc ('\n', dump_file);
+ }
+}
+
+/* Make one pass across an element tree deciding whether or not it's
+ profitable to instantiate individual leaf scalars.
+
+ PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
+ fields all the way up the tree. */
+
+static void
+decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
+ unsigned int parent_copies)
+{
+ if (dump_file && !elt->parent)
+ {
+ fputs ("Initial instantiation for ", dump_file);
+ dump_sra_elt_name (dump_file, elt);
+ fputc ('\n', dump_file);
+ }
+
+ if (elt->cannot_scalarize)
+ return;
+
+ if (elt->is_scalar)
+ {
+ /* The decision is simple: instantiate if we're used more frequently
+ than the parent needs to be seen as a complete unit. */
+ if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
+ instantiate_element (elt);
+ }
+ else
+ {
+ struct sra_elt *c;
+ unsigned int this_uses = elt->n_uses + parent_uses;
+ unsigned int this_copies = elt->n_copies + parent_copies;
+
+ for (c = elt->children; c ; c = c->sibling)
+ decide_instantiation_1 (c, this_uses, this_copies);
+ }
+}
+
+/* Compute the size and number of all instantiated elements below ELT.
+ We will only care about this if the size of the complete structure
+ fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */
+
+static unsigned int
+sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
+{
+ if (elt->replacement)
+ {
+ *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
+ return 1;
+ }
+ else
+ {
+ struct sra_elt *c;
+ unsigned int count = 0;
+
+ for (c = elt->children; c ; c = c->sibling)
+ count += sum_instantiated_sizes (c, sizep);
+
+ return count;
+ }
+}
+
+/* Instantiate fields in ELT->TYPE that are not currently present as
+ children of ELT. */
+
+static void instantiate_missing_elements (struct sra_elt *elt);
+
+static void
+instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
+{
+ struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
+ if (sub->is_scalar)
+ {
+ if (sub->replacement == NULL)
+ instantiate_element (sub);
+ }
+ else
+ instantiate_missing_elements (sub);
+}
+
+static void
+instantiate_missing_elements (struct sra_elt *elt)
+{
+ tree type = elt->type;
+
+ switch (TREE_CODE (type))
+ {
+ case RECORD_TYPE:
+ {
+ tree f;
+ for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
+ if (TREE_CODE (f) == FIELD_DECL)
+ instantiate_missing_elements_1 (elt, f, TREE_TYPE (f));
+ break;
+ }
+
+ case ARRAY_TYPE:
+ {
+ tree i, max, subtype;
+
+ i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
+ max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
+ subtype = TREE_TYPE (type);
+
+ while (1)
+ {
+ instantiate_missing_elements_1 (elt, i, subtype);
+ if (tree_int_cst_equal (i, max))
+ break;
+ i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
+ }
+
+ break;
+ }
+
+ case COMPLEX_TYPE:
+ type = TREE_TYPE (type);
+ instantiate_missing_elements_1 (elt, integer_zero_node, type);
+ instantiate_missing_elements_1 (elt, integer_one_node, type);
+ break;
+
+ default:
+ abort ();
+ }
+}
+
+/* Make one pass across an element tree deciding whether to perform block
+ or element copies. If we decide on element copies, instantiate all
+ elements. Return true if there are any instantiated sub-elements. */
+
+static bool
+decide_block_copy (struct sra_elt *elt)
+{
+ struct sra_elt *c;
+ bool any_inst;
+
+ /* If scalarization is disabled, respect it. */
+ if (elt->cannot_scalarize)
+ {
+ elt->use_block_copy = 1;
+
+ if (dump_file)
+ {
+ fputs ("Scalarization disabled for ", dump_file);
+ dump_sra_elt_name (dump_file, elt);
+ fputc ('\n', dump_file);
+ }
+
+ return false;
+ }
+
+ /* Don't decide if we've no uses. */
+ if (elt->n_uses == 0 && elt->n_copies == 0)
+ ;
+
+ else if (!elt->is_scalar)
+ {
+ tree size_tree = TYPE_SIZE_UNIT (elt->type);
+ bool use_block_copy = true;
+
+ /* Don't bother trying to figure out the rest if the structure is
+ so large we can't do easy arithmetic. This also forces block
+ copies for variable sized structures. */
+ if (host_integerp (size_tree, 1))
+ {
+ unsigned HOST_WIDE_INT full_size, inst_size = 0;
+ unsigned int inst_count;
+
+ full_size = tree_low_cst (size_tree, 1);
+
+ /* ??? What to do here. If there are two fields, and we've only
+ instantiated one, then instantiating the other is clearly a win.
+ If there are a large number of fields then the size of the copy
+ is much more of a factor. */
+
+ /* If the structure is small, and we've made copies, go ahead
+ and instantiate, hoping that the copies will go away. */
+ if (full_size <= (unsigned) MOVE_RATIO * UNITS_PER_WORD
+ && elt->n_copies > elt->n_uses)
+ use_block_copy = false;
+ else
+ {
+ inst_count = sum_instantiated_sizes (elt, &inst_size);
+
+ if (inst_size * 4 >= full_size * 3)
+ use_block_copy = false;
+ }
+
+ /* In order to avoid block copy, we have to be able to instantiate
+ all elements of the type. See if this is possible. */
+ if (!use_block_copy
+ && (!can_completely_scalarize_p (elt)
+ || !type_can_instantiate_all_elements (elt->type)))
+ use_block_copy = true;
+ }
+ elt->use_block_copy = use_block_copy;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Using %s for ",
+ use_block_copy ? "block-copy" : "element-copy");
+ dump_sra_elt_name (dump_file, elt);
+ fputc ('\n', dump_file);
+ }
+
+ if (!use_block_copy)
+ {
+ instantiate_missing_elements (elt);
+ return true;
+ }
+ }
+
+ any_inst = elt->replacement != NULL;
+
+ for (c = elt->children; c ; c = c->sibling)
+ any_inst |= decide_block_copy (c);
+
+ return any_inst;
+}
+
+/* Entry point to phase 3. Instantiate scalar replacement variables. */
+
+static void
+decide_instantiations (void)
+{
+ unsigned int i;
+ bool cleared_any;
+ struct bitmap_head_def done_head;
+
+ /* We cannot clear bits from a bitmap we're iterating over,
+ so save up all the bits to clear until the end. */
+ bitmap_initialize (&done_head, 1);
+ cleared_any = false;
+
+ EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i,
+ {
+ tree var = referenced_var (i);
+ struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
+ if (elt)
+ {
+ decide_instantiation_1 (elt, 0, 0);
+ if (!decide_block_copy (elt))
+ elt = NULL;
+ }
+ if (!elt)
+ {
+ bitmap_set_bit (&done_head, i);
+ cleared_any = true;
+ }
+ });
+
+ if (cleared_any)
+ {
+ bitmap_operation (sra_candidates, sra_candidates, &done_head,
+ BITMAP_AND_COMPL);
+ bitmap_operation (needs_copy_in, needs_copy_in, &done_head,
+ BITMAP_AND_COMPL);
+ }
+ bitmap_clear (&done_head);
+
+ if (dump_file)
+ fputc ('\n', dump_file);
+}
+
+\f
+/* Phase Four: Update the function to match the replacements created. */
+
+/* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for
+ renaming. This becomes necessary when we modify all of a non-scalar. */
+
+static void
+mark_all_v_defs (tree stmt)
+{
+ v_may_def_optype v_may_defs;
+ v_must_def_optype v_must_defs;
+ size_t i, n;
+
+ get_stmt_operands (stmt);
+
+ v_may_defs = V_MAY_DEF_OPS (stmt_ann (stmt));
+ n = NUM_V_MAY_DEFS (v_may_defs);
+ for (i = 0; i < n; i++)
+ {
+ tree sym = V_MAY_DEF_RESULT (v_may_defs, i);
+ if (TREE_CODE (sym) == SSA_NAME)
+ sym = SSA_NAME_VAR (sym);
+ bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
+ }
+
+ v_must_defs = V_MUST_DEF_OPS (stmt_ann (stmt));
+ n = NUM_V_MUST_DEFS (v_must_defs);
+ for (i = 0; i < n; i++)
+ {
+ tree sym = V_MUST_DEF_OP (v_must_defs, i);
+ if (TREE_CODE (sym) == SSA_NAME)
+ sym = SSA_NAME_VAR (sym);
+ bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
+ }
}
-/* A subroutine of create_scalar_copies. Construct a COMPONENT_REF
- expression for BASE referencing FIELD. INDEX is the field index. */
+/* Build a single level component reference to ELT rooted at BASE. */
static tree
-csc_build_component_ref (tree base, tree field)
+generate_one_element_ref (struct sra_elt *elt, tree base)
{
- switch (TREE_CODE (base))
+ switch (TREE_CODE (TREE_TYPE (base)))
{
- case CONSTRUCTOR:
- /* Only appears on RHS. The only remaining CONSTRUCTORS for
- record types that should remain are empty, and imply that
- the entire structure should be zeroed. */
- if (CONSTRUCTOR_ELTS (base))
- abort ();
- return fold_convert (TREE_TYPE (field), integer_zero_node);
+ case RECORD_TYPE:
+ return build (COMPONENT_REF, elt->type, base, elt->element, NULL);
- default:
- /* Avoid sharing BASE when building the different COMPONENT_REFs.
- We let the first field have the original version. */
- if (field != TYPE_FIELDS (TREE_TYPE (base)))
- base = unshare_expr (base);
- break;
+ case ARRAY_TYPE:
+ return build (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
- case VAR_DECL:
- case PARM_DECL:
- /* Special case for the above -- decls are always shared. */
- break;
- }
+ case COMPLEX_TYPE:
+ if (elt->element == integer_zero_node)
+ return build (REALPART_EXPR, elt->type, base);
+ else
+ return build (IMAGPART_EXPR, elt->type, base);
- return build (COMPONENT_REF, TREE_TYPE (field), base, field, NULL_TREE);
+ default:
+ abort ();
+ }
}
-/* Similarly for REALPART_EXPR and IMAGPART_EXPR for complex types. */
+/* Build a full component reference to ELT rooted at its native variable. */
static tree
-csc_build_complex_part (tree base, enum tree_code part)
+generate_element_ref (struct sra_elt *elt)
+{
+ if (elt->parent)
+ return generate_one_element_ref (elt, generate_element_ref (elt->parent));
+ else
+ return elt->element;
+}
+
+/* Generate a set of assignment statements in *LIST_P to copy all
+ instantiated elements under ELT to or from the equivalent structure
+ rooted at EXPR. COPY_OUT controls the direction of the copy, with
+ true meaning to copy out of EXPR into ELT. */
+
+static void
+generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
+ tree *list_p)
{
- switch (TREE_CODE (base))
+ struct sra_elt *c;
+ tree t;
+
+ if (elt->replacement)
{
- case COMPLEX_CST:
- if (part == REALPART_EXPR)
- return TREE_REALPART (base);
+ if (copy_out)
+ t = build (MODIFY_EXPR, void_type_node, elt->replacement, expr);
else
- return TREE_IMAGPART (base);
+ t = build (MODIFY_EXPR, void_type_node, expr, elt->replacement);
+ append_to_statement_list (t, list_p);
+ }
+ else
+ {
+ for (c = elt->children; c ; c = c->sibling)
+ {
+ t = generate_one_element_ref (c, unshare_expr (expr));
+ generate_copy_inout (c, copy_out, t, list_p);
+ }
+ }
+}
- case COMPLEX_EXPR:
- if (part == REALPART_EXPR)
- return TREE_OPERAND (base, 0);
- else
- return TREE_OPERAND (base, 1);
+/* Generate a set of assignment statements in *LIST_P to copy all instantiated
+ elements under SRC to their counterparts under DST. There must be a 1-1
+ correspondence of instantiated elements. */
- default:
- /* Avoid sharing BASE when building the different references.
- We let the real part have the original version. */
- if (part != REALPART_EXPR)
- base = unshare_expr (base);
- break;
+static void
+generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
+{
+ struct sra_elt *dc, *sc;
- case VAR_DECL:
- case PARM_DECL:
- /* Special case for the above -- decls are always shared. */
- break;
+ for (dc = dst->children; dc ; dc = dc->sibling)
+ {
+ sc = lookup_element (src, dc->element, NULL, NO_INSERT);
+ if (sc == NULL)
+ abort ();
+ generate_element_copy (dc, sc, list_p);
}
- return build1 (part, TREE_TYPE (TREE_TYPE (base)), base);
-}
-
-/* Create and return a list of assignments to perform a scalarized
- structure assignment 'LHS = RHS'. Both LHS and RHS are assumed to be
- of an aggregate or complex type. Three types of copies may be specified:
+ if (dst->replacement)
+ {
+ tree t;
- SCALAR_SCALAR will emit assignments for all the scalar temporaries
- corresponding to the fields of LHS and RHS.
+ if (src->replacement == NULL)
+ abort ();
- FIELD_SCALAR will emit assignments from the scalar replacements of
- RHS into each of the fields of the LHS.
+ t = build (MODIFY_EXPR, void_type_node, dst->replacement,
+ src->replacement);
+ append_to_statement_list (t, list_p);
+ }
+}
- SCALAR_FIELD will emit assignments from each field of the RHS into
- the scalar replacements of the LHS. */
+/* Generate a set of assignment statements in *LIST_P to zero all instantiated
+ elements under ELT. In addition, do not assign to elements that have been
+ marked VISITED but do reset the visited flag; this allows easy coordination
+ with generate_element_init. */
-static tree
-create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode)
+static void
+generate_element_zero (struct sra_elt *elt, tree *list_p)
{
- tree type, list;
- tree_stmt_iterator tsi;
-
-#if defined ENABLE_CHECKING
- /* Sanity checking. Check that we are not trying to scalarize a
- non-decl. */
- if (!DECL_P (lhs) && (mode == SCALAR_FIELD || mode == SCALAR_SCALAR))
- abort ();
- if (!DECL_P (rhs) && (mode == FIELD_SCALAR || mode == SCALAR_SCALAR))
- abort ();
-#endif
+ struct sra_elt *c;
- type = TREE_TYPE (lhs);
- list = alloc_stmt_list ();
- tsi = tsi_start (list);
+ for (c = elt->children; c ; c = c->sibling)
+ generate_element_zero (c, list_p);
- /* VA_ARG_EXPRs have side effects, so we need to copy it first to a
- temporary before scalarizing. FIXME: This should disappear once
- VA_ARG_EXPRs are properly lowered. */
- if (TREE_CODE (rhs) == VA_ARG_EXPR)
+ if (elt->visited)
+ elt->visited = false;
+ else if (elt->replacement)
{
- tree stmt, tmp;
-
- /* Add TMP = VA_ARG_EXPR <> */
- tmp = make_rename_temp (TREE_TYPE (rhs), NULL);
- stmt = csc_assign (&tsi, tmp, rhs);
+ tree t;
- /* Mark all the variables in VDEF operands for renaming, because
- the VA_ARG_EXPR will now be in a different statement. */
- mark_all_v_may_defs (stmt);
- mark_all_v_must_defs (stmt);
+ if (elt->is_scalar)
+ t = fold_convert (elt->type, integer_zero_node);
+ else
+ /* We generated a replacement for a non-scalar? */
+ abort ();
- /* Set RHS to be the new temporary TMP. */
- rhs = tmp;
+ t = build (MODIFY_EXPR, void_type_node, elt->replacement, t);
+ append_to_statement_list (t, list_p);
}
+}
- /* When making *_SCALAR copies from PARM_DECLs, we will need to insert
- copy-in operations from the original PARM_DECLs. Note that these
- copy-in operations may end up being dead, but we won't know until
- we rename the new variables into SSA. */
- if ((mode == SCALAR_SCALAR || mode == FIELD_SCALAR)
- && TREE_CODE (rhs) == PARM_DECL)
- bitmap_set_bit (needs_copy_in, var_ann (rhs)->uid);
+/* Generate a set of assignment statements in *LIST_P to set all instantiated
+ elements under ELT with the contents of the initializer INIT. In addition,
+ mark all assigned elements VISITED; this allows easy coordination with
+ generate_element_zero. */
- /* Now create scalar copies for each individual field according to MODE. */
- if (TREE_CODE (type) == COMPLEX_TYPE)
- {
- /* Create scalar copies of both the real and imaginary parts. */
- tree real_lhs, real_rhs, imag_lhs, imag_rhs;
+static void
+generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
+{
+ enum tree_code init_code = TREE_CODE (init);
+ struct sra_elt *sub;
+ tree t;
- if (mode == SCALAR_FIELD)
- {
- real_rhs = csc_build_complex_part (rhs, REALPART_EXPR);
- imag_rhs = csc_build_complex_part (rhs, IMAGPART_EXPR);
- }
- else
+ if (elt->is_scalar)
+ {
+ if (elt->replacement)
{
- real_rhs = get_scalar_for_complex_part (rhs, REALPART_EXPR);
- imag_rhs = get_scalar_for_complex_part (rhs, IMAGPART_EXPR);
+ t = build (MODIFY_EXPR, void_type_node, elt->replacement, init);
+ append_to_statement_list (t, list_p);
+ elt->visited = true;
}
+ return;
+ }
- if (mode == FIELD_SCALAR)
+ switch (init_code)
+ {
+ case COMPLEX_CST:
+ case COMPLEX_EXPR:
+ for (sub = elt->children; sub ; sub = sub->sibling)
{
- /* In this case we do not need to create but one statement,
- since we can create a new complex value whole. */
-
- if (TREE_CONSTANT (real_rhs) && TREE_CONSTANT (imag_rhs))
- rhs = build_complex (type, real_rhs, imag_rhs);
+ if (sub->element == integer_zero_node)
+ t = (init_code == COMPLEX_EXPR
+ ? TREE_OPERAND (init, 0) : TREE_REALPART (init));
else
- rhs = build (COMPLEX_EXPR, type, real_rhs, imag_rhs);
- csc_assign (&tsi, lhs, rhs);
- }
- else
- {
- real_lhs = get_scalar_for_complex_part (lhs, REALPART_EXPR);
- imag_lhs = get_scalar_for_complex_part (lhs, IMAGPART_EXPR);
-
- csc_assign (&tsi, real_lhs, real_rhs);
- csc_assign (&tsi, imag_lhs, imag_rhs);
+ t = (init_code == COMPLEX_EXPR
+ ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
+ generate_element_init (sub, t, list_p);
}
- }
- else
- {
- tree lf, rf;
-
- /* ??? C++ generates copies between different pointer-to-member
- structures of different types. To combat this, we must track
- the field of both the left and right structures, so that we
- index the variables with fields of their own type. */
+ break;
- for (lf = TYPE_FIELDS (type), rf = TYPE_FIELDS (TREE_TYPE (rhs));
- lf;
- lf = TREE_CHAIN (lf), rf = TREE_CHAIN (rf))
+ case CONSTRUCTOR:
+ for (t = CONSTRUCTOR_ELTS (init); t ; t = TREE_CHAIN (t))
{
- tree lhs_var, rhs_var;
-
- /* Only copy FIELD_DECLs. */
- if (TREE_CODE (lf) != FIELD_DECL)
+ sub = lookup_element (elt, TREE_PURPOSE (t), NULL, NO_INSERT);
+ if (sub == NULL)
continue;
+ generate_element_init (sub, TREE_VALUE (t), list_p);
+ }
+ break;
- if (mode == FIELD_SCALAR)
- lhs_var = csc_build_component_ref (lhs, lf);
- else
- lhs_var = get_scalar_for_field (lhs, lf);
+ default:
+ abort ();
+ }
+}
- if (mode == SCALAR_FIELD)
- rhs_var = csc_build_component_ref (rhs, rf);
- else
- rhs_var = get_scalar_for_field (rhs, rf);
+/* Insert STMT on all the outgoing edges out of BB. Note that if BB
+ has more than one edge, STMT will be replicated for each edge. Also,
+ abnormal edges will be ignored. */
- csc_assign (&tsi, lhs_var, rhs_var);
- }
- }
+void
+insert_edge_copies (tree stmt, basic_block bb)
+{
+ edge e;
+ bool first_copy;
- /* All the scalar copies just created will either create new definitions
- or remove existing definitions of LHS, so we need to mark it for
- renaming. */
- if (TREE_SIDE_EFFECTS (list))
+ first_copy = true;
+ for (e = bb->succ; e; e = e->succ_next)
{
- if (mode == SCALAR_FIELD || mode == SCALAR_SCALAR)
- {
- /* If the LHS has been scalarized, mark it for renaming. */
- bitmap_set_bit (vars_to_rename, var_ann (lhs)->uid);
- }
- else if (mode == FIELD_SCALAR)
+ /* We don't need to insert copies on abnormal edges. The
+ value of the scalar replacement is not guaranteed to
+ be valid through an abnormal edge. */
+ if (!(e->flags & EDGE_ABNORMAL))
{
- /* Otherwise, mark all the symbols in the VDEFs for the last
- scalarized statement just created. Since all the statements
- introduce the same VDEFs, we only need to check the last one. */
- mark_all_v_may_defs (tsi_stmt (tsi));
- mark_all_v_must_defs (tsi_stmt (tsi));
+ if (first_copy)
+ {
+ bsi_insert_on_edge (e, stmt);
+ first_copy = false;
+ }
+ else
+ bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt));
}
- else
- abort ();
}
-
- return list;
}
-/* A helper function that creates the copies, updates line info,
- and emits the code either before or after BSI. */
+/* Helper function to insert LIST before BSI, and set up line number info. */
static void
-emit_scalar_copies (block_stmt_iterator *bsi, tree lhs, tree rhs,
- enum sra_copy_mode mode)
+sra_insert_before (block_stmt_iterator *bsi, tree list)
{
- tree list = create_scalar_copies (lhs, rhs, mode);
tree stmt = bsi_stmt (*bsi);
if (EXPR_HAS_LOCATION (stmt))
annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
-
bsi_insert_before (bsi, list, BSI_SAME_STMT);
}
-/* Traverse all the statements in the function replacing references to
- scalarizable structures with their corresponding scalar temporaries. */
+/* Similarly, but insert after BSI. Handles insertion onto edges as well. */
static void
-scalarize_structures (void)
+sra_insert_after (block_stmt_iterator *bsi, tree list)
{
- basic_block bb;
- block_stmt_iterator si;
- size_t i;
-
- FOR_EACH_BB (bb)
- for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
- {
- tree stmt;
- stmt_ann_t ann;
-
- stmt = bsi_stmt (si);
- ann = stmt_ann (stmt);
-
- /* If the statement has no virtual operands, then it doesn't make
- structure references that we care about. */
- if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0
- && NUM_VUSES (VUSE_OPS (ann)) == 0
- && NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0)
- continue;
-
- /* Structure references may only appear in certain statements. */
- if (TREE_CODE (stmt) != MODIFY_EXPR
- && TREE_CODE (stmt) != CALL_EXPR
- && TREE_CODE (stmt) != RETURN_EXPR
- && TREE_CODE (stmt) != ASM_EXPR)
- continue;
-
- scalarize_stmt (&si);
- }
+ tree stmt = bsi_stmt (*bsi);
- /* Initialize the scalar replacements for every structure that is a
- function argument. */
- EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i,
- {
- tree var = referenced_var (i);
- tree list = create_scalar_copies (var, var, SCALAR_FIELD);
- bsi_insert_on_edge (ENTRY_BLOCK_PTR->succ, list);
- });
+ if (EXPR_HAS_LOCATION (stmt))
+ annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
- /* Commit edge insertions. */
- bsi_commit_edge_inserts (NULL);
+ if (stmt_ends_bb_p (stmt))
+ insert_edge_copies (list, bsi->bb);
+ else
+ bsi_insert_after (bsi, list, BSI_CONTINUE_LINKING);
}
-
-/* Scalarize structure references in the statement pointed by SI_P. */
+/* Similarly, but replace the statement at BSI. */
static void
-scalarize_stmt (block_stmt_iterator *si_p)
+sra_replace (block_stmt_iterator *bsi, tree list)
{
- tree stmt = bsi_stmt (*si_p);
-
- /* Handle assignments. */
- if (TREE_CODE (stmt) == MODIFY_EXPR
- && TREE_CODE (TREE_OPERAND (stmt, 1)) != CALL_EXPR)
- scalarize_modify_expr (si_p);
-
- /* Handle RETURN_EXPR. */
- else if (TREE_CODE (stmt) == RETURN_EXPR)
- scalarize_return_expr (si_p);
-
- /* Handle function calls (note that this must be handled after
- MODIFY_EXPR and RETURN_EXPR because a function call can appear in
- both). */
- else if (get_call_expr_in (stmt) != NULL_TREE)
- scalarize_call_expr (si_p);
-
- /* Handle ASM_EXPRs. */
- else if (TREE_CODE (stmt) == ASM_EXPR)
- scalarize_asm_expr (si_p);
+ sra_insert_before (bsi, list);
+ bsi_remove (bsi);
+ if (bsi_end_p (*bsi))
+ *bsi = bsi_last (bsi->bb);
+ else
+ bsi_prev (bsi);
}
-
-/* Helper for scalarize_stmt to handle assignments. */
+/* Scalarize a USE. To recap, this is either a simple reference to ELT,
+ if elt is scalar, or some ocurrence of ELT that requires a complete
+ aggregate. IS_OUTPUT is true if ELT is being modified. */
static void
-scalarize_modify_expr (block_stmt_iterator *si_p)
+scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
+ bool is_output)
{
- tree stmt = bsi_stmt (*si_p);
- tree lhs = TREE_OPERAND (stmt, 0);
- tree rhs = TREE_OPERAND (stmt, 1);
- tree var = NULL_TREE;
+ tree list = NULL, stmt = bsi_stmt (*bsi);
- /* Found AGGREGATE.FIELD = ... */
- if (is_sra_candidate_ref (lhs))
+ if (elt->replacement)
{
- tree sym;
- v_may_def_optype v_may_defs;
-
- scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 0));
-
- /* Mark the LHS to be renamed, as we have just removed the previous
- V_MAY_DEF for AGGREGATE. The statement should have exactly one
- V_MAY_DEF for variable AGGREGATE. */
- v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
- if (NUM_V_MAY_DEFS (v_may_defs) != 1)
- abort ();
- sym = SSA_NAME_VAR (V_MAY_DEF_RESULT (v_may_defs, 0));
- bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
+ /* If we have a replacement, then updating the reference is as
+ simple as modifying the existing statement in place. */
+ if (is_output)
+ mark_all_v_defs (stmt);
+ *expr_p = elt->replacement;
+ modify_stmt (stmt);
}
-
- /* Found ... = AGGREGATE.FIELD */
- else if (is_sra_candidate_ref (rhs))
- scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 1));
-
- /* Found a complex reference nesting involving a candidate decl. This
- should only occur if the above condition is false if a BIT_FIELD_REF or
- VIEW_CONVERT_EXPR is involved. This is similar to a CALL_EXPR, if the
- operand of the BIT_FIELD_REF is a scalarizable structure, we need to
- copy from its scalar replacements before doing the bitfield operation.
-
- FIXME: BIT_FIELD_REFs are often generated by fold-const.c. This is
- not always desirable because they obfuscate the original predicates,
- limiting what the tree optimizers may do. For instance, in
- testsuite/g++.dg/opt/nrv4.C the use of SRA allows the optimizers to
- optimize function main() to 'return 0;'. However, the folder
- generates a BIT_FIELD_REF operation for one of the comparisons,
- preventing the optimizers from removing all the redundant
- operations. */
- else if (is_sra_candidate_complex_ref (rhs, &var))
+ else
{
- emit_scalar_copies (si_p, var, var, FIELD_SCALAR);
-
- /* If the LHS of the assignment is also a scalarizable structure, insert
- copies into the scalar replacements after the call. */
- if (is_sra_candidate_decl (lhs))
+ /* Otherwise we need some copies. If ELT is being read, then we want
+ to store all (modified) sub-elements back into the structure before
+ the reference takes place. If ELT is being written, then we want to
+ load the changed values back into our shadow variables. */
+ /* ??? We don't check modified for reads, we just always write all of
+ the values. We should be able to record the SSA number of the VOP
+ for which the values were last read. If that number matches the
+ SSA number of the VOP in the current statement, then we needn't
+ emit an assignment. This would also eliminate double writes when
+ a structure is passed as more than one argument to a function call.
+ This optimization would be most effective if sra_walk_function
+ processed the blocks in dominator order. */
+
+ generate_copy_inout (elt, is_output, generate_element_ref (elt), &list);
+ if (list == NULL)
+ return;
+ if (is_output)
{
- tree list = create_scalar_copies (lhs, lhs, SCALAR_FIELD);
- if (EXPR_HAS_LOCATION (stmt))
- annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
- if (stmt_ends_bb_p (stmt))
- insert_edge_copies (list, bb_for_stmt (stmt));
- else
- bsi_insert_after (si_p, list, BSI_NEW_STMT);
+ mark_all_v_defs (expr_first (list));
+ sra_insert_after (bsi, list);
}
+ else
+ sra_insert_before (bsi, list);
}
-
- /* Found AGGREGATE = ... or ... = AGGREGATE */
- else if (DECL_P (lhs) || DECL_P (rhs))
- scalarize_structure_assignment (si_p);
}
+/* Scalarize a COPY. To recap, this is an assignment statement between
+ two scalarizable references, LHS_ELT and RHS_ELT. */
-/* Scalarize structure references in LIST. Use DONE to avoid duplicates. */
-
-static inline void
-scalarize_tree_list (tree list, block_stmt_iterator *si_p, bitmap done)
+static void
+scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
+ block_stmt_iterator *bsi)
{
- tree op;
+ tree list, stmt;
- for (op = list; op; op = TREE_CHAIN (op))
+ if (lhs_elt->replacement && rhs_elt->replacement)
{
- tree arg = TREE_VALUE (op);
+ /* If we have two scalar operands, modify the existing statement. */
+ stmt = bsi_stmt (*bsi);
- if (is_sra_candidate_decl (arg))
- {
- int index = var_ann (arg)->uid;
- if (!bitmap_bit_p (done, index))
- {
- emit_scalar_copies (si_p, arg, arg, FIELD_SCALAR);
- bitmap_set_bit (done, index);
- }
- }
- else if (is_sra_candidate_ref (arg))
+#ifdef ENABLE_CHECKING
+ /* See the commentary in sra_walk_function concerning
+ RETURN_EXPR, and why we should never see one here. */
+ if (TREE_CODE (stmt) != MODIFY_EXPR)
+ abort ();
+#endif
+
+ TREE_OPERAND (stmt, 0) = lhs_elt->replacement;
+ TREE_OPERAND (stmt, 1) = rhs_elt->replacement;
+ modify_stmt (stmt);
+ }
+ else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
+ {
+ /* If either side requires a block copy, then sync the RHS back
+ to the original structure, leave the original assignment
+ statement (which will perform the block copy), then load the
+ LHS values out of its now-updated original structure. */
+ /* ??? Could perform a modified pair-wise element copy. That
+ would at least allow those elements that are instantiated in
+ both structures to be optimized well. */
+
+ list = NULL;
+ generate_copy_inout (rhs_elt, false,
+ generate_element_ref (rhs_elt), &list);
+ if (list)
{
- tree stmt = bsi_stmt (*si_p);
- scalarize_component_ref (stmt, &TREE_VALUE (op));
+ mark_all_v_defs (expr_first (list));
+ sra_insert_before (bsi, list);
}
+
+ list = NULL;
+ generate_copy_inout (lhs_elt, true,
+ generate_element_ref (lhs_elt), &list);
+ if (list)
+ sra_insert_after (bsi, list);
}
-}
+ else
+ {
+ /* Otherwise both sides must be fully instantiated. In which
+ case perform pair-wise element assignments and replace the
+ original block copy statement. */
+
+ stmt = bsi_stmt (*bsi);
+ mark_all_v_defs (stmt);
+ list = NULL;
+ generate_element_copy (lhs_elt, rhs_elt, &list);
+ if (list == NULL)
+ abort ();
+ sra_replace (bsi, list);
+ }
+}
-/* Helper for scalarize_stmt to handle function calls. */
+/* Scalarize an INIT. To recap, this is an assignment to a scalarizable
+ reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
+ COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty
+ CONSTRUCTOR. */
static void
-scalarize_call_expr (block_stmt_iterator *si_p)
+scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
{
- tree stmt = bsi_stmt (*si_p);
- tree call = (TREE_CODE (stmt) == MODIFY_EXPR) ? TREE_OPERAND (stmt, 1) : stmt;
- struct bitmap_head_def done_head;
+ tree list = NULL;
- /* First scalarize the arguments. Order is important, because the copy
- operations for the arguments need to go before the call.
- Scalarization of the return value needs to go after the call. */
- bitmap_initialize (&done_head, 1);
- scalarize_tree_list (TREE_OPERAND (call, 1), si_p, &done_head);
- bitmap_clear (&done_head);
+ /* Generate initialization statements for all members extant in the RHS. */
+ if (rhs)
+ generate_element_init (lhs_elt, rhs, &list);
- /* Scalarize the return value, if any. */
- if (TREE_CODE (stmt) == MODIFY_EXPR)
- {
- tree var = get_base_address (TREE_OPERAND (stmt, 0));
+ /* CONSTRUCTOR is defined such that any member not mentioned is assigned
+ a zero value. Initialize the rest of the instantiated elements. */
+ generate_element_zero (lhs_elt, &list);
+ if (list == NULL)
+ return;
- /* If the LHS of the assignment is a scalarizable structure, insert
- copies into the scalar replacements after the call. */
- if (is_sra_candidate_decl (var))
- {
- tree list = create_scalar_copies (var, var, SCALAR_FIELD);
- if (EXPR_HAS_LOCATION (stmt))
- annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
- if (stmt_ends_bb_p (stmt))
- insert_edge_copies (list, bb_for_stmt (stmt));
- else
- bsi_insert_after (si_p, list, BSI_NEW_STMT);
- }
+ if (lhs_elt->use_block_copy)
+ {
+ /* Since LHS is not fully instantiated, we must leave the structure
+ assignment in place. Treating this case differently from a USE
+ exposes constants to later optimizations. */
+ mark_all_v_defs (expr_first (list));
+ sra_insert_after (bsi, list);
+ }
+ else
+ {
+ /* The LHS is fully instantiated. The list of initializations
+ replaces the original structure assignment. */
+ mark_all_v_defs (bsi_stmt (*bsi));
+ sra_replace (bsi, list);
}
}
+/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP
+ on all INDIRECT_REFs. */
-/* Helper for scalarize_stmt to handle ASM_EXPRs. */
-
-static void
-scalarize_asm_expr (block_stmt_iterator *si_p)
+static tree
+mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
{
- tree stmt = bsi_stmt (*si_p);
- struct bitmap_head_def done_head;
+ tree t = *tp;
- bitmap_initialize (&done_head, 1);
- scalarize_tree_list (ASM_INPUTS (stmt), si_p, &done_head);
- scalarize_tree_list (ASM_OUTPUTS (stmt), si_p, &done_head);
- bitmap_clear (&done_head);
+ if (TREE_CODE (t) == INDIRECT_REF)
+ {
+ TREE_THIS_NOTRAP (t) = 1;
+ *walk_subtrees = 0;
+ }
+ else if (DECL_P (t) || TYPE_P (t))
+ *walk_subtrees = 0;
- /* ??? Process outputs after the asm. */
+ return NULL;
}
-
-/* Helper for scalarize_stmt to handle return expressions. */
+/* Scalarize a LDST. To recap, this is an assignment between one scalarizable
+ reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true
+ if ELT is on the left-hand side. */
static void
-scalarize_return_expr (block_stmt_iterator *si_p)
+scalarize_ldst (struct sra_elt *elt, tree other,
+ block_stmt_iterator *bsi, bool is_output)
{
- tree stmt = bsi_stmt (*si_p);
- tree op = TREE_OPERAND (stmt, 0);
-
- if (op == NULL_TREE)
- return;
+ /* Shouldn't have gotten called for a scalar. */
+ if (elt->replacement)
+ abort ();
- /* Handle a bare RESULT_DECL. This will handle for types needed
- constructors, or possibly after NRV type optimizations. */
- if (is_sra_candidate_decl (op))
- emit_scalar_copies (si_p, op, op, FIELD_SCALAR);
- else if (TREE_CODE (op) == MODIFY_EXPR)
+ if (elt->use_block_copy)
+ {
+ /* Since ELT is not fully instantiated, we have to leave the
+ block copy in place. Treat this as a USE. */
+ scalarize_use (elt, NULL, bsi, is_output);
+ }
+ else
{
- tree *rhs_p = &TREE_OPERAND (op, 1);
- tree rhs = *rhs_p;
+ /* The interesting case is when ELT is fully instantiated. In this
+ case we can have each element stored/loaded directly to/from the
+ corresponding slot in OTHER. This avoids a block copy. */
- /* Handle 'return STRUCTURE;' */
- if (is_sra_candidate_decl (rhs))
- emit_scalar_copies (si_p, rhs, rhs, FIELD_SCALAR);
+ tree list = NULL, stmt = bsi_stmt (*bsi);
- /* Handle 'return STRUCTURE.FIELD;' */
- else if (is_sra_candidate_ref (rhs))
- scalarize_component_ref (stmt, rhs_p);
+ mark_all_v_defs (stmt);
+ generate_copy_inout (elt, is_output, other, &list);
+ if (list == NULL)
+ abort ();
- /* Handle 'return CALL_EXPR;' */
- else if (TREE_CODE (rhs) == CALL_EXPR)
+ /* Preserve EH semantics. */
+ if (stmt_ends_bb_p (stmt))
{
- struct bitmap_head_def done_head;
- bitmap_initialize (&done_head, 1);
- scalarize_tree_list (TREE_OPERAND (rhs, 1), si_p, &done_head);
- bitmap_clear (&done_head);
+ tree_stmt_iterator tsi;
+ tree first;
+
+ /* Extract the first statement from LIST. */
+ tsi = tsi_start (list);
+ first = tsi_stmt (tsi);
+ tsi_delink (&tsi);
+
+ /* Replace the old statement with this new representative. */
+ bsi_replace (bsi, first, true);
+
+ if (!tsi_end_p (tsi))
+ {
+ /* If any reference would trap, then they all would. And more
+ to the point, the first would. Therefore none of the rest
+ will trap since the first didn't. Indicate this by
+ iterating over the remaining statements and set
+ TREE_THIS_NOTRAP in all INDIRECT_REFs. */
+ do
+ {
+ walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
+ tsi_next (&tsi);
+ }
+ while (!tsi_end_p (tsi));
+
+ insert_edge_copies (list, bsi->bb);
+ }
}
+ else
+ sra_replace (bsi, list);
}
}
+/* Generate initializations for all scalarizable parameters. */
-/* Debugging dump for the scalar replacement map. */
-
-static int
-dump_sra_map_trav (void **slot, void *data)
+static void
+scalarize_parms (void)
{
- struct sra_elt *e = *slot;
- FILE *f = data;
+ tree list = NULL;
+ size_t i;
- switch (e->kind)
- {
- case REALPART_EXPR:
- fputs ("__real__ ", f);
- print_generic_expr (dump_file, e->base, dump_flags);
- fprintf (f, " -> %s\n", get_name (e->replace));
- break;
- case IMAGPART_EXPR:
- fputs ("__imag__ ", f);
- print_generic_expr (dump_file, e->base, dump_flags);
- fprintf (f, " -> %s\n", get_name (e->replace));
- break;
- case COMPONENT_REF:
- print_generic_expr (dump_file, e->base, dump_flags);
- fprintf (f, ".%s -> %s\n", get_name (e->field), get_name (e->replace));
- break;
- default:
- abort ();
- }
+ EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i,
+ {
+ tree var = referenced_var (i);
+ struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
+ generate_copy_inout (elt, true, var, &list);
+ });
- return 1;
+ if (list)
+ insert_edge_copies (list, ENTRY_BLOCK_PTR);
}
+/* Entry point to phase 4. Update the function to match replacements. */
+
static void
-dump_sra_map (FILE *f)
+scalarize_function (void)
{
- fputs ("Scalar replacements:\n", f);
- htab_traverse_noresize (sra_map, dump_sra_map_trav, f);
- fputs ("\n\n", f);
+ static const struct sra_walk_fns fns = {
+ scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
+ };
+
+ sra_walk_function (&fns);
+ scalarize_parms ();
+ bsi_commit_edge_inserts (NULL);
}
-/* Main entry point to Scalar Replacement of Aggregates (SRA). This pass
- re-writes non-aliased structure references into scalar temporaries. The
- goal is to expose some/all structures to the scalar optimizers.
+\f
+/* Debug helper function. Print ELT in a nice human-readable format. */
- Scalarization proceeds in two main phases. First, every structure
- referenced in the program that complies with can_be_scalarized_p is
- marked for scalarization (find_candidates_for_sra).
-
- Second, a mapping between structure fields and scalar temporaries so
- that every time a particular field of a particular structure is
- referenced in the code, we replace it with its corresponding scalar
- temporary (scalarize_structures).
+static void
+dump_sra_elt_name (FILE *f, struct sra_elt *elt)
+{
+ if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
+ {
+ fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
+ dump_sra_elt_name (f, elt->parent);
+ }
+ else
+ {
+ if (elt->parent)
+ dump_sra_elt_name (f, elt->parent);
+ if (DECL_P (elt->element))
+ {
+ if (TREE_CODE (elt->element) == FIELD_DECL)
+ fputc ('.', f);
+ print_generic_expr (f, elt->element, dump_flags);
+ }
+ else
+ fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
+ TREE_INT_CST_LOW (elt->element));
+ }
+}
- TODO
+/* Likewise, but callable from the debugger. */
+
+void
+debug_sra_elt_name (struct sra_elt *elt)
+{
+ dump_sra_elt_name (stderr, elt);
+ fputc ('\n', stderr);
+}
- 1- Scalarize COMPLEX_TYPEs
- 2- Scalarize ARRAY_REFs that are always referenced with a
- constant index.
- 3- Timings to determine when scalarization is not profitable.
- 4- Determine what's a good value for MAX_NFIELDS_FOR_SRA. */
+/* Main entry point. */
static void
tree_sra (void)
{
/* Initialize local variables. */
+ gcc_obstack_init (&sra_obstack);
sra_candidates = BITMAP_XMALLOC ();
- sra_map = NULL;
- needs_copy_in = NULL;
+ needs_copy_in = BITMAP_XMALLOC ();
+ sra_type_decomp_cache = BITMAP_XMALLOC ();
+ sra_type_inst_cache = BITMAP_XMALLOC ();
+ sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
- /* Find structures to be scalarized. */
- if (!find_candidates_for_sra ())
+ /* Scan. If we find anything, instantiate and scalarize. */
+ if (find_candidates_for_sra ())
{
- BITMAP_XFREE (sra_candidates);
- return;
+ scan_function ();
+ decide_instantiations ();
+ scalarize_function ();
}
- /* If we found any, re-write structure references with their
- corresponding scalar replacement. */
- sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, free);
- needs_copy_in = BITMAP_XMALLOC ();
-
- scalarize_structures ();
-
- if (dump_file)
- dump_sra_map (dump_file);
-
/* Free allocated memory. */
htab_delete (sra_map);
sra_map = NULL;
- BITMAP_XFREE (needs_copy_in);
BITMAP_XFREE (sra_candidates);
+ BITMAP_XFREE (needs_copy_in);
+ BITMAP_XFREE (sra_type_decomp_cache);
+ BITMAP_XFREE (sra_type_inst_cache);
+ obstack_free (&sra_obstack, NULL);
}
static bool