From: Richard Henderson Date: Tue, 29 Jun 2004 16:25:28 +0000 (-0700) Subject: tree-sra.c: Rewrite from scratch. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=97e73bd25031f3c7ea03a29c469fbbd4b6db6f8e;p=gcc.git tree-sra.c: Rewrite from scratch. * tree-sra.c: Rewrite from scratch. Handle nested aggregates. * gcc.dg/tree-ssa/20040430-1.c: Expect zero if's. From-SVN: r83858 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1696238c5e6..a87da45d891 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2004-06-29 Richard Henderson + + * tree-sra.c: Rewrite from scratch. Handle nested aggregates. + 2004-06-29 Nathan Sidwell * vec.h (VEC_T_safe_push, VEC_T_safe_insert): Tweak for when diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1bc26719b39..e60bb1e5a53 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2004-06-29 Richard Henderson + + * gcc.dg/tree-ssa/20040430-1.c: Expect zero if's. + 2004-06-29 Paul Brook * g++.old-deja/g++.abi/arraynew.C: Handle ARM EABI cookies. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040430-1.c index 73ee8da85a6..4fbd950f3dd 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/20040430-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/20040430-1.c @@ -1,7 +1,7 @@ /* PR middle-end/14470. Similar to gcc.c-torture/execute/20040313-1.c, but with a compile time test to - make sure the second if() is removed. We should actually get rid - of the first if() too, but we're not that smart yet. */ + make sure the second if() is removed. */ +/* Update: We now remove both ifs. Whee. */ /* { dg-do run } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ @@ -22,4 +22,4 @@ int main() return 0; } -/* { dg-final { scan-tree-dump-times "if " 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "if " 0 "optimized"} } */ diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c index 24b2d187c96..f0d6da92372 100644 --- a/gcc/tree-sra.c +++ b/gcc/tree-sra.c @@ -44,26 +44,34 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "timevar.h" #include "flags.h" #include "bitmap.h" +#include "obstack.h" +#include "target.h" -/* Maximum number of fields that a structure should have to be scalarized. - FIXME This limit has been arbitrarily set to 5. Experiment to find a - sensible setting. */ -#define MAX_NFIELDS_FOR_SRA 5 +/* This object of this pass is to replace a non-addressable aggregate with a + set of independent variables. Most of the time, all of these variables + will be scalars. But a secondary objective is to break up larger + aggregates into smaller aggregates. In the process we may find that some + bits of the larger aggregate can be deleted as unreferenced. -/* Codes indicating how to copy one structure into another. */ -enum sra_copy_mode { SCALAR_SCALAR, FIELD_SCALAR, SCALAR_FIELD }; + This substitution is done globally. More localized substitutions would + be the purvey of a load-store motion pass. + + The optimization proceeds in phases: + + (1) Identify variables that have types that are candidates for + decomposition. + + (2) Scan the function looking for the ways these variables are used. + In particular we're interested in the number of times a variable + (or member) is needed as a complete unit, and the number of times + a variable (or member) is copied. + + (3) Based on the usage profile, instantiate substitution variables. + + (4) Scan the function making replacements. +*/ -/* Local functions. */ -static inline bool can_be_scalarized_p (tree); -static tree create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode); -static inline void scalarize_component_ref (tree, tree *tp); -static void scalarize_structures (void); -static void scalarize_stmt (block_stmt_iterator *); -static void scalarize_modify_expr (block_stmt_iterator *); -static void scalarize_call_expr (block_stmt_iterator *); -static void scalarize_asm_expr (block_stmt_iterator *); -static void scalarize_return_expr (block_stmt_iterator *); /* The set of aggregate variables that are candidates for scalarization. */ static bitmap sra_candidates; @@ -72,85 +80,68 @@ static bitmap sra_candidates; beginning of the function. */ static bitmap needs_copy_in; -/* This structure holds the mapping between and element of an aggregate - and the scalar replacement variable. */ +/* Sets of bit pairs that cache type decomposition and instantiation. */ +static bitmap sra_type_decomp_cache; +static bitmap sra_type_inst_cache; + +/* One of these structures is created for each candidate aggregate + and each (accessed) member of such an aggregate. */ struct sra_elt { - enum tree_code kind; - tree base; - tree field; - tree replace; -}; - -static htab_t sra_map; + /* A tree of the elements. Used when we want to traverse everything. */ + struct sra_elt *parent; + struct sra_elt *children; + struct sra_elt *sibling; -static hashval_t -sra_elt_hash (const void *x) -{ - const struct sra_elt *e = x; - hashval_t h = (size_t) e->base * e->kind; - if (e->kind == COMPONENT_REF) - h ^= (size_t) e->field; - return h; -} + /* If this element is a root, then this is the VAR_DECL. If this is + a sub-element, this is some token used to identify the reference. + In the case of COMPONENT_REF, this is the FIELD_DECL. In the case + of an ARRAY_REF, this is the (constant) index. In the case of a + complex number, this is a zero or one. */ + tree element; -static int -sra_elt_eq (const void *x, const void *y) -{ - const struct sra_elt *a = x; - const struct sra_elt *b = y; + /* The type of the element. */ + tree type; - if (a->kind != b->kind) - return false; - if (a->base != b->base) - return false; - if (a->kind == COMPONENT_REF) - if (a->field != b->field) - return false; + /* A VAR_DECL, for any sub-element we've decided to replace. */ + tree replacement; - return true; -} + /* The number of times the element is referenced as a whole. I.e. + given "a.b.c", this would be incremented for C, but not for A or B. */ + unsigned int n_uses; -/* Mark all the variables in V_MAY_DEF operands for STMT for renaming. - This becomes necessary when we modify all of a non-scalar. */ + /* The number of times the element is copied to or from another + scalarizable element. */ + unsigned int n_copies; -static void -mark_all_v_may_defs (tree stmt) -{ - v_may_def_optype v_may_defs; - size_t i, n; + /* True if TYPE is scalar. */ + bool is_scalar; - get_stmt_operands (stmt); - v_may_defs = V_MAY_DEF_OPS (stmt_ann (stmt)); - n = NUM_V_MAY_DEFS (v_may_defs); + /* True if we saw something about this element that prevents scalarization, + such as non-constant indexing. */ + bool cannot_scalarize; - for (i = 0; i < n; i++) - { - tree sym = V_MAY_DEF_RESULT (v_may_defs, i); - bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); - } -} + /* True if we've decided that structure-to-structure assignment + should happen via memcpy and not per-element. */ + bool use_block_copy; -/* Mark all the variables in V_MUST_DEF operands for STMT for renaming. - This becomes necessary when we modify all of a non-scalar. */ + /* A flag for use with/after random access traversals. */ + bool visited; +}; -static void -mark_all_v_must_defs (tree stmt) -{ - v_must_def_optype v_must_defs; - size_t i, n; +/* Random access to the child of a parent is performed by hashing. + This prevents quadratic behaviour, and allows SRA to function + reasonably on larger records. */ +static htab_t sra_map; - get_stmt_operands (stmt); - v_must_defs = V_MUST_DEF_OPS (stmt_ann (stmt)); - n = NUM_V_MUST_DEFS (v_must_defs); +/* All structures are allocated out of the following obstack. */ +static struct obstack sra_obstack; - for (i = 0; i < n; i++) - { - tree sym = V_MUST_DEF_OP (v_must_defs, i); - bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); - } -} +/* Debugging functions. */ +static void dump_sra_elt_name (FILE *, struct sra_elt *); +extern void debug_sra_elt_name (struct sra_elt *); + /* Return true if DECL is an SRA candidate. */ static bool @@ -159,157 +150,102 @@ is_sra_candidate_decl (tree decl) return DECL_P (decl) && bitmap_bit_p (sra_candidates, var_ann (decl)->uid); } -/* Return true if EXP is of the form , where REF is one of the - field access references we handle and DECL is an SRA candidate. */ +/* Return true if TYPE is a scalar type. */ static bool -is_sra_candidate_ref (tree exp) +is_sra_scalar_type (tree type) { - switch (TREE_CODE (exp)) - { - case COMPONENT_REF: - case REALPART_EXPR: - case IMAGPART_EXPR: - return is_sra_candidate_decl (TREE_OPERAND (exp, 0)); - - default: - break; - } - - return false; + enum tree_code code = TREE_CODE (type); + return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE + || code == ENUMERAL_TYPE || code == BOOLEAN_TYPE + || code == CHAR_TYPE || code == POINTER_TYPE || code == OFFSET_TYPE + || code == REFERENCE_TYPE); } -/* Return true if EXP is of the form , where REF is a nest of - references handled by handle_components_p and DECL is an SRA candidate. - *VAR_P is set to DECL. */ +/* Return true if TYPE can be decomposed into a set of independent variables. + + Note that this doesn't imply that all elements of TYPE can be + instantiated, just that if we decide to break up the type into + separate pieces that it can be done. */ static bool -is_sra_candidate_complex_ref (tree exp, tree *var_p) +type_can_be_decomposed_p (tree type) { - tree orig_exp = exp; - - while (TREE_CODE (exp) == REALPART_EXPR || TREE_CODE (exp) == IMAGPART_EXPR - || handled_component_p (exp)) - exp = TREE_OPERAND (exp, 0); + unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; + tree t; - if (orig_exp != exp && is_sra_candidate_decl (exp)) - { - *var_p = exp; - return true; - } - - return false; -} - -/* Return the scalar in SRA_MAP[VAR_IX][FIELD_IX]. If none exists, create - a new scalar with type TYPE. */ + /* Avoid searching the same type twice. */ + if (bitmap_bit_p (sra_type_decomp_cache, cache+0)) + return true; + if (bitmap_bit_p (sra_type_decomp_cache, cache+1)) + return false; -static tree -lookup_scalar (struct sra_elt *key, tree type) -{ - struct sra_elt **slot, *res; + /* The type must have a definite non-zero size. */ + if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type))) + goto fail; - slot = (struct sra_elt **) htab_find_slot (sra_map, key, INSERT); - res = *slot; - if (!res) + /* The type must be a non-union aggregate. */ + switch (TREE_CODE (type)) { - res = xmalloc (sizeof (*res)); - *slot = res; - *res = *key; - res->replace = make_rename_temp (type, "SR"); + case RECORD_TYPE: + { + bool saw_one_field = false; - if (DECL_NAME (key->base) && !DECL_IGNORED_P (key->base)) - { - char *name = NULL; - switch (key->kind) - { - case COMPONENT_REF: - if (!DECL_NAME (key->field)) - break; - name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)), - "$", - IDENTIFIER_POINTER (DECL_NAME (key->field)), - NULL); - break; - case REALPART_EXPR: - name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)), - "$real", NULL); - break; - case IMAGPART_EXPR: - name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)), - "$imag", NULL); - break; - default: - abort (); - } - if (name) + for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t)) + if (TREE_CODE (t) == FIELD_DECL) { - DECL_NAME (res->replace) = get_identifier (name); - free (name); - } - } - - DECL_SOURCE_LOCATION (res->replace) = DECL_SOURCE_LOCATION (key->base); - TREE_NO_WARNING (res->replace) = TREE_NO_WARNING (key->base); - DECL_ARTIFICIAL (res->replace) = DECL_ARTIFICIAL (key->base); - } - - return res->replace; -} - - -/* Given a structure reference VAR.FIELD, return a scalar representing it. - If no scalar is found, a new one is created and added to the SRA_MAP - matrix. */ - -static tree -get_scalar_for_field (tree var, tree field) -{ - struct sra_elt key; - -#ifdef ENABLE_CHECKING - /* Validate that FIELD actually exists in VAR's type. */ - { - tree f; - for (f = TYPE_FIELDS (TREE_TYPE (var)); f ; f = TREE_CHAIN (f)) - if (f == field) - goto found; - abort (); - found:; - } -#endif + /* Reject incorrectly represented bit fields. */ + if (DECL_BIT_FIELD (t) + && (tree_low_cst (DECL_SIZE (t), 1) + != TYPE_PRECISION (TREE_TYPE (t)))) + goto fail; - key.kind = COMPONENT_REF; - key.base = var; - key.field = field; + saw_one_field = true; + } - return lookup_scalar (&key, TREE_TYPE (field)); -} + /* Record types must have at least one field. */ + if (!saw_one_field) + goto fail; + } + break; + case ARRAY_TYPE: + /* Array types must have a fixed lower and upper bound. */ + t = TYPE_DOMAIN (type); + if (t == NULL) + goto fail; + if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t))) + goto fail; + if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t))) + goto fail; + break; -/* Similarly for the parts of a complex type. */ + case COMPLEX_TYPE: + break; -static tree -get_scalar_for_complex_part (tree var, enum tree_code part) -{ - struct sra_elt key; + default: + goto fail; + } - key.kind = part; - key.base = var; + bitmap_set_bit (sra_type_decomp_cache, cache+0); + return true; - return lookup_scalar (&key, TREE_TYPE (TREE_TYPE (var))); + fail: + bitmap_set_bit (sra_type_decomp_cache, cache+1); + return false; } -/* Return true if the fields of VAR can be replaced by scalar temporaries. - This only happens if VAR is not call-clobbered and it contains less - than MAX_NFIELDS_FOR_SRA scalar fields. */ +/* Return true if DECL can be decomposed into a set of independent + (though not necessarily scalar) variables. */ -static inline bool -can_be_scalarized_p (tree var) +static bool +decl_can_be_decomposed_p (tree var) { - tree field, type; - int nfields; + /* Early out for scalars. */ + if (is_sra_scalar_type (TREE_TYPE (var))) + return false; + /* The variable must not be aliased. */ if (!is_gimple_non_addressable (var)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -321,6 +257,7 @@ can_be_scalarized_p (tree var) return false; } + /* The variable must not be volatile. */ if (TREE_THIS_VOLATILE (var)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -332,879 +269,1701 @@ can_be_scalarized_p (tree var) return false; } - /* Any COMPLEX_TYPE that has reached this point can be scalarized. */ - if (TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE) + /* We must be able to decompose the variable's type. */ + if (!type_can_be_decomposed_p (TREE_TYPE (var))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Cannot scalarize variable "); + print_generic_expr (dump_file, var, dump_flags); + fprintf (dump_file, " because its type cannot be decomposed\n"); + } + return false; + } + + return true; +} + +/* Return true if TYPE can be *completely* decomposed into scalars. */ + +static bool +type_can_instantiate_all_elements (tree type) +{ + if (is_sra_scalar_type (type)) return true; + if (!type_can_be_decomposed_p (type)) + return false; - type = TREE_TYPE (var); - nfields = 0; - for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) + switch (TREE_CODE (type)) { - if (TREE_CODE (field) != FIELD_DECL) - continue; + case RECORD_TYPE: + { + unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2; + tree f; - /* FIXME: We really should recurse down the type hierarchy and - scalarize the fields at the leaves. */ - if (AGGREGATE_TYPE_P (TREE_TYPE (field))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains an aggregate type field, "); - print_generic_expr (dump_file, field, dump_flags); - fprintf (dump_file, "\n"); - } + if (bitmap_bit_p (sra_type_inst_cache, cache+0)) + return true; + if (bitmap_bit_p (sra_type_inst_cache, cache+1)) return false; - } - /* FIXME: Similarly. Indeed, considering that we treat complex - as an aggregate, this is exactly the same problem. - Structures with __complex__ fields are tested in the libstdc++ - testsuite: 26_numerics/complex_inserters_extractors.cc. */ - if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) - { - if (dump_file && (dump_flags & TDF_DETAILS)) + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains a __complex__ field, "); - print_generic_expr (dump_file, field, dump_flags); - fprintf (dump_file, "\n"); + if (!type_can_instantiate_all_elements (TREE_TYPE (f))) + { + bitmap_set_bit (sra_type_inst_cache, cache+1); + return false; + } } - return false; - } - /* FIXME. We don't scalarize structures with bit fields yet. To - support this, we should make sure that all the fields fit in one - word and modify every operation done on the scalarized bit fields - to mask them properly. */ - if (DECL_BIT_FIELD (field)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains a bit-field, "); - print_generic_expr (dump_file, field, dump_flags); - fprintf (dump_file, "\n"); - } - return false; - } + bitmap_set_bit (sra_type_inst_cache, cache+0); + return true; + } - nfields++; - if (nfields > MAX_NFIELDS_FOR_SRA) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Cannot scalarize variable "); - print_generic_expr (dump_file, var, dump_flags); - fprintf (dump_file, - " because it contains more than %d fields\n", - MAX_NFIELDS_FOR_SRA); - } - return false; - } - } + case ARRAY_TYPE: + return type_can_instantiate_all_elements (TREE_TYPE (type)); - /* If the structure had no FIELD_DECLs, then don't bother - scalarizing it. */ - return nfields > 0; -} + case COMPLEX_TYPE: + return true; + default: + abort (); + } +} -/* Replace the COMPONENT_REF, REALPART_EXPR or IMAGPART_EXPR pointed-to by - TP inside STMT with the corresponding scalar replacement from SRA_MAP. */ +/* Test whether ELT or some sub-element cannot be scalarized. */ -static inline void -scalarize_component_ref (tree stmt, tree *tp) +static bool +can_completely_scalarize_p (struct sra_elt *elt) { - tree t = *tp, obj = TREE_OPERAND (t, 0); + struct sra_elt *c; + + if (elt->cannot_scalarize) + return false; + + for (c = elt->children; c ; c = c->sibling) + if (!can_completely_scalarize_p (c)) + return false; + + return true; +} - /* When scalarizing a function argument, we will need to insert copy-in - operations from the original PARM_DECLs. Note that these copy-in - operations may end up being dead, but we won't know until we rename - the new variables into SSA. */ - if (TREE_CODE (obj) == PARM_DECL) - bitmap_set_bit (needs_copy_in, var_ann (obj)->uid); + +/* A simplified tree hashing algorithm that only handles the types of + trees we expect to find in sra_elt->element. */ +static hashval_t +sra_hash_tree (tree t) +{ switch (TREE_CODE (t)) { - case COMPONENT_REF: - t = get_scalar_for_field (obj, TREE_OPERAND (t, 1)); - break; - case REALPART_EXPR: - case IMAGPART_EXPR: - t = get_scalar_for_complex_part (obj, TREE_CODE (t)); - break; + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + case FIELD_DECL: + return DECL_UID (t); + case INTEGER_CST: + return TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t); default: abort (); } - - *tp = t; - modify_stmt (stmt); } +/* Hash function for type SRA_PAIR. */ -/* Scalarize the structure assignment for the statement pointed by SI_P. */ - -static void -scalarize_structure_assignment (block_stmt_iterator *si_p) +static hashval_t +sra_elt_hash (const void *x) { - var_ann_t lhs_ann, rhs_ann; - tree lhs, rhs, list, orig_stmt; - bool lhs_can, rhs_can; - - orig_stmt = bsi_stmt (*si_p); - lhs = TREE_OPERAND (orig_stmt, 0); - rhs = TREE_OPERAND (orig_stmt, 1); - list = NULL_TREE; + const struct sra_elt *e = x; + const struct sra_elt *p; + hashval_t h; -#if defined ENABLE_CHECKING - if (TREE_CODE (orig_stmt) != MODIFY_EXPR) - abort (); -#endif + h = sra_hash_tree (e->element); - /* Remove all type casts from RHS. This may seem heavy handed but - it's actually safe and it is necessary in the presence of C++ - reinterpret_cast<> where structure assignments of different - structures will be present in the IL. This was the case of PR - 13347 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13347) which - had something like this: - - struct A f; - struct B g; - f = (struct A)g; - - Both 'f' and 'g' were scalarizable, but the presence of the type - cast was causing SRA to not replace the RHS of the assignment - with g's scalar replacements. Furthermore, the fact that this - assignment reached this point without causing syntax errors means - that the type cast is safe and that a field-by-field assignment - from 'g' into 'f' is the right thing to do. */ - STRIP_NOPS (rhs); - - lhs_ann = DECL_P (lhs) ? var_ann (lhs) : NULL; - rhs_ann = DECL_P (rhs) ? var_ann (rhs) : NULL; - -#if defined ENABLE_CHECKING - /* Two different variables should not have the same UID. */ - if (lhs_ann - && rhs_ann - && lhs != rhs - && lhs_ann->uid == rhs_ann->uid) - abort (); -#endif + /* Take into account everything back up the chain. Given that chain + lengths are rarely very long, this should be acceptable. If we + truely identify this as a performance problem, it should work to + hash the pointer value "e->parent". */ + for (p = e->parent; p ; p = p->parent) + h = (h * 65521) ^ sra_hash_tree (p->element); - lhs_can = lhs_ann && bitmap_bit_p (sra_candidates, lhs_ann->uid); - rhs_can = rhs_ann && bitmap_bit_p (sra_candidates, rhs_ann->uid); + return h; +} + +/* Equality function for type SRA_PAIR. */ - /* Both LHS and RHS are scalarizable. */ - if (lhs_can && rhs_can) - list = create_scalar_copies (lhs, rhs, SCALAR_SCALAR); +static int +sra_elt_eq (const void *x, const void *y) +{ + const struct sra_elt *a = x; + const struct sra_elt *b = y; - /* Only RHS is scalarizable. */ - else if (rhs_can) - list = create_scalar_copies (lhs, rhs, FIELD_SCALAR); + if (a->parent != b->parent) + return false; - /* Only LHS is scalarizable. */ - else if (lhs_can) - list = create_scalar_copies (lhs, rhs, SCALAR_FIELD); + /* All the field/decl stuff is unique. */ + if (a->element == b->element) + return true; - /* If neither side is scalarizable, do nothing else. */ + /* The only thing left is integer equality. */ + if (TREE_CODE (a->element) == INTEGER_CST + && TREE_CODE (b->element) == INTEGER_CST) + return tree_int_cst_equal (a->element, b->element); else - return; - - /* Set line number information for our replacements. */ - if (EXPR_HAS_LOCATION (orig_stmt)) - annotate_all_with_locus (&list, EXPR_LOCATION (orig_stmt)); - - /* Replace the existing statement with the newly created list of - scalarized copies. When replacing the original statement, the first - copy replaces it and the remaining copies are inserted either after - the first copy or on the outgoing edges of the original statement's - block. */ - { - tree_stmt_iterator tsi = tsi_start (list); - bsi_replace (si_p, tsi_stmt (tsi), true); - tsi_delink (&tsi); - if (stmt_ends_bb_p (orig_stmt)) - insert_edge_copies (list, bb_for_stmt (orig_stmt)); - else - bsi_insert_after (si_p, list, BSI_CONTINUE_LINKING); - } + return false; } +/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT + may be null, in which case CHILD must be a DECL. */ -/* Traverse all the referenced variables in the program looking for - structures that could be replaced with scalars. */ - -static bool -find_candidates_for_sra (void) +static struct sra_elt * +lookup_element (struct sra_elt *parent, tree child, tree type, + enum insert_option insert) { - size_t i; - bool any_set = false; + struct sra_elt dummy; + struct sra_elt **slot; + struct sra_elt *elt; - for (i = 0; i < num_referenced_vars; i++) + dummy.parent = parent; + dummy.element = child; + + slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert); + if (!slot && insert == NO_INSERT) + return NULL; + + elt = *slot; + if (!elt && insert == INSERT) { - tree var = referenced_var (i); + *slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt)); + memset (elt, 0, sizeof (*elt)); + + elt->parent = parent; + elt->element = child; + elt->type = type; + elt->is_scalar = is_sra_scalar_type (type); + + if (parent) + { + elt->sibling = parent->children; + parent->children = elt; + } - if ((TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE - || TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE) - && can_be_scalarized_p (var)) + /* If this is a parameter, then if we want to scalarize, we have + one copy from the true function parameter. Count it now. */ + if (TREE_CODE (child) == PARM_DECL) { - bitmap_set_bit (sra_candidates, var_ann (var)->uid); - any_set = true; + elt->n_copies = 1; + bitmap_set_bit (needs_copy_in, var_ann (child)->uid); } } - return any_set; + return elt; } +/* Return true if the ARRAY_REF in EXPR is a constant, in bounds access. */ -/* Insert STMT on all the outgoing edges out of BB. Note that if BB - has more than one edge, STMT will be replicated for each edge. Also, - abnormal edges will be ignored. */ - -void -insert_edge_copies (tree stmt, basic_block bb) +static bool +is_valid_const_index (tree expr) { - edge e; - bool first_copy; + tree dom, t, index = TREE_OPERAND (expr, 1); - first_copy = true; - for (e = bb->succ; e; e = e->succ_next) - { - /* We don't need to insert copies on abnormal edges. The - value of the scalar replacement is not guaranteed to - be valid through an abnormal edge. */ - if (!(e->flags & EDGE_ABNORMAL)) - { - if (first_copy) - { - bsi_insert_on_edge (e, stmt); - first_copy = false; - } - else - bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt)); - } - } -} + if (TREE_CODE (index) != INTEGER_CST) + return false; + /* Watch out for stupid user tricks, indexing outside the array. -/* Append a new assignment statement to TSI. */ + Careful, we're not called only on scalarizable types, so do not + assume constant array bounds. We needn't do anything with such + cases, since they'll be referring to objects that we should have + already rejected for scalarization, so returning false is fine. */ -static tree -csc_assign (tree_stmt_iterator *tsi, tree lhs, tree rhs) + dom = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (expr, 0))); + if (dom == NULL) + return false; + + t = TYPE_MIN_VALUE (dom); + if (!t || TREE_CODE (t) != INTEGER_CST) + return false; + if (tree_int_cst_lt (index, t)) + return false; + + t = TYPE_MAX_VALUE (dom); + if (!t || TREE_CODE (t) != INTEGER_CST) + return false; + if (tree_int_cst_lt (t, index)) + return false; + + return true; +} + +/* Create or return the SRA_ELT structure for EXPR if the expression + refers to a scalarizable variable. */ + +static struct sra_elt * +maybe_lookup_element_for_expr (tree expr) { - tree stmt = build (MODIFY_EXPR, TREE_TYPE (lhs), lhs, rhs); - modify_stmt (stmt); - tsi_link_after (tsi, stmt, TSI_NEW_STMT); - return stmt; + struct sra_elt *elt; + tree child; + + switch (TREE_CODE (expr)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + if (is_sra_candidate_decl (expr)) + return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT); + return NULL; + + case ARRAY_REF: + /* We can't scalarize variable array indicies. */ + if (is_valid_const_index (expr)) + child = TREE_OPERAND (expr, 1); + else + return NULL; + break; + + case COMPONENT_REF: + /* Don't look through unions. */ + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE) + return NULL; + child = TREE_OPERAND (expr, 1); + break; + + case REALPART_EXPR: + child = integer_zero_node; + break; + case IMAGPART_EXPR: + child = integer_one_node; + break; + + default: + return NULL; + } + + elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0)); + if (elt) + return lookup_element (elt, child, TREE_TYPE (expr), INSERT); + return NULL; +} + + +/* Functions to walk just enough of the tree to see all scalarizable + references, and categorize them. */ + +/* A set of callbacks for phases 2 and 4. They'll be invoked for the + various kinds of references seen. In all cases, *BSI is an iterator + pointing to the statement being processed. */ +struct sra_walk_fns +{ + /* Invoked when ELT is required as a unit. Note that ELT might refer to + a leaf node, in which case this is a simple scalar reference. *EXPR_P + points to the location of the expression. IS_OUTPUT is true if this + is a left-hand-side reference. */ + void (*use) (struct sra_elt *elt, tree *expr_p, + block_stmt_iterator *bsi, bool is_output); + + /* Invoked when we have a copy between two scalarizable references. */ + void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + block_stmt_iterator *bsi); + + /* Invoked when ELT is initialized from a constant. VALUE may be NULL, + in which case it should be treated as an empty CONSTRUCTOR. */ + void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi); + + /* Invoked when we have a copy between one scalarizable reference ELT + and one non-scalarizable reference OTHER. IS_OUTPUT is true if ELT + is on the left-hand side. */ + void (*ldst) (struct sra_elt *elt, tree other, + block_stmt_iterator *bsi, bool is_output); + + /* True during phase 2, false during phase 4. */ + /* ??? This is a hack. */ + bool initial_scan; +}; + +#ifdef ENABLE_CHECKING +/* Invoked via walk_tree, if *TP contains an candidate decl, return it. */ + +static tree +sra_find_candidate_decl (tree *tp, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + tree t = *tp; + enum tree_code code = TREE_CODE (t); + + if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL) + { + *walk_subtrees = 0; + if (is_sra_candidate_decl (t)) + return t; + } + else if (TYPE_P (t)) + *walk_subtrees = 0; + + return NULL; +} +#endif + +/* Walk most expressions looking for a scalarizable aggregate. + If we find one, invoke FNS->USE. */ + +static void +sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output, + const struct sra_walk_fns *fns) +{ + tree expr = *expr_p; + tree inner = expr; + + /* We're looking to collect a reference expression between EXPR and INNER, + such that INNER is a scalarizable decl and all other nodes through EXPR + are references that we can scalarize. If we come across something that + we can't scalarize, we reset EXPR. This has the effect of making it + appear that we're referring to the larger expression as a whole. */ + + while (1) + switch (TREE_CODE (inner)) + { + case VAR_DECL: + case PARM_DECL: + case RESULT_DECL: + /* If there is a scalarizable decl at the bottom, then process it. */ + if (is_sra_candidate_decl (inner)) + { + struct sra_elt *elt = maybe_lookup_element_for_expr (expr); + fns->use (elt, expr_p, bsi, is_output); + } + return; + + case ARRAY_REF: + /* Non-constant index means any member may be accessed. Prevent the + expression from being scalarized. If we were to treat this as a + reference to the whole array, we can wind up with a single dynamic + index reference inside a loop being overridden by several constant + index references during loop setup. It's possible that this could + be avoided by using dynamic usage counts based on BB trip counts + (based on loop analysis or profiling), but that hardly seems worth + the effort. */ + /* ??? Hack. Figure out how to push this into the scan routines + without duplicating too much code. */ + if (!is_valid_const_index (inner)) + { + if (fns->initial_scan) + { + struct sra_elt *elt + = maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0)); + if (elt) + elt->cannot_scalarize = true; + } + return; + } + /* ??? Are we assured that non-constant bounds and stride will have + the same value everywhere? I don't think Fortran will... */ + if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3)) + goto use_all; + inner = TREE_OPERAND (inner, 0); + break; + + case COMPONENT_REF: + /* A reference to a union member constitutes a reference to the + entire union. */ + if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE) + goto use_all; + /* ??? See above re non-constant stride. */ + if (TREE_OPERAND (inner, 2)) + goto use_all; + inner = TREE_OPERAND (inner, 0); + break; + + case REALPART_EXPR: + case IMAGPART_EXPR: + inner = TREE_OPERAND (inner, 0); + break; + + case BIT_FIELD_REF: + /* A bit field reference (access to *multiple* fields simultaneously) + is not currently scalarized. Consider this an access to the + complete outer element, to which walk_tree will bring us next. */ + goto use_all; + + case ARRAY_RANGE_REF: + /* Similarly, an subrange reference is used to modify indexing. Which + means that the canonical element names that we have won't work. */ + goto use_all; + + case VIEW_CONVERT_EXPR: + case NOP_EXPR: + /* Similarly, a view/nop explicitly wants to look at an object in a + type other than the one we've scalarized. */ + goto use_all; + + use_all: + expr_p = &TREE_OPERAND (inner, 0); + inner = expr = *expr_p; + break; + + default: +#ifdef ENABLE_CHECKING + /* Validate that we're not missing any references. */ + if (walk_tree (&inner, sra_find_candidate_decl, NULL, NULL)) + abort (); +#endif + return; + } +} + +/* Walk a TREE_LIST of values looking for scalarizable aggregates. + If we find one, invoke FNS->USE. */ + +static void +sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output, + const struct sra_walk_fns *fns) +{ + tree op; + for (op = list; op ; op = TREE_CHAIN (op)) + sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns); +} + +/* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates. + If we find one, invoke FNS->USE. */ + +static void +sra_walk_call_expr (tree expr, block_stmt_iterator *bsi, + const struct sra_walk_fns *fns) +{ + sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns); +} + +/* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable + aggregates. If we find one, invoke FNS->USE. */ + +static void +sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi, + const struct sra_walk_fns *fns) +{ + sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns); + sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns); +} + +/* Walk a MODIFY_EXPR and categorize the assignment appropriately. */ + +static void +sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi, + const struct sra_walk_fns *fns) +{ + struct sra_elt *lhs_elt, *rhs_elt; + tree lhs, rhs; + + lhs = TREE_OPERAND (expr, 0); + rhs = TREE_OPERAND (expr, 1); + lhs_elt = maybe_lookup_element_for_expr (lhs); + rhs_elt = maybe_lookup_element_for_expr (rhs); + + /* If both sides are scalarizable, this is a COPY operation. */ + if (lhs_elt && rhs_elt) + { + fns->copy (lhs_elt, rhs_elt, bsi); + return; + } + + if (lhs_elt) + { + /* If this is an assignment from a constant, or constructor, then + we have access to all of the elements individually. Invoke INIT. */ + if (TREE_CODE (rhs) == COMPLEX_EXPR + || TREE_CODE (rhs) == COMPLEX_CST + || TREE_CODE (rhs) == CONSTRUCTOR) + fns->init (lhs_elt, rhs, bsi); + + /* If this is an assignment from read-only memory, treat this as if + we'd been passed the constructor directly. Invoke INIT. */ + else if (TREE_CODE (rhs) == VAR_DECL + && TREE_STATIC (rhs) + && TREE_READONLY (rhs) + && targetm.binds_local_p (rhs)) + { + if (DECL_INITIAL (rhs) != error_mark_node) + fns->init (lhs_elt, DECL_INITIAL (rhs), bsi); + } + + /* If this is a copy from a non-scalarizable lvalue, invoke LDST. + The lvalue requirement prevents us from trying to directly scalarize + the result of a function call. Which would result in trying to call + the function multiple times, and other evil things. */ + else if (!lhs_elt->is_scalar && is_gimple_addr_expr_arg (rhs)) + fns->ldst (lhs_elt, rhs, bsi, true); + + /* Otherwise we're being used in some context that requires the + aggregate to be seen as a whole. Invoke USE. */ + else + fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true); + } + else + { + /* LHS_ELT being null only means that the LHS as a whole is not a + scalarizable reference. There may be occurrences of scalarizable + variables within, which implies a USE. */ + sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns); + } + + /* Likewise for the right-hand side. The only difference here is that + we don't have to handle constants, and the RHS may be a call. */ + if (rhs_elt) + { + if (!rhs_elt->is_scalar) + fns->ldst (rhs_elt, lhs, bsi, false); + else + fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false); + } + else if (TREE_CODE (rhs) == CALL_EXPR) + sra_walk_call_expr (rhs, bsi, fns); + else + sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns); +} + +/* Entry point to the walk functions. Search the entire function, + invoking the callbacks in FNS on each of the references to + scalarizable variables. */ + +static void +sra_walk_function (const struct sra_walk_fns *fns) +{ + basic_block bb; + block_stmt_iterator si; + + /* ??? Phase 4 could derive some benefit to walking the function in + dominator tree order. */ + + FOR_EACH_BB (bb) + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + { + tree stmt, t; + stmt_ann_t ann; + + stmt = bsi_stmt (si); + ann = stmt_ann (stmt); + + /* If the statement has no virtual operands, then it doesn't + make any structure references that we care about. */ + if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0 + && NUM_VUSES (VUSE_OPS (ann)) == 0 + && NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0) + continue; + + switch (TREE_CODE (stmt)) + { + case RETURN_EXPR: + /* If we have "return " then the return value is + already exposed for our pleasure. Walk it as a USE to + force all the components back in place for the return. + + If we have an embedded assignment, then is of + a type that gets returned in registers in this ABI, and + we do not wish to extend their lifetimes. Treat this + as a USE of the variable on the RHS of this assignment. */ + + t = TREE_OPERAND (stmt, 0); + if (TREE_CODE (t) == MODIFY_EXPR) + sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns); + else + sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns); + break; + + case MODIFY_EXPR: + sra_walk_modify_expr (stmt, &si, fns); + break; + case CALL_EXPR: + sra_walk_call_expr (stmt, &si, fns); + break; + case ASM_EXPR: + sra_walk_asm_expr (stmt, &si, fns); + break; + + default: + break; + } + } +} + +/* Phase One: Scan all referenced variables in the program looking for + structures that could be decomposed. */ + +static bool +find_candidates_for_sra (void) +{ + size_t i; + bool any_set = false; + + for (i = 0; i < num_referenced_vars; i++) + { + tree var = referenced_var (i); + if (decl_can_be_decomposed_p (var)) + { + bitmap_set_bit (sra_candidates, var_ann (var)->uid); + any_set = true; + } + } + + return any_set; +} + + +/* Phase Two: Scan all references to scalarizable variables. Count the + number of times they are used or copied respectively. */ + +/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is + considered a copy, because we can decompose the reference such that + the sub-elements needn't be contiguous. */ + +static void +scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED, + bool is_output ATTRIBUTE_UNUSED) +{ + elt->n_uses += 1; +} + +static void +scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED) +{ + lhs_elt->n_copies += 1; + rhs_elt->n_copies += 1; +} + +static void +scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED) +{ + lhs_elt->n_copies += 1; +} + +static void +scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED, + block_stmt_iterator *bsi ATTRIBUTE_UNUSED, + bool is_output ATTRIBUTE_UNUSED) +{ + elt->n_copies += 1; +} + +/* Dump the values we collected during the scanning phase. */ + +static void +scan_dump (struct sra_elt *elt) +{ + struct sra_elt *c; + + dump_sra_elt_name (dump_file, elt); + fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies); + + for (c = elt->children; c ; c = c->sibling) + scan_dump (c); +} + +/* Entry point to phase 2. Scan the entire function, building up + scalarization data structures, recording copies and uses. */ + +static void +scan_function (void) +{ + static const struct sra_walk_fns fns = { + scan_use, scan_copy, scan_init, scan_ldst, true + }; + + sra_walk_function (&fns); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + size_t i; + + fputs ("\nScan results:\n", dump_file); + EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + if (elt) + scan_dump (elt); + }); + fputc ('\n', dump_file); + } +} + +/* Phase Three: Make decisions about which variables to scalarize, if any. + All elements to be scalarized have replacement variables made for them. */ + +/* A subroutine of build_element_name. Recursively build the element + name on the obstack. */ + +static void +build_element_name_1 (struct sra_elt *elt) +{ + tree t; + char buffer[32]; + + if (elt->parent) + { + build_element_name_1 (elt->parent); + obstack_1grow (&sra_obstack, '$'); + + if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE) + { + if (elt->element == integer_zero_node) + obstack_grow (&sra_obstack, "real", 4); + else + obstack_grow (&sra_obstack, "imag", 4); + return; + } + } + + t = elt->element; + if (TREE_CODE (t) == INTEGER_CST) + { + /* ??? Eh. Don't bother doing double-wide printing. */ + sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + } + else + { + tree name = DECL_NAME (t); + if (name) + obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name), + IDENTIFIER_LENGTH (name)); + else + { + sprintf (buffer, "D%u", DECL_UID (t)); + obstack_grow (&sra_obstack, buffer, strlen (buffer)); + } + } +} + +/* Construct a pretty variable name for an element's replacement variable. + The name is built on the obstack. */ + +static char * +build_element_name (struct sra_elt *elt) +{ + build_element_name_1 (elt); + obstack_1grow (&sra_obstack, '\0'); + return obstack_finish (&sra_obstack); +} + +/* Instantiate an element as an independent variable. */ + +static void +instantiate_element (struct sra_elt *elt) +{ + struct sra_elt *base_elt; + tree var, base; + + for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent) + continue; + base = base_elt->element; + + elt->replacement = var = make_rename_temp (elt->type, "SR"); + DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base); + TREE_NO_WARNING (var) = TREE_NO_WARNING (base); + DECL_ARTIFICIAL (var) = DECL_ARTIFICIAL (base); + + if (DECL_NAME (base) && !DECL_IGNORED_P (base)) + { + char *pretty_name = build_element_name (elt); + DECL_NAME (var) = get_identifier (pretty_name); + obstack_free (&sra_obstack, pretty_name); + } + + if (dump_file) + { + fputs (" ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputs (" -> ", dump_file); + print_generic_expr (dump_file, var, dump_flags); + fputc ('\n', dump_file); + } +} + +/* Make one pass across an element tree deciding whether or not it's + profitable to instantiate individual leaf scalars. + + PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES + fields all the way up the tree. */ + +static void +decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses, + unsigned int parent_copies) +{ + if (dump_file && !elt->parent) + { + fputs ("Initial instantiation for ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + if (elt->cannot_scalarize) + return; + + if (elt->is_scalar) + { + /* The decision is simple: instantiate if we're used more frequently + than the parent needs to be seen as a complete unit. */ + if (elt->n_uses + elt->n_copies + parent_copies > parent_uses) + instantiate_element (elt); + } + else + { + struct sra_elt *c; + unsigned int this_uses = elt->n_uses + parent_uses; + unsigned int this_copies = elt->n_copies + parent_copies; + + for (c = elt->children; c ; c = c->sibling) + decide_instantiation_1 (c, this_uses, this_copies); + } +} + +/* Compute the size and number of all instantiated elements below ELT. + We will only care about this if the size of the complete structure + fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */ + +static unsigned int +sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep) +{ + if (elt->replacement) + { + *sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type)); + return 1; + } + else + { + struct sra_elt *c; + unsigned int count = 0; + + for (c = elt->children; c ; c = c->sibling) + count += sum_instantiated_sizes (c, sizep); + + return count; + } +} + +/* Instantiate fields in ELT->TYPE that are not currently present as + children of ELT. */ + +static void instantiate_missing_elements (struct sra_elt *elt); + +static void +instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type) +{ + struct sra_elt *sub = lookup_element (elt, child, type, INSERT); + if (sub->is_scalar) + { + if (sub->replacement == NULL) + instantiate_element (sub); + } + else + instantiate_missing_elements (sub); +} + +static void +instantiate_missing_elements (struct sra_elt *elt) +{ + tree type = elt->type; + + switch (TREE_CODE (type)) + { + case RECORD_TYPE: + { + tree f; + for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f)) + if (TREE_CODE (f) == FIELD_DECL) + instantiate_missing_elements_1 (elt, f, TREE_TYPE (f)); + break; + } + + case ARRAY_TYPE: + { + tree i, max, subtype; + + i = TYPE_MIN_VALUE (TYPE_DOMAIN (type)); + max = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + subtype = TREE_TYPE (type); + + while (1) + { + instantiate_missing_elements_1 (elt, i, subtype); + if (tree_int_cst_equal (i, max)) + break; + i = int_const_binop (PLUS_EXPR, i, integer_one_node, true); + } + + break; + } + + case COMPLEX_TYPE: + type = TREE_TYPE (type); + instantiate_missing_elements_1 (elt, integer_zero_node, type); + instantiate_missing_elements_1 (elt, integer_one_node, type); + break; + + default: + abort (); + } +} + +/* Make one pass across an element tree deciding whether to perform block + or element copies. If we decide on element copies, instantiate all + elements. Return true if there are any instantiated sub-elements. */ + +static bool +decide_block_copy (struct sra_elt *elt) +{ + struct sra_elt *c; + bool any_inst; + + /* If scalarization is disabled, respect it. */ + if (elt->cannot_scalarize) + { + elt->use_block_copy = 1; + + if (dump_file) + { + fputs ("Scalarization disabled for ", dump_file); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + return false; + } + + /* Don't decide if we've no uses. */ + if (elt->n_uses == 0 && elt->n_copies == 0) + ; + + else if (!elt->is_scalar) + { + tree size_tree = TYPE_SIZE_UNIT (elt->type); + bool use_block_copy = true; + + /* Don't bother trying to figure out the rest if the structure is + so large we can't do easy arithmetic. This also forces block + copies for variable sized structures. */ + if (host_integerp (size_tree, 1)) + { + unsigned HOST_WIDE_INT full_size, inst_size = 0; + unsigned int inst_count; + + full_size = tree_low_cst (size_tree, 1); + + /* ??? What to do here. If there are two fields, and we've only + instantiated one, then instantiating the other is clearly a win. + If there are a large number of fields then the size of the copy + is much more of a factor. */ + + /* If the structure is small, and we've made copies, go ahead + and instantiate, hoping that the copies will go away. */ + if (full_size <= (unsigned) MOVE_RATIO * UNITS_PER_WORD + && elt->n_copies > elt->n_uses) + use_block_copy = false; + else + { + inst_count = sum_instantiated_sizes (elt, &inst_size); + + if (inst_size * 4 >= full_size * 3) + use_block_copy = false; + } + + /* In order to avoid block copy, we have to be able to instantiate + all elements of the type. See if this is possible. */ + if (!use_block_copy + && (!can_completely_scalarize_p (elt) + || !type_can_instantiate_all_elements (elt->type))) + use_block_copy = true; + } + elt->use_block_copy = use_block_copy; + + if (dump_file) + { + fprintf (dump_file, "Using %s for ", + use_block_copy ? "block-copy" : "element-copy"); + dump_sra_elt_name (dump_file, elt); + fputc ('\n', dump_file); + } + + if (!use_block_copy) + { + instantiate_missing_elements (elt); + return true; + } + } + + any_inst = elt->replacement != NULL; + + for (c = elt->children; c ; c = c->sibling) + any_inst |= decide_block_copy (c); + + return any_inst; +} + +/* Entry point to phase 3. Instantiate scalar replacement variables. */ + +static void +decide_instantiations (void) +{ + unsigned int i; + bool cleared_any; + struct bitmap_head_def done_head; + + /* We cannot clear bits from a bitmap we're iterating over, + so save up all the bits to clear until the end. */ + bitmap_initialize (&done_head, 1); + cleared_any = false; + + EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i, + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + if (elt) + { + decide_instantiation_1 (elt, 0, 0); + if (!decide_block_copy (elt)) + elt = NULL; + } + if (!elt) + { + bitmap_set_bit (&done_head, i); + cleared_any = true; + } + }); + + if (cleared_any) + { + bitmap_operation (sra_candidates, sra_candidates, &done_head, + BITMAP_AND_COMPL); + bitmap_operation (needs_copy_in, needs_copy_in, &done_head, + BITMAP_AND_COMPL); + } + bitmap_clear (&done_head); + + if (dump_file) + fputc ('\n', dump_file); +} + + +/* Phase Four: Update the function to match the replacements created. */ + +/* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for + renaming. This becomes necessary when we modify all of a non-scalar. */ + +static void +mark_all_v_defs (tree stmt) +{ + v_may_def_optype v_may_defs; + v_must_def_optype v_must_defs; + size_t i, n; + + get_stmt_operands (stmt); + + v_may_defs = V_MAY_DEF_OPS (stmt_ann (stmt)); + n = NUM_V_MAY_DEFS (v_may_defs); + for (i = 0; i < n; i++) + { + tree sym = V_MAY_DEF_RESULT (v_may_defs, i); + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); + } + + v_must_defs = V_MUST_DEF_OPS (stmt_ann (stmt)); + n = NUM_V_MUST_DEFS (v_must_defs); + for (i = 0; i < n; i++) + { + tree sym = V_MUST_DEF_OP (v_must_defs, i); + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); + } } -/* A subroutine of create_scalar_copies. Construct a COMPONENT_REF - expression for BASE referencing FIELD. INDEX is the field index. */ +/* Build a single level component reference to ELT rooted at BASE. */ static tree -csc_build_component_ref (tree base, tree field) +generate_one_element_ref (struct sra_elt *elt, tree base) { - switch (TREE_CODE (base)) + switch (TREE_CODE (TREE_TYPE (base))) { - case CONSTRUCTOR: - /* Only appears on RHS. The only remaining CONSTRUCTORS for - record types that should remain are empty, and imply that - the entire structure should be zeroed. */ - if (CONSTRUCTOR_ELTS (base)) - abort (); - return fold_convert (TREE_TYPE (field), integer_zero_node); + case RECORD_TYPE: + return build (COMPONENT_REF, elt->type, base, elt->element, NULL); - default: - /* Avoid sharing BASE when building the different COMPONENT_REFs. - We let the first field have the original version. */ - if (field != TYPE_FIELDS (TREE_TYPE (base))) - base = unshare_expr (base); - break; + case ARRAY_TYPE: + return build (ARRAY_REF, elt->type, base, elt->element, NULL, NULL); - case VAR_DECL: - case PARM_DECL: - /* Special case for the above -- decls are always shared. */ - break; - } + case COMPLEX_TYPE: + if (elt->element == integer_zero_node) + return build (REALPART_EXPR, elt->type, base); + else + return build (IMAGPART_EXPR, elt->type, base); - return build (COMPONENT_REF, TREE_TYPE (field), base, field, NULL_TREE); + default: + abort (); + } } -/* Similarly for REALPART_EXPR and IMAGPART_EXPR for complex types. */ +/* Build a full component reference to ELT rooted at its native variable. */ static tree -csc_build_complex_part (tree base, enum tree_code part) +generate_element_ref (struct sra_elt *elt) +{ + if (elt->parent) + return generate_one_element_ref (elt, generate_element_ref (elt->parent)); + else + return elt->element; +} + +/* Generate a set of assignment statements in *LIST_P to copy all + instantiated elements under ELT to or from the equivalent structure + rooted at EXPR. COPY_OUT controls the direction of the copy, with + true meaning to copy out of EXPR into ELT. */ + +static void +generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr, + tree *list_p) { - switch (TREE_CODE (base)) + struct sra_elt *c; + tree t; + + if (elt->replacement) { - case COMPLEX_CST: - if (part == REALPART_EXPR) - return TREE_REALPART (base); + if (copy_out) + t = build (MODIFY_EXPR, void_type_node, elt->replacement, expr); else - return TREE_IMAGPART (base); + t = build (MODIFY_EXPR, void_type_node, expr, elt->replacement); + append_to_statement_list (t, list_p); + } + else + { + for (c = elt->children; c ; c = c->sibling) + { + t = generate_one_element_ref (c, unshare_expr (expr)); + generate_copy_inout (c, copy_out, t, list_p); + } + } +} - case COMPLEX_EXPR: - if (part == REALPART_EXPR) - return TREE_OPERAND (base, 0); - else - return TREE_OPERAND (base, 1); +/* Generate a set of assignment statements in *LIST_P to copy all instantiated + elements under SRC to their counterparts under DST. There must be a 1-1 + correspondence of instantiated elements. */ - default: - /* Avoid sharing BASE when building the different references. - We let the real part have the original version. */ - if (part != REALPART_EXPR) - base = unshare_expr (base); - break; +static void +generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p) +{ + struct sra_elt *dc, *sc; - case VAR_DECL: - case PARM_DECL: - /* Special case for the above -- decls are always shared. */ - break; + for (dc = dst->children; dc ; dc = dc->sibling) + { + sc = lookup_element (src, dc->element, NULL, NO_INSERT); + if (sc == NULL) + abort (); + generate_element_copy (dc, sc, list_p); } - return build1 (part, TREE_TYPE (TREE_TYPE (base)), base); -} - -/* Create and return a list of assignments to perform a scalarized - structure assignment 'LHS = RHS'. Both LHS and RHS are assumed to be - of an aggregate or complex type. Three types of copies may be specified: + if (dst->replacement) + { + tree t; - SCALAR_SCALAR will emit assignments for all the scalar temporaries - corresponding to the fields of LHS and RHS. + if (src->replacement == NULL) + abort (); - FIELD_SCALAR will emit assignments from the scalar replacements of - RHS into each of the fields of the LHS. + t = build (MODIFY_EXPR, void_type_node, dst->replacement, + src->replacement); + append_to_statement_list (t, list_p); + } +} - SCALAR_FIELD will emit assignments from each field of the RHS into - the scalar replacements of the LHS. */ +/* Generate a set of assignment statements in *LIST_P to zero all instantiated + elements under ELT. In addition, do not assign to elements that have been + marked VISITED but do reset the visited flag; this allows easy coordination + with generate_element_init. */ -static tree -create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode) +static void +generate_element_zero (struct sra_elt *elt, tree *list_p) { - tree type, list; - tree_stmt_iterator tsi; - -#if defined ENABLE_CHECKING - /* Sanity checking. Check that we are not trying to scalarize a - non-decl. */ - if (!DECL_P (lhs) && (mode == SCALAR_FIELD || mode == SCALAR_SCALAR)) - abort (); - if (!DECL_P (rhs) && (mode == FIELD_SCALAR || mode == SCALAR_SCALAR)) - abort (); -#endif + struct sra_elt *c; - type = TREE_TYPE (lhs); - list = alloc_stmt_list (); - tsi = tsi_start (list); + for (c = elt->children; c ; c = c->sibling) + generate_element_zero (c, list_p); - /* VA_ARG_EXPRs have side effects, so we need to copy it first to a - temporary before scalarizing. FIXME: This should disappear once - VA_ARG_EXPRs are properly lowered. */ - if (TREE_CODE (rhs) == VA_ARG_EXPR) + if (elt->visited) + elt->visited = false; + else if (elt->replacement) { - tree stmt, tmp; - - /* Add TMP = VA_ARG_EXPR <> */ - tmp = make_rename_temp (TREE_TYPE (rhs), NULL); - stmt = csc_assign (&tsi, tmp, rhs); + tree t; - /* Mark all the variables in VDEF operands for renaming, because - the VA_ARG_EXPR will now be in a different statement. */ - mark_all_v_may_defs (stmt); - mark_all_v_must_defs (stmt); + if (elt->is_scalar) + t = fold_convert (elt->type, integer_zero_node); + else + /* We generated a replacement for a non-scalar? */ + abort (); - /* Set RHS to be the new temporary TMP. */ - rhs = tmp; + t = build (MODIFY_EXPR, void_type_node, elt->replacement, t); + append_to_statement_list (t, list_p); } +} - /* When making *_SCALAR copies from PARM_DECLs, we will need to insert - copy-in operations from the original PARM_DECLs. Note that these - copy-in operations may end up being dead, but we won't know until - we rename the new variables into SSA. */ - if ((mode == SCALAR_SCALAR || mode == FIELD_SCALAR) - && TREE_CODE (rhs) == PARM_DECL) - bitmap_set_bit (needs_copy_in, var_ann (rhs)->uid); +/* Generate a set of assignment statements in *LIST_P to set all instantiated + elements under ELT with the contents of the initializer INIT. In addition, + mark all assigned elements VISITED; this allows easy coordination with + generate_element_zero. */ - /* Now create scalar copies for each individual field according to MODE. */ - if (TREE_CODE (type) == COMPLEX_TYPE) - { - /* Create scalar copies of both the real and imaginary parts. */ - tree real_lhs, real_rhs, imag_lhs, imag_rhs; +static void +generate_element_init (struct sra_elt *elt, tree init, tree *list_p) +{ + enum tree_code init_code = TREE_CODE (init); + struct sra_elt *sub; + tree t; - if (mode == SCALAR_FIELD) - { - real_rhs = csc_build_complex_part (rhs, REALPART_EXPR); - imag_rhs = csc_build_complex_part (rhs, IMAGPART_EXPR); - } - else + if (elt->is_scalar) + { + if (elt->replacement) { - real_rhs = get_scalar_for_complex_part (rhs, REALPART_EXPR); - imag_rhs = get_scalar_for_complex_part (rhs, IMAGPART_EXPR); + t = build (MODIFY_EXPR, void_type_node, elt->replacement, init); + append_to_statement_list (t, list_p); + elt->visited = true; } + return; + } - if (mode == FIELD_SCALAR) + switch (init_code) + { + case COMPLEX_CST: + case COMPLEX_EXPR: + for (sub = elt->children; sub ; sub = sub->sibling) { - /* In this case we do not need to create but one statement, - since we can create a new complex value whole. */ - - if (TREE_CONSTANT (real_rhs) && TREE_CONSTANT (imag_rhs)) - rhs = build_complex (type, real_rhs, imag_rhs); + if (sub->element == integer_zero_node) + t = (init_code == COMPLEX_EXPR + ? TREE_OPERAND (init, 0) : TREE_REALPART (init)); else - rhs = build (COMPLEX_EXPR, type, real_rhs, imag_rhs); - csc_assign (&tsi, lhs, rhs); - } - else - { - real_lhs = get_scalar_for_complex_part (lhs, REALPART_EXPR); - imag_lhs = get_scalar_for_complex_part (lhs, IMAGPART_EXPR); - - csc_assign (&tsi, real_lhs, real_rhs); - csc_assign (&tsi, imag_lhs, imag_rhs); + t = (init_code == COMPLEX_EXPR + ? TREE_OPERAND (init, 1) : TREE_IMAGPART (init)); + generate_element_init (sub, t, list_p); } - } - else - { - tree lf, rf; - - /* ??? C++ generates copies between different pointer-to-member - structures of different types. To combat this, we must track - the field of both the left and right structures, so that we - index the variables with fields of their own type. */ + break; - for (lf = TYPE_FIELDS (type), rf = TYPE_FIELDS (TREE_TYPE (rhs)); - lf; - lf = TREE_CHAIN (lf), rf = TREE_CHAIN (rf)) + case CONSTRUCTOR: + for (t = CONSTRUCTOR_ELTS (init); t ; t = TREE_CHAIN (t)) { - tree lhs_var, rhs_var; - - /* Only copy FIELD_DECLs. */ - if (TREE_CODE (lf) != FIELD_DECL) + sub = lookup_element (elt, TREE_PURPOSE (t), NULL, NO_INSERT); + if (sub == NULL) continue; + generate_element_init (sub, TREE_VALUE (t), list_p); + } + break; - if (mode == FIELD_SCALAR) - lhs_var = csc_build_component_ref (lhs, lf); - else - lhs_var = get_scalar_for_field (lhs, lf); + default: + abort (); + } +} - if (mode == SCALAR_FIELD) - rhs_var = csc_build_component_ref (rhs, rf); - else - rhs_var = get_scalar_for_field (rhs, rf); +/* Insert STMT on all the outgoing edges out of BB. Note that if BB + has more than one edge, STMT will be replicated for each edge. Also, + abnormal edges will be ignored. */ - csc_assign (&tsi, lhs_var, rhs_var); - } - } +void +insert_edge_copies (tree stmt, basic_block bb) +{ + edge e; + bool first_copy; - /* All the scalar copies just created will either create new definitions - or remove existing definitions of LHS, so we need to mark it for - renaming. */ - if (TREE_SIDE_EFFECTS (list)) + first_copy = true; + for (e = bb->succ; e; e = e->succ_next) { - if (mode == SCALAR_FIELD || mode == SCALAR_SCALAR) - { - /* If the LHS has been scalarized, mark it for renaming. */ - bitmap_set_bit (vars_to_rename, var_ann (lhs)->uid); - } - else if (mode == FIELD_SCALAR) + /* We don't need to insert copies on abnormal edges. The + value of the scalar replacement is not guaranteed to + be valid through an abnormal edge. */ + if (!(e->flags & EDGE_ABNORMAL)) { - /* Otherwise, mark all the symbols in the VDEFs for the last - scalarized statement just created. Since all the statements - introduce the same VDEFs, we only need to check the last one. */ - mark_all_v_may_defs (tsi_stmt (tsi)); - mark_all_v_must_defs (tsi_stmt (tsi)); + if (first_copy) + { + bsi_insert_on_edge (e, stmt); + first_copy = false; + } + else + bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt)); } - else - abort (); } - - return list; } -/* A helper function that creates the copies, updates line info, - and emits the code either before or after BSI. */ +/* Helper function to insert LIST before BSI, and set up line number info. */ static void -emit_scalar_copies (block_stmt_iterator *bsi, tree lhs, tree rhs, - enum sra_copy_mode mode) +sra_insert_before (block_stmt_iterator *bsi, tree list) { - tree list = create_scalar_copies (lhs, rhs, mode); tree stmt = bsi_stmt (*bsi); if (EXPR_HAS_LOCATION (stmt)) annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); - bsi_insert_before (bsi, list, BSI_SAME_STMT); } -/* Traverse all the statements in the function replacing references to - scalarizable structures with their corresponding scalar temporaries. */ +/* Similarly, but insert after BSI. Handles insertion onto edges as well. */ static void -scalarize_structures (void) +sra_insert_after (block_stmt_iterator *bsi, tree list) { - basic_block bb; - block_stmt_iterator si; - size_t i; - - FOR_EACH_BB (bb) - for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) - { - tree stmt; - stmt_ann_t ann; - - stmt = bsi_stmt (si); - ann = stmt_ann (stmt); - - /* If the statement has no virtual operands, then it doesn't make - structure references that we care about. */ - if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0 - && NUM_VUSES (VUSE_OPS (ann)) == 0 - && NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0) - continue; - - /* Structure references may only appear in certain statements. */ - if (TREE_CODE (stmt) != MODIFY_EXPR - && TREE_CODE (stmt) != CALL_EXPR - && TREE_CODE (stmt) != RETURN_EXPR - && TREE_CODE (stmt) != ASM_EXPR) - continue; - - scalarize_stmt (&si); - } + tree stmt = bsi_stmt (*bsi); - /* Initialize the scalar replacements for every structure that is a - function argument. */ - EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, - { - tree var = referenced_var (i); - tree list = create_scalar_copies (var, var, SCALAR_FIELD); - bsi_insert_on_edge (ENTRY_BLOCK_PTR->succ, list); - }); + if (EXPR_HAS_LOCATION (stmt)) + annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); - /* Commit edge insertions. */ - bsi_commit_edge_inserts (NULL); + if (stmt_ends_bb_p (stmt)) + insert_edge_copies (list, bsi->bb); + else + bsi_insert_after (bsi, list, BSI_CONTINUE_LINKING); } - -/* Scalarize structure references in the statement pointed by SI_P. */ +/* Similarly, but replace the statement at BSI. */ static void -scalarize_stmt (block_stmt_iterator *si_p) +sra_replace (block_stmt_iterator *bsi, tree list) { - tree stmt = bsi_stmt (*si_p); - - /* Handle assignments. */ - if (TREE_CODE (stmt) == MODIFY_EXPR - && TREE_CODE (TREE_OPERAND (stmt, 1)) != CALL_EXPR) - scalarize_modify_expr (si_p); - - /* Handle RETURN_EXPR. */ - else if (TREE_CODE (stmt) == RETURN_EXPR) - scalarize_return_expr (si_p); - - /* Handle function calls (note that this must be handled after - MODIFY_EXPR and RETURN_EXPR because a function call can appear in - both). */ - else if (get_call_expr_in (stmt) != NULL_TREE) - scalarize_call_expr (si_p); - - /* Handle ASM_EXPRs. */ - else if (TREE_CODE (stmt) == ASM_EXPR) - scalarize_asm_expr (si_p); + sra_insert_before (bsi, list); + bsi_remove (bsi); + if (bsi_end_p (*bsi)) + *bsi = bsi_last (bsi->bb); + else + bsi_prev (bsi); } - -/* Helper for scalarize_stmt to handle assignments. */ +/* Scalarize a USE. To recap, this is either a simple reference to ELT, + if elt is scalar, or some ocurrence of ELT that requires a complete + aggregate. IS_OUTPUT is true if ELT is being modified. */ static void -scalarize_modify_expr (block_stmt_iterator *si_p) +scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi, + bool is_output) { - tree stmt = bsi_stmt (*si_p); - tree lhs = TREE_OPERAND (stmt, 0); - tree rhs = TREE_OPERAND (stmt, 1); - tree var = NULL_TREE; + tree list = NULL, stmt = bsi_stmt (*bsi); - /* Found AGGREGATE.FIELD = ... */ - if (is_sra_candidate_ref (lhs)) + if (elt->replacement) { - tree sym; - v_may_def_optype v_may_defs; - - scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 0)); - - /* Mark the LHS to be renamed, as we have just removed the previous - V_MAY_DEF for AGGREGATE. The statement should have exactly one - V_MAY_DEF for variable AGGREGATE. */ - v_may_defs = STMT_V_MAY_DEF_OPS (stmt); - if (NUM_V_MAY_DEFS (v_may_defs) != 1) - abort (); - sym = SSA_NAME_VAR (V_MAY_DEF_RESULT (v_may_defs, 0)); - bitmap_set_bit (vars_to_rename, var_ann (sym)->uid); + /* If we have a replacement, then updating the reference is as + simple as modifying the existing statement in place. */ + if (is_output) + mark_all_v_defs (stmt); + *expr_p = elt->replacement; + modify_stmt (stmt); } - - /* Found ... = AGGREGATE.FIELD */ - else if (is_sra_candidate_ref (rhs)) - scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 1)); - - /* Found a complex reference nesting involving a candidate decl. This - should only occur if the above condition is false if a BIT_FIELD_REF or - VIEW_CONVERT_EXPR is involved. This is similar to a CALL_EXPR, if the - operand of the BIT_FIELD_REF is a scalarizable structure, we need to - copy from its scalar replacements before doing the bitfield operation. - - FIXME: BIT_FIELD_REFs are often generated by fold-const.c. This is - not always desirable because they obfuscate the original predicates, - limiting what the tree optimizers may do. For instance, in - testsuite/g++.dg/opt/nrv4.C the use of SRA allows the optimizers to - optimize function main() to 'return 0;'. However, the folder - generates a BIT_FIELD_REF operation for one of the comparisons, - preventing the optimizers from removing all the redundant - operations. */ - else if (is_sra_candidate_complex_ref (rhs, &var)) + else { - emit_scalar_copies (si_p, var, var, FIELD_SCALAR); - - /* If the LHS of the assignment is also a scalarizable structure, insert - copies into the scalar replacements after the call. */ - if (is_sra_candidate_decl (lhs)) + /* Otherwise we need some copies. If ELT is being read, then we want + to store all (modified) sub-elements back into the structure before + the reference takes place. If ELT is being written, then we want to + load the changed values back into our shadow variables. */ + /* ??? We don't check modified for reads, we just always write all of + the values. We should be able to record the SSA number of the VOP + for which the values were last read. If that number matches the + SSA number of the VOP in the current statement, then we needn't + emit an assignment. This would also eliminate double writes when + a structure is passed as more than one argument to a function call. + This optimization would be most effective if sra_walk_function + processed the blocks in dominator order. */ + + generate_copy_inout (elt, is_output, generate_element_ref (elt), &list); + if (list == NULL) + return; + if (is_output) { - tree list = create_scalar_copies (lhs, lhs, SCALAR_FIELD); - if (EXPR_HAS_LOCATION (stmt)) - annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); - if (stmt_ends_bb_p (stmt)) - insert_edge_copies (list, bb_for_stmt (stmt)); - else - bsi_insert_after (si_p, list, BSI_NEW_STMT); + mark_all_v_defs (expr_first (list)); + sra_insert_after (bsi, list); } + else + sra_insert_before (bsi, list); } - - /* Found AGGREGATE = ... or ... = AGGREGATE */ - else if (DECL_P (lhs) || DECL_P (rhs)) - scalarize_structure_assignment (si_p); } +/* Scalarize a COPY. To recap, this is an assignment statement between + two scalarizable references, LHS_ELT and RHS_ELT. */ -/* Scalarize structure references in LIST. Use DONE to avoid duplicates. */ - -static inline void -scalarize_tree_list (tree list, block_stmt_iterator *si_p, bitmap done) +static void +scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt, + block_stmt_iterator *bsi) { - tree op; + tree list, stmt; - for (op = list; op; op = TREE_CHAIN (op)) + if (lhs_elt->replacement && rhs_elt->replacement) { - tree arg = TREE_VALUE (op); + /* If we have two scalar operands, modify the existing statement. */ + stmt = bsi_stmt (*bsi); - if (is_sra_candidate_decl (arg)) - { - int index = var_ann (arg)->uid; - if (!bitmap_bit_p (done, index)) - { - emit_scalar_copies (si_p, arg, arg, FIELD_SCALAR); - bitmap_set_bit (done, index); - } - } - else if (is_sra_candidate_ref (arg)) +#ifdef ENABLE_CHECKING + /* See the commentary in sra_walk_function concerning + RETURN_EXPR, and why we should never see one here. */ + if (TREE_CODE (stmt) != MODIFY_EXPR) + abort (); +#endif + + TREE_OPERAND (stmt, 0) = lhs_elt->replacement; + TREE_OPERAND (stmt, 1) = rhs_elt->replacement; + modify_stmt (stmt); + } + else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy) + { + /* If either side requires a block copy, then sync the RHS back + to the original structure, leave the original assignment + statement (which will perform the block copy), then load the + LHS values out of its now-updated original structure. */ + /* ??? Could perform a modified pair-wise element copy. That + would at least allow those elements that are instantiated in + both structures to be optimized well. */ + + list = NULL; + generate_copy_inout (rhs_elt, false, + generate_element_ref (rhs_elt), &list); + if (list) { - tree stmt = bsi_stmt (*si_p); - scalarize_component_ref (stmt, &TREE_VALUE (op)); + mark_all_v_defs (expr_first (list)); + sra_insert_before (bsi, list); } + + list = NULL; + generate_copy_inout (lhs_elt, true, + generate_element_ref (lhs_elt), &list); + if (list) + sra_insert_after (bsi, list); } -} + else + { + /* Otherwise both sides must be fully instantiated. In which + case perform pair-wise element assignments and replace the + original block copy statement. */ + + stmt = bsi_stmt (*bsi); + mark_all_v_defs (stmt); + list = NULL; + generate_element_copy (lhs_elt, rhs_elt, &list); + if (list == NULL) + abort (); + sra_replace (bsi, list); + } +} -/* Helper for scalarize_stmt to handle function calls. */ +/* Scalarize an INIT. To recap, this is an assignment to a scalarizable + reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or + COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty + CONSTRUCTOR. */ static void -scalarize_call_expr (block_stmt_iterator *si_p) +scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi) { - tree stmt = bsi_stmt (*si_p); - tree call = (TREE_CODE (stmt) == MODIFY_EXPR) ? TREE_OPERAND (stmt, 1) : stmt; - struct bitmap_head_def done_head; + tree list = NULL; - /* First scalarize the arguments. Order is important, because the copy - operations for the arguments need to go before the call. - Scalarization of the return value needs to go after the call. */ - bitmap_initialize (&done_head, 1); - scalarize_tree_list (TREE_OPERAND (call, 1), si_p, &done_head); - bitmap_clear (&done_head); + /* Generate initialization statements for all members extant in the RHS. */ + if (rhs) + generate_element_init (lhs_elt, rhs, &list); - /* Scalarize the return value, if any. */ - if (TREE_CODE (stmt) == MODIFY_EXPR) - { - tree var = get_base_address (TREE_OPERAND (stmt, 0)); + /* CONSTRUCTOR is defined such that any member not mentioned is assigned + a zero value. Initialize the rest of the instantiated elements. */ + generate_element_zero (lhs_elt, &list); + if (list == NULL) + return; - /* If the LHS of the assignment is a scalarizable structure, insert - copies into the scalar replacements after the call. */ - if (is_sra_candidate_decl (var)) - { - tree list = create_scalar_copies (var, var, SCALAR_FIELD); - if (EXPR_HAS_LOCATION (stmt)) - annotate_all_with_locus (&list, EXPR_LOCATION (stmt)); - if (stmt_ends_bb_p (stmt)) - insert_edge_copies (list, bb_for_stmt (stmt)); - else - bsi_insert_after (si_p, list, BSI_NEW_STMT); - } + if (lhs_elt->use_block_copy) + { + /* Since LHS is not fully instantiated, we must leave the structure + assignment in place. Treating this case differently from a USE + exposes constants to later optimizations. */ + mark_all_v_defs (expr_first (list)); + sra_insert_after (bsi, list); + } + else + { + /* The LHS is fully instantiated. The list of initializations + replaces the original structure assignment. */ + mark_all_v_defs (bsi_stmt (*bsi)); + sra_replace (bsi, list); } } +/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP + on all INDIRECT_REFs. */ -/* Helper for scalarize_stmt to handle ASM_EXPRs. */ - -static void -scalarize_asm_expr (block_stmt_iterator *si_p) +static tree +mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) { - tree stmt = bsi_stmt (*si_p); - struct bitmap_head_def done_head; + tree t = *tp; - bitmap_initialize (&done_head, 1); - scalarize_tree_list (ASM_INPUTS (stmt), si_p, &done_head); - scalarize_tree_list (ASM_OUTPUTS (stmt), si_p, &done_head); - bitmap_clear (&done_head); + if (TREE_CODE (t) == INDIRECT_REF) + { + TREE_THIS_NOTRAP (t) = 1; + *walk_subtrees = 0; + } + else if (DECL_P (t) || TYPE_P (t)) + *walk_subtrees = 0; - /* ??? Process outputs after the asm. */ + return NULL; } - -/* Helper for scalarize_stmt to handle return expressions. */ +/* Scalarize a LDST. To recap, this is an assignment between one scalarizable + reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true + if ELT is on the left-hand side. */ static void -scalarize_return_expr (block_stmt_iterator *si_p) +scalarize_ldst (struct sra_elt *elt, tree other, + block_stmt_iterator *bsi, bool is_output) { - tree stmt = bsi_stmt (*si_p); - tree op = TREE_OPERAND (stmt, 0); - - if (op == NULL_TREE) - return; + /* Shouldn't have gotten called for a scalar. */ + if (elt->replacement) + abort (); - /* Handle a bare RESULT_DECL. This will handle for types needed - constructors, or possibly after NRV type optimizations. */ - if (is_sra_candidate_decl (op)) - emit_scalar_copies (si_p, op, op, FIELD_SCALAR); - else if (TREE_CODE (op) == MODIFY_EXPR) + if (elt->use_block_copy) + { + /* Since ELT is not fully instantiated, we have to leave the + block copy in place. Treat this as a USE. */ + scalarize_use (elt, NULL, bsi, is_output); + } + else { - tree *rhs_p = &TREE_OPERAND (op, 1); - tree rhs = *rhs_p; + /* The interesting case is when ELT is fully instantiated. In this + case we can have each element stored/loaded directly to/from the + corresponding slot in OTHER. This avoids a block copy. */ - /* Handle 'return STRUCTURE;' */ - if (is_sra_candidate_decl (rhs)) - emit_scalar_copies (si_p, rhs, rhs, FIELD_SCALAR); + tree list = NULL, stmt = bsi_stmt (*bsi); - /* Handle 'return STRUCTURE.FIELD;' */ - else if (is_sra_candidate_ref (rhs)) - scalarize_component_ref (stmt, rhs_p); + mark_all_v_defs (stmt); + generate_copy_inout (elt, is_output, other, &list); + if (list == NULL) + abort (); - /* Handle 'return CALL_EXPR;' */ - else if (TREE_CODE (rhs) == CALL_EXPR) + /* Preserve EH semantics. */ + if (stmt_ends_bb_p (stmt)) { - struct bitmap_head_def done_head; - bitmap_initialize (&done_head, 1); - scalarize_tree_list (TREE_OPERAND (rhs, 1), si_p, &done_head); - bitmap_clear (&done_head); + tree_stmt_iterator tsi; + tree first; + + /* Extract the first statement from LIST. */ + tsi = tsi_start (list); + first = tsi_stmt (tsi); + tsi_delink (&tsi); + + /* Replace the old statement with this new representative. */ + bsi_replace (bsi, first, true); + + if (!tsi_end_p (tsi)) + { + /* If any reference would trap, then they all would. And more + to the point, the first would. Therefore none of the rest + will trap since the first didn't. Indicate this by + iterating over the remaining statements and set + TREE_THIS_NOTRAP in all INDIRECT_REFs. */ + do + { + walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL); + tsi_next (&tsi); + } + while (!tsi_end_p (tsi)); + + insert_edge_copies (list, bsi->bb); + } } + else + sra_replace (bsi, list); } } +/* Generate initializations for all scalarizable parameters. */ -/* Debugging dump for the scalar replacement map. */ - -static int -dump_sra_map_trav (void **slot, void *data) +static void +scalarize_parms (void) { - struct sra_elt *e = *slot; - FILE *f = data; + tree list = NULL; + size_t i; - switch (e->kind) - { - case REALPART_EXPR: - fputs ("__real__ ", f); - print_generic_expr (dump_file, e->base, dump_flags); - fprintf (f, " -> %s\n", get_name (e->replace)); - break; - case IMAGPART_EXPR: - fputs ("__imag__ ", f); - print_generic_expr (dump_file, e->base, dump_flags); - fprintf (f, " -> %s\n", get_name (e->replace)); - break; - case COMPONENT_REF: - print_generic_expr (dump_file, e->base, dump_flags); - fprintf (f, ".%s -> %s\n", get_name (e->field), get_name (e->replace)); - break; - default: - abort (); - } + EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i, + { + tree var = referenced_var (i); + struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT); + generate_copy_inout (elt, true, var, &list); + }); - return 1; + if (list) + insert_edge_copies (list, ENTRY_BLOCK_PTR); } +/* Entry point to phase 4. Update the function to match replacements. */ + static void -dump_sra_map (FILE *f) +scalarize_function (void) { - fputs ("Scalar replacements:\n", f); - htab_traverse_noresize (sra_map, dump_sra_map_trav, f); - fputs ("\n\n", f); + static const struct sra_walk_fns fns = { + scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false + }; + + sra_walk_function (&fns); + scalarize_parms (); + bsi_commit_edge_inserts (NULL); } -/* Main entry point to Scalar Replacement of Aggregates (SRA). This pass - re-writes non-aliased structure references into scalar temporaries. The - goal is to expose some/all structures to the scalar optimizers. + +/* Debug helper function. Print ELT in a nice human-readable format. */ - Scalarization proceeds in two main phases. First, every structure - referenced in the program that complies with can_be_scalarized_p is - marked for scalarization (find_candidates_for_sra). - - Second, a mapping between structure fields and scalar temporaries so - that every time a particular field of a particular structure is - referenced in the code, we replace it with its corresponding scalar - temporary (scalarize_structures). +static void +dump_sra_elt_name (FILE *f, struct sra_elt *elt) +{ + if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE) + { + fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f); + dump_sra_elt_name (f, elt->parent); + } + else + { + if (elt->parent) + dump_sra_elt_name (f, elt->parent); + if (DECL_P (elt->element)) + { + if (TREE_CODE (elt->element) == FIELD_DECL) + fputc ('.', f); + print_generic_expr (f, elt->element, dump_flags); + } + else + fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", + TREE_INT_CST_LOW (elt->element)); + } +} - TODO +/* Likewise, but callable from the debugger. */ + +void +debug_sra_elt_name (struct sra_elt *elt) +{ + dump_sra_elt_name (stderr, elt); + fputc ('\n', stderr); +} - 1- Scalarize COMPLEX_TYPEs - 2- Scalarize ARRAY_REFs that are always referenced with a - constant index. - 3- Timings to determine when scalarization is not profitable. - 4- Determine what's a good value for MAX_NFIELDS_FOR_SRA. */ +/* Main entry point. */ static void tree_sra (void) { /* Initialize local variables. */ + gcc_obstack_init (&sra_obstack); sra_candidates = BITMAP_XMALLOC (); - sra_map = NULL; - needs_copy_in = NULL; + needs_copy_in = BITMAP_XMALLOC (); + sra_type_decomp_cache = BITMAP_XMALLOC (); + sra_type_inst_cache = BITMAP_XMALLOC (); + sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL); - /* Find structures to be scalarized. */ - if (!find_candidates_for_sra ()) + /* Scan. If we find anything, instantiate and scalarize. */ + if (find_candidates_for_sra ()) { - BITMAP_XFREE (sra_candidates); - return; + scan_function (); + decide_instantiations (); + scalarize_function (); } - /* If we found any, re-write structure references with their - corresponding scalar replacement. */ - sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, free); - needs_copy_in = BITMAP_XMALLOC (); - - scalarize_structures (); - - if (dump_file) - dump_sra_map (dump_file); - /* Free allocated memory. */ htab_delete (sra_map); sra_map = NULL; - BITMAP_XFREE (needs_copy_in); BITMAP_XFREE (sra_candidates); + BITMAP_XFREE (needs_copy_in); + BITMAP_XFREE (sra_type_decomp_cache); + BITMAP_XFREE (sra_type_inst_cache); + obstack_free (&sra_obstack, NULL); } static bool