1 /* Scalar Replacement of Aggregates (SRA) converts some structure
2 references into scalar references, exposing them to the scalar
4 Copyright (C) 2003, 2004, 2005, 2006, 2007
5 Free Software Foundation, Inc.
6 Contributed by Diego Novillo <dnovillo@redhat.com>
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option) any
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 #include "coretypes.h"
32 /* These RTL headers are needed for basic-block.h. */
35 #include "hard-reg-set.h"
36 #include "basic-block.h"
37 #include "diagnostic.h"
38 #include "langhooks.h"
39 #include "tree-inline.h"
40 #include "tree-flow.h"
41 #include "tree-gimple.h"
42 #include "tree-dump.h"
43 #include "tree-pass.h"
49 /* expr.h is needed for MOVE_RATIO. */
54 /* This object of this pass is to replace a non-addressable aggregate with a
55 set of independent variables. Most of the time, all of these variables
56 will be scalars. But a secondary objective is to break up larger
57 aggregates into smaller aggregates. In the process we may find that some
58 bits of the larger aggregate can be deleted as unreferenced.
60 This substitution is done globally. More localized substitutions would
61 be the purvey of a load-store motion pass.
63 The optimization proceeds in phases:
65 (1) Identify variables that have types that are candidates for
68 (2) Scan the function looking for the ways these variables are used.
69 In particular we're interested in the number of times a variable
70 (or member) is needed as a complete unit, and the number of times
71 a variable (or member) is copied.
73 (3) Based on the usage profile, instantiate substitution variables.
75 (4) Scan the function making replacements.
79 /* True if this is the "early" pass, before inlining. */
80 static bool early_sra
;
82 /* The set of todo flags to return from tree_sra. */
83 static unsigned int todoflags
;
85 /* The set of aggregate variables that are candidates for scalarization. */
86 static bitmap sra_candidates
;
88 /* Set of scalarizable PARM_DECLs that need copy-in operations at the
89 beginning of the function. */
90 static bitmap needs_copy_in
;
92 /* Sets of bit pairs that cache type decomposition and instantiation. */
93 static bitmap sra_type_decomp_cache
;
94 static bitmap sra_type_inst_cache
;
96 /* One of these structures is created for each candidate aggregate and
97 each (accessed) member or group of members of such an aggregate. */
100 /* A tree of the elements. Used when we want to traverse everything. */
101 struct sra_elt
*parent
;
102 struct sra_elt
*groups
;
103 struct sra_elt
*children
;
104 struct sra_elt
*sibling
;
106 /* If this element is a root, then this is the VAR_DECL. If this is
107 a sub-element, this is some token used to identify the reference.
108 In the case of COMPONENT_REF, this is the FIELD_DECL. In the case
109 of an ARRAY_REF, this is the (constant) index. In the case of an
110 ARRAY_RANGE_REF, this is the (constant) RANGE_EXPR. In the case
111 of a complex number, this is a zero or one. */
114 /* The type of the element. */
117 /* A VAR_DECL, for any sub-element we've decided to replace. */
120 /* The number of times the element is referenced as a whole. I.e.
121 given "a.b.c", this would be incremented for C, but not for A or B. */
124 /* The number of times the element is copied to or from another
125 scalarizable element. */
126 unsigned int n_copies
;
128 /* True if TYPE is scalar. */
131 /* True if this element is a group of members of its parent. */
134 /* True if we saw something about this element that prevents scalarization,
135 such as non-constant indexing. */
136 bool cannot_scalarize
;
138 /* True if we've decided that structure-to-structure assignment
139 should happen via memcpy and not per-element. */
142 /* True if everything under this element has been marked TREE_NO_WARNING. */
145 /* A flag for use with/after random access traversals. */
148 /* True if there is BIT_FIELD_REF on the lhs with a vector. */
151 /* 1 if the element is a field that is part of a block, 2 if the field
152 is the block itself, 0 if it's neither. */
153 char in_bitfld_block
;
156 #define IS_ELEMENT_FOR_GROUP(ELEMENT) (TREE_CODE (ELEMENT) == RANGE_EXPR)
158 #define FOR_EACH_ACTUAL_CHILD(CHILD, ELT) \
159 for ((CHILD) = (ELT)->is_group \
160 ? next_child_for_group (NULL, (ELT)) \
163 (CHILD) = (ELT)->is_group \
164 ? next_child_for_group ((CHILD), (ELT)) \
167 /* Helper function for above macro. Return next child in group. */
168 static struct sra_elt
*
169 next_child_for_group (struct sra_elt
*child
, struct sra_elt
*group
)
171 gcc_assert (group
->is_group
);
173 /* Find the next child in the parent. */
175 child
= child
->sibling
;
177 child
= group
->parent
->children
;
179 /* Skip siblings that do not belong to the group. */
182 tree g_elt
= group
->element
;
183 if (TREE_CODE (g_elt
) == RANGE_EXPR
)
185 if (!tree_int_cst_lt (child
->element
, TREE_OPERAND (g_elt
, 0))
186 && !tree_int_cst_lt (TREE_OPERAND (g_elt
, 1), child
->element
))
192 child
= child
->sibling
;
198 /* Random access to the child of a parent is performed by hashing.
199 This prevents quadratic behavior, and allows SRA to function
200 reasonably on larger records. */
201 static htab_t sra_map
;
203 /* All structures are allocated out of the following obstack. */
204 static struct obstack sra_obstack
;
206 /* Debugging functions. */
207 static void dump_sra_elt_name (FILE *, struct sra_elt
*);
208 extern void debug_sra_elt_name (struct sra_elt
*);
210 /* Forward declarations. */
211 static tree
generate_element_ref (struct sra_elt
*);
213 /* Return true if DECL is an SRA candidate. */
216 is_sra_candidate_decl (tree decl
)
218 return DECL_P (decl
) && bitmap_bit_p (sra_candidates
, DECL_UID (decl
));
221 /* Return true if TYPE is a scalar type. */
224 is_sra_scalar_type (tree type
)
226 enum tree_code code
= TREE_CODE (type
);
227 return (code
== INTEGER_TYPE
|| code
== REAL_TYPE
|| code
== VECTOR_TYPE
228 || code
== ENUMERAL_TYPE
|| code
== BOOLEAN_TYPE
229 || code
== POINTER_TYPE
|| code
== OFFSET_TYPE
230 || code
== REFERENCE_TYPE
);
233 /* Return true if TYPE can be decomposed into a set of independent variables.
235 Note that this doesn't imply that all elements of TYPE can be
236 instantiated, just that if we decide to break up the type into
237 separate pieces that it can be done. */
240 sra_type_can_be_decomposed_p (tree type
)
242 unsigned int cache
= TYPE_UID (TYPE_MAIN_VARIANT (type
)) * 2;
245 /* Avoid searching the same type twice. */
246 if (bitmap_bit_p (sra_type_decomp_cache
, cache
+0))
248 if (bitmap_bit_p (sra_type_decomp_cache
, cache
+1))
251 /* The type must have a definite nonzero size. */
252 if (TYPE_SIZE (type
) == NULL
|| TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
253 || integer_zerop (TYPE_SIZE (type
)))
256 /* The type must be a non-union aggregate. */
257 switch (TREE_CODE (type
))
261 bool saw_one_field
= false;
263 for (t
= TYPE_FIELDS (type
); t
; t
= TREE_CHAIN (t
))
264 if (TREE_CODE (t
) == FIELD_DECL
)
266 /* Reject incorrectly represented bit fields. */
267 if (DECL_BIT_FIELD (t
)
268 && (tree_low_cst (DECL_SIZE (t
), 1)
269 != TYPE_PRECISION (TREE_TYPE (t
))))
272 saw_one_field
= true;
275 /* Record types must have at least one field. */
282 /* Array types must have a fixed lower and upper bound. */
283 t
= TYPE_DOMAIN (type
);
286 if (TYPE_MIN_VALUE (t
) == NULL
|| !TREE_CONSTANT (TYPE_MIN_VALUE (t
)))
288 if (TYPE_MAX_VALUE (t
) == NULL
|| !TREE_CONSTANT (TYPE_MAX_VALUE (t
)))
299 bitmap_set_bit (sra_type_decomp_cache
, cache
+0);
303 bitmap_set_bit (sra_type_decomp_cache
, cache
+1);
307 /* Return true if DECL can be decomposed into a set of independent
308 (though not necessarily scalar) variables. */
311 decl_can_be_decomposed_p (tree var
)
313 /* Early out for scalars. */
314 if (is_sra_scalar_type (TREE_TYPE (var
)))
317 /* The variable must not be aliased. */
318 if (!is_gimple_non_addressable (var
))
320 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
322 fprintf (dump_file
, "Cannot scalarize variable ");
323 print_generic_expr (dump_file
, var
, dump_flags
);
324 fprintf (dump_file
, " because it must live in memory\n");
329 /* The variable must not be volatile. */
330 if (TREE_THIS_VOLATILE (var
))
332 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
334 fprintf (dump_file
, "Cannot scalarize variable ");
335 print_generic_expr (dump_file
, var
, dump_flags
);
336 fprintf (dump_file
, " because it is declared volatile\n");
341 /* We must be able to decompose the variable's type. */
342 if (!sra_type_can_be_decomposed_p (TREE_TYPE (var
)))
344 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
346 fprintf (dump_file
, "Cannot scalarize variable ");
347 print_generic_expr (dump_file
, var
, dump_flags
);
348 fprintf (dump_file
, " because its type cannot be decomposed\n");
353 /* HACK: if we decompose a va_list_type_node before inlining, then we'll
354 confuse tree-stdarg.c, and we won't be able to figure out which and
355 how many arguments are accessed. This really should be improved in
356 tree-stdarg.c, as the decomposition is truely a win. This could also
357 be fixed if the stdarg pass ran early, but this can't be done until
358 we've aliasing information early too. See PR 30791. */
360 && TYPE_MAIN_VARIANT (TREE_TYPE (var
))
361 == TYPE_MAIN_VARIANT (va_list_type_node
))
367 /* Return true if TYPE can be *completely* decomposed into scalars. */
370 type_can_instantiate_all_elements (tree type
)
372 if (is_sra_scalar_type (type
))
374 if (!sra_type_can_be_decomposed_p (type
))
377 switch (TREE_CODE (type
))
381 unsigned int cache
= TYPE_UID (TYPE_MAIN_VARIANT (type
)) * 2;
384 if (bitmap_bit_p (sra_type_inst_cache
, cache
+0))
386 if (bitmap_bit_p (sra_type_inst_cache
, cache
+1))
389 for (f
= TYPE_FIELDS (type
); f
; f
= TREE_CHAIN (f
))
390 if (TREE_CODE (f
) == FIELD_DECL
)
392 if (!type_can_instantiate_all_elements (TREE_TYPE (f
)))
394 bitmap_set_bit (sra_type_inst_cache
, cache
+1);
399 bitmap_set_bit (sra_type_inst_cache
, cache
+0);
404 return type_can_instantiate_all_elements (TREE_TYPE (type
));
414 /* Test whether ELT or some sub-element cannot be scalarized. */
417 can_completely_scalarize_p (struct sra_elt
*elt
)
421 if (elt
->cannot_scalarize
)
424 for (c
= elt
->children
; c
; c
= c
->sibling
)
425 if (!can_completely_scalarize_p (c
))
428 for (c
= elt
->groups
; c
; c
= c
->sibling
)
429 if (!can_completely_scalarize_p (c
))
436 /* A simplified tree hashing algorithm that only handles the types of
437 trees we expect to find in sra_elt->element. */
440 sra_hash_tree (tree t
)
444 switch (TREE_CODE (t
))
453 h
= TREE_INT_CST_LOW (t
) ^ TREE_INT_CST_HIGH (t
);
457 h
= iterative_hash_expr (TREE_OPERAND (t
, 0), 0);
458 h
= iterative_hash_expr (TREE_OPERAND (t
, 1), h
);
462 /* We can have types that are compatible, but have different member
463 lists, so we can't hash fields by ID. Use offsets instead. */
464 h
= iterative_hash_expr (DECL_FIELD_OFFSET (t
), 0);
465 h
= iterative_hash_expr (DECL_FIELD_BIT_OFFSET (t
), h
);
469 /* Don't take operand 0 into account, that's our parent. */
470 h
= iterative_hash_expr (TREE_OPERAND (t
, 1), 0);
471 h
= iterative_hash_expr (TREE_OPERAND (t
, 2), h
);
481 /* Hash function for type SRA_PAIR. */
484 sra_elt_hash (const void *x
)
486 const struct sra_elt
*e
= x
;
487 const struct sra_elt
*p
;
490 h
= sra_hash_tree (e
->element
);
492 /* Take into account everything except bitfield blocks back up the
493 chain. Given that chain lengths are rarely very long, this
494 should be acceptable. If we truly identify this as a performance
495 problem, it should work to hash the pointer value
497 for (p
= e
->parent
; p
; p
= p
->parent
)
498 if (!p
->in_bitfld_block
)
499 h
= (h
* 65521) ^ sra_hash_tree (p
->element
);
504 /* Equality function for type SRA_PAIR. */
507 sra_elt_eq (const void *x
, const void *y
)
509 const struct sra_elt
*a
= x
;
510 const struct sra_elt
*b
= y
;
512 const struct sra_elt
*ap
= a
->parent
;
513 const struct sra_elt
*bp
= b
->parent
;
516 while (ap
->in_bitfld_block
)
519 while (bp
->in_bitfld_block
)
530 if (TREE_CODE (ae
) != TREE_CODE (be
))
533 switch (TREE_CODE (ae
))
538 /* These are all pointer unique. */
542 /* Integers are not pointer unique, so compare their values. */
543 return tree_int_cst_equal (ae
, be
);
547 tree_int_cst_equal (TREE_OPERAND (ae
, 0), TREE_OPERAND (be
, 0))
548 && tree_int_cst_equal (TREE_OPERAND (ae
, 1), TREE_OPERAND (be
, 1));
551 /* Fields are unique within a record, but not between
552 compatible records. */
553 if (DECL_FIELD_CONTEXT (ae
) == DECL_FIELD_CONTEXT (be
))
555 return fields_compatible_p (ae
, be
);
559 tree_int_cst_equal (TREE_OPERAND (ae
, 1), TREE_OPERAND (be
, 1))
560 && tree_int_cst_equal (TREE_OPERAND (ae
, 2), TREE_OPERAND (be
, 2));
567 /* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT
568 may be null, in which case CHILD must be a DECL. */
570 static struct sra_elt
*
571 lookup_element (struct sra_elt
*parent
, tree child
, tree type
,
572 enum insert_option insert
)
574 struct sra_elt dummy
;
575 struct sra_elt
**slot
;
579 dummy
.parent
= parent
->is_group
? parent
->parent
: parent
;
582 dummy
.element
= child
;
584 slot
= (struct sra_elt
**) htab_find_slot (sra_map
, &dummy
, insert
);
585 if (!slot
&& insert
== NO_INSERT
)
589 if (!elt
&& insert
== INSERT
)
591 *slot
= elt
= obstack_alloc (&sra_obstack
, sizeof (*elt
));
592 memset (elt
, 0, sizeof (*elt
));
594 elt
->parent
= parent
;
595 elt
->element
= child
;
597 elt
->is_scalar
= is_sra_scalar_type (type
);
601 if (IS_ELEMENT_FOR_GROUP (elt
->element
))
603 elt
->is_group
= true;
604 elt
->sibling
= parent
->groups
;
605 parent
->groups
= elt
;
609 elt
->sibling
= parent
->children
;
610 parent
->children
= elt
;
614 /* If this is a parameter, then if we want to scalarize, we have
615 one copy from the true function parameter. Count it now. */
616 if (TREE_CODE (child
) == PARM_DECL
)
619 bitmap_set_bit (needs_copy_in
, DECL_UID (child
));
626 /* Create or return the SRA_ELT structure for EXPR if the expression
627 refers to a scalarizable variable. */
629 static struct sra_elt
*
630 maybe_lookup_element_for_expr (tree expr
)
635 switch (TREE_CODE (expr
))
640 if (is_sra_candidate_decl (expr
))
641 return lookup_element (NULL
, expr
, TREE_TYPE (expr
), INSERT
);
645 /* We can't scalarize variable array indices. */
646 if (in_array_bounds_p (expr
))
647 child
= TREE_OPERAND (expr
, 1);
652 case ARRAY_RANGE_REF
:
653 /* We can't scalarize variable array indices. */
654 if (range_in_array_bounds_p (expr
))
656 tree domain
= TYPE_DOMAIN (TREE_TYPE (expr
));
657 child
= build2 (RANGE_EXPR
, integer_type_node
,
658 TYPE_MIN_VALUE (domain
), TYPE_MAX_VALUE (domain
));
665 /* Don't look through unions. */
666 if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr
, 0))) != RECORD_TYPE
)
668 child
= TREE_OPERAND (expr
, 1);
672 child
= integer_zero_node
;
675 child
= integer_one_node
;
682 elt
= maybe_lookup_element_for_expr (TREE_OPERAND (expr
, 0));
684 return lookup_element (elt
, child
, TREE_TYPE (expr
), INSERT
);
689 /* Functions to walk just enough of the tree to see all scalarizable
690 references, and categorize them. */
692 /* A set of callbacks for phases 2 and 4. They'll be invoked for the
693 various kinds of references seen. In all cases, *BSI is an iterator
694 pointing to the statement being processed. */
697 /* Invoked when ELT is required as a unit. Note that ELT might refer to
698 a leaf node, in which case this is a simple scalar reference. *EXPR_P
699 points to the location of the expression. IS_OUTPUT is true if this
700 is a left-hand-side reference. */
701 void (*use
) (struct sra_elt
*elt
, tree
*expr_p
,
702 block_stmt_iterator
*bsi
, bool is_output
);
704 /* Invoked when we have a copy between two scalarizable references. */
705 void (*copy
) (struct sra_elt
*lhs_elt
, struct sra_elt
*rhs_elt
,
706 block_stmt_iterator
*bsi
);
708 /* Invoked when ELT is initialized from a constant. VALUE may be NULL,
709 in which case it should be treated as an empty CONSTRUCTOR. */
710 void (*init
) (struct sra_elt
*elt
, tree value
, block_stmt_iterator
*bsi
);
712 /* Invoked when we have a copy between one scalarizable reference ELT
713 and one non-scalarizable reference OTHER without side-effects.
714 IS_OUTPUT is true if ELT is on the left-hand side. */
715 void (*ldst
) (struct sra_elt
*elt
, tree other
,
716 block_stmt_iterator
*bsi
, bool is_output
);
718 /* True during phase 2, false during phase 4. */
719 /* ??? This is a hack. */
723 #ifdef ENABLE_CHECKING
724 /* Invoked via walk_tree, if *TP contains a candidate decl, return it. */
727 sra_find_candidate_decl (tree
*tp
, int *walk_subtrees
,
728 void *data ATTRIBUTE_UNUSED
)
731 enum tree_code code
= TREE_CODE (t
);
733 if (code
== VAR_DECL
|| code
== PARM_DECL
|| code
== RESULT_DECL
)
736 if (is_sra_candidate_decl (t
))
746 /* Walk most expressions looking for a scalarizable aggregate.
747 If we find one, invoke FNS->USE. */
750 sra_walk_expr (tree
*expr_p
, block_stmt_iterator
*bsi
, bool is_output
,
751 const struct sra_walk_fns
*fns
)
755 bool disable_scalarization
= false;
757 /* We're looking to collect a reference expression between EXPR and INNER,
758 such that INNER is a scalarizable decl and all other nodes through EXPR
759 are references that we can scalarize. If we come across something that
760 we can't scalarize, we reset EXPR. This has the effect of making it
761 appear that we're referring to the larger expression as a whole. */
764 switch (TREE_CODE (inner
))
769 /* If there is a scalarizable decl at the bottom, then process it. */
770 if (is_sra_candidate_decl (inner
))
772 struct sra_elt
*elt
= maybe_lookup_element_for_expr (expr
);
773 if (disable_scalarization
)
774 elt
->cannot_scalarize
= true;
776 fns
->use (elt
, expr_p
, bsi
, is_output
);
781 /* Non-constant index means any member may be accessed. Prevent the
782 expression from being scalarized. If we were to treat this as a
783 reference to the whole array, we can wind up with a single dynamic
784 index reference inside a loop being overridden by several constant
785 index references during loop setup. It's possible that this could
786 be avoided by using dynamic usage counts based on BB trip counts
787 (based on loop analysis or profiling), but that hardly seems worth
789 /* ??? Hack. Figure out how to push this into the scan routines
790 without duplicating too much code. */
791 if (!in_array_bounds_p (inner
))
793 disable_scalarization
= true;
796 /* ??? Are we assured that non-constant bounds and stride will have
797 the same value everywhere? I don't think Fortran will... */
798 if (TREE_OPERAND (inner
, 2) || TREE_OPERAND (inner
, 3))
800 inner
= TREE_OPERAND (inner
, 0);
803 case ARRAY_RANGE_REF
:
804 if (!range_in_array_bounds_p (inner
))
806 disable_scalarization
= true;
809 /* ??? See above non-constant bounds and stride . */
810 if (TREE_OPERAND (inner
, 2) || TREE_OPERAND (inner
, 3))
812 inner
= TREE_OPERAND (inner
, 0);
816 /* A reference to a union member constitutes a reference to the
818 if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner
, 0))) != RECORD_TYPE
)
820 /* ??? See above re non-constant stride. */
821 if (TREE_OPERAND (inner
, 2))
823 inner
= TREE_OPERAND (inner
, 0);
828 inner
= TREE_OPERAND (inner
, 0);
832 /* A bit field reference to a specific vector is scalarized but for
833 ones for inputs need to be marked as used on the left hand size so
834 when we scalarize it, we can mark that variable as non renamable. */
836 && TREE_CODE (TREE_TYPE (TREE_OPERAND (inner
, 0))) == VECTOR_TYPE
)
839 = maybe_lookup_element_for_expr (TREE_OPERAND (inner
, 0));
841 elt
->is_vector_lhs
= true;
843 /* A bit field reference (access to *multiple* fields simultaneously)
844 is not currently scalarized. Consider this an access to the
845 complete outer element, to which walk_tree will bring us next. */
849 case VIEW_CONVERT_EXPR
:
851 /* Similarly, a view/nop explicitly wants to look at an object in a
852 type other than the one we've scalarized. */
856 /* This is a transparent wrapper. The entire inner expression really
861 expr_p
= &TREE_OPERAND (inner
, 0);
862 inner
= expr
= *expr_p
;
866 #ifdef ENABLE_CHECKING
867 /* Validate that we're not missing any references. */
868 gcc_assert (!walk_tree (&inner
, sra_find_candidate_decl
, NULL
, NULL
));
874 /* Walk a TREE_LIST of values looking for scalarizable aggregates.
875 If we find one, invoke FNS->USE. */
878 sra_walk_tree_list (tree list
, block_stmt_iterator
*bsi
, bool is_output
,
879 const struct sra_walk_fns
*fns
)
882 for (op
= list
; op
; op
= TREE_CHAIN (op
))
883 sra_walk_expr (&TREE_VALUE (op
), bsi
, is_output
, fns
);
886 /* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
887 If we find one, invoke FNS->USE. */
890 sra_walk_call_expr (tree expr
, block_stmt_iterator
*bsi
,
891 const struct sra_walk_fns
*fns
)
894 int nargs
= call_expr_nargs (expr
);
895 for (i
= 0; i
< nargs
; i
++)
896 sra_walk_expr (&CALL_EXPR_ARG (expr
, i
), bsi
, false, fns
);
899 /* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
900 aggregates. If we find one, invoke FNS->USE. */
903 sra_walk_asm_expr (tree expr
, block_stmt_iterator
*bsi
,
904 const struct sra_walk_fns
*fns
)
906 sra_walk_tree_list (ASM_INPUTS (expr
), bsi
, false, fns
);
907 sra_walk_tree_list (ASM_OUTPUTS (expr
), bsi
, true, fns
);
910 static void sra_replace (block_stmt_iterator
*bsi
, tree list
);
911 static tree
sra_build_elt_assignment (struct sra_elt
*elt
, tree src
);
913 /* Walk a GIMPLE_MODIFY_STMT and categorize the assignment appropriately. */
916 sra_walk_gimple_modify_stmt (tree expr
, block_stmt_iterator
*bsi
,
917 const struct sra_walk_fns
*fns
)
919 struct sra_elt
*lhs_elt
, *rhs_elt
;
922 lhs
= GIMPLE_STMT_OPERAND (expr
, 0);
923 rhs
= GIMPLE_STMT_OPERAND (expr
, 1);
924 lhs_elt
= maybe_lookup_element_for_expr (lhs
);
925 rhs_elt
= maybe_lookup_element_for_expr (rhs
);
927 /* If both sides are scalarizable, this is a COPY operation. */
928 if (lhs_elt
&& rhs_elt
)
930 fns
->copy (lhs_elt
, rhs_elt
, bsi
);
934 /* If the RHS is scalarizable, handle it. There are only two cases. */
937 if (!rhs_elt
->is_scalar
&& !TREE_SIDE_EFFECTS (lhs
))
938 fns
->ldst (rhs_elt
, lhs
, bsi
, false);
940 fns
->use (rhs_elt
, &GIMPLE_STMT_OPERAND (expr
, 1), bsi
, false);
943 /* If it isn't scalarizable, there may be scalarizable variables within, so
944 check for a call or else walk the RHS to see if we need to do any
945 copy-in operations. We need to do it before the LHS is scalarized so
946 that the statements get inserted in the proper place, before any
947 copy-out operations. */
950 tree call
= get_call_expr_in (rhs
);
952 sra_walk_call_expr (call
, bsi
, fns
);
954 sra_walk_expr (&GIMPLE_STMT_OPERAND (expr
, 1), bsi
, false, fns
);
957 /* Likewise, handle the LHS being scalarizable. We have cases similar
958 to those above, but also want to handle RHS being constant. */
961 /* If this is an assignment from a constant, or constructor, then
962 we have access to all of the elements individually. Invoke INIT. */
963 if (TREE_CODE (rhs
) == COMPLEX_EXPR
964 || TREE_CODE (rhs
) == COMPLEX_CST
965 || TREE_CODE (rhs
) == CONSTRUCTOR
)
966 fns
->init (lhs_elt
, rhs
, bsi
);
968 /* If this is an assignment from read-only memory, treat this as if
969 we'd been passed the constructor directly. Invoke INIT. */
970 else if (TREE_CODE (rhs
) == VAR_DECL
972 && TREE_READONLY (rhs
)
973 && targetm
.binds_local_p (rhs
))
974 fns
->init (lhs_elt
, DECL_INITIAL (rhs
), bsi
);
976 /* If this is a copy from a non-scalarizable lvalue, invoke LDST.
977 The lvalue requirement prevents us from trying to directly scalarize
978 the result of a function call. Which would result in trying to call
979 the function multiple times, and other evil things. */
980 else if (!lhs_elt
->is_scalar
981 && !TREE_SIDE_EFFECTS (rhs
) && is_gimple_addressable (rhs
))
982 fns
->ldst (lhs_elt
, rhs
, bsi
, true);
984 /* Otherwise we're being used in some context that requires the
985 aggregate to be seen as a whole. Invoke USE. */
988 fns
->use (lhs_elt
, &GIMPLE_STMT_OPERAND (expr
, 0), bsi
, true);
992 /* Similarly to above, LHS_ELT being null only means that the LHS as a
993 whole is not a scalarizable reference. There may be occurrences of
994 scalarizable variables within, which implies a USE. */
996 sra_walk_expr (&GIMPLE_STMT_OPERAND (expr
, 0), bsi
, true, fns
);
999 /* Entry point to the walk functions. Search the entire function,
1000 invoking the callbacks in FNS on each of the references to
1001 scalarizable variables. */
1004 sra_walk_function (const struct sra_walk_fns
*fns
)
1007 block_stmt_iterator si
, ni
;
1009 /* ??? Phase 4 could derive some benefit to walking the function in
1010 dominator tree order. */
1013 for (si
= bsi_start (bb
); !bsi_end_p (si
); si
= ni
)
1018 stmt
= bsi_stmt (si
);
1019 ann
= stmt_ann (stmt
);
1024 /* If the statement has no virtual operands, then it doesn't
1025 make any structure references that we care about. */
1026 if (gimple_aliases_computed_p (cfun
)
1027 && ZERO_SSA_OPERANDS (stmt
, (SSA_OP_VIRTUAL_DEFS
| SSA_OP_VUSE
)))
1030 switch (TREE_CODE (stmt
))
1033 /* If we have "return <retval>" then the return value is
1034 already exposed for our pleasure. Walk it as a USE to
1035 force all the components back in place for the return.
1037 If we have an embedded assignment, then <retval> is of
1038 a type that gets returned in registers in this ABI, and
1039 we do not wish to extend their lifetimes. Treat this
1040 as a USE of the variable on the RHS of this assignment. */
1042 t
= TREE_OPERAND (stmt
, 0);
1045 else if (TREE_CODE (t
) == GIMPLE_MODIFY_STMT
)
1046 sra_walk_expr (&GIMPLE_STMT_OPERAND (t
, 1), &si
, false, fns
);
1048 sra_walk_expr (&TREE_OPERAND (stmt
, 0), &si
, false, fns
);
1051 case GIMPLE_MODIFY_STMT
:
1052 sra_walk_gimple_modify_stmt (stmt
, &si
, fns
);
1055 sra_walk_call_expr (stmt
, &si
, fns
);
1058 sra_walk_asm_expr (stmt
, &si
, fns
);
1067 /* Phase One: Scan all referenced variables in the program looking for
1068 structures that could be decomposed. */
1071 find_candidates_for_sra (void)
1073 bool any_set
= false;
1075 referenced_var_iterator rvi
;
1077 FOR_EACH_REFERENCED_VAR (var
, rvi
)
1079 if (decl_can_be_decomposed_p (var
))
1081 bitmap_set_bit (sra_candidates
, DECL_UID (var
));
1090 /* Phase Two: Scan all references to scalarizable variables. Count the
1091 number of times they are used or copied respectively. */
1093 /* Callbacks to fill in SRA_WALK_FNS. Everything but USE is
1094 considered a copy, because we can decompose the reference such that
1095 the sub-elements needn't be contiguous. */
1098 scan_use (struct sra_elt
*elt
, tree
*expr_p ATTRIBUTE_UNUSED
,
1099 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
1100 bool is_output ATTRIBUTE_UNUSED
)
1106 scan_copy (struct sra_elt
*lhs_elt
, struct sra_elt
*rhs_elt
,
1107 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
)
1109 lhs_elt
->n_copies
+= 1;
1110 rhs_elt
->n_copies
+= 1;
1114 scan_init (struct sra_elt
*lhs_elt
, tree rhs ATTRIBUTE_UNUSED
,
1115 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
)
1117 lhs_elt
->n_copies
+= 1;
1121 scan_ldst (struct sra_elt
*elt
, tree other ATTRIBUTE_UNUSED
,
1122 block_stmt_iterator
*bsi ATTRIBUTE_UNUSED
,
1123 bool is_output ATTRIBUTE_UNUSED
)
1128 /* Dump the values we collected during the scanning phase. */
1131 scan_dump (struct sra_elt
*elt
)
1135 dump_sra_elt_name (dump_file
, elt
);
1136 fprintf (dump_file
, ": n_uses=%u n_copies=%u\n", elt
->n_uses
, elt
->n_copies
);
1138 for (c
= elt
->children
; c
; c
= c
->sibling
)
1141 for (c
= elt
->groups
; c
; c
= c
->sibling
)
1145 /* Entry point to phase 2. Scan the entire function, building up
1146 scalarization data structures, recording copies and uses. */
1149 scan_function (void)
1151 static const struct sra_walk_fns fns
= {
1152 scan_use
, scan_copy
, scan_init
, scan_ldst
, true
1156 sra_walk_function (&fns
);
1158 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1162 fputs ("\nScan results:\n", dump_file
);
1163 EXECUTE_IF_SET_IN_BITMAP (sra_candidates
, 0, i
, bi
)
1165 tree var
= referenced_var (i
);
1166 struct sra_elt
*elt
= lookup_element (NULL
, var
, NULL
, NO_INSERT
);
1170 fputc ('\n', dump_file
);
1174 /* Phase Three: Make decisions about which variables to scalarize, if any.
1175 All elements to be scalarized have replacement variables made for them. */
1177 /* A subroutine of build_element_name. Recursively build the element
1178 name on the obstack. */
1181 build_element_name_1 (struct sra_elt
*elt
)
1188 build_element_name_1 (elt
->parent
);
1189 obstack_1grow (&sra_obstack
, '$');
1191 if (TREE_CODE (elt
->parent
->type
) == COMPLEX_TYPE
)
1193 if (elt
->element
== integer_zero_node
)
1194 obstack_grow (&sra_obstack
, "real", 4);
1196 obstack_grow (&sra_obstack
, "imag", 4);
1202 if (TREE_CODE (t
) == INTEGER_CST
)
1204 /* ??? Eh. Don't bother doing double-wide printing. */
1205 sprintf (buffer
, HOST_WIDE_INT_PRINT_DEC
, TREE_INT_CST_LOW (t
));
1206 obstack_grow (&sra_obstack
, buffer
, strlen (buffer
));
1208 else if (TREE_CODE (t
) == BIT_FIELD_REF
)
1210 sprintf (buffer
, "B" HOST_WIDE_INT_PRINT_DEC
,
1211 tree_low_cst (TREE_OPERAND (t
, 2), 1));
1212 obstack_grow (&sra_obstack
, buffer
, strlen (buffer
));
1213 sprintf (buffer
, "F" HOST_WIDE_INT_PRINT_DEC
,
1214 tree_low_cst (TREE_OPERAND (t
, 1), 1));
1215 obstack_grow (&sra_obstack
, buffer
, strlen (buffer
));
1219 tree name
= DECL_NAME (t
);
1221 obstack_grow (&sra_obstack
, IDENTIFIER_POINTER (name
),
1222 IDENTIFIER_LENGTH (name
));
1225 sprintf (buffer
, "D%u", DECL_UID (t
));
1226 obstack_grow (&sra_obstack
, buffer
, strlen (buffer
));
1231 /* Construct a pretty variable name for an element's replacement variable.
1232 The name is built on the obstack. */
1235 build_element_name (struct sra_elt
*elt
)
1237 build_element_name_1 (elt
);
1238 obstack_1grow (&sra_obstack
, '\0');
1239 return XOBFINISH (&sra_obstack
, char *);
1242 /* Instantiate an element as an independent variable. */
1245 instantiate_element (struct sra_elt
*elt
)
1247 struct sra_elt
*base_elt
;
1249 bool nowarn
= TREE_NO_WARNING (elt
->element
);
1251 for (base_elt
= elt
; base_elt
->parent
; base_elt
= base_elt
->parent
)
1253 nowarn
= base_elt
->parent
->n_uses
1254 || TREE_NO_WARNING (base_elt
->parent
->element
);
1255 base
= base_elt
->element
;
1257 elt
->replacement
= var
= make_rename_temp (elt
->type
, "SR");
1259 /* For vectors, if used on the left hand side with BIT_FIELD_REF,
1260 they are not a gimple register. */
1261 if (TREE_CODE (TREE_TYPE (var
)) == VECTOR_TYPE
&& elt
->is_vector_lhs
)
1262 DECL_GIMPLE_REG_P (var
) = 0;
1264 DECL_SOURCE_LOCATION (var
) = DECL_SOURCE_LOCATION (base
);
1265 DECL_ARTIFICIAL (var
) = 1;
1267 if (TREE_THIS_VOLATILE (elt
->type
))
1269 TREE_THIS_VOLATILE (var
) = 1;
1270 TREE_SIDE_EFFECTS (var
) = 1;
1273 if (DECL_NAME (base
) && !DECL_IGNORED_P (base
))
1275 char *pretty_name
= build_element_name (elt
);
1276 DECL_NAME (var
) = get_identifier (pretty_name
);
1277 obstack_free (&sra_obstack
, pretty_name
);
1279 SET_DECL_DEBUG_EXPR (var
, generate_element_ref (elt
));
1280 DECL_DEBUG_EXPR_IS_FROM (var
) = 1;
1282 DECL_IGNORED_P (var
) = 0;
1283 TREE_NO_WARNING (var
) = nowarn
;
1287 DECL_IGNORED_P (var
) = 1;
1288 /* ??? We can't generate any warning that would be meaningful. */
1289 TREE_NO_WARNING (var
) = 1;
1294 fputs (" ", dump_file
);
1295 dump_sra_elt_name (dump_file
, elt
);
1296 fputs (" -> ", dump_file
);
1297 print_generic_expr (dump_file
, var
, dump_flags
);
1298 fputc ('\n', dump_file
);
1302 /* Make one pass across an element tree deciding whether or not it's
1303 profitable to instantiate individual leaf scalars.
1305 PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
1306 fields all the way up the tree. */
1309 decide_instantiation_1 (struct sra_elt
*elt
, unsigned int parent_uses
,
1310 unsigned int parent_copies
)
1312 if (dump_file
&& !elt
->parent
)
1314 fputs ("Initial instantiation for ", dump_file
);
1315 dump_sra_elt_name (dump_file
, elt
);
1316 fputc ('\n', dump_file
);
1319 if (elt
->cannot_scalarize
)
1324 /* The decision is simple: instantiate if we're used more frequently
1325 than the parent needs to be seen as a complete unit. */
1326 if (elt
->n_uses
+ elt
->n_copies
+ parent_copies
> parent_uses
)
1327 instantiate_element (elt
);
1331 struct sra_elt
*c
, *group
;
1332 unsigned int this_uses
= elt
->n_uses
+ parent_uses
;
1333 unsigned int this_copies
= elt
->n_copies
+ parent_copies
;
1335 /* Consider groups of sub-elements as weighing in favour of
1336 instantiation whatever their size. */
1337 for (group
= elt
->groups
; group
; group
= group
->sibling
)
1338 FOR_EACH_ACTUAL_CHILD (c
, group
)
1340 c
->n_uses
+= group
->n_uses
;
1341 c
->n_copies
+= group
->n_copies
;
1344 for (c
= elt
->children
; c
; c
= c
->sibling
)
1345 decide_instantiation_1 (c
, this_uses
, this_copies
);
1349 /* Compute the size and number of all instantiated elements below ELT.
1350 We will only care about this if the size of the complete structure
1351 fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */
1354 sum_instantiated_sizes (struct sra_elt
*elt
, unsigned HOST_WIDE_INT
*sizep
)
1356 if (elt
->replacement
)
1358 *sizep
+= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt
->type
));
1364 unsigned int count
= 0;
1366 for (c
= elt
->children
; c
; c
= c
->sibling
)
1367 count
+= sum_instantiated_sizes (c
, sizep
);
1373 /* Instantiate fields in ELT->TYPE that are not currently present as
1376 static void instantiate_missing_elements (struct sra_elt
*elt
);
1378 static struct sra_elt
*
1379 instantiate_missing_elements_1 (struct sra_elt
*elt
, tree child
, tree type
)
1381 struct sra_elt
*sub
= lookup_element (elt
, child
, type
, INSERT
);
1384 if (sub
->replacement
== NULL
)
1385 instantiate_element (sub
);
1388 instantiate_missing_elements (sub
);
1392 /* Obtain the canonical type for field F of ELEMENT. */
1395 canon_type_for_field (tree f
, tree element
)
1397 tree field_type
= TREE_TYPE (f
);
1399 /* canonicalize_component_ref() unwidens some bit-field types (not
1400 marked as DECL_BIT_FIELD in C++), so we must do the same, lest we
1401 may introduce type mismatches. */
1402 if (INTEGRAL_TYPE_P (field_type
)
1403 && DECL_MODE (f
) != TYPE_MODE (field_type
))
1404 field_type
= TREE_TYPE (get_unwidened (build3 (COMPONENT_REF
,
1413 /* Look for adjacent fields of ELT starting at F that we'd like to
1414 scalarize as a single variable. Return the last field of the
1418 try_instantiate_multiple_fields (struct sra_elt
*elt
, tree f
)
1420 unsigned HOST_WIDE_INT align
, oalign
, word
, bit
, size
, alchk
;
1421 enum machine_mode mode
;
1422 tree first
= f
, prev
;
1424 struct sra_elt
*block
;
1426 if (!is_sra_scalar_type (TREE_TYPE (f
))
1427 || !host_integerp (DECL_FIELD_OFFSET (f
), 1)
1428 || !host_integerp (DECL_FIELD_BIT_OFFSET (f
), 1)
1429 || !host_integerp (DECL_SIZE (f
), 1)
1430 || lookup_element (elt
, f
, NULL
, NO_INSERT
))
1433 /* Taking the alignment of elt->element is not enough, since it
1434 might be just an array index or some such. */
1435 for (block
= elt
; block
; block
= block
->parent
)
1436 if (DECL_P (block
->element
))
1438 align
= DECL_ALIGN (block
->element
);
1443 oalign
= DECL_OFFSET_ALIGN (f
);
1444 word
= tree_low_cst (DECL_FIELD_OFFSET (f
), 1);
1445 bit
= tree_low_cst (DECL_FIELD_BIT_OFFSET (f
), 1);
1446 size
= tree_low_cst (DECL_SIZE (f
), 1);
1454 if ((bit
& alchk
) != ((bit
+ size
- 1) & alchk
))
1457 /* Find adjacent fields in the same alignment word. */
1459 for (prev
= f
, f
= TREE_CHAIN (f
);
1460 f
&& TREE_CODE (f
) == FIELD_DECL
1461 && is_sra_scalar_type (TREE_TYPE (f
))
1462 && host_integerp (DECL_FIELD_OFFSET (f
), 1)
1463 && host_integerp (DECL_FIELD_BIT_OFFSET (f
), 1)
1464 && host_integerp (DECL_SIZE (f
), 1)
1465 && (HOST_WIDE_INT
)word
== tree_low_cst (DECL_FIELD_OFFSET (f
), 1)
1466 && !lookup_element (elt
, f
, NULL
, NO_INSERT
);
1467 prev
= f
, f
= TREE_CHAIN (f
))
1469 unsigned HOST_WIDE_INT nbit
, nsize
;
1471 nbit
= tree_low_cst (DECL_FIELD_BIT_OFFSET (f
), 1);
1472 nsize
= tree_low_cst (DECL_SIZE (f
), 1);
1474 if (bit
+ size
== nbit
)
1476 if ((bit
& alchk
) != ((nbit
+ nsize
- 1) & alchk
))
1480 else if (nbit
+ nsize
== bit
)
1482 if ((nbit
& alchk
) != ((bit
+ size
- 1) & alchk
))
1496 gcc_assert ((bit
& alchk
) == ((bit
+ size
- 1) & alchk
));
1498 /* Try to widen the bit range so as to cover padding bits as well. */
1500 if ((bit
& ~alchk
) || size
!= align
)
1502 unsigned HOST_WIDE_INT mbit
= bit
& alchk
;
1503 unsigned HOST_WIDE_INT msize
= align
;
1505 for (f
= TYPE_FIELDS (elt
->type
);
1506 f
; f
= TREE_CHAIN (f
))
1508 unsigned HOST_WIDE_INT fword
, fbit
, fsize
;
1510 /* Skip the fields from first to prev. */
1517 if (!(TREE_CODE (f
) == FIELD_DECL
1518 && host_integerp (DECL_FIELD_OFFSET (f
), 1)
1519 && host_integerp (DECL_FIELD_BIT_OFFSET (f
), 1)))
1522 fword
= tree_low_cst (DECL_FIELD_OFFSET (f
), 1);
1523 /* If we're past the selected word, we're fine. */
1527 fbit
= tree_low_cst (DECL_FIELD_BIT_OFFSET (f
), 1);
1529 if (host_integerp (DECL_SIZE (f
), 1))
1530 fsize
= tree_low_cst (DECL_SIZE (f
), 1);
1532 /* Assume a variable-sized field takes up all space till
1533 the end of the word. ??? Endianness issues? */
1534 fsize
= align
- fbit
;
1538 /* A large field might start at a previous word and
1539 extend into the selected word. Exclude those
1540 bits. ??? Endianness issues? */
1541 HOST_WIDE_INT diff
= fbit
+ fsize
1542 - (HOST_WIDE_INT
)((word
- fword
) * BITS_PER_UNIT
+ mbit
);
1552 gcc_assert (fword
== word
);
1554 /* Non-overlapping, great. */
1555 if (fbit
+ fsize
<= mbit
1556 || mbit
+ msize
<= fbit
)
1561 unsigned HOST_WIDE_INT diff
= fbit
+ fsize
- mbit
;
1565 else if (fbit
> mbit
)
1566 msize
-= (mbit
+ msize
- fbit
);
1576 /* Now we know the bit range we're interested in. Find the smallest
1577 machine mode we can use to access it. */
1579 for (mode
= smallest_mode_for_size (size
, MODE_INT
);
1581 mode
= GET_MODE_WIDER_MODE (mode
))
1583 gcc_assert (mode
!= VOIDmode
);
1585 alchk
= GET_MODE_PRECISION (mode
) - 1;
1588 if ((bit
& alchk
) == ((bit
+ size
- 1) & alchk
))
1592 gcc_assert (~alchk
< align
);
1594 /* Create the field group as a single variable. */
1596 type
= lang_hooks
.types
.type_for_mode (mode
, 1);
1598 var
= build3 (BIT_FIELD_REF
, type
, NULL_TREE
,
1600 bitsize_int (word
* BITS_PER_UNIT
+ bit
));
1601 BIT_FIELD_REF_UNSIGNED (var
) = 1;
1603 block
= instantiate_missing_elements_1 (elt
, var
, type
);
1604 gcc_assert (block
&& block
->is_scalar
);
1606 var
= block
->replacement
;
1608 if (((word
* BITS_PER_UNIT
+ bit
) & ~alchk
)
1609 || (HOST_WIDE_INT
)size
!= tree_low_cst (DECL_SIZE (var
), 1))
1611 block
->replacement
= build3 (BIT_FIELD_REF
,
1612 TREE_TYPE (block
->element
), var
,
1614 bitsize_int ((word
* BITS_PER_UNIT
1616 BIT_FIELD_REF_UNSIGNED (block
->replacement
) = 1;
1617 TREE_NO_WARNING (block
->replacement
) = 1;
1620 block
->in_bitfld_block
= 2;
1622 /* Add the member fields to the group, such that they access
1623 portions of the group variable. */
1625 for (f
= first
; f
!= TREE_CHAIN (prev
); f
= TREE_CHAIN (f
))
1627 tree field_type
= canon_type_for_field (f
, elt
->element
);
1628 struct sra_elt
*fld
= lookup_element (block
, f
, field_type
, INSERT
);
1630 gcc_assert (fld
&& fld
->is_scalar
&& !fld
->replacement
);
1632 fld
->replacement
= build3 (BIT_FIELD_REF
, field_type
, var
,
1635 ((word
* BITS_PER_UNIT
1637 (DECL_FIELD_BIT_OFFSET (f
))))
1639 BIT_FIELD_REF_UNSIGNED (fld
->replacement
) = TYPE_UNSIGNED (field_type
);
1640 TREE_NO_WARNING (block
->replacement
) = 1;
1641 fld
->in_bitfld_block
= 1;
1648 instantiate_missing_elements (struct sra_elt
*elt
)
1650 tree type
= elt
->type
;
1652 switch (TREE_CODE (type
))
1657 for (f
= TYPE_FIELDS (type
); f
; f
= TREE_CHAIN (f
))
1658 if (TREE_CODE (f
) == FIELD_DECL
)
1660 tree last
= try_instantiate_multiple_fields (elt
, f
);
1668 instantiate_missing_elements_1 (elt
, f
,
1669 canon_type_for_field
1677 tree i
, max
, subtype
;
1679 i
= TYPE_MIN_VALUE (TYPE_DOMAIN (type
));
1680 max
= TYPE_MAX_VALUE (TYPE_DOMAIN (type
));
1681 subtype
= TREE_TYPE (type
);
1685 instantiate_missing_elements_1 (elt
, i
, subtype
);
1686 if (tree_int_cst_equal (i
, max
))
1688 i
= int_const_binop (PLUS_EXPR
, i
, integer_one_node
, true);
1695 type
= TREE_TYPE (type
);
1696 instantiate_missing_elements_1 (elt
, integer_zero_node
, type
);
1697 instantiate_missing_elements_1 (elt
, integer_one_node
, type
);
1705 /* Return true if there is only one non aggregate field in the record, TYPE.
1706 Return false otherwise. */
1709 single_scalar_field_in_record_p (tree type
)
1713 if (TREE_CODE (type
) != RECORD_TYPE
)
1716 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
1717 if (TREE_CODE (field
) == FIELD_DECL
)
1721 if (num_fields
== 2)
1724 if (AGGREGATE_TYPE_P (TREE_TYPE (field
)))
1731 /* Make one pass across an element tree deciding whether to perform block
1732 or element copies. If we decide on element copies, instantiate all
1733 elements. Return true if there are any instantiated sub-elements. */
1736 decide_block_copy (struct sra_elt
*elt
)
1741 /* We shouldn't be invoked on groups of sub-elements as they must
1742 behave like their parent as far as block copy is concerned. */
1743 gcc_assert (!elt
->is_group
);
1745 /* If scalarization is disabled, respect it. */
1746 if (elt
->cannot_scalarize
)
1748 elt
->use_block_copy
= 1;
1752 fputs ("Scalarization disabled for ", dump_file
);
1753 dump_sra_elt_name (dump_file
, elt
);
1754 fputc ('\n', dump_file
);
1757 /* Disable scalarization of sub-elements */
1758 for (c
= elt
->children
; c
; c
= c
->sibling
)
1760 c
->cannot_scalarize
= 1;
1761 decide_block_copy (c
);
1764 /* Groups behave like their parent. */
1765 for (c
= elt
->groups
; c
; c
= c
->sibling
)
1767 c
->cannot_scalarize
= 1;
1768 c
->use_block_copy
= 1;
1774 /* Don't decide if we've no uses. */
1775 if (elt
->n_uses
== 0 && elt
->n_copies
== 0)
1778 else if (!elt
->is_scalar
)
1780 tree size_tree
= TYPE_SIZE_UNIT (elt
->type
);
1781 bool use_block_copy
= true;
1783 /* Tradeoffs for COMPLEX types pretty much always make it better
1784 to go ahead and split the components. */
1785 if (TREE_CODE (elt
->type
) == COMPLEX_TYPE
)
1786 use_block_copy
= false;
1788 /* Don't bother trying to figure out the rest if the structure is
1789 so large we can't do easy arithmetic. This also forces block
1790 copies for variable sized structures. */
1791 else if (host_integerp (size_tree
, 1))
1793 unsigned HOST_WIDE_INT full_size
, inst_size
= 0;
1794 unsigned int max_size
, max_count
, inst_count
, full_count
;
1796 /* If the sra-max-structure-size parameter is 0, then the
1797 user has not overridden the parameter and we can choose a
1798 sensible default. */
1799 max_size
= SRA_MAX_STRUCTURE_SIZE
1800 ? SRA_MAX_STRUCTURE_SIZE
1801 : MOVE_RATIO
* UNITS_PER_WORD
;
1802 max_count
= SRA_MAX_STRUCTURE_COUNT
1803 ? SRA_MAX_STRUCTURE_COUNT
1806 full_size
= tree_low_cst (size_tree
, 1);
1807 full_count
= count_type_elements (elt
->type
, false);
1808 inst_count
= sum_instantiated_sizes (elt
, &inst_size
);
1810 /* If there is only one scalar field in the record, don't block copy. */
1811 if (single_scalar_field_in_record_p (elt
->type
))
1812 use_block_copy
= false;
1814 /* ??? What to do here. If there are two fields, and we've only
1815 instantiated one, then instantiating the other is clearly a win.
1816 If there are a large number of fields then the size of the copy
1817 is much more of a factor. */
1819 /* If the structure is small, and we've made copies, go ahead
1820 and instantiate, hoping that the copies will go away. */
1821 if (full_size
<= max_size
1822 && (full_count
- inst_count
) <= max_count
1823 && elt
->n_copies
> elt
->n_uses
)
1824 use_block_copy
= false;
1825 else if (inst_count
* 100 >= full_count
* SRA_FIELD_STRUCTURE_RATIO
1826 && inst_size
* 100 >= full_size
* SRA_FIELD_STRUCTURE_RATIO
)
1827 use_block_copy
= false;
1829 /* In order to avoid block copy, we have to be able to instantiate
1830 all elements of the type. See if this is possible. */
1832 && (!can_completely_scalarize_p (elt
)
1833 || !type_can_instantiate_all_elements (elt
->type
)))
1834 use_block_copy
= true;
1837 elt
->use_block_copy
= use_block_copy
;
1839 /* Groups behave like their parent. */
1840 for (c
= elt
->groups
; c
; c
= c
->sibling
)
1841 c
->use_block_copy
= use_block_copy
;
1845 fprintf (dump_file
, "Using %s for ",
1846 use_block_copy
? "block-copy" : "element-copy");
1847 dump_sra_elt_name (dump_file
, elt
);
1848 fputc ('\n', dump_file
);
1851 if (!use_block_copy
)
1853 instantiate_missing_elements (elt
);
1858 any_inst
= elt
->replacement
!= NULL
;
1860 for (c
= elt
->children
; c
; c
= c
->sibling
)
1861 any_inst
|= decide_block_copy (c
);
1866 /* Entry point to phase 3. Instantiate scalar replacement variables. */
1869 decide_instantiations (void)
1873 bitmap_head done_head
;
1876 /* We cannot clear bits from a bitmap we're iterating over,
1877 so save up all the bits to clear until the end. */
1878 bitmap_initialize (&done_head
, &bitmap_default_obstack
);
1879 cleared_any
= false;
1881 EXECUTE_IF_SET_IN_BITMAP (sra_candidates
, 0, i
, bi
)
1883 tree var
= referenced_var (i
);
1884 struct sra_elt
*elt
= lookup_element (NULL
, var
, NULL
, NO_INSERT
);
1887 decide_instantiation_1 (elt
, 0, 0);
1888 if (!decide_block_copy (elt
))
1893 bitmap_set_bit (&done_head
, i
);
1900 bitmap_and_compl_into (sra_candidates
, &done_head
);
1901 bitmap_and_compl_into (needs_copy_in
, &done_head
);
1903 bitmap_clear (&done_head
);
1905 mark_set_for_renaming (sra_candidates
);
1908 fputc ('\n', dump_file
);
1912 /* Phase Four: Update the function to match the replacements created. */
1914 /* Mark all the variables in VDEF/VUSE operators for STMT for
1915 renaming. This becomes necessary when we modify all of a
1919 mark_all_v_defs_1 (tree stmt
)
1924 update_stmt_if_modified (stmt
);
1926 FOR_EACH_SSA_TREE_OPERAND (sym
, stmt
, iter
, SSA_OP_ALL_VIRTUALS
)
1928 if (TREE_CODE (sym
) == SSA_NAME
)
1929 sym
= SSA_NAME_VAR (sym
);
1930 mark_sym_for_renaming (sym
);
1935 /* Mark all the variables in virtual operands in all the statements in
1936 LIST for renaming. */
1939 mark_all_v_defs (tree list
)
1941 if (TREE_CODE (list
) != STATEMENT_LIST
)
1942 mark_all_v_defs_1 (list
);
1945 tree_stmt_iterator i
;
1946 for (i
= tsi_start (list
); !tsi_end_p (i
); tsi_next (&i
))
1947 mark_all_v_defs_1 (tsi_stmt (i
));
1952 /* Mark every replacement under ELT with TREE_NO_WARNING. */
1955 mark_no_warning (struct sra_elt
*elt
)
1957 if (!elt
->all_no_warning
)
1959 if (elt
->replacement
)
1960 TREE_NO_WARNING (elt
->replacement
) = 1;
1964 FOR_EACH_ACTUAL_CHILD (c
, elt
)
1965 mark_no_warning (c
);
1967 elt
->all_no_warning
= true;
1971 /* Build a single level component reference to ELT rooted at BASE. */
1974 generate_one_element_ref (struct sra_elt
*elt
, tree base
)
1976 switch (TREE_CODE (TREE_TYPE (base
)))
1980 tree field
= elt
->element
;
1982 /* We can't test elt->in_bitfld_blk here because, when this is
1983 called from instantiate_element, we haven't set this field
1985 if (TREE_CODE (field
) == BIT_FIELD_REF
)
1987 tree ret
= copy_node (field
);
1988 TREE_OPERAND (ret
, 0) = base
;
1992 /* Watch out for compatible records with differing field lists. */
1993 if (DECL_FIELD_CONTEXT (field
) != TYPE_MAIN_VARIANT (TREE_TYPE (base
)))
1994 field
= find_compatible_field (TREE_TYPE (base
), field
);
1996 return build3 (COMPONENT_REF
, elt
->type
, base
, field
, NULL
);
2000 if (TREE_CODE (elt
->element
) == RANGE_EXPR
)
2001 return build4 (ARRAY_RANGE_REF
, elt
->type
, base
,
2002 TREE_OPERAND (elt
->element
, 0), NULL
, NULL
);
2004 return build4 (ARRAY_REF
, elt
->type
, base
, elt
->element
, NULL
, NULL
);
2007 if (elt
->element
== integer_zero_node
)
2008 return build1 (REALPART_EXPR
, elt
->type
, base
);
2010 return build1 (IMAGPART_EXPR
, elt
->type
, base
);
2017 /* Build a full component reference to ELT rooted at its native variable. */
2020 generate_element_ref (struct sra_elt
*elt
)
2023 return generate_one_element_ref (elt
, generate_element_ref (elt
->parent
));
2025 return elt
->element
;
2028 /* Create an assignment statement from SRC to DST. */
2031 sra_build_assignment (tree dst
, tree src
)
2033 /* It was hoped that we could perform some type sanity checking
2034 here, but since front-ends can emit accesses of fields in types
2035 different from their nominal types and copy structures containing
2036 them as a whole, we'd have to handle such differences here.
2037 Since such accesses under different types require compatibility
2038 anyway, there's little point in making tests and/or adding
2039 conversions to ensure the types of src and dst are the same.
2040 So we just assume type differences at this point are ok. */
2041 return build_gimple_modify_stmt (dst
, src
);
2044 /* BIT_FIELD_REFs must not be shared. sra_build_elt_assignment()
2045 takes care of assignments, but we must create copies for uses. */
2046 #define REPLDUP(t) (TREE_CODE (t) != BIT_FIELD_REF ? (t) : copy_node (t))
2049 sra_build_elt_assignment (struct sra_elt
*elt
, tree src
)
2051 tree dst
= elt
->replacement
;
2052 tree var
, type
, tmp
, tmp2
, tmp3
;
2054 tree cst
, cst2
, mask
;
2055 tree minshift
, maxshift
;
2057 if (TREE_CODE (dst
) != BIT_FIELD_REF
2058 || !elt
->in_bitfld_block
)
2059 return sra_build_assignment (REPLDUP (dst
), src
);
2061 var
= TREE_OPERAND (dst
, 0);
2063 /* Try to widen the assignment to the entire variable.
2064 We need the source to be a BIT_FIELD_REF as well, such that, for
2065 BIT_FIELD_REF<d,sz,dp> = BIT_FIELD_REF<s,sz,sp>,
2066 if sp >= dp, we can turn it into
2067 d = BIT_FIELD_REF<s,sp+sz,sp-dp>. */
2068 if (elt
->in_bitfld_block
== 2
2069 && TREE_CODE (src
) == BIT_FIELD_REF
2070 && !tree_int_cst_lt (TREE_OPERAND (src
, 2), TREE_OPERAND (dst
, 2)))
2072 src
= fold_build3 (BIT_FIELD_REF
, TREE_TYPE (var
),
2073 TREE_OPERAND (src
, 0),
2074 size_binop (PLUS_EXPR
, TREE_OPERAND (src
, 1),
2075 TREE_OPERAND (dst
, 2)),
2076 size_binop (MINUS_EXPR
, TREE_OPERAND (src
, 2),
2077 TREE_OPERAND (dst
, 2)));
2078 BIT_FIELD_REF_UNSIGNED (src
) = 1;
2080 return sra_build_assignment (var
, src
);
2083 if (!is_gimple_reg (var
))
2084 return sra_build_assignment (REPLDUP (dst
), src
);
2086 list
= alloc_stmt_list ();
2088 cst
= TREE_OPERAND (dst
, 2);
2089 if (WORDS_BIG_ENDIAN
)
2091 cst
= size_binop (MINUS_EXPR
, DECL_SIZE (var
), cst
);
2097 cst2
= size_binop (PLUS_EXPR
, TREE_OPERAND (dst
, 1),
2098 TREE_OPERAND (dst
, 2));
2099 if (WORDS_BIG_ENDIAN
)
2101 cst2
= size_binop (MINUS_EXPR
, DECL_SIZE (var
), cst2
);
2107 type
= TREE_TYPE (var
);
2109 mask
= build_int_cst_wide (type
, 1, 0);
2110 cst
= int_const_binop (LSHIFT_EXPR
, mask
, maxshift
, 1);
2111 cst2
= int_const_binop (LSHIFT_EXPR
, mask
, minshift
, 1);
2112 mask
= int_const_binop (MINUS_EXPR
, cst
, cst2
, 1);
2113 mask
= fold_build1 (BIT_NOT_EXPR
, type
, mask
);
2115 if (!WORDS_BIG_ENDIAN
)
2116 cst2
= TREE_OPERAND (dst
, 2);
2118 tmp
= make_rename_temp (type
, "SR");
2119 stmt
= build_gimple_modify_stmt (tmp
,
2120 fold_build2 (BIT_AND_EXPR
, type
,
2122 append_to_statement_list (stmt
, &list
);
2124 if (is_gimple_reg (src
))
2128 tmp2
= make_rename_temp (TREE_TYPE (src
), "SR");
2129 stmt
= sra_build_assignment (tmp2
, src
);
2130 append_to_statement_list (stmt
, &list
);
2133 if (!TYPE_UNSIGNED (TREE_TYPE (tmp2
))
2134 || TYPE_MAIN_VARIANT (TREE_TYPE (tmp2
)) != TYPE_MAIN_VARIANT (type
))
2136 tmp3
= make_rename_temp (type
, "SR");
2137 tmp2
= fold_build3 (BIT_FIELD_REF
, type
, tmp2
, TREE_OPERAND (dst
, 1),
2139 if (TREE_CODE (tmp2
) == BIT_FIELD_REF
)
2140 BIT_FIELD_REF_UNSIGNED (tmp2
) = 1;
2141 stmt
= sra_build_assignment (tmp3
, tmp2
);
2142 append_to_statement_list (stmt
, &list
);
2146 if (!integer_zerop (minshift
))
2148 tmp3
= make_rename_temp (type
, "SR");
2149 stmt
= build_gimple_modify_stmt (tmp3
,
2150 fold_build2 (LSHIFT_EXPR
, type
,
2152 append_to_statement_list (stmt
, &list
);
2156 stmt
= build_gimple_modify_stmt (var
,
2157 fold_build2 (BIT_IOR_EXPR
, type
,
2159 append_to_statement_list (stmt
, &list
);
2164 /* Generate a set of assignment statements in *LIST_P to copy all
2165 instantiated elements under ELT to or from the equivalent structure
2166 rooted at EXPR. COPY_OUT controls the direction of the copy, with
2167 true meaning to copy out of EXPR into ELT. */
2170 generate_copy_inout (struct sra_elt
*elt
, bool copy_out
, tree expr
,
2176 if (!copy_out
&& TREE_CODE (expr
) == SSA_NAME
2177 && TREE_CODE (TREE_TYPE (expr
)) == COMPLEX_TYPE
)
2181 c
= lookup_element (elt
, integer_zero_node
, NULL
, NO_INSERT
);
2183 c
= lookup_element (elt
, integer_one_node
, NULL
, NO_INSERT
);
2186 t
= build2 (COMPLEX_EXPR
, elt
->type
, r
, i
);
2187 t
= sra_build_assignment (expr
, t
);
2188 SSA_NAME_DEF_STMT (expr
) = t
;
2189 append_to_statement_list (t
, list_p
);
2191 else if (elt
->replacement
)
2194 t
= sra_build_elt_assignment (elt
, expr
);
2196 t
= sra_build_assignment (expr
, REPLDUP (elt
->replacement
));
2197 append_to_statement_list (t
, list_p
);
2201 FOR_EACH_ACTUAL_CHILD (c
, elt
)
2203 t
= generate_one_element_ref (c
, unshare_expr (expr
));
2204 generate_copy_inout (c
, copy_out
, t
, list_p
);
2209 /* Generate a set of assignment statements in *LIST_P to copy all instantiated
2210 elements under SRC to their counterparts under DST. There must be a 1-1
2211 correspondence of instantiated elements. */
2214 generate_element_copy (struct sra_elt
*dst
, struct sra_elt
*src
, tree
*list_p
)
2216 struct sra_elt
*dc
, *sc
;
2218 FOR_EACH_ACTUAL_CHILD (dc
, dst
)
2220 sc
= lookup_element (src
, dc
->element
, NULL
, NO_INSERT
);
2221 if (!sc
&& dc
->in_bitfld_block
== 2)
2223 struct sra_elt
*dcs
;
2225 FOR_EACH_ACTUAL_CHILD (dcs
, dc
)
2227 sc
= lookup_element (src
, dcs
->element
, NULL
, NO_INSERT
);
2229 generate_element_copy (dcs
, sc
, list_p
);
2235 generate_element_copy (dc
, sc
, list_p
);
2238 if (dst
->replacement
)
2242 gcc_assert (src
->replacement
);
2244 t
= sra_build_elt_assignment (dst
, REPLDUP (src
->replacement
));
2245 append_to_statement_list (t
, list_p
);
2249 /* Generate a set of assignment statements in *LIST_P to zero all instantiated
2250 elements under ELT. In addition, do not assign to elements that have been
2251 marked VISITED but do reset the visited flag; this allows easy coordination
2252 with generate_element_init. */
2255 generate_element_zero (struct sra_elt
*elt
, tree
*list_p
)
2261 elt
->visited
= false;
2265 if (!elt
->in_bitfld_block
)
2266 FOR_EACH_ACTUAL_CHILD (c
, elt
)
2267 generate_element_zero (c
, list_p
);
2269 if (elt
->replacement
)
2273 gcc_assert (elt
->is_scalar
);
2274 t
= fold_convert (elt
->type
, integer_zero_node
);
2276 t
= sra_build_elt_assignment (elt
, t
);
2277 append_to_statement_list (t
, list_p
);
2281 /* Generate an assignment VAR = INIT, where INIT may need gimplification.
2282 Add the result to *LIST_P. */
2285 generate_one_element_init (struct sra_elt
*elt
, tree init
, tree
*list_p
)
2287 /* The replacement can be almost arbitrarily complex. Gimplify. */
2288 tree stmt
= sra_build_elt_assignment (elt
, init
);
2289 gimplify_and_add (stmt
, list_p
);
2292 /* Generate a set of assignment statements in *LIST_P to set all instantiated
2293 elements under ELT with the contents of the initializer INIT. In addition,
2294 mark all assigned elements VISITED; this allows easy coordination with
2295 generate_element_zero. Return false if we found a case we couldn't
2299 generate_element_init_1 (struct sra_elt
*elt
, tree init
, tree
*list_p
)
2302 enum tree_code init_code
;
2303 struct sra_elt
*sub
;
2305 unsigned HOST_WIDE_INT idx
;
2306 tree value
, purpose
;
2308 /* We can be passed DECL_INITIAL of a static variable. It might have a
2309 conversion, which we strip off here. */
2310 STRIP_USELESS_TYPE_CONVERSION (init
);
2311 init_code
= TREE_CODE (init
);
2315 if (elt
->replacement
)
2317 generate_one_element_init (elt
, init
, list_p
);
2318 elt
->visited
= true;
2327 FOR_EACH_ACTUAL_CHILD (sub
, elt
)
2329 if (sub
->element
== integer_zero_node
)
2330 t
= (init_code
== COMPLEX_EXPR
2331 ? TREE_OPERAND (init
, 0) : TREE_REALPART (init
));
2333 t
= (init_code
== COMPLEX_EXPR
2334 ? TREE_OPERAND (init
, 1) : TREE_IMAGPART (init
));
2335 result
&= generate_element_init_1 (sub
, t
, list_p
);
2340 FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (init
), idx
, purpose
, value
)
2342 if (TREE_CODE (purpose
) == RANGE_EXPR
)
2344 tree lower
= TREE_OPERAND (purpose
, 0);
2345 tree upper
= TREE_OPERAND (purpose
, 1);
2349 sub
= lookup_element (elt
, lower
, NULL
, NO_INSERT
);
2351 result
&= generate_element_init_1 (sub
, value
, list_p
);
2352 if (tree_int_cst_equal (lower
, upper
))
2354 lower
= int_const_binop (PLUS_EXPR
, lower
,
2355 integer_one_node
, true);
2360 sub
= lookup_element (elt
, purpose
, NULL
, NO_INSERT
);
2362 result
&= generate_element_init_1 (sub
, value
, list_p
);
2368 elt
->visited
= true;
2375 /* A wrapper function for generate_element_init_1 that handles cleanup after
2379 generate_element_init (struct sra_elt
*elt
, tree init
, tree
*list_p
)
2383 push_gimplify_context ();
2384 ret
= generate_element_init_1 (elt
, init
, list_p
);
2385 pop_gimplify_context (NULL
);
2387 /* The replacement can expose previously unreferenced variables. */
2390 tree_stmt_iterator i
;
2392 for (i
= tsi_start (*list_p
); !tsi_end_p (i
); tsi_next (&i
))
2393 find_new_referenced_vars (tsi_stmt_ptr (i
));
2399 /* Insert STMT on all the outgoing edges out of BB. Note that if BB
2400 has more than one edge, STMT will be replicated for each edge. Also,
2401 abnormal edges will be ignored. */
2404 insert_edge_copies (tree stmt
, basic_block bb
)
2411 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2413 /* We don't need to insert copies on abnormal edges. The
2414 value of the scalar replacement is not guaranteed to
2415 be valid through an abnormal edge. */
2416 if (!(e
->flags
& EDGE_ABNORMAL
))
2420 bsi_insert_on_edge (e
, stmt
);
2424 bsi_insert_on_edge (e
, unsave_expr_now (stmt
));
2429 /* Helper function to insert LIST before BSI, and set up line number info. */
2432 sra_insert_before (block_stmt_iterator
*bsi
, tree list
)
2434 tree stmt
= bsi_stmt (*bsi
);
2436 if (EXPR_HAS_LOCATION (stmt
))
2437 annotate_all_with_locus (&list
, EXPR_LOCATION (stmt
));
2438 bsi_insert_before (bsi
, list
, BSI_SAME_STMT
);
2441 /* Similarly, but insert after BSI. Handles insertion onto edges as well. */
2444 sra_insert_after (block_stmt_iterator
*bsi
, tree list
)
2446 tree stmt
= bsi_stmt (*bsi
);
2448 if (EXPR_HAS_LOCATION (stmt
))
2449 annotate_all_with_locus (&list
, EXPR_LOCATION (stmt
));
2451 if (stmt_ends_bb_p (stmt
))
2452 insert_edge_copies (list
, bsi
->bb
);
2454 bsi_insert_after (bsi
, list
, BSI_SAME_STMT
);
2457 /* Similarly, but replace the statement at BSI. */
2460 sra_replace (block_stmt_iterator
*bsi
, tree list
)
2462 sra_insert_before (bsi
, list
);
2463 bsi_remove (bsi
, false);
2464 if (bsi_end_p (*bsi
))
2465 *bsi
= bsi_last (bsi
->bb
);
2470 /* Scalarize a USE. To recap, this is either a simple reference to ELT,
2471 if elt is scalar, or some occurrence of ELT that requires a complete
2472 aggregate. IS_OUTPUT is true if ELT is being modified. */
2475 scalarize_use (struct sra_elt
*elt
, tree
*expr_p
, block_stmt_iterator
*bsi
,
2478 tree list
= NULL
, stmt
= bsi_stmt (*bsi
);
2480 if (elt
->replacement
)
2482 /* If we have a replacement, then updating the reference is as
2483 simple as modifying the existing statement in place. */
2486 if (TREE_CODE (elt
->replacement
) == BIT_FIELD_REF
2487 && is_gimple_reg (TREE_OPERAND (elt
->replacement
, 0))
2488 && TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
2489 && &GIMPLE_STMT_OPERAND (stmt
, 0) == expr_p
)
2491 tree newstmt
= sra_build_elt_assignment
2492 (elt
, GIMPLE_STMT_OPERAND (stmt
, 1));
2493 if (TREE_CODE (newstmt
) != STATEMENT_LIST
)
2495 tree list
= alloc_stmt_list ();
2496 append_to_statement_list (newstmt
, &list
);
2499 sra_replace (bsi
, newstmt
);
2503 mark_all_v_defs (stmt
);
2505 *expr_p
= REPLDUP (elt
->replacement
);
2510 /* Otherwise we need some copies. If ELT is being read, then we want
2511 to store all (modified) sub-elements back into the structure before
2512 the reference takes place. If ELT is being written, then we want to
2513 load the changed values back into our shadow variables. */
2514 /* ??? We don't check modified for reads, we just always write all of
2515 the values. We should be able to record the SSA number of the VOP
2516 for which the values were last read. If that number matches the
2517 SSA number of the VOP in the current statement, then we needn't
2518 emit an assignment. This would also eliminate double writes when
2519 a structure is passed as more than one argument to a function call.
2520 This optimization would be most effective if sra_walk_function
2521 processed the blocks in dominator order. */
2523 generate_copy_inout (elt
, false, generate_element_ref (elt
), &list
);
2526 mark_all_v_defs (list
);
2527 sra_insert_before (bsi
, list
);
2528 mark_no_warning (elt
);
2534 generate_copy_inout (elt
, true, generate_element_ref (elt
), &list
);
2537 mark_all_v_defs (list
);
2538 sra_insert_after (bsi
, list
);
2544 /* Scalarize a COPY. To recap, this is an assignment statement between
2545 two scalarizable references, LHS_ELT and RHS_ELT. */
2548 scalarize_copy (struct sra_elt
*lhs_elt
, struct sra_elt
*rhs_elt
,
2549 block_stmt_iterator
*bsi
)
2553 if (lhs_elt
->replacement
&& rhs_elt
->replacement
)
2555 /* If we have two scalar operands, modify the existing statement. */
2556 stmt
= bsi_stmt (*bsi
);
2558 /* See the commentary in sra_walk_function concerning
2559 RETURN_EXPR, and why we should never see one here. */
2560 gcc_assert (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
);
2562 GIMPLE_STMT_OPERAND (stmt
, 0) = lhs_elt
->replacement
;
2563 GIMPLE_STMT_OPERAND (stmt
, 1) = REPLDUP (rhs_elt
->replacement
);
2566 else if (lhs_elt
->use_block_copy
|| rhs_elt
->use_block_copy
)
2568 /* If either side requires a block copy, then sync the RHS back
2569 to the original structure, leave the original assignment
2570 statement (which will perform the block copy), then load the
2571 LHS values out of its now-updated original structure. */
2572 /* ??? Could perform a modified pair-wise element copy. That
2573 would at least allow those elements that are instantiated in
2574 both structures to be optimized well. */
2577 generate_copy_inout (rhs_elt
, false,
2578 generate_element_ref (rhs_elt
), &list
);
2581 mark_all_v_defs (list
);
2582 sra_insert_before (bsi
, list
);
2586 generate_copy_inout (lhs_elt
, true,
2587 generate_element_ref (lhs_elt
), &list
);
2590 mark_all_v_defs (list
);
2591 sra_insert_after (bsi
, list
);
2596 /* Otherwise both sides must be fully instantiated. In which
2597 case perform pair-wise element assignments and replace the
2598 original block copy statement. */
2600 stmt
= bsi_stmt (*bsi
);
2601 mark_all_v_defs (stmt
);
2604 generate_element_copy (lhs_elt
, rhs_elt
, &list
);
2606 mark_all_v_defs (list
);
2607 sra_replace (bsi
, list
);
2611 /* Scalarize an INIT. To recap, this is an assignment to a scalarizable
2612 reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
2613 COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty
2617 scalarize_init (struct sra_elt
*lhs_elt
, tree rhs
, block_stmt_iterator
*bsi
)
2622 /* Generate initialization statements for all members extant in the RHS. */
2625 /* Unshare the expression just in case this is from a decl's initial. */
2626 rhs
= unshare_expr (rhs
);
2627 result
= generate_element_init (lhs_elt
, rhs
, &list
);
2630 /* CONSTRUCTOR is defined such that any member not mentioned is assigned
2631 a zero value. Initialize the rest of the instantiated elements. */
2632 generate_element_zero (lhs_elt
, &list
);
2636 /* If we failed to convert the entire initializer, then we must
2637 leave the structure assignment in place and must load values
2638 from the structure into the slots for which we did not find
2639 constants. The easiest way to do this is to generate a complete
2640 copy-out, and then follow that with the constant assignments
2641 that we were able to build. DCE will clean things up. */
2643 generate_copy_inout (lhs_elt
, true, generate_element_ref (lhs_elt
),
2645 append_to_statement_list (list
, &list0
);
2649 if (lhs_elt
->use_block_copy
|| !result
)
2651 /* Since LHS is not fully instantiated, we must leave the structure
2652 assignment in place. Treating this case differently from a USE
2653 exposes constants to later optimizations. */
2656 mark_all_v_defs (list
);
2657 sra_insert_after (bsi
, list
);
2662 /* The LHS is fully instantiated. The list of initializations
2663 replaces the original structure assignment. */
2665 mark_all_v_defs (bsi_stmt (*bsi
));
2666 mark_all_v_defs (list
);
2667 sra_replace (bsi
, list
);
2671 /* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP
2672 on all INDIRECT_REFs. */
2675 mark_notrap (tree
*tp
, int *walk_subtrees
, void *data ATTRIBUTE_UNUSED
)
2679 if (TREE_CODE (t
) == INDIRECT_REF
)
2681 TREE_THIS_NOTRAP (t
) = 1;
2684 else if (IS_TYPE_OR_DECL_P (t
))
2690 /* Scalarize a LDST. To recap, this is an assignment between one scalarizable
2691 reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true
2692 if ELT is on the left-hand side. */
2695 scalarize_ldst (struct sra_elt
*elt
, tree other
,
2696 block_stmt_iterator
*bsi
, bool is_output
)
2698 /* Shouldn't have gotten called for a scalar. */
2699 gcc_assert (!elt
->replacement
);
2701 if (elt
->use_block_copy
)
2703 /* Since ELT is not fully instantiated, we have to leave the
2704 block copy in place. Treat this as a USE. */
2705 scalarize_use (elt
, NULL
, bsi
, is_output
);
2709 /* The interesting case is when ELT is fully instantiated. In this
2710 case we can have each element stored/loaded directly to/from the
2711 corresponding slot in OTHER. This avoids a block copy. */
2713 tree list
= NULL
, stmt
= bsi_stmt (*bsi
);
2715 mark_all_v_defs (stmt
);
2716 generate_copy_inout (elt
, is_output
, other
, &list
);
2718 mark_all_v_defs (list
);
2720 /* Preserve EH semantics. */
2721 if (stmt_ends_bb_p (stmt
))
2723 tree_stmt_iterator tsi
;
2726 /* Extract the first statement from LIST. */
2727 tsi
= tsi_start (list
);
2728 first
= tsi_stmt (tsi
);
2731 /* Replace the old statement with this new representative. */
2732 bsi_replace (bsi
, first
, true);
2734 if (!tsi_end_p (tsi
))
2736 /* If any reference would trap, then they all would. And more
2737 to the point, the first would. Therefore none of the rest
2738 will trap since the first didn't. Indicate this by
2739 iterating over the remaining statements and set
2740 TREE_THIS_NOTRAP in all INDIRECT_REFs. */
2743 walk_tree (tsi_stmt_ptr (tsi
), mark_notrap
, NULL
, NULL
);
2746 while (!tsi_end_p (tsi
));
2748 insert_edge_copies (list
, bsi
->bb
);
2752 sra_replace (bsi
, list
);
2756 /* Generate initializations for all scalarizable parameters. */
2759 scalarize_parms (void)
2765 EXECUTE_IF_SET_IN_BITMAP (needs_copy_in
, 0, i
, bi
)
2767 tree var
= referenced_var (i
);
2768 struct sra_elt
*elt
= lookup_element (NULL
, var
, NULL
, NO_INSERT
);
2769 generate_copy_inout (elt
, true, var
, &list
);
2774 insert_edge_copies (list
, ENTRY_BLOCK_PTR
);
2775 mark_all_v_defs (list
);
2779 /* Entry point to phase 4. Update the function to match replacements. */
2782 scalarize_function (void)
2784 static const struct sra_walk_fns fns
= {
2785 scalarize_use
, scalarize_copy
, scalarize_init
, scalarize_ldst
, false
2788 sra_walk_function (&fns
);
2790 bsi_commit_edge_inserts ();
2794 /* Debug helper function. Print ELT in a nice human-readable format. */
2797 dump_sra_elt_name (FILE *f
, struct sra_elt
*elt
)
2799 if (elt
->parent
&& TREE_CODE (elt
->parent
->type
) == COMPLEX_TYPE
)
2801 fputs (elt
->element
== integer_zero_node
? "__real__ " : "__imag__ ", f
);
2802 dump_sra_elt_name (f
, elt
->parent
);
2807 dump_sra_elt_name (f
, elt
->parent
);
2808 if (DECL_P (elt
->element
))
2810 if (TREE_CODE (elt
->element
) == FIELD_DECL
)
2812 print_generic_expr (f
, elt
->element
, dump_flags
);
2814 else if (TREE_CODE (elt
->element
) == BIT_FIELD_REF
)
2815 fprintf (f
, "$B" HOST_WIDE_INT_PRINT_DEC
"F" HOST_WIDE_INT_PRINT_DEC
,
2816 tree_low_cst (TREE_OPERAND (elt
->element
, 2), 1),
2817 tree_low_cst (TREE_OPERAND (elt
->element
, 1), 1));
2818 else if (TREE_CODE (elt
->element
) == RANGE_EXPR
)
2819 fprintf (f
, "["HOST_WIDE_INT_PRINT_DEC
".."HOST_WIDE_INT_PRINT_DEC
"]",
2820 TREE_INT_CST_LOW (TREE_OPERAND (elt
->element
, 0)),
2821 TREE_INT_CST_LOW (TREE_OPERAND (elt
->element
, 1)));
2823 fprintf (f
, "[" HOST_WIDE_INT_PRINT_DEC
"]",
2824 TREE_INT_CST_LOW (elt
->element
));
2828 /* Likewise, but callable from the debugger. */
2831 debug_sra_elt_name (struct sra_elt
*elt
)
2833 dump_sra_elt_name (stderr
, elt
);
2834 fputc ('\n', stderr
);
2838 sra_init_cache (void)
2840 if (sra_type_decomp_cache
)
2843 sra_type_decomp_cache
= BITMAP_ALLOC (NULL
);
2844 sra_type_inst_cache
= BITMAP_ALLOC (NULL
);
2847 /* Main entry point. */
2852 /* Initialize local variables. */
2854 gcc_obstack_init (&sra_obstack
);
2855 sra_candidates
= BITMAP_ALLOC (NULL
);
2856 needs_copy_in
= BITMAP_ALLOC (NULL
);
2858 sra_map
= htab_create (101, sra_elt_hash
, sra_elt_eq
, NULL
);
2860 /* Scan. If we find anything, instantiate and scalarize. */
2861 if (find_candidates_for_sra ())
2864 decide_instantiations ();
2865 scalarize_function ();
2868 /* Free allocated memory. */
2869 htab_delete (sra_map
);
2871 BITMAP_FREE (sra_candidates
);
2872 BITMAP_FREE (needs_copy_in
);
2873 BITMAP_FREE (sra_type_decomp_cache
);
2874 BITMAP_FREE (sra_type_inst_cache
);
2875 obstack_free (&sra_obstack
, NULL
);
2880 tree_sra_early (void)
2894 return flag_tree_sra
!= 0;
2897 struct tree_opt_pass pass_sra_early
=
2900 gate_sra
, /* gate */
2901 tree_sra_early
, /* execute */
2904 0, /* static_pass_number */
2905 TV_TREE_SRA
, /* tv_id */
2906 PROP_cfg
| PROP_ssa
, /* properties_required */
2907 0, /* properties_provided */
2908 0, /* properties_destroyed */
2909 0, /* todo_flags_start */
2913 | TODO_verify_ssa
, /* todo_flags_finish */
2917 struct tree_opt_pass pass_sra
=
2920 gate_sra
, /* gate */
2921 tree_sra
, /* execute */
2924 0, /* static_pass_number */
2925 TV_TREE_SRA
, /* tv_id */
2926 PROP_cfg
| PROP_ssa
, /* properties_required */
2927 0, /* properties_provided */
2928 0, /* properties_destroyed */
2929 0, /* todo_flags_start */
2933 | TODO_verify_ssa
, /* todo_flags_finish */