Daily bump.
[gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "tm_p.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-flow.h"
30 #include "cfgloop.h"
31 #include "domwalk.h"
32 #include "params.h"
33 #include "tree-pass.h"
34 #include "flags.h"
35 #include "hashtab.h"
36 #include "tree-affine.h"
37 #include "pointer-set.h"
38 #include "tree-ssa-propagate.h"
39
40 /* TODO: Support for predicated code motion. I.e.
41
42 while (1)
43 {
44 if (cond)
45 {
46 a = inv;
47 something;
48 }
49 }
50
51 Where COND and INV are invariants, but evaluating INV may trap or be
52 invalid from some other reason if !COND. This may be transformed to
53
54 if (cond)
55 a = inv;
56 while (1)
57 {
58 if (cond)
59 something;
60 } */
61
62 /* A type for the list of statements that have to be moved in order to be able
63 to hoist an invariant computation. */
64
65 struct depend
66 {
67 gimple stmt;
68 struct depend *next;
69 };
70
71 /* The auxiliary data kept for each statement. */
72
73 struct lim_aux_data
74 {
75 struct loop *max_loop; /* The outermost loop in that the statement
76 is invariant. */
77
78 struct loop *tgt_loop; /* The loop out of that we want to move the
79 invariant. */
80
81 struct loop *always_executed_in;
82 /* The outermost loop for that we are sure
83 the statement is executed if the loop
84 is entered. */
85
86 unsigned cost; /* Cost of the computation performed by the
87 statement. */
88
89 struct depend *depends; /* List of statements that must be also hoisted
90 out of the loop when this statement is
91 hoisted; i.e. those that define the operands
92 of the statement and are inside of the
93 MAX_LOOP loop. */
94 };
95
96 /* Maps statements to their lim_aux_data. */
97
98 static struct pointer_map_t *lim_aux_data_map;
99
100 /* Description of a memory reference location. */
101
102 typedef struct mem_ref_loc
103 {
104 tree *ref; /* The reference itself. */
105 gimple stmt; /* The statement in that it occurs. */
106 } *mem_ref_loc_p;
107
108
109 /* The list of memory reference locations in a loop. */
110
111 typedef struct mem_ref_locs
112 {
113 vec<mem_ref_loc_p> locs;
114 } *mem_ref_locs_p;
115
116
117 /* Description of a memory reference. */
118
119 typedef struct mem_ref
120 {
121 tree mem; /* The memory itself. */
122 unsigned id; /* ID assigned to the memory reference
123 (its index in memory_accesses.refs_list) */
124 hashval_t hash; /* Its hash value. */
125 bitmap stored; /* The set of loops in that this memory location
126 is stored to. */
127 vec<mem_ref_locs_p> accesses_in_loop;
128 /* The locations of the accesses. Vector
129 indexed by the loop number. */
130
131 /* The following sets are computed on demand. We keep both set and
132 its complement, so that we know whether the information was
133 already computed or not. */
134 bitmap indep_loop; /* The set of loops in that the memory
135 reference is independent, meaning:
136 If it is stored in the loop, this store
137 is independent on all other loads and
138 stores.
139 If it is only loaded, then it is independent
140 on all stores in the loop. */
141 bitmap dep_loop; /* The complement of INDEP_LOOP. */
142
143 bitmap indep_ref; /* The set of memory references on that
144 this reference is independent. */
145 bitmap dep_ref; /* The complement of INDEP_REF. */
146 } *mem_ref_p;
147
148
149
150
151 /* Description of memory accesses in loops. */
152
153 static struct
154 {
155 /* The hash table of memory references accessed in loops. */
156 htab_t refs;
157
158 /* The list of memory references. */
159 vec<mem_ref_p> refs_list;
160
161 /* The set of memory references accessed in each loop. */
162 vec<bitmap> refs_in_loop;
163
164 /* The set of memory references accessed in each loop, including
165 subloops. */
166 vec<bitmap> all_refs_in_loop;
167
168 /* The set of memory references stored in each loop, including
169 subloops. */
170 vec<bitmap> all_refs_stored_in_loop;
171
172 /* Cache for expanding memory addresses. */
173 struct pointer_map_t *ttae_cache;
174 } memory_accesses;
175
176 /* Obstack for the bitmaps in the above data structures. */
177 static bitmap_obstack lim_bitmap_obstack;
178
179 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
180
181 /* Minimum cost of an expensive expression. */
182 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
183
184 /* The outermost loop for which execution of the header guarantees that the
185 block will be executed. */
186 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
187 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
188
189 /* Whether the reference was analyzable. */
190 #define MEM_ANALYZABLE(REF) ((REF)->mem != error_mark_node)
191
192 static struct lim_aux_data *
193 init_lim_data (gimple stmt)
194 {
195 void **p = pointer_map_insert (lim_aux_data_map, stmt);
196
197 *p = XCNEW (struct lim_aux_data);
198 return (struct lim_aux_data *) *p;
199 }
200
201 static struct lim_aux_data *
202 get_lim_data (gimple stmt)
203 {
204 void **p = pointer_map_contains (lim_aux_data_map, stmt);
205 if (!p)
206 return NULL;
207
208 return (struct lim_aux_data *) *p;
209 }
210
211 /* Releases the memory occupied by DATA. */
212
213 static void
214 free_lim_aux_data (struct lim_aux_data *data)
215 {
216 struct depend *dep, *next;
217
218 for (dep = data->depends; dep; dep = next)
219 {
220 next = dep->next;
221 free (dep);
222 }
223 free (data);
224 }
225
226 static void
227 clear_lim_data (gimple stmt)
228 {
229 void **p = pointer_map_contains (lim_aux_data_map, stmt);
230 if (!p)
231 return;
232
233 free_lim_aux_data ((struct lim_aux_data *) *p);
234 *p = NULL;
235 }
236
237 /* Calls CBCK for each index in memory reference ADDR_P. There are two
238 kinds situations handled; in each of these cases, the memory reference
239 and DATA are passed to the callback:
240
241 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
242 pass the pointer to the index to the callback.
243
244 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
245 pointer to addr to the callback.
246
247 If the callback returns false, the whole search stops and false is returned.
248 Otherwise the function returns true after traversing through the whole
249 reference *ADDR_P. */
250
251 bool
252 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
253 {
254 tree *nxt, *idx;
255
256 for (; ; addr_p = nxt)
257 {
258 switch (TREE_CODE (*addr_p))
259 {
260 case SSA_NAME:
261 return cbck (*addr_p, addr_p, data);
262
263 case MEM_REF:
264 nxt = &TREE_OPERAND (*addr_p, 0);
265 return cbck (*addr_p, nxt, data);
266
267 case BIT_FIELD_REF:
268 case VIEW_CONVERT_EXPR:
269 case REALPART_EXPR:
270 case IMAGPART_EXPR:
271 nxt = &TREE_OPERAND (*addr_p, 0);
272 break;
273
274 case COMPONENT_REF:
275 /* If the component has varying offset, it behaves like index
276 as well. */
277 idx = &TREE_OPERAND (*addr_p, 2);
278 if (*idx
279 && !cbck (*addr_p, idx, data))
280 return false;
281
282 nxt = &TREE_OPERAND (*addr_p, 0);
283 break;
284
285 case ARRAY_REF:
286 case ARRAY_RANGE_REF:
287 nxt = &TREE_OPERAND (*addr_p, 0);
288 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
289 return false;
290 break;
291
292 case VAR_DECL:
293 case PARM_DECL:
294 case CONST_DECL:
295 case STRING_CST:
296 case RESULT_DECL:
297 case VECTOR_CST:
298 case COMPLEX_CST:
299 case INTEGER_CST:
300 case REAL_CST:
301 case FIXED_CST:
302 case CONSTRUCTOR:
303 return true;
304
305 case ADDR_EXPR:
306 gcc_assert (is_gimple_min_invariant (*addr_p));
307 return true;
308
309 case TARGET_MEM_REF:
310 idx = &TMR_BASE (*addr_p);
311 if (*idx
312 && !cbck (*addr_p, idx, data))
313 return false;
314 idx = &TMR_INDEX (*addr_p);
315 if (*idx
316 && !cbck (*addr_p, idx, data))
317 return false;
318 idx = &TMR_INDEX2 (*addr_p);
319 if (*idx
320 && !cbck (*addr_p, idx, data))
321 return false;
322 return true;
323
324 default:
325 gcc_unreachable ();
326 }
327 }
328 }
329
330 /* If it is possible to hoist the statement STMT unconditionally,
331 returns MOVE_POSSIBLE.
332 If it is possible to hoist the statement STMT, but we must avoid making
333 it executed if it would not be executed in the original program (e.g.
334 because it may trap), return MOVE_PRESERVE_EXECUTION.
335 Otherwise return MOVE_IMPOSSIBLE. */
336
337 enum move_pos
338 movement_possibility (gimple stmt)
339 {
340 tree lhs;
341 enum move_pos ret = MOVE_POSSIBLE;
342
343 if (flag_unswitch_loops
344 && gimple_code (stmt) == GIMPLE_COND)
345 {
346 /* If we perform unswitching, force the operands of the invariant
347 condition to be moved out of the loop. */
348 return MOVE_POSSIBLE;
349 }
350
351 if (gimple_code (stmt) == GIMPLE_PHI
352 && gimple_phi_num_args (stmt) <= 2
353 && !virtual_operand_p (gimple_phi_result (stmt))
354 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
355 return MOVE_POSSIBLE;
356
357 if (gimple_get_lhs (stmt) == NULL_TREE)
358 return MOVE_IMPOSSIBLE;
359
360 if (gimple_vdef (stmt))
361 return MOVE_IMPOSSIBLE;
362
363 if (stmt_ends_bb_p (stmt)
364 || gimple_has_volatile_ops (stmt)
365 || gimple_has_side_effects (stmt)
366 || stmt_could_throw_p (stmt))
367 return MOVE_IMPOSSIBLE;
368
369 if (is_gimple_call (stmt))
370 {
371 /* While pure or const call is guaranteed to have no side effects, we
372 cannot move it arbitrarily. Consider code like
373
374 char *s = something ();
375
376 while (1)
377 {
378 if (s)
379 t = strlen (s);
380 else
381 t = 0;
382 }
383
384 Here the strlen call cannot be moved out of the loop, even though
385 s is invariant. In addition to possibly creating a call with
386 invalid arguments, moving out a function call that is not executed
387 may cause performance regressions in case the call is costly and
388 not executed at all. */
389 ret = MOVE_PRESERVE_EXECUTION;
390 lhs = gimple_call_lhs (stmt);
391 }
392 else if (is_gimple_assign (stmt))
393 lhs = gimple_assign_lhs (stmt);
394 else
395 return MOVE_IMPOSSIBLE;
396
397 if (TREE_CODE (lhs) == SSA_NAME
398 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
399 return MOVE_IMPOSSIBLE;
400
401 if (TREE_CODE (lhs) != SSA_NAME
402 || gimple_could_trap_p (stmt))
403 return MOVE_PRESERVE_EXECUTION;
404
405 /* Non local loads in a transaction cannot be hoisted out. Well,
406 unless the load happens on every path out of the loop, but we
407 don't take this into account yet. */
408 if (flag_tm
409 && gimple_in_transaction (stmt)
410 && gimple_assign_single_p (stmt))
411 {
412 tree rhs = gimple_assign_rhs1 (stmt);
413 if (DECL_P (rhs) && is_global_var (rhs))
414 {
415 if (dump_file)
416 {
417 fprintf (dump_file, "Cannot hoist conditional load of ");
418 print_generic_expr (dump_file, rhs, TDF_SLIM);
419 fprintf (dump_file, " because it is in a transaction.\n");
420 }
421 return MOVE_IMPOSSIBLE;
422 }
423 }
424
425 return ret;
426 }
427
428 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
429 loop to that we could move the expression using DEF if it did not have
430 other operands, i.e. the outermost loop enclosing LOOP in that the value
431 of DEF is invariant. */
432
433 static struct loop *
434 outermost_invariant_loop (tree def, struct loop *loop)
435 {
436 gimple def_stmt;
437 basic_block def_bb;
438 struct loop *max_loop;
439 struct lim_aux_data *lim_data;
440
441 if (!def)
442 return superloop_at_depth (loop, 1);
443
444 if (TREE_CODE (def) != SSA_NAME)
445 {
446 gcc_assert (is_gimple_min_invariant (def));
447 return superloop_at_depth (loop, 1);
448 }
449
450 def_stmt = SSA_NAME_DEF_STMT (def);
451 def_bb = gimple_bb (def_stmt);
452 if (!def_bb)
453 return superloop_at_depth (loop, 1);
454
455 max_loop = find_common_loop (loop, def_bb->loop_father);
456
457 lim_data = get_lim_data (def_stmt);
458 if (lim_data != NULL && lim_data->max_loop != NULL)
459 max_loop = find_common_loop (max_loop,
460 loop_outer (lim_data->max_loop));
461 if (max_loop == loop)
462 return NULL;
463 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
464
465 return max_loop;
466 }
467
468 /* DATA is a structure containing information associated with a statement
469 inside LOOP. DEF is one of the operands of this statement.
470
471 Find the outermost loop enclosing LOOP in that value of DEF is invariant
472 and record this in DATA->max_loop field. If DEF itself is defined inside
473 this loop as well (i.e. we need to hoist it out of the loop if we want
474 to hoist the statement represented by DATA), record the statement in that
475 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
476 add the cost of the computation of DEF to the DATA->cost.
477
478 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
479
480 static bool
481 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
482 bool add_cost)
483 {
484 gimple def_stmt = SSA_NAME_DEF_STMT (def);
485 basic_block def_bb = gimple_bb (def_stmt);
486 struct loop *max_loop;
487 struct depend *dep;
488 struct lim_aux_data *def_data;
489
490 if (!def_bb)
491 return true;
492
493 max_loop = outermost_invariant_loop (def, loop);
494 if (!max_loop)
495 return false;
496
497 if (flow_loop_nested_p (data->max_loop, max_loop))
498 data->max_loop = max_loop;
499
500 def_data = get_lim_data (def_stmt);
501 if (!def_data)
502 return true;
503
504 if (add_cost
505 /* Only add the cost if the statement defining DEF is inside LOOP,
506 i.e. if it is likely that by moving the invariants dependent
507 on it, we will be able to avoid creating a new register for
508 it (since it will be only used in these dependent invariants). */
509 && def_bb->loop_father == loop)
510 data->cost += def_data->cost;
511
512 dep = XNEW (struct depend);
513 dep->stmt = def_stmt;
514 dep->next = data->depends;
515 data->depends = dep;
516
517 return true;
518 }
519
520 /* Returns an estimate for a cost of statement STMT. The values here
521 are just ad-hoc constants, similar to costs for inlining. */
522
523 static unsigned
524 stmt_cost (gimple stmt)
525 {
526 /* Always try to create possibilities for unswitching. */
527 if (gimple_code (stmt) == GIMPLE_COND
528 || gimple_code (stmt) == GIMPLE_PHI)
529 return LIM_EXPENSIVE;
530
531 /* We should be hoisting calls if possible. */
532 if (is_gimple_call (stmt))
533 {
534 tree fndecl;
535
536 /* Unless the call is a builtin_constant_p; this always folds to a
537 constant, so moving it is useless. */
538 fndecl = gimple_call_fndecl (stmt);
539 if (fndecl
540 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
541 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
542 return 0;
543
544 return LIM_EXPENSIVE;
545 }
546
547 /* Hoisting memory references out should almost surely be a win. */
548 if (gimple_references_memory_p (stmt))
549 return LIM_EXPENSIVE;
550
551 if (gimple_code (stmt) != GIMPLE_ASSIGN)
552 return 1;
553
554 switch (gimple_assign_rhs_code (stmt))
555 {
556 case MULT_EXPR:
557 case WIDEN_MULT_EXPR:
558 case WIDEN_MULT_PLUS_EXPR:
559 case WIDEN_MULT_MINUS_EXPR:
560 case DOT_PROD_EXPR:
561 case FMA_EXPR:
562 case TRUNC_DIV_EXPR:
563 case CEIL_DIV_EXPR:
564 case FLOOR_DIV_EXPR:
565 case ROUND_DIV_EXPR:
566 case EXACT_DIV_EXPR:
567 case CEIL_MOD_EXPR:
568 case FLOOR_MOD_EXPR:
569 case ROUND_MOD_EXPR:
570 case TRUNC_MOD_EXPR:
571 case RDIV_EXPR:
572 /* Division and multiplication are usually expensive. */
573 return LIM_EXPENSIVE;
574
575 case LSHIFT_EXPR:
576 case RSHIFT_EXPR:
577 case WIDEN_LSHIFT_EXPR:
578 case LROTATE_EXPR:
579 case RROTATE_EXPR:
580 /* Shifts and rotates are usually expensive. */
581 return LIM_EXPENSIVE;
582
583 case CONSTRUCTOR:
584 /* Make vector construction cost proportional to the number
585 of elements. */
586 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
587
588 case SSA_NAME:
589 case PAREN_EXPR:
590 /* Whether or not something is wrapped inside a PAREN_EXPR
591 should not change move cost. Nor should an intermediate
592 unpropagated SSA name copy. */
593 return 0;
594
595 default:
596 return 1;
597 }
598 }
599
600 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
601 REF is independent. If REF is not independent in LOOP, NULL is returned
602 instead. */
603
604 static struct loop *
605 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
606 {
607 struct loop *aloop;
608
609 if (bitmap_bit_p (ref->stored, loop->num))
610 return NULL;
611
612 for (aloop = outer;
613 aloop != loop;
614 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
615 if (!bitmap_bit_p (ref->stored, aloop->num)
616 && ref_indep_loop_p (aloop, ref))
617 return aloop;
618
619 if (ref_indep_loop_p (loop, ref))
620 return loop;
621 else
622 return NULL;
623 }
624
625 /* If there is a simple load or store to a memory reference in STMT, returns
626 the location of the memory reference, and sets IS_STORE according to whether
627 it is a store or load. Otherwise, returns NULL. */
628
629 static tree *
630 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
631 {
632 tree *lhs, *rhs;
633
634 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
635 if (!gimple_assign_single_p (stmt))
636 return NULL;
637
638 lhs = gimple_assign_lhs_ptr (stmt);
639 rhs = gimple_assign_rhs1_ptr (stmt);
640
641 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
642 {
643 *is_store = false;
644 return rhs;
645 }
646 else if (gimple_vdef (stmt)
647 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
648 {
649 *is_store = true;
650 return lhs;
651 }
652 else
653 return NULL;
654 }
655
656 /* Returns the memory reference contained in STMT. */
657
658 static mem_ref_p
659 mem_ref_in_stmt (gimple stmt)
660 {
661 bool store;
662 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
663 hashval_t hash;
664 mem_ref_p ref;
665
666 if (!mem)
667 return NULL;
668 gcc_assert (!store);
669
670 hash = iterative_hash_expr (*mem, 0);
671 ref = (mem_ref_p) htab_find_with_hash (memory_accesses.refs, *mem, hash);
672
673 gcc_assert (ref != NULL);
674 return ref;
675 }
676
677 /* From a controlling predicate in DOM determine the arguments from
678 the PHI node PHI that are chosen if the predicate evaluates to
679 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
680 they are non-NULL. Returns true if the arguments can be determined,
681 else return false. */
682
683 static bool
684 extract_true_false_args_from_phi (basic_block dom, gimple phi,
685 tree *true_arg_p, tree *false_arg_p)
686 {
687 basic_block bb = gimple_bb (phi);
688 edge true_edge, false_edge, tem;
689 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
690
691 /* We have to verify that one edge into the PHI node is dominated
692 by the true edge of the predicate block and the other edge
693 dominated by the false edge. This ensures that the PHI argument
694 we are going to take is completely determined by the path we
695 take from the predicate block.
696 We can only use BB dominance checks below if the destination of
697 the true/false edges are dominated by their edge, thus only
698 have a single predecessor. */
699 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
700 tem = EDGE_PRED (bb, 0);
701 if (tem == true_edge
702 || (single_pred_p (true_edge->dest)
703 && (tem->src == true_edge->dest
704 || dominated_by_p (CDI_DOMINATORS,
705 tem->src, true_edge->dest))))
706 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
707 else if (tem == false_edge
708 || (single_pred_p (false_edge->dest)
709 && (tem->src == false_edge->dest
710 || dominated_by_p (CDI_DOMINATORS,
711 tem->src, false_edge->dest))))
712 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
713 else
714 return false;
715 tem = EDGE_PRED (bb, 1);
716 if (tem == true_edge
717 || (single_pred_p (true_edge->dest)
718 && (tem->src == true_edge->dest
719 || dominated_by_p (CDI_DOMINATORS,
720 tem->src, true_edge->dest))))
721 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
722 else if (tem == false_edge
723 || (single_pred_p (false_edge->dest)
724 && (tem->src == false_edge->dest
725 || dominated_by_p (CDI_DOMINATORS,
726 tem->src, false_edge->dest))))
727 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
728 else
729 return false;
730 if (!arg0 || !arg1)
731 return false;
732
733 if (true_arg_p)
734 *true_arg_p = arg0;
735 if (false_arg_p)
736 *false_arg_p = arg1;
737
738 return true;
739 }
740
741 /* Determine the outermost loop to that it is possible to hoist a statement
742 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
743 the outermost loop in that the value computed by STMT is invariant.
744 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
745 we preserve the fact whether STMT is executed. It also fills other related
746 information to LIM_DATA (STMT).
747
748 The function returns false if STMT cannot be hoisted outside of the loop it
749 is defined in, and true otherwise. */
750
751 static bool
752 determine_max_movement (gimple stmt, bool must_preserve_exec)
753 {
754 basic_block bb = gimple_bb (stmt);
755 struct loop *loop = bb->loop_father;
756 struct loop *level;
757 struct lim_aux_data *lim_data = get_lim_data (stmt);
758 tree val;
759 ssa_op_iter iter;
760
761 if (must_preserve_exec)
762 level = ALWAYS_EXECUTED_IN (bb);
763 else
764 level = superloop_at_depth (loop, 1);
765 lim_data->max_loop = level;
766
767 if (gimple_code (stmt) == GIMPLE_PHI)
768 {
769 use_operand_p use_p;
770 unsigned min_cost = UINT_MAX;
771 unsigned total_cost = 0;
772 struct lim_aux_data *def_data;
773
774 /* We will end up promoting dependencies to be unconditionally
775 evaluated. For this reason the PHI cost (and thus the
776 cost we remove from the loop by doing the invariant motion)
777 is that of the cheapest PHI argument dependency chain. */
778 FOR_EACH_PHI_ARG (use_p, stmt, iter, SSA_OP_USE)
779 {
780 val = USE_FROM_PTR (use_p);
781 if (TREE_CODE (val) != SSA_NAME)
782 continue;
783 if (!add_dependency (val, lim_data, loop, false))
784 return false;
785 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
786 if (def_data)
787 {
788 min_cost = MIN (min_cost, def_data->cost);
789 total_cost += def_data->cost;
790 }
791 }
792
793 lim_data->cost += min_cost;
794
795 if (gimple_phi_num_args (stmt) > 1)
796 {
797 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
798 gimple cond;
799 if (gsi_end_p (gsi_last_bb (dom)))
800 return false;
801 cond = gsi_stmt (gsi_last_bb (dom));
802 if (gimple_code (cond) != GIMPLE_COND)
803 return false;
804 /* Verify that this is an extended form of a diamond and
805 the PHI arguments are completely controlled by the
806 predicate in DOM. */
807 if (!extract_true_false_args_from_phi (dom, stmt, NULL, NULL))
808 return false;
809
810 /* Fold in dependencies and cost of the condition. */
811 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
812 {
813 if (!add_dependency (val, lim_data, loop, false))
814 return false;
815 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
816 if (def_data)
817 total_cost += def_data->cost;
818 }
819
820 /* We want to avoid unconditionally executing very expensive
821 operations. As costs for our dependencies cannot be
822 negative just claim we are not invariand for this case.
823 We also are not sure whether the control-flow inside the
824 loop will vanish. */
825 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
826 && !(min_cost != 0
827 && total_cost / min_cost <= 2))
828 return false;
829
830 /* Assume that the control-flow in the loop will vanish.
831 ??? We should verify this and not artificially increase
832 the cost if that is not the case. */
833 lim_data->cost += stmt_cost (stmt);
834 }
835
836 return true;
837 }
838 else
839 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
840 if (!add_dependency (val, lim_data, loop, true))
841 return false;
842
843 if (gimple_vuse (stmt))
844 {
845 mem_ref_p ref = mem_ref_in_stmt (stmt);
846
847 if (ref)
848 {
849 lim_data->max_loop
850 = outermost_indep_loop (lim_data->max_loop, loop, ref);
851 if (!lim_data->max_loop)
852 return false;
853 }
854 else
855 {
856 if ((val = gimple_vuse (stmt)) != NULL_TREE)
857 {
858 if (!add_dependency (val, lim_data, loop, false))
859 return false;
860 }
861 }
862 }
863
864 lim_data->cost += stmt_cost (stmt);
865
866 return true;
867 }
868
869 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
870 and that one of the operands of this statement is computed by STMT.
871 Ensure that STMT (together with all the statements that define its
872 operands) is hoisted at least out of the loop LEVEL. */
873
874 static void
875 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
876 {
877 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
878 struct depend *dep;
879 struct lim_aux_data *lim_data;
880
881 stmt_loop = find_common_loop (orig_loop, stmt_loop);
882 lim_data = get_lim_data (stmt);
883 if (lim_data != NULL && lim_data->tgt_loop != NULL)
884 stmt_loop = find_common_loop (stmt_loop,
885 loop_outer (lim_data->tgt_loop));
886 if (flow_loop_nested_p (stmt_loop, level))
887 return;
888
889 gcc_assert (level == lim_data->max_loop
890 || flow_loop_nested_p (lim_data->max_loop, level));
891
892 lim_data->tgt_loop = level;
893 for (dep = lim_data->depends; dep; dep = dep->next)
894 set_level (dep->stmt, orig_loop, level);
895 }
896
897 /* Determines an outermost loop from that we want to hoist the statement STMT.
898 For now we chose the outermost possible loop. TODO -- use profiling
899 information to set it more sanely. */
900
901 static void
902 set_profitable_level (gimple stmt)
903 {
904 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
905 }
906
907 /* Returns true if STMT is a call that has side effects. */
908
909 static bool
910 nonpure_call_p (gimple stmt)
911 {
912 if (gimple_code (stmt) != GIMPLE_CALL)
913 return false;
914
915 return gimple_has_side_effects (stmt);
916 }
917
918 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
919
920 static gimple
921 rewrite_reciprocal (gimple_stmt_iterator *bsi)
922 {
923 gimple stmt, stmt1, stmt2;
924 tree name, lhs, type;
925 tree real_one;
926 gimple_stmt_iterator gsi;
927
928 stmt = gsi_stmt (*bsi);
929 lhs = gimple_assign_lhs (stmt);
930 type = TREE_TYPE (lhs);
931
932 real_one = build_one_cst (type);
933
934 name = make_temp_ssa_name (type, NULL, "reciptmp");
935 stmt1 = gimple_build_assign_with_ops (RDIV_EXPR, name, real_one,
936 gimple_assign_rhs2 (stmt));
937
938 stmt2 = gimple_build_assign_with_ops (MULT_EXPR, lhs, name,
939 gimple_assign_rhs1 (stmt));
940
941 /* Replace division stmt with reciprocal and multiply stmts.
942 The multiply stmt is not invariant, so update iterator
943 and avoid rescanning. */
944 gsi = *bsi;
945 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
946 gsi_replace (&gsi, stmt2, true);
947
948 /* Continue processing with invariant reciprocal statement. */
949 return stmt1;
950 }
951
952 /* Check if the pattern at *BSI is a bittest of the form
953 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
954
955 static gimple
956 rewrite_bittest (gimple_stmt_iterator *bsi)
957 {
958 gimple stmt, use_stmt, stmt1, stmt2;
959 tree lhs, name, t, a, b;
960 use_operand_p use;
961
962 stmt = gsi_stmt (*bsi);
963 lhs = gimple_assign_lhs (stmt);
964
965 /* Verify that the single use of lhs is a comparison against zero. */
966 if (TREE_CODE (lhs) != SSA_NAME
967 || !single_imm_use (lhs, &use, &use_stmt)
968 || gimple_code (use_stmt) != GIMPLE_COND)
969 return stmt;
970 if (gimple_cond_lhs (use_stmt) != lhs
971 || (gimple_cond_code (use_stmt) != NE_EXPR
972 && gimple_cond_code (use_stmt) != EQ_EXPR)
973 || !integer_zerop (gimple_cond_rhs (use_stmt)))
974 return stmt;
975
976 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
977 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
978 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
979 return stmt;
980
981 /* There is a conversion in between possibly inserted by fold. */
982 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
983 {
984 t = gimple_assign_rhs1 (stmt1);
985 if (TREE_CODE (t) != SSA_NAME
986 || !has_single_use (t))
987 return stmt;
988 stmt1 = SSA_NAME_DEF_STMT (t);
989 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
990 return stmt;
991 }
992
993 /* Verify that B is loop invariant but A is not. Verify that with
994 all the stmt walking we are still in the same loop. */
995 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
996 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
997 return stmt;
998
999 a = gimple_assign_rhs1 (stmt1);
1000 b = gimple_assign_rhs2 (stmt1);
1001
1002 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
1003 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
1004 {
1005 gimple_stmt_iterator rsi;
1006
1007 /* 1 << B */
1008 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
1009 build_int_cst (TREE_TYPE (a), 1), b);
1010 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1011 stmt1 = gimple_build_assign (name, t);
1012
1013 /* A & (1 << B) */
1014 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
1015 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
1016 stmt2 = gimple_build_assign (name, t);
1017
1018 /* Replace the SSA_NAME we compare against zero. Adjust
1019 the type of zero accordingly. */
1020 SET_USE (use, name);
1021 gimple_cond_set_rhs (use_stmt, build_int_cst_type (TREE_TYPE (name), 0));
1022
1023 /* Don't use gsi_replace here, none of the new assignments sets
1024 the variable originally set in stmt. Move bsi to stmt1, and
1025 then remove the original stmt, so that we get a chance to
1026 retain debug info for it. */
1027 rsi = *bsi;
1028 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
1029 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
1030 gsi_remove (&rsi, true);
1031
1032 return stmt1;
1033 }
1034
1035 return stmt;
1036 }
1037
1038
1039 /* Determine the outermost loops in that statements in basic block BB are
1040 invariant, and record them to the LIM_DATA associated with the statements.
1041 Callback for walk_dominator_tree. */
1042
1043 static void
1044 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
1045 basic_block bb)
1046 {
1047 enum move_pos pos;
1048 gimple_stmt_iterator bsi;
1049 gimple stmt;
1050 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1051 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1052 struct lim_aux_data *lim_data;
1053
1054 if (!loop_outer (bb->loop_father))
1055 return;
1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1059 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1060
1061 /* Look at PHI nodes, but only if there is at most two.
1062 ??? We could relax this further by post-processing the inserted
1063 code and transforming adjacent cond-exprs with the same predicate
1064 to control flow again. */
1065 bsi = gsi_start_phis (bb);
1066 if (!gsi_end_p (bsi)
1067 && ((gsi_next (&bsi), gsi_end_p (bsi))
1068 || (gsi_next (&bsi), gsi_end_p (bsi))))
1069 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1070 {
1071 stmt = gsi_stmt (bsi);
1072
1073 pos = movement_possibility (stmt);
1074 if (pos == MOVE_IMPOSSIBLE)
1075 continue;
1076
1077 lim_data = init_lim_data (stmt);
1078 lim_data->always_executed_in = outermost;
1079
1080 if (!determine_max_movement (stmt, false))
1081 {
1082 lim_data->max_loop = NULL;
1083 continue;
1084 }
1085
1086 if (dump_file && (dump_flags & TDF_DETAILS))
1087 {
1088 print_gimple_stmt (dump_file, stmt, 2, 0);
1089 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1090 loop_depth (lim_data->max_loop),
1091 lim_data->cost);
1092 }
1093
1094 if (lim_data->cost >= LIM_EXPENSIVE)
1095 set_profitable_level (stmt);
1096 }
1097
1098 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1099 {
1100 stmt = gsi_stmt (bsi);
1101
1102 pos = movement_possibility (stmt);
1103 if (pos == MOVE_IMPOSSIBLE)
1104 {
1105 if (nonpure_call_p (stmt))
1106 {
1107 maybe_never = true;
1108 outermost = NULL;
1109 }
1110 /* Make sure to note always_executed_in for stores to make
1111 store-motion work. */
1112 else if (stmt_makes_single_store (stmt))
1113 {
1114 struct lim_aux_data *lim_data = init_lim_data (stmt);
1115 lim_data->always_executed_in = outermost;
1116 }
1117 continue;
1118 }
1119
1120 if (is_gimple_assign (stmt)
1121 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1122 == GIMPLE_BINARY_RHS))
1123 {
1124 tree op0 = gimple_assign_rhs1 (stmt);
1125 tree op1 = gimple_assign_rhs2 (stmt);
1126 struct loop *ol1 = outermost_invariant_loop (op1,
1127 loop_containing_stmt (stmt));
1128
1129 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1130 to be hoisted out of loop, saving expensive divide. */
1131 if (pos == MOVE_POSSIBLE
1132 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1133 && flag_unsafe_math_optimizations
1134 && !flag_trapping_math
1135 && ol1 != NULL
1136 && outermost_invariant_loop (op0, ol1) == NULL)
1137 stmt = rewrite_reciprocal (&bsi);
1138
1139 /* If the shift count is invariant, convert (A >> B) & 1 to
1140 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1141 saving an expensive shift. */
1142 if (pos == MOVE_POSSIBLE
1143 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1144 && integer_onep (op1)
1145 && TREE_CODE (op0) == SSA_NAME
1146 && has_single_use (op0))
1147 stmt = rewrite_bittest (&bsi);
1148 }
1149
1150 lim_data = init_lim_data (stmt);
1151 lim_data->always_executed_in = outermost;
1152
1153 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1154 continue;
1155
1156 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1157 {
1158 lim_data->max_loop = NULL;
1159 continue;
1160 }
1161
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 {
1164 print_gimple_stmt (dump_file, stmt, 2, 0);
1165 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1166 loop_depth (lim_data->max_loop),
1167 lim_data->cost);
1168 }
1169
1170 if (lim_data->cost >= LIM_EXPENSIVE)
1171 set_profitable_level (stmt);
1172 }
1173 }
1174
1175 /* For each statement determines the outermost loop in that it is invariant,
1176 statements on whose motion it depends and the cost of the computation.
1177 This information is stored to the LIM_DATA structure associated with
1178 each statement. */
1179
1180 static void
1181 determine_invariantness (void)
1182 {
1183 struct dom_walk_data walk_data;
1184
1185 memset (&walk_data, 0, sizeof (struct dom_walk_data));
1186 walk_data.dom_direction = CDI_DOMINATORS;
1187 walk_data.before_dom_children = determine_invariantness_stmt;
1188
1189 init_walk_dominator_tree (&walk_data);
1190 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1191 fini_walk_dominator_tree (&walk_data);
1192 }
1193
1194 /* Hoist the statements in basic block BB out of the loops prescribed by
1195 data stored in LIM_DATA structures associated with each statement. Callback
1196 for walk_dominator_tree. */
1197
1198 static void
1199 move_computations_stmt (struct dom_walk_data *dw_data,
1200 basic_block bb)
1201 {
1202 struct loop *level;
1203 gimple_stmt_iterator bsi;
1204 gimple stmt;
1205 unsigned cost = 0;
1206 struct lim_aux_data *lim_data;
1207
1208 if (!loop_outer (bb->loop_father))
1209 return;
1210
1211 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1212 {
1213 gimple new_stmt;
1214 stmt = gsi_stmt (bsi);
1215
1216 lim_data = get_lim_data (stmt);
1217 if (lim_data == NULL)
1218 {
1219 gsi_next (&bsi);
1220 continue;
1221 }
1222
1223 cost = lim_data->cost;
1224 level = lim_data->tgt_loop;
1225 clear_lim_data (stmt);
1226
1227 if (!level)
1228 {
1229 gsi_next (&bsi);
1230 continue;
1231 }
1232
1233 if (dump_file && (dump_flags & TDF_DETAILS))
1234 {
1235 fprintf (dump_file, "Moving PHI node\n");
1236 print_gimple_stmt (dump_file, stmt, 0, 0);
1237 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1238 cost, level->num);
1239 }
1240
1241 if (gimple_phi_num_args (stmt) == 1)
1242 {
1243 tree arg = PHI_ARG_DEF (stmt, 0);
1244 new_stmt = gimple_build_assign_with_ops (TREE_CODE (arg),
1245 gimple_phi_result (stmt),
1246 arg, NULL_TREE);
1247 SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
1248 }
1249 else
1250 {
1251 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1252 gimple cond = gsi_stmt (gsi_last_bb (dom));
1253 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1254 /* Get the PHI arguments corresponding to the true and false
1255 edges of COND. */
1256 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1257 gcc_assert (arg0 && arg1);
1258 t = build2 (gimple_cond_code (cond), boolean_type_node,
1259 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1260 new_stmt = gimple_build_assign_with_ops (COND_EXPR,
1261 gimple_phi_result (stmt),
1262 t, arg0, arg1);
1263 SSA_NAME_DEF_STMT (gimple_phi_result (stmt)) = new_stmt;
1264 *((unsigned int *)(dw_data->global_data)) |= TODO_cleanup_cfg;
1265 }
1266 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1267 remove_phi_node (&bsi, false);
1268 }
1269
1270 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1271 {
1272 edge e;
1273
1274 stmt = gsi_stmt (bsi);
1275
1276 lim_data = get_lim_data (stmt);
1277 if (lim_data == NULL)
1278 {
1279 gsi_next (&bsi);
1280 continue;
1281 }
1282
1283 cost = lim_data->cost;
1284 level = lim_data->tgt_loop;
1285 clear_lim_data (stmt);
1286
1287 if (!level)
1288 {
1289 gsi_next (&bsi);
1290 continue;
1291 }
1292
1293 /* We do not really want to move conditionals out of the loop; we just
1294 placed it here to force its operands to be moved if necessary. */
1295 if (gimple_code (stmt) == GIMPLE_COND)
1296 continue;
1297
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1299 {
1300 fprintf (dump_file, "Moving statement\n");
1301 print_gimple_stmt (dump_file, stmt, 0, 0);
1302 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1303 cost, level->num);
1304 }
1305
1306 e = loop_preheader_edge (level);
1307 gcc_assert (!gimple_vdef (stmt));
1308 if (gimple_vuse (stmt))
1309 {
1310 /* The new VUSE is the one from the virtual PHI in the loop
1311 header or the one already present. */
1312 gimple_stmt_iterator gsi2;
1313 for (gsi2 = gsi_start_phis (e->dest);
1314 !gsi_end_p (gsi2); gsi_next (&gsi2))
1315 {
1316 gimple phi = gsi_stmt (gsi2);
1317 if (virtual_operand_p (gimple_phi_result (phi)))
1318 {
1319 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1320 break;
1321 }
1322 }
1323 }
1324 gsi_remove (&bsi, false);
1325 gsi_insert_on_edge (e, stmt);
1326 }
1327 }
1328
1329 /* Hoist the statements out of the loops prescribed by data stored in
1330 LIM_DATA structures associated with each statement.*/
1331
1332 static unsigned int
1333 move_computations (void)
1334 {
1335 struct dom_walk_data walk_data;
1336 unsigned int todo = 0;
1337
1338 memset (&walk_data, 0, sizeof (struct dom_walk_data));
1339 walk_data.global_data = &todo;
1340 walk_data.dom_direction = CDI_DOMINATORS;
1341 walk_data.before_dom_children = move_computations_stmt;
1342
1343 init_walk_dominator_tree (&walk_data);
1344 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1345 fini_walk_dominator_tree (&walk_data);
1346
1347 gsi_commit_edge_inserts ();
1348 if (need_ssa_update_p (cfun))
1349 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1350
1351 return todo;
1352 }
1353
1354 /* Checks whether the statement defining variable *INDEX can be hoisted
1355 out of the loop passed in DATA. Callback for for_each_index. */
1356
1357 static bool
1358 may_move_till (tree ref, tree *index, void *data)
1359 {
1360 struct loop *loop = (struct loop *) data, *max_loop;
1361
1362 /* If REF is an array reference, check also that the step and the lower
1363 bound is invariant in LOOP. */
1364 if (TREE_CODE (ref) == ARRAY_REF)
1365 {
1366 tree step = TREE_OPERAND (ref, 3);
1367 tree lbound = TREE_OPERAND (ref, 2);
1368
1369 max_loop = outermost_invariant_loop (step, loop);
1370 if (!max_loop)
1371 return false;
1372
1373 max_loop = outermost_invariant_loop (lbound, loop);
1374 if (!max_loop)
1375 return false;
1376 }
1377
1378 max_loop = outermost_invariant_loop (*index, loop);
1379 if (!max_loop)
1380 return false;
1381
1382 return true;
1383 }
1384
1385 /* If OP is SSA NAME, force the statement that defines it to be
1386 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1387
1388 static void
1389 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1390 {
1391 gimple stmt;
1392
1393 if (!op
1394 || is_gimple_min_invariant (op))
1395 return;
1396
1397 gcc_assert (TREE_CODE (op) == SSA_NAME);
1398
1399 stmt = SSA_NAME_DEF_STMT (op);
1400 if (gimple_nop_p (stmt))
1401 return;
1402
1403 set_level (stmt, orig_loop, loop);
1404 }
1405
1406 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1407 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1408 for_each_index. */
1409
1410 struct fmt_data
1411 {
1412 struct loop *loop;
1413 struct loop *orig_loop;
1414 };
1415
1416 static bool
1417 force_move_till (tree ref, tree *index, void *data)
1418 {
1419 struct fmt_data *fmt_data = (struct fmt_data *) data;
1420
1421 if (TREE_CODE (ref) == ARRAY_REF)
1422 {
1423 tree step = TREE_OPERAND (ref, 3);
1424 tree lbound = TREE_OPERAND (ref, 2);
1425
1426 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1427 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1428 }
1429
1430 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1431
1432 return true;
1433 }
1434
1435 /* A hash function for struct mem_ref object OBJ. */
1436
1437 static hashval_t
1438 memref_hash (const void *obj)
1439 {
1440 const struct mem_ref *const mem = (const struct mem_ref *) obj;
1441
1442 return mem->hash;
1443 }
1444
1445 /* An equality function for struct mem_ref object OBJ1 with
1446 memory reference OBJ2. */
1447
1448 static int
1449 memref_eq (const void *obj1, const void *obj2)
1450 {
1451 const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
1452
1453 return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
1454 }
1455
1456 /* Releases list of memory reference locations ACCS. */
1457
1458 static void
1459 free_mem_ref_locs (mem_ref_locs_p accs)
1460 {
1461 unsigned i;
1462 mem_ref_loc_p loc;
1463
1464 if (!accs)
1465 return;
1466
1467 FOR_EACH_VEC_ELT (accs->locs, i, loc)
1468 free (loc);
1469 accs->locs.release ();
1470 free (accs);
1471 }
1472
1473 /* A function to free the mem_ref object OBJ. */
1474
1475 static void
1476 memref_free (struct mem_ref *mem)
1477 {
1478 unsigned i;
1479 mem_ref_locs_p accs;
1480
1481 FOR_EACH_VEC_ELT (mem->accesses_in_loop, i, accs)
1482 free_mem_ref_locs (accs);
1483 mem->accesses_in_loop.release ();
1484
1485 free (mem);
1486 }
1487
1488 /* Allocates and returns a memory reference description for MEM whose hash
1489 value is HASH and id is ID. */
1490
1491 static mem_ref_p
1492 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1493 {
1494 mem_ref_p ref = XNEW (struct mem_ref);
1495 ref->mem = mem;
1496 ref->id = id;
1497 ref->hash = hash;
1498 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1499 ref->indep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
1500 ref->dep_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
1501 ref->indep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1502 ref->dep_ref = BITMAP_ALLOC (&lim_bitmap_obstack);
1503 ref->accesses_in_loop.create (0);
1504
1505 return ref;
1506 }
1507
1508 /* Allocates and returns the new list of locations. */
1509
1510 static mem_ref_locs_p
1511 mem_ref_locs_alloc (void)
1512 {
1513 mem_ref_locs_p accs = XNEW (struct mem_ref_locs);
1514 accs->locs.create (0);
1515 return accs;
1516 }
1517
1518 /* Records memory reference location *LOC in LOOP to the memory reference
1519 description REF. The reference occurs in statement STMT. */
1520
1521 static void
1522 record_mem_ref_loc (mem_ref_p ref, struct loop *loop, gimple stmt, tree *loc)
1523 {
1524 mem_ref_loc_p aref = XNEW (struct mem_ref_loc);
1525 mem_ref_locs_p accs;
1526 bitmap ril = memory_accesses.refs_in_loop[loop->num];
1527
1528 if (ref->accesses_in_loop.length ()
1529 <= (unsigned) loop->num)
1530 ref->accesses_in_loop.safe_grow_cleared (loop->num + 1);
1531 accs = ref->accesses_in_loop[loop->num];
1532 if (!accs)
1533 {
1534 accs = mem_ref_locs_alloc ();
1535 ref->accesses_in_loop[loop->num] = accs;
1536 }
1537
1538 aref->stmt = stmt;
1539 aref->ref = loc;
1540
1541 accs->locs.safe_push (aref);
1542 bitmap_set_bit (ril, ref->id);
1543 }
1544
1545 /* Marks reference REF as stored in LOOP. */
1546
1547 static void
1548 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1549 {
1550 for (;
1551 loop != current_loops->tree_root
1552 && !bitmap_bit_p (ref->stored, loop->num);
1553 loop = loop_outer (loop))
1554 bitmap_set_bit (ref->stored, loop->num);
1555 }
1556
1557 /* Gathers memory references in statement STMT in LOOP, storing the
1558 information about them in the memory_accesses structure. Marks
1559 the vops accessed through unrecognized statements there as
1560 well. */
1561
1562 static void
1563 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1564 {
1565 tree *mem = NULL;
1566 hashval_t hash;
1567 PTR *slot;
1568 mem_ref_p ref;
1569 bool is_stored;
1570 unsigned id;
1571
1572 if (!gimple_vuse (stmt))
1573 return;
1574
1575 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1576 if (!mem)
1577 {
1578 id = memory_accesses.refs_list.length ();
1579 ref = mem_ref_alloc (error_mark_node, 0, id);
1580 memory_accesses.refs_list.safe_push (ref);
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1582 {
1583 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1584 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1585 }
1586 if (gimple_vdef (stmt))
1587 mark_ref_stored (ref, loop);
1588 record_mem_ref_loc (ref, loop, stmt, mem);
1589 return;
1590 }
1591
1592 hash = iterative_hash_expr (*mem, 0);
1593 slot = htab_find_slot_with_hash (memory_accesses.refs, *mem, hash, INSERT);
1594
1595 if (*slot)
1596 {
1597 ref = (mem_ref_p) *slot;
1598 id = ref->id;
1599 }
1600 else
1601 {
1602 id = memory_accesses.refs_list.length ();
1603 ref = mem_ref_alloc (*mem, hash, id);
1604 memory_accesses.refs_list.safe_push (ref);
1605 *slot = ref;
1606
1607 if (dump_file && (dump_flags & TDF_DETAILS))
1608 {
1609 fprintf (dump_file, "Memory reference %u: ", id);
1610 print_generic_expr (dump_file, ref->mem, TDF_SLIM);
1611 fprintf (dump_file, "\n");
1612 }
1613 }
1614
1615 if (is_stored)
1616 mark_ref_stored (ref, loop);
1617
1618 record_mem_ref_loc (ref, loop, stmt, mem);
1619 return;
1620 }
1621
1622 /* Gathers memory references in loops. */
1623
1624 static void
1625 gather_mem_refs_in_loops (void)
1626 {
1627 gimple_stmt_iterator bsi;
1628 basic_block bb;
1629 struct loop *loop;
1630 loop_iterator li;
1631 bitmap lrefs, alrefs, alrefso;
1632
1633 FOR_EACH_BB (bb)
1634 {
1635 loop = bb->loop_father;
1636 if (loop == current_loops->tree_root)
1637 continue;
1638
1639 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1640 gather_mem_refs_stmt (loop, gsi_stmt (bsi));
1641 }
1642
1643 /* Propagate the information about accessed memory references up
1644 the loop hierarchy. */
1645 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1646 {
1647 lrefs = memory_accesses.refs_in_loop[loop->num];
1648 alrefs = memory_accesses.all_refs_in_loop[loop->num];
1649 bitmap_ior_into (alrefs, lrefs);
1650
1651 if (loop_outer (loop) == current_loops->tree_root)
1652 continue;
1653
1654 alrefso = memory_accesses.all_refs_in_loop[loop_outer (loop)->num];
1655 bitmap_ior_into (alrefso, alrefs);
1656 }
1657 }
1658
1659 /* Create a mapping from virtual operands to references that touch them
1660 in LOOP. */
1661
1662 static void
1663 create_vop_ref_mapping_loop (struct loop *loop)
1664 {
1665 bitmap refs = memory_accesses.refs_in_loop[loop->num];
1666 struct loop *sloop;
1667 bitmap_iterator bi;
1668 unsigned i;
1669 mem_ref_p ref;
1670
1671 EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi)
1672 {
1673 ref = memory_accesses.refs_list[i];
1674 for (sloop = loop; sloop != current_loops->tree_root;
1675 sloop = loop_outer (sloop))
1676 if (bitmap_bit_p (ref->stored, loop->num))
1677 {
1678 bitmap refs_stored
1679 = memory_accesses.all_refs_stored_in_loop[sloop->num];
1680 bitmap_set_bit (refs_stored, ref->id);
1681 }
1682 }
1683 }
1684
1685 /* For each non-clobbered virtual operand and each loop, record the memory
1686 references in this loop that touch the operand. */
1687
1688 static void
1689 create_vop_ref_mapping (void)
1690 {
1691 loop_iterator li;
1692 struct loop *loop;
1693
1694 FOR_EACH_LOOP (li, loop, 0)
1695 {
1696 create_vop_ref_mapping_loop (loop);
1697 }
1698 }
1699
1700 /* Gathers information about memory accesses in the loops. */
1701
1702 static void
1703 analyze_memory_references (void)
1704 {
1705 unsigned i;
1706 bitmap empty;
1707
1708 memory_accesses.refs = htab_create (100, memref_hash, memref_eq, NULL);
1709 memory_accesses.refs_list.create (0);
1710 memory_accesses.refs_in_loop.create (number_of_loops ());
1711 memory_accesses.all_refs_in_loop.create (number_of_loops ());
1712 memory_accesses.all_refs_stored_in_loop.create (number_of_loops ());
1713
1714 for (i = 0; i < number_of_loops (); i++)
1715 {
1716 empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1717 memory_accesses.refs_in_loop.quick_push (empty);
1718 empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1719 memory_accesses.all_refs_in_loop.quick_push (empty);
1720 empty = BITMAP_ALLOC (&lim_bitmap_obstack);
1721 memory_accesses.all_refs_stored_in_loop.quick_push (empty);
1722 }
1723
1724 memory_accesses.ttae_cache = NULL;
1725
1726 gather_mem_refs_in_loops ();
1727 create_vop_ref_mapping ();
1728 }
1729
1730 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1731 tree_to_aff_combination_expand. */
1732
1733 static bool
1734 mem_refs_may_alias_p (tree mem1, tree mem2, struct pointer_map_t **ttae_cache)
1735 {
1736 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1737 object and their offset differ in such a way that the locations cannot
1738 overlap, then they cannot alias. */
1739 double_int size1, size2;
1740 aff_tree off1, off2;
1741
1742 /* Perform basic offset and type-based disambiguation. */
1743 if (!refs_may_alias_p (mem1, mem2))
1744 return false;
1745
1746 /* The expansion of addresses may be a bit expensive, thus we only do
1747 the check at -O2 and higher optimization levels. */
1748 if (optimize < 2)
1749 return true;
1750
1751 get_inner_reference_aff (mem1, &off1, &size1);
1752 get_inner_reference_aff (mem2, &off2, &size2);
1753 aff_combination_expand (&off1, ttae_cache);
1754 aff_combination_expand (&off2, ttae_cache);
1755 aff_combination_scale (&off1, double_int_minus_one);
1756 aff_combination_add (&off2, &off1);
1757
1758 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1759 return false;
1760
1761 return true;
1762 }
1763
1764 /* Rewrites location LOC by TMP_VAR. */
1765
1766 static void
1767 rewrite_mem_ref_loc (mem_ref_loc_p loc, tree tmp_var)
1768 {
1769 *loc->ref = tmp_var;
1770 update_stmt (loc->stmt);
1771 }
1772
1773 /* Adds all locations of REF in LOOP and its subloops to LOCS. */
1774
1775 static void
1776 get_all_locs_in_loop (struct loop *loop, mem_ref_p ref,
1777 vec<mem_ref_loc_p> *locs)
1778 {
1779 mem_ref_locs_p accs;
1780 unsigned i;
1781 mem_ref_loc_p loc;
1782 bitmap refs = memory_accesses.all_refs_in_loop[loop->num];
1783 struct loop *subloop;
1784
1785 if (!bitmap_bit_p (refs, ref->id))
1786 return;
1787
1788 if (ref->accesses_in_loop.length ()
1789 > (unsigned) loop->num)
1790 {
1791 accs = ref->accesses_in_loop[loop->num];
1792 if (accs)
1793 {
1794 FOR_EACH_VEC_ELT (accs->locs, i, loc)
1795 locs->safe_push (loc);
1796 }
1797 }
1798
1799 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
1800 get_all_locs_in_loop (subloop, ref, locs);
1801 }
1802
1803 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1804
1805 static void
1806 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1807 {
1808 unsigned i;
1809 mem_ref_loc_p loc;
1810 vec<mem_ref_loc_p> locs = vNULL;
1811
1812 get_all_locs_in_loop (loop, ref, &locs);
1813 FOR_EACH_VEC_ELT (locs, i, loc)
1814 rewrite_mem_ref_loc (loc, tmp_var);
1815 locs.release ();
1816 }
1817
1818 /* The name and the length of the currently generated variable
1819 for lsm. */
1820 #define MAX_LSM_NAME_LENGTH 40
1821 static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
1822 static int lsm_tmp_name_length;
1823
1824 /* Adds S to lsm_tmp_name. */
1825
1826 static void
1827 lsm_tmp_name_add (const char *s)
1828 {
1829 int l = strlen (s) + lsm_tmp_name_length;
1830 if (l > MAX_LSM_NAME_LENGTH)
1831 return;
1832
1833 strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
1834 lsm_tmp_name_length = l;
1835 }
1836
1837 /* Stores the name for temporary variable that replaces REF to
1838 lsm_tmp_name. */
1839
1840 static void
1841 gen_lsm_tmp_name (tree ref)
1842 {
1843 const char *name;
1844
1845 switch (TREE_CODE (ref))
1846 {
1847 case MEM_REF:
1848 case TARGET_MEM_REF:
1849 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1850 lsm_tmp_name_add ("_");
1851 break;
1852
1853 case ADDR_EXPR:
1854 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1855 break;
1856
1857 case BIT_FIELD_REF:
1858 case VIEW_CONVERT_EXPR:
1859 case ARRAY_RANGE_REF:
1860 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1861 break;
1862
1863 case REALPART_EXPR:
1864 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1865 lsm_tmp_name_add ("_RE");
1866 break;
1867
1868 case IMAGPART_EXPR:
1869 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1870 lsm_tmp_name_add ("_IM");
1871 break;
1872
1873 case COMPONENT_REF:
1874 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1875 lsm_tmp_name_add ("_");
1876 name = get_name (TREE_OPERAND (ref, 1));
1877 if (!name)
1878 name = "F";
1879 lsm_tmp_name_add (name);
1880 break;
1881
1882 case ARRAY_REF:
1883 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1884 lsm_tmp_name_add ("_I");
1885 break;
1886
1887 case SSA_NAME:
1888 case VAR_DECL:
1889 case PARM_DECL:
1890 name = get_name (ref);
1891 if (!name)
1892 name = "D";
1893 lsm_tmp_name_add (name);
1894 break;
1895
1896 case STRING_CST:
1897 lsm_tmp_name_add ("S");
1898 break;
1899
1900 case RESULT_DECL:
1901 lsm_tmp_name_add ("R");
1902 break;
1903
1904 case INTEGER_CST:
1905 /* Nothing. */
1906 break;
1907
1908 default:
1909 gcc_unreachable ();
1910 }
1911 }
1912
1913 /* Determines name for temporary variable that replaces REF.
1914 The name is accumulated into the lsm_tmp_name variable.
1915 N is added to the name of the temporary. */
1916
1917 char *
1918 get_lsm_tmp_name (tree ref, unsigned n)
1919 {
1920 char ns[2];
1921
1922 lsm_tmp_name_length = 0;
1923 gen_lsm_tmp_name (ref);
1924 lsm_tmp_name_add ("_lsm");
1925 if (n < 10)
1926 {
1927 ns[0] = '0' + n;
1928 ns[1] = 0;
1929 lsm_tmp_name_add (ns);
1930 }
1931 return lsm_tmp_name;
1932 }
1933
1934 struct prev_flag_edges {
1935 /* Edge to insert new flag comparison code. */
1936 edge append_cond_position;
1937
1938 /* Edge for fall through from previous flag comparison. */
1939 edge last_cond_fallthru;
1940 };
1941
1942 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1943 MEM along edge EX.
1944
1945 The store is only done if MEM has changed. We do this so no
1946 changes to MEM occur on code paths that did not originally store
1947 into it.
1948
1949 The common case for execute_sm will transform:
1950
1951 for (...) {
1952 if (foo)
1953 stuff;
1954 else
1955 MEM = TMP_VAR;
1956 }
1957
1958 into:
1959
1960 lsm = MEM;
1961 for (...) {
1962 if (foo)
1963 stuff;
1964 else
1965 lsm = TMP_VAR;
1966 }
1967 MEM = lsm;
1968
1969 This function will generate:
1970
1971 lsm = MEM;
1972
1973 lsm_flag = false;
1974 ...
1975 for (...) {
1976 if (foo)
1977 stuff;
1978 else {
1979 lsm = TMP_VAR;
1980 lsm_flag = true;
1981 }
1982 }
1983 if (lsm_flag) <--
1984 MEM = lsm; <--
1985 */
1986
1987 static void
1988 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1989 {
1990 basic_block new_bb, then_bb, old_dest;
1991 bool loop_has_only_one_exit;
1992 edge then_old_edge, orig_ex = ex;
1993 gimple_stmt_iterator gsi;
1994 gimple stmt;
1995 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1996
1997 /* ?? Insert store after previous store if applicable. See note
1998 below. */
1999 if (prev_edges)
2000 ex = prev_edges->append_cond_position;
2001
2002 loop_has_only_one_exit = single_pred_p (ex->dest);
2003
2004 if (loop_has_only_one_exit)
2005 ex = split_block_after_labels (ex->dest);
2006
2007 old_dest = ex->dest;
2008 new_bb = split_edge (ex);
2009 then_bb = create_empty_bb (new_bb);
2010 if (current_loops && new_bb->loop_father)
2011 add_bb_to_loop (then_bb, new_bb->loop_father);
2012
2013 gsi = gsi_start_bb (new_bb);
2014 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
2015 NULL_TREE, NULL_TREE);
2016 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2017
2018 gsi = gsi_start_bb (then_bb);
2019 /* Insert actual store. */
2020 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
2021 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2022
2023 make_edge (new_bb, then_bb, EDGE_TRUE_VALUE);
2024 make_edge (new_bb, old_dest, EDGE_FALSE_VALUE);
2025 then_old_edge = make_edge (then_bb, old_dest, EDGE_FALLTHRU);
2026
2027 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
2028
2029 if (prev_edges)
2030 {
2031 basic_block prevbb = prev_edges->last_cond_fallthru->src;
2032 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
2033 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
2034 set_immediate_dominator (CDI_DOMINATORS, old_dest,
2035 recompute_dominator (CDI_DOMINATORS, old_dest));
2036 }
2037
2038 /* ?? Because stores may alias, they must happen in the exact
2039 sequence they originally happened. Save the position right after
2040 the (_lsm) store we just created so we can continue appending after
2041 it and maintain the original order. */
2042 {
2043 struct prev_flag_edges *p;
2044
2045 if (orig_ex->aux)
2046 orig_ex->aux = NULL;
2047 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
2048 p = (struct prev_flag_edges *) orig_ex->aux;
2049 p->append_cond_position = then_old_edge;
2050 p->last_cond_fallthru = find_edge (new_bb, old_dest);
2051 orig_ex->aux = (void *) p;
2052 }
2053
2054 if (!loop_has_only_one_exit)
2055 for (gsi = gsi_start_phis (old_dest); !gsi_end_p (gsi); gsi_next (&gsi))
2056 {
2057 gimple phi = gsi_stmt (gsi);
2058 unsigned i;
2059
2060 for (i = 0; i < gimple_phi_num_args (phi); i++)
2061 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
2062 {
2063 tree arg = gimple_phi_arg_def (phi, i);
2064 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
2065 update_stmt (phi);
2066 }
2067 }
2068 /* Remove the original fall through edge. This was the
2069 single_succ_edge (new_bb). */
2070 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
2071 }
2072
2073 /* Helper function for execute_sm. On every location where REF is
2074 set, set an appropriate flag indicating the store. */
2075
2076 static tree
2077 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
2078 {
2079 unsigned i;
2080 mem_ref_loc_p loc;
2081 tree flag;
2082 vec<mem_ref_loc_p> locs = vNULL;
2083 char *str = get_lsm_tmp_name (ref->mem, ~0);
2084
2085 lsm_tmp_name_add ("_flag");
2086 flag = create_tmp_reg (boolean_type_node, str);
2087 get_all_locs_in_loop (loop, ref, &locs);
2088 FOR_EACH_VEC_ELT (locs, i, loc)
2089 {
2090 gimple_stmt_iterator gsi;
2091 gimple stmt;
2092
2093 /* Only set the flag for writes. */
2094 if (is_gimple_assign (loc->stmt)
2095 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
2096 {
2097 gsi = gsi_for_stmt (loc->stmt);
2098 stmt = gimple_build_assign (flag, boolean_true_node);
2099 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2100 }
2101 }
2102 locs.release ();
2103 return flag;
2104 }
2105
2106 /* Executes store motion of memory reference REF from LOOP.
2107 Exits from the LOOP are stored in EXITS. The initialization of the
2108 temporary variable is put to the preheader of the loop, and assignments
2109 to the reference from the temporary variable are emitted to exits. */
2110
2111 static void
2112 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
2113 {
2114 tree tmp_var, store_flag;
2115 unsigned i;
2116 gimple load;
2117 struct fmt_data fmt_data;
2118 edge ex, latch_edge;
2119 struct lim_aux_data *lim_data;
2120 bool multi_threaded_model_p = false;
2121
2122 if (dump_file && (dump_flags & TDF_DETAILS))
2123 {
2124 fprintf (dump_file, "Executing store motion of ");
2125 print_generic_expr (dump_file, ref->mem, 0);
2126 fprintf (dump_file, " from loop %d\n", loop->num);
2127 }
2128
2129 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem),
2130 get_lsm_tmp_name (ref->mem, ~0));
2131
2132 fmt_data.loop = loop;
2133 fmt_data.orig_loop = loop;
2134 for_each_index (&ref->mem, force_move_till, &fmt_data);
2135
2136 if (block_in_transaction (loop_preheader_edge (loop)->src)
2137 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
2138 multi_threaded_model_p = true;
2139
2140 if (multi_threaded_model_p)
2141 store_flag = execute_sm_if_changed_flag_set (loop, ref);
2142
2143 rewrite_mem_refs (loop, ref, tmp_var);
2144
2145 /* Emit the load code into the latch, so that we are sure it will
2146 be processed after all dependencies. */
2147 latch_edge = loop_latch_edge (loop);
2148
2149 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2150 load altogether, since the store is predicated by a flag. We
2151 could, do the load only if it was originally in the loop. */
2152 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem));
2153 lim_data = init_lim_data (load);
2154 lim_data->max_loop = loop;
2155 lim_data->tgt_loop = loop;
2156 gsi_insert_on_edge (latch_edge, load);
2157
2158 if (multi_threaded_model_p)
2159 {
2160 load = gimple_build_assign (store_flag, boolean_false_node);
2161 lim_data = init_lim_data (load);
2162 lim_data->max_loop = loop;
2163 lim_data->tgt_loop = loop;
2164 gsi_insert_on_edge (latch_edge, load);
2165 }
2166
2167 /* Sink the store to every exit from the loop. */
2168 FOR_EACH_VEC_ELT (exits, i, ex)
2169 if (!multi_threaded_model_p)
2170 {
2171 gimple store;
2172 store = gimple_build_assign (unshare_expr (ref->mem), tmp_var);
2173 gsi_insert_on_edge (ex, store);
2174 }
2175 else
2176 execute_sm_if_changed (ex, ref->mem, tmp_var, store_flag);
2177 }
2178
2179 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2180 edges of the LOOP. */
2181
2182 static void
2183 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2184 vec<edge> exits)
2185 {
2186 mem_ref_p ref;
2187 unsigned i;
2188 bitmap_iterator bi;
2189
2190 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2191 {
2192 ref = memory_accesses.refs_list[i];
2193 execute_sm (loop, exits, ref);
2194 }
2195 }
2196
2197 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2198 make sure REF is always stored to in LOOP. */
2199
2200 static bool
2201 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2202 {
2203 vec<mem_ref_loc_p> locs = vNULL;
2204 unsigned i;
2205 mem_ref_loc_p loc;
2206 bool ret = false;
2207 struct loop *must_exec;
2208 tree base;
2209
2210 base = get_base_address (ref->mem);
2211 if (INDIRECT_REF_P (base)
2212 || TREE_CODE (base) == MEM_REF)
2213 base = TREE_OPERAND (base, 0);
2214
2215 get_all_locs_in_loop (loop, ref, &locs);
2216 FOR_EACH_VEC_ELT (locs, i, loc)
2217 {
2218 if (!get_lim_data (loc->stmt))
2219 continue;
2220
2221 /* If we require an always executed store make sure the statement
2222 stores to the reference. */
2223 if (stored_p)
2224 {
2225 tree lhs;
2226 if (!gimple_get_lhs (loc->stmt))
2227 continue;
2228 lhs = get_base_address (gimple_get_lhs (loc->stmt));
2229 if (!lhs)
2230 continue;
2231 if (INDIRECT_REF_P (lhs)
2232 || TREE_CODE (lhs) == MEM_REF)
2233 lhs = TREE_OPERAND (lhs, 0);
2234 if (lhs != base)
2235 continue;
2236 }
2237
2238 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2239 if (!must_exec)
2240 continue;
2241
2242 if (must_exec == loop
2243 || flow_loop_nested_p (must_exec, loop))
2244 {
2245 ret = true;
2246 break;
2247 }
2248 }
2249 locs.release ();
2250
2251 return ret;
2252 }
2253
2254 /* Returns true if REF1 and REF2 are independent. */
2255
2256 static bool
2257 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2258 {
2259 if (ref1 == ref2
2260 || bitmap_bit_p (ref1->indep_ref, ref2->id))
2261 return true;
2262 if (bitmap_bit_p (ref1->dep_ref, ref2->id))
2263 return false;
2264 if (!MEM_ANALYZABLE (ref1)
2265 || !MEM_ANALYZABLE (ref2))
2266 return false;
2267
2268 if (dump_file && (dump_flags & TDF_DETAILS))
2269 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2270 ref1->id, ref2->id);
2271
2272 if (mem_refs_may_alias_p (ref1->mem, ref2->mem,
2273 &memory_accesses.ttae_cache))
2274 {
2275 bitmap_set_bit (ref1->dep_ref, ref2->id);
2276 bitmap_set_bit (ref2->dep_ref, ref1->id);
2277 if (dump_file && (dump_flags & TDF_DETAILS))
2278 fprintf (dump_file, "dependent.\n");
2279 return false;
2280 }
2281 else
2282 {
2283 bitmap_set_bit (ref1->indep_ref, ref2->id);
2284 bitmap_set_bit (ref2->indep_ref, ref1->id);
2285 if (dump_file && (dump_flags & TDF_DETAILS))
2286 fprintf (dump_file, "independent.\n");
2287 return true;
2288 }
2289 }
2290
2291 /* Records the information whether REF is independent in LOOP (according
2292 to INDEP). */
2293
2294 static void
2295 record_indep_loop (struct loop *loop, mem_ref_p ref, bool indep)
2296 {
2297 if (indep)
2298 bitmap_set_bit (ref->indep_loop, loop->num);
2299 else
2300 bitmap_set_bit (ref->dep_loop, loop->num);
2301 }
2302
2303 /* Returns true if REF is independent on all other memory references in
2304 LOOP. */
2305
2306 static bool
2307 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref)
2308 {
2309 bitmap refs_to_check;
2310 unsigned i;
2311 bitmap_iterator bi;
2312 bool ret = true, stored = bitmap_bit_p (ref->stored, loop->num);
2313 mem_ref_p aref;
2314
2315 if (stored)
2316 refs_to_check = memory_accesses.all_refs_in_loop[loop->num];
2317 else
2318 refs_to_check = memory_accesses.all_refs_stored_in_loop[loop->num];
2319
2320 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2321 {
2322 aref = memory_accesses.refs_list[i];
2323 if (!MEM_ANALYZABLE (aref)
2324 || !refs_independent_p (ref, aref))
2325 {
2326 ret = false;
2327 record_indep_loop (loop, aref, false);
2328 break;
2329 }
2330 }
2331
2332 return ret;
2333 }
2334
2335 /* Returns true if REF is independent on all other memory references in
2336 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2337
2338 static bool
2339 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2340 {
2341 bool ret;
2342
2343 if (bitmap_bit_p (ref->indep_loop, loop->num))
2344 return true;
2345 if (bitmap_bit_p (ref->dep_loop, loop->num))
2346 return false;
2347
2348 ret = ref_indep_loop_p_1 (loop, ref);
2349
2350 if (dump_file && (dump_flags & TDF_DETAILS))
2351 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2352 ref->id, loop->num, ret ? "independent" : "dependent");
2353
2354 record_indep_loop (loop, ref, ret);
2355
2356 return ret;
2357 }
2358
2359 /* Returns true if we can perform store motion of REF from LOOP. */
2360
2361 static bool
2362 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2363 {
2364 tree base;
2365
2366 /* Can't hoist unanalyzable refs. */
2367 if (!MEM_ANALYZABLE (ref))
2368 return false;
2369
2370 /* Unless the reference is stored in the loop, there is nothing to do. */
2371 if (!bitmap_bit_p (ref->stored, loop->num))
2372 return false;
2373
2374 /* It should be movable. */
2375 if (!is_gimple_reg_type (TREE_TYPE (ref->mem))
2376 || TREE_THIS_VOLATILE (ref->mem)
2377 || !for_each_index (&ref->mem, may_move_till, loop))
2378 return false;
2379
2380 /* If it can throw fail, we do not properly update EH info. */
2381 if (tree_could_throw_p (ref->mem))
2382 return false;
2383
2384 /* If it can trap, it must be always executed in LOOP.
2385 Readonly memory locations may trap when storing to them, but
2386 tree_could_trap_p is a predicate for rvalues, so check that
2387 explicitly. */
2388 base = get_base_address (ref->mem);
2389 if ((tree_could_trap_p (ref->mem)
2390 || (DECL_P (base) && TREE_READONLY (base)))
2391 && !ref_always_accessed_p (loop, ref, true))
2392 return false;
2393
2394 /* And it must be independent on all other memory references
2395 in LOOP. */
2396 if (!ref_indep_loop_p (loop, ref))
2397 return false;
2398
2399 return true;
2400 }
2401
2402 /* Marks the references in LOOP for that store motion should be performed
2403 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2404 motion was performed in one of the outer loops. */
2405
2406 static void
2407 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2408 {
2409 bitmap refs = memory_accesses.all_refs_in_loop[loop->num];
2410 unsigned i;
2411 bitmap_iterator bi;
2412 mem_ref_p ref;
2413
2414 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2415 {
2416 ref = memory_accesses.refs_list[i];
2417 if (can_sm_ref_p (loop, ref))
2418 bitmap_set_bit (refs_to_sm, i);
2419 }
2420 }
2421
2422 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2423 for a store motion optimization (i.e. whether we can insert statement
2424 on its exits). */
2425
2426 static bool
2427 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2428 vec<edge> exits)
2429 {
2430 unsigned i;
2431 edge ex;
2432
2433 FOR_EACH_VEC_ELT (exits, i, ex)
2434 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2435 return false;
2436
2437 return true;
2438 }
2439
2440 /* Try to perform store motion for all memory references modified inside
2441 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2442 store motion was executed in one of the outer loops. */
2443
2444 static void
2445 store_motion_loop (struct loop *loop, bitmap sm_executed)
2446 {
2447 vec<edge> exits = get_loop_exit_edges (loop);
2448 struct loop *subloop;
2449 bitmap sm_in_loop = BITMAP_ALLOC (NULL);
2450
2451 if (loop_suitable_for_sm (loop, exits))
2452 {
2453 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2454 hoist_memory_references (loop, sm_in_loop, exits);
2455 }
2456 exits.release ();
2457
2458 bitmap_ior_into (sm_executed, sm_in_loop);
2459 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2460 store_motion_loop (subloop, sm_executed);
2461 bitmap_and_compl_into (sm_executed, sm_in_loop);
2462 BITMAP_FREE (sm_in_loop);
2463 }
2464
2465 /* Try to perform store motion for all memory references modified inside
2466 loops. */
2467
2468 static void
2469 store_motion (void)
2470 {
2471 struct loop *loop;
2472 bitmap sm_executed = BITMAP_ALLOC (NULL);
2473
2474 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2475 store_motion_loop (loop, sm_executed);
2476
2477 BITMAP_FREE (sm_executed);
2478 gsi_commit_edge_inserts ();
2479 }
2480
2481 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2482 for each such basic block bb records the outermost loop for that execution
2483 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2484 blocks that contain a nonpure call. */
2485
2486 static void
2487 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
2488 {
2489 basic_block bb = NULL, *bbs, last = NULL;
2490 unsigned i;
2491 edge e;
2492 struct loop *inn_loop = loop;
2493
2494 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2495 {
2496 bbs = get_loop_body_in_dom_order (loop);
2497
2498 for (i = 0; i < loop->num_nodes; i++)
2499 {
2500 edge_iterator ei;
2501 bb = bbs[i];
2502
2503 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2504 last = bb;
2505
2506 if (bitmap_bit_p (contains_call, bb->index))
2507 break;
2508
2509 FOR_EACH_EDGE (e, ei, bb->succs)
2510 if (!flow_bb_inside_loop_p (loop, e->dest))
2511 break;
2512 if (e)
2513 break;
2514
2515 /* A loop might be infinite (TODO use simple loop analysis
2516 to disprove this if possible). */
2517 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2518 break;
2519
2520 if (!flow_bb_inside_loop_p (inn_loop, bb))
2521 break;
2522
2523 if (bb->loop_father->header == bb)
2524 {
2525 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2526 break;
2527
2528 /* In a loop that is always entered we may proceed anyway.
2529 But record that we entered it and stop once we leave it. */
2530 inn_loop = bb->loop_father;
2531 }
2532 }
2533
2534 while (1)
2535 {
2536 SET_ALWAYS_EXECUTED_IN (last, loop);
2537 if (last == loop->header)
2538 break;
2539 last = get_immediate_dominator (CDI_DOMINATORS, last);
2540 }
2541
2542 free (bbs);
2543 }
2544
2545 for (loop = loop->inner; loop; loop = loop->next)
2546 fill_always_executed_in (loop, contains_call);
2547 }
2548
2549 /* Compute the global information needed by the loop invariant motion pass. */
2550
2551 static void
2552 tree_ssa_lim_initialize (void)
2553 {
2554 sbitmap contains_call = sbitmap_alloc (last_basic_block);
2555 gimple_stmt_iterator bsi;
2556 struct loop *loop;
2557 basic_block bb;
2558
2559 bitmap_obstack_initialize (&lim_bitmap_obstack);
2560
2561 bitmap_clear (contains_call);
2562 FOR_EACH_BB (bb)
2563 {
2564 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2565 {
2566 if (nonpure_call_p (gsi_stmt (bsi)))
2567 break;
2568 }
2569
2570 if (!gsi_end_p (bsi))
2571 bitmap_set_bit (contains_call, bb->index);
2572 }
2573
2574 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2575 fill_always_executed_in (loop, contains_call);
2576
2577 sbitmap_free (contains_call);
2578
2579 lim_aux_data_map = pointer_map_create ();
2580
2581 if (flag_tm)
2582 compute_transaction_bits ();
2583
2584 alloc_aux_for_edges (0);
2585 }
2586
2587 /* Cleans up after the invariant motion pass. */
2588
2589 static void
2590 tree_ssa_lim_finalize (void)
2591 {
2592 basic_block bb;
2593 unsigned i;
2594 mem_ref_p ref;
2595
2596 free_aux_for_edges ();
2597
2598 FOR_EACH_BB (bb)
2599 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2600
2601 bitmap_obstack_release (&lim_bitmap_obstack);
2602 pointer_map_destroy (lim_aux_data_map);
2603
2604 htab_delete (memory_accesses.refs);
2605
2606 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2607 memref_free (ref);
2608 memory_accesses.refs_list.release ();
2609
2610 memory_accesses.refs_in_loop.release ();
2611 memory_accesses.all_refs_in_loop.release ();
2612 memory_accesses.all_refs_stored_in_loop.release ();
2613
2614 if (memory_accesses.ttae_cache)
2615 free_affine_expand_cache (&memory_accesses.ttae_cache);
2616 }
2617
2618 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2619 i.e. those that are likely to be win regardless of the register pressure. */
2620
2621 unsigned int
2622 tree_ssa_lim (void)
2623 {
2624 unsigned int todo;
2625
2626 tree_ssa_lim_initialize ();
2627
2628 /* Gathers information about memory accesses in the loops. */
2629 analyze_memory_references ();
2630
2631 /* For each statement determine the outermost loop in that it is
2632 invariant and cost for computing the invariant. */
2633 determine_invariantness ();
2634
2635 /* Execute store motion. Force the necessary invariants to be moved
2636 out of the loops as well. */
2637 store_motion ();
2638
2639 /* Move the expressions that are expensive enough. */
2640 todo = move_computations ();
2641
2642 tree_ssa_lim_finalize ();
2643
2644 return todo;
2645 }