re PR target/65697 (__atomic memory barriers not strong enough for __sync builtins)
[gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "alias.h"
25 #include "symtab.h"
26 #include "tree.h"
27 #include "fold-const.h"
28 #include "tm_p.h"
29 #include "predict.h"
30 #include "hard-reg-set.h"
31 #include "function.h"
32 #include "dominance.h"
33 #include "cfg.h"
34 #include "cfganal.h"
35 #include "basic-block.h"
36 #include "gimple-pretty-print.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "tree-eh.h"
40 #include "gimple-expr.h"
41 #include "gimple.h"
42 #include "gimplify.h"
43 #include "gimple-iterator.h"
44 #include "gimple-ssa.h"
45 #include "tree-cfg.h"
46 #include "tree-phinodes.h"
47 #include "ssa-iterators.h"
48 #include "stringpool.h"
49 #include "tree-ssanames.h"
50 #include "tree-ssa-loop-manip.h"
51 #include "tree-ssa-loop.h"
52 #include "tree-into-ssa.h"
53 #include "cfgloop.h"
54 #include "domwalk.h"
55 #include "params.h"
56 #include "tree-pass.h"
57 #include "flags.h"
58 #include "tree-affine.h"
59 #include "tree-ssa-propagate.h"
60 #include "trans-mem.h"
61 #include "gimple-fold.h"
62
63 /* TODO: Support for predicated code motion. I.e.
64
65 while (1)
66 {
67 if (cond)
68 {
69 a = inv;
70 something;
71 }
72 }
73
74 Where COND and INV are invariants, but evaluating INV may trap or be
75 invalid from some other reason if !COND. This may be transformed to
76
77 if (cond)
78 a = inv;
79 while (1)
80 {
81 if (cond)
82 something;
83 } */
84
85 /* The auxiliary data kept for each statement. */
86
87 struct lim_aux_data
88 {
89 struct loop *max_loop; /* The outermost loop in that the statement
90 is invariant. */
91
92 struct loop *tgt_loop; /* The loop out of that we want to move the
93 invariant. */
94
95 struct loop *always_executed_in;
96 /* The outermost loop for that we are sure
97 the statement is executed if the loop
98 is entered. */
99
100 unsigned cost; /* Cost of the computation performed by the
101 statement. */
102
103 vec<gimple> depends; /* Vector of statements that must be also
104 hoisted out of the loop when this statement
105 is hoisted; i.e. those that define the
106 operands of the statement and are inside of
107 the MAX_LOOP loop. */
108 };
109
110 /* Maps statements to their lim_aux_data. */
111
112 static hash_map<gimple, lim_aux_data *> *lim_aux_data_map;
113
114 /* Description of a memory reference location. */
115
116 typedef struct mem_ref_loc
117 {
118 tree *ref; /* The reference itself. */
119 gimple stmt; /* The statement in that it occurs. */
120 } *mem_ref_loc_p;
121
122
123 /* Description of a memory reference. */
124
125 typedef struct im_mem_ref
126 {
127 unsigned id; /* ID assigned to the memory reference
128 (its index in memory_accesses.refs_list) */
129 hashval_t hash; /* Its hash value. */
130
131 /* The memory access itself and associated caching of alias-oracle
132 query meta-data. */
133 ao_ref mem;
134
135 bitmap stored; /* The set of loops in that this memory location
136 is stored to. */
137 vec<mem_ref_loc> accesses_in_loop;
138 /* The locations of the accesses. Vector
139 indexed by the loop number. */
140
141 /* The following sets are computed on demand. We keep both set and
142 its complement, so that we know whether the information was
143 already computed or not. */
144 bitmap_head indep_loop; /* The set of loops in that the memory
145 reference is independent, meaning:
146 If it is stored in the loop, this store
147 is independent on all other loads and
148 stores.
149 If it is only loaded, then it is independent
150 on all stores in the loop. */
151 bitmap_head dep_loop; /* The complement of INDEP_LOOP. */
152 } *mem_ref_p;
153
154 /* We use two bits per loop in the ref->{in,}dep_loop bitmaps, the first
155 to record (in)dependence against stores in the loop and its subloops, the
156 second to record (in)dependence against all references in the loop
157 and its subloops. */
158 #define LOOP_DEP_BIT(loopnum, storedp) (2 * (loopnum) + (storedp ? 1 : 0))
159
160 /* Mem_ref hashtable helpers. */
161
162 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
163 {
164 typedef tree_node *compare_type;
165 static inline hashval_t hash (const im_mem_ref *);
166 static inline bool equal (const im_mem_ref *, const tree_node *);
167 };
168
169 /* A hash function for struct im_mem_ref object OBJ. */
170
171 inline hashval_t
172 mem_ref_hasher::hash (const im_mem_ref *mem)
173 {
174 return mem->hash;
175 }
176
177 /* An equality function for struct im_mem_ref object MEM1 with
178 memory reference OBJ2. */
179
180 inline bool
181 mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
182 {
183 return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
184 }
185
186
187 /* Description of memory accesses in loops. */
188
189 static struct
190 {
191 /* The hash table of memory references accessed in loops. */
192 hash_table<mem_ref_hasher> *refs;
193
194 /* The list of memory references. */
195 vec<mem_ref_p> refs_list;
196
197 /* The set of memory references accessed in each loop. */
198 vec<bitmap_head> refs_in_loop;
199
200 /* The set of memory references stored in each loop. */
201 vec<bitmap_head> refs_stored_in_loop;
202
203 /* The set of memory references stored in each loop, including subloops . */
204 vec<bitmap_head> all_refs_stored_in_loop;
205
206 /* Cache for expanding memory addresses. */
207 hash_map<tree, name_expansion *> *ttae_cache;
208 } memory_accesses;
209
210 /* Obstack for the bitmaps in the above data structures. */
211 static bitmap_obstack lim_bitmap_obstack;
212 static obstack mem_ref_obstack;
213
214 static bool ref_indep_loop_p (struct loop *, mem_ref_p);
215
216 /* Minimum cost of an expensive expression. */
217 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
218
219 /* The outermost loop for which execution of the header guarantees that the
220 block will be executed. */
221 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
222 #define SET_ALWAYS_EXECUTED_IN(BB, VAL) ((BB)->aux = (void *) (VAL))
223
224 /* ID of the shared unanalyzable mem. */
225 #define UNANALYZABLE_MEM_ID 0
226
227 /* Whether the reference was analyzable. */
228 #define MEM_ANALYZABLE(REF) ((REF)->id != UNANALYZABLE_MEM_ID)
229
230 static struct lim_aux_data *
231 init_lim_data (gimple stmt)
232 {
233 lim_aux_data *p = XCNEW (struct lim_aux_data);
234 lim_aux_data_map->put (stmt, p);
235
236 return p;
237 }
238
239 static struct lim_aux_data *
240 get_lim_data (gimple stmt)
241 {
242 lim_aux_data **p = lim_aux_data_map->get (stmt);
243 if (!p)
244 return NULL;
245
246 return *p;
247 }
248
249 /* Releases the memory occupied by DATA. */
250
251 static void
252 free_lim_aux_data (struct lim_aux_data *data)
253 {
254 data->depends.release ();
255 free (data);
256 }
257
258 static void
259 clear_lim_data (gimple stmt)
260 {
261 lim_aux_data **p = lim_aux_data_map->get (stmt);
262 if (!p)
263 return;
264
265 free_lim_aux_data (*p);
266 *p = NULL;
267 }
268
269
270 /* The possibilities of statement movement. */
271 enum move_pos
272 {
273 MOVE_IMPOSSIBLE, /* No movement -- side effect expression. */
274 MOVE_PRESERVE_EXECUTION, /* Must not cause the non-executed statement
275 become executed -- memory accesses, ... */
276 MOVE_POSSIBLE /* Unlimited movement. */
277 };
278
279
280 /* If it is possible to hoist the statement STMT unconditionally,
281 returns MOVE_POSSIBLE.
282 If it is possible to hoist the statement STMT, but we must avoid making
283 it executed if it would not be executed in the original program (e.g.
284 because it may trap), return MOVE_PRESERVE_EXECUTION.
285 Otherwise return MOVE_IMPOSSIBLE. */
286
287 enum move_pos
288 movement_possibility (gimple stmt)
289 {
290 tree lhs;
291 enum move_pos ret = MOVE_POSSIBLE;
292
293 if (flag_unswitch_loops
294 && gimple_code (stmt) == GIMPLE_COND)
295 {
296 /* If we perform unswitching, force the operands of the invariant
297 condition to be moved out of the loop. */
298 return MOVE_POSSIBLE;
299 }
300
301 if (gimple_code (stmt) == GIMPLE_PHI
302 && gimple_phi_num_args (stmt) <= 2
303 && !virtual_operand_p (gimple_phi_result (stmt))
304 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (stmt)))
305 return MOVE_POSSIBLE;
306
307 if (gimple_get_lhs (stmt) == NULL_TREE)
308 return MOVE_IMPOSSIBLE;
309
310 if (gimple_vdef (stmt))
311 return MOVE_IMPOSSIBLE;
312
313 if (stmt_ends_bb_p (stmt)
314 || gimple_has_volatile_ops (stmt)
315 || gimple_has_side_effects (stmt)
316 || stmt_could_throw_p (stmt))
317 return MOVE_IMPOSSIBLE;
318
319 if (is_gimple_call (stmt))
320 {
321 /* While pure or const call is guaranteed to have no side effects, we
322 cannot move it arbitrarily. Consider code like
323
324 char *s = something ();
325
326 while (1)
327 {
328 if (s)
329 t = strlen (s);
330 else
331 t = 0;
332 }
333
334 Here the strlen call cannot be moved out of the loop, even though
335 s is invariant. In addition to possibly creating a call with
336 invalid arguments, moving out a function call that is not executed
337 may cause performance regressions in case the call is costly and
338 not executed at all. */
339 ret = MOVE_PRESERVE_EXECUTION;
340 lhs = gimple_call_lhs (stmt);
341 }
342 else if (is_gimple_assign (stmt))
343 lhs = gimple_assign_lhs (stmt);
344 else
345 return MOVE_IMPOSSIBLE;
346
347 if (TREE_CODE (lhs) == SSA_NAME
348 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
349 return MOVE_IMPOSSIBLE;
350
351 if (TREE_CODE (lhs) != SSA_NAME
352 || gimple_could_trap_p (stmt))
353 return MOVE_PRESERVE_EXECUTION;
354
355 /* Non local loads in a transaction cannot be hoisted out. Well,
356 unless the load happens on every path out of the loop, but we
357 don't take this into account yet. */
358 if (flag_tm
359 && gimple_in_transaction (stmt)
360 && gimple_assign_single_p (stmt))
361 {
362 tree rhs = gimple_assign_rhs1 (stmt);
363 if (DECL_P (rhs) && is_global_var (rhs))
364 {
365 if (dump_file)
366 {
367 fprintf (dump_file, "Cannot hoist conditional load of ");
368 print_generic_expr (dump_file, rhs, TDF_SLIM);
369 fprintf (dump_file, " because it is in a transaction.\n");
370 }
371 return MOVE_IMPOSSIBLE;
372 }
373 }
374
375 return ret;
376 }
377
378 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
379 loop to that we could move the expression using DEF if it did not have
380 other operands, i.e. the outermost loop enclosing LOOP in that the value
381 of DEF is invariant. */
382
383 static struct loop *
384 outermost_invariant_loop (tree def, struct loop *loop)
385 {
386 gimple def_stmt;
387 basic_block def_bb;
388 struct loop *max_loop;
389 struct lim_aux_data *lim_data;
390
391 if (!def)
392 return superloop_at_depth (loop, 1);
393
394 if (TREE_CODE (def) != SSA_NAME)
395 {
396 gcc_assert (is_gimple_min_invariant (def));
397 return superloop_at_depth (loop, 1);
398 }
399
400 def_stmt = SSA_NAME_DEF_STMT (def);
401 def_bb = gimple_bb (def_stmt);
402 if (!def_bb)
403 return superloop_at_depth (loop, 1);
404
405 max_loop = find_common_loop (loop, def_bb->loop_father);
406
407 lim_data = get_lim_data (def_stmt);
408 if (lim_data != NULL && lim_data->max_loop != NULL)
409 max_loop = find_common_loop (max_loop,
410 loop_outer (lim_data->max_loop));
411 if (max_loop == loop)
412 return NULL;
413 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
414
415 return max_loop;
416 }
417
418 /* DATA is a structure containing information associated with a statement
419 inside LOOP. DEF is one of the operands of this statement.
420
421 Find the outermost loop enclosing LOOP in that value of DEF is invariant
422 and record this in DATA->max_loop field. If DEF itself is defined inside
423 this loop as well (i.e. we need to hoist it out of the loop if we want
424 to hoist the statement represented by DATA), record the statement in that
425 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
426 add the cost of the computation of DEF to the DATA->cost.
427
428 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
429
430 static bool
431 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
432 bool add_cost)
433 {
434 gimple def_stmt = SSA_NAME_DEF_STMT (def);
435 basic_block def_bb = gimple_bb (def_stmt);
436 struct loop *max_loop;
437 struct lim_aux_data *def_data;
438
439 if (!def_bb)
440 return true;
441
442 max_loop = outermost_invariant_loop (def, loop);
443 if (!max_loop)
444 return false;
445
446 if (flow_loop_nested_p (data->max_loop, max_loop))
447 data->max_loop = max_loop;
448
449 def_data = get_lim_data (def_stmt);
450 if (!def_data)
451 return true;
452
453 if (add_cost
454 /* Only add the cost if the statement defining DEF is inside LOOP,
455 i.e. if it is likely that by moving the invariants dependent
456 on it, we will be able to avoid creating a new register for
457 it (since it will be only used in these dependent invariants). */
458 && def_bb->loop_father == loop)
459 data->cost += def_data->cost;
460
461 data->depends.safe_push (def_stmt);
462
463 return true;
464 }
465
466 /* Returns an estimate for a cost of statement STMT. The values here
467 are just ad-hoc constants, similar to costs for inlining. */
468
469 static unsigned
470 stmt_cost (gimple stmt)
471 {
472 /* Always try to create possibilities for unswitching. */
473 if (gimple_code (stmt) == GIMPLE_COND
474 || gimple_code (stmt) == GIMPLE_PHI)
475 return LIM_EXPENSIVE;
476
477 /* We should be hoisting calls if possible. */
478 if (is_gimple_call (stmt))
479 {
480 tree fndecl;
481
482 /* Unless the call is a builtin_constant_p; this always folds to a
483 constant, so moving it is useless. */
484 fndecl = gimple_call_fndecl (stmt);
485 if (fndecl
486 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
487 && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)
488 return 0;
489
490 return LIM_EXPENSIVE;
491 }
492
493 /* Hoisting memory references out should almost surely be a win. */
494 if (gimple_references_memory_p (stmt))
495 return LIM_EXPENSIVE;
496
497 if (gimple_code (stmt) != GIMPLE_ASSIGN)
498 return 1;
499
500 switch (gimple_assign_rhs_code (stmt))
501 {
502 case MULT_EXPR:
503 case WIDEN_MULT_EXPR:
504 case WIDEN_MULT_PLUS_EXPR:
505 case WIDEN_MULT_MINUS_EXPR:
506 case DOT_PROD_EXPR:
507 case FMA_EXPR:
508 case TRUNC_DIV_EXPR:
509 case CEIL_DIV_EXPR:
510 case FLOOR_DIV_EXPR:
511 case ROUND_DIV_EXPR:
512 case EXACT_DIV_EXPR:
513 case CEIL_MOD_EXPR:
514 case FLOOR_MOD_EXPR:
515 case ROUND_MOD_EXPR:
516 case TRUNC_MOD_EXPR:
517 case RDIV_EXPR:
518 /* Division and multiplication are usually expensive. */
519 return LIM_EXPENSIVE;
520
521 case LSHIFT_EXPR:
522 case RSHIFT_EXPR:
523 case WIDEN_LSHIFT_EXPR:
524 case LROTATE_EXPR:
525 case RROTATE_EXPR:
526 /* Shifts and rotates are usually expensive. */
527 return LIM_EXPENSIVE;
528
529 case CONSTRUCTOR:
530 /* Make vector construction cost proportional to the number
531 of elements. */
532 return CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt));
533
534 case SSA_NAME:
535 case PAREN_EXPR:
536 /* Whether or not something is wrapped inside a PAREN_EXPR
537 should not change move cost. Nor should an intermediate
538 unpropagated SSA name copy. */
539 return 0;
540
541 default:
542 return 1;
543 }
544 }
545
546 /* Finds the outermost loop between OUTER and LOOP in that the memory reference
547 REF is independent. If REF is not independent in LOOP, NULL is returned
548 instead. */
549
550 static struct loop *
551 outermost_indep_loop (struct loop *outer, struct loop *loop, mem_ref_p ref)
552 {
553 struct loop *aloop;
554
555 if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
556 return NULL;
557
558 for (aloop = outer;
559 aloop != loop;
560 aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
561 if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
562 && ref_indep_loop_p (aloop, ref))
563 return aloop;
564
565 if (ref_indep_loop_p (loop, ref))
566 return loop;
567 else
568 return NULL;
569 }
570
571 /* If there is a simple load or store to a memory reference in STMT, returns
572 the location of the memory reference, and sets IS_STORE according to whether
573 it is a store or load. Otherwise, returns NULL. */
574
575 static tree *
576 simple_mem_ref_in_stmt (gimple stmt, bool *is_store)
577 {
578 tree *lhs, *rhs;
579
580 /* Recognize SSA_NAME = MEM and MEM = (SSA_NAME | invariant) patterns. */
581 if (!gimple_assign_single_p (stmt))
582 return NULL;
583
584 lhs = gimple_assign_lhs_ptr (stmt);
585 rhs = gimple_assign_rhs1_ptr (stmt);
586
587 if (TREE_CODE (*lhs) == SSA_NAME && gimple_vuse (stmt))
588 {
589 *is_store = false;
590 return rhs;
591 }
592 else if (gimple_vdef (stmt)
593 && (TREE_CODE (*rhs) == SSA_NAME || is_gimple_min_invariant (*rhs)))
594 {
595 *is_store = true;
596 return lhs;
597 }
598 else
599 return NULL;
600 }
601
602 /* Returns the memory reference contained in STMT. */
603
604 static mem_ref_p
605 mem_ref_in_stmt (gimple stmt)
606 {
607 bool store;
608 tree *mem = simple_mem_ref_in_stmt (stmt, &store);
609 hashval_t hash;
610 mem_ref_p ref;
611
612 if (!mem)
613 return NULL;
614 gcc_assert (!store);
615
616 hash = iterative_hash_expr (*mem, 0);
617 ref = memory_accesses.refs->find_with_hash (*mem, hash);
618
619 gcc_assert (ref != NULL);
620 return ref;
621 }
622
623 /* From a controlling predicate in DOM determine the arguments from
624 the PHI node PHI that are chosen if the predicate evaluates to
625 true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
626 they are non-NULL. Returns true if the arguments can be determined,
627 else return false. */
628
629 static bool
630 extract_true_false_args_from_phi (basic_block dom, gphi *phi,
631 tree *true_arg_p, tree *false_arg_p)
632 {
633 basic_block bb = gimple_bb (phi);
634 edge true_edge, false_edge, tem;
635 tree arg0 = NULL_TREE, arg1 = NULL_TREE;
636
637 /* We have to verify that one edge into the PHI node is dominated
638 by the true edge of the predicate block and the other edge
639 dominated by the false edge. This ensures that the PHI argument
640 we are going to take is completely determined by the path we
641 take from the predicate block.
642 We can only use BB dominance checks below if the destination of
643 the true/false edges are dominated by their edge, thus only
644 have a single predecessor. */
645 extract_true_false_edges_from_block (dom, &true_edge, &false_edge);
646 tem = EDGE_PRED (bb, 0);
647 if (tem == true_edge
648 || (single_pred_p (true_edge->dest)
649 && (tem->src == true_edge->dest
650 || dominated_by_p (CDI_DOMINATORS,
651 tem->src, true_edge->dest))))
652 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
653 else if (tem == false_edge
654 || (single_pred_p (false_edge->dest)
655 && (tem->src == false_edge->dest
656 || dominated_by_p (CDI_DOMINATORS,
657 tem->src, false_edge->dest))))
658 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
659 else
660 return false;
661 tem = EDGE_PRED (bb, 1);
662 if (tem == true_edge
663 || (single_pred_p (true_edge->dest)
664 && (tem->src == true_edge->dest
665 || dominated_by_p (CDI_DOMINATORS,
666 tem->src, true_edge->dest))))
667 arg0 = PHI_ARG_DEF (phi, tem->dest_idx);
668 else if (tem == false_edge
669 || (single_pred_p (false_edge->dest)
670 && (tem->src == false_edge->dest
671 || dominated_by_p (CDI_DOMINATORS,
672 tem->src, false_edge->dest))))
673 arg1 = PHI_ARG_DEF (phi, tem->dest_idx);
674 else
675 return false;
676 if (!arg0 || !arg1)
677 return false;
678
679 if (true_arg_p)
680 *true_arg_p = arg0;
681 if (false_arg_p)
682 *false_arg_p = arg1;
683
684 return true;
685 }
686
687 /* Determine the outermost loop to that it is possible to hoist a statement
688 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
689 the outermost loop in that the value computed by STMT is invariant.
690 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
691 we preserve the fact whether STMT is executed. It also fills other related
692 information to LIM_DATA (STMT).
693
694 The function returns false if STMT cannot be hoisted outside of the loop it
695 is defined in, and true otherwise. */
696
697 static bool
698 determine_max_movement (gimple stmt, bool must_preserve_exec)
699 {
700 basic_block bb = gimple_bb (stmt);
701 struct loop *loop = bb->loop_father;
702 struct loop *level;
703 struct lim_aux_data *lim_data = get_lim_data (stmt);
704 tree val;
705 ssa_op_iter iter;
706
707 if (must_preserve_exec)
708 level = ALWAYS_EXECUTED_IN (bb);
709 else
710 level = superloop_at_depth (loop, 1);
711 lim_data->max_loop = level;
712
713 if (gphi *phi = dyn_cast <gphi *> (stmt))
714 {
715 use_operand_p use_p;
716 unsigned min_cost = UINT_MAX;
717 unsigned total_cost = 0;
718 struct lim_aux_data *def_data;
719
720 /* We will end up promoting dependencies to be unconditionally
721 evaluated. For this reason the PHI cost (and thus the
722 cost we remove from the loop by doing the invariant motion)
723 is that of the cheapest PHI argument dependency chain. */
724 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
725 {
726 val = USE_FROM_PTR (use_p);
727
728 if (TREE_CODE (val) != SSA_NAME)
729 {
730 /* Assign const 1 to constants. */
731 min_cost = MIN (min_cost, 1);
732 total_cost += 1;
733 continue;
734 }
735 if (!add_dependency (val, lim_data, loop, false))
736 return false;
737
738 gimple def_stmt = SSA_NAME_DEF_STMT (val);
739 if (gimple_bb (def_stmt)
740 && gimple_bb (def_stmt)->loop_father == loop)
741 {
742 def_data = get_lim_data (def_stmt);
743 if (def_data)
744 {
745 min_cost = MIN (min_cost, def_data->cost);
746 total_cost += def_data->cost;
747 }
748 }
749 }
750
751 min_cost = MIN (min_cost, total_cost);
752 lim_data->cost += min_cost;
753
754 if (gimple_phi_num_args (phi) > 1)
755 {
756 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
757 gimple cond;
758 if (gsi_end_p (gsi_last_bb (dom)))
759 return false;
760 cond = gsi_stmt (gsi_last_bb (dom));
761 if (gimple_code (cond) != GIMPLE_COND)
762 return false;
763 /* Verify that this is an extended form of a diamond and
764 the PHI arguments are completely controlled by the
765 predicate in DOM. */
766 if (!extract_true_false_args_from_phi (dom, phi, NULL, NULL))
767 return false;
768
769 /* Fold in dependencies and cost of the condition. */
770 FOR_EACH_SSA_TREE_OPERAND (val, cond, iter, SSA_OP_USE)
771 {
772 if (!add_dependency (val, lim_data, loop, false))
773 return false;
774 def_data = get_lim_data (SSA_NAME_DEF_STMT (val));
775 if (def_data)
776 total_cost += def_data->cost;
777 }
778
779 /* We want to avoid unconditionally executing very expensive
780 operations. As costs for our dependencies cannot be
781 negative just claim we are not invariand for this case.
782 We also are not sure whether the control-flow inside the
783 loop will vanish. */
784 if (total_cost - min_cost >= 2 * LIM_EXPENSIVE
785 && !(min_cost != 0
786 && total_cost / min_cost <= 2))
787 return false;
788
789 /* Assume that the control-flow in the loop will vanish.
790 ??? We should verify this and not artificially increase
791 the cost if that is not the case. */
792 lim_data->cost += stmt_cost (stmt);
793 }
794
795 return true;
796 }
797 else
798 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
799 if (!add_dependency (val, lim_data, loop, true))
800 return false;
801
802 if (gimple_vuse (stmt))
803 {
804 mem_ref_p ref = mem_ref_in_stmt (stmt);
805
806 if (ref)
807 {
808 lim_data->max_loop
809 = outermost_indep_loop (lim_data->max_loop, loop, ref);
810 if (!lim_data->max_loop)
811 return false;
812 }
813 else
814 {
815 if ((val = gimple_vuse (stmt)) != NULL_TREE)
816 {
817 if (!add_dependency (val, lim_data, loop, false))
818 return false;
819 }
820 }
821 }
822
823 lim_data->cost += stmt_cost (stmt);
824
825 return true;
826 }
827
828 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
829 and that one of the operands of this statement is computed by STMT.
830 Ensure that STMT (together with all the statements that define its
831 operands) is hoisted at least out of the loop LEVEL. */
832
833 static void
834 set_level (gimple stmt, struct loop *orig_loop, struct loop *level)
835 {
836 struct loop *stmt_loop = gimple_bb (stmt)->loop_father;
837 struct lim_aux_data *lim_data;
838 gimple dep_stmt;
839 unsigned i;
840
841 stmt_loop = find_common_loop (orig_loop, stmt_loop);
842 lim_data = get_lim_data (stmt);
843 if (lim_data != NULL && lim_data->tgt_loop != NULL)
844 stmt_loop = find_common_loop (stmt_loop,
845 loop_outer (lim_data->tgt_loop));
846 if (flow_loop_nested_p (stmt_loop, level))
847 return;
848
849 gcc_assert (level == lim_data->max_loop
850 || flow_loop_nested_p (lim_data->max_loop, level));
851
852 lim_data->tgt_loop = level;
853 FOR_EACH_VEC_ELT (lim_data->depends, i, dep_stmt)
854 set_level (dep_stmt, orig_loop, level);
855 }
856
857 /* Determines an outermost loop from that we want to hoist the statement STMT.
858 For now we chose the outermost possible loop. TODO -- use profiling
859 information to set it more sanely. */
860
861 static void
862 set_profitable_level (gimple stmt)
863 {
864 set_level (stmt, gimple_bb (stmt)->loop_father, get_lim_data (stmt)->max_loop);
865 }
866
867 /* Returns true if STMT is a call that has side effects. */
868
869 static bool
870 nonpure_call_p (gimple stmt)
871 {
872 if (gimple_code (stmt) != GIMPLE_CALL)
873 return false;
874
875 return gimple_has_side_effects (stmt);
876 }
877
878 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
879
880 static gimple
881 rewrite_reciprocal (gimple_stmt_iterator *bsi)
882 {
883 gassign *stmt, *stmt1, *stmt2;
884 tree name, lhs, type;
885 tree real_one;
886 gimple_stmt_iterator gsi;
887
888 stmt = as_a <gassign *> (gsi_stmt (*bsi));
889 lhs = gimple_assign_lhs (stmt);
890 type = TREE_TYPE (lhs);
891
892 real_one = build_one_cst (type);
893
894 name = make_temp_ssa_name (type, NULL, "reciptmp");
895 stmt1 = gimple_build_assign (name, RDIV_EXPR, real_one,
896 gimple_assign_rhs2 (stmt));
897 stmt2 = gimple_build_assign (lhs, MULT_EXPR, name,
898 gimple_assign_rhs1 (stmt));
899
900 /* Replace division stmt with reciprocal and multiply stmts.
901 The multiply stmt is not invariant, so update iterator
902 and avoid rescanning. */
903 gsi = *bsi;
904 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
905 gsi_replace (&gsi, stmt2, true);
906
907 /* Continue processing with invariant reciprocal statement. */
908 return stmt1;
909 }
910
911 /* Check if the pattern at *BSI is a bittest of the form
912 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
913
914 static gimple
915 rewrite_bittest (gimple_stmt_iterator *bsi)
916 {
917 gassign *stmt;
918 gimple stmt1;
919 gassign *stmt2;
920 gimple use_stmt;
921 gcond *cond_stmt;
922 tree lhs, name, t, a, b;
923 use_operand_p use;
924
925 stmt = as_a <gassign *> (gsi_stmt (*bsi));
926 lhs = gimple_assign_lhs (stmt);
927
928 /* Verify that the single use of lhs is a comparison against zero. */
929 if (TREE_CODE (lhs) != SSA_NAME
930 || !single_imm_use (lhs, &use, &use_stmt))
931 return stmt;
932 cond_stmt = dyn_cast <gcond *> (use_stmt);
933 if (!cond_stmt)
934 return stmt;
935 if (gimple_cond_lhs (cond_stmt) != lhs
936 || (gimple_cond_code (cond_stmt) != NE_EXPR
937 && gimple_cond_code (cond_stmt) != EQ_EXPR)
938 || !integer_zerop (gimple_cond_rhs (cond_stmt)))
939 return stmt;
940
941 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
942 stmt1 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
943 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
944 return stmt;
945
946 /* There is a conversion in between possibly inserted by fold. */
947 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt1)))
948 {
949 t = gimple_assign_rhs1 (stmt1);
950 if (TREE_CODE (t) != SSA_NAME
951 || !has_single_use (t))
952 return stmt;
953 stmt1 = SSA_NAME_DEF_STMT (t);
954 if (gimple_code (stmt1) != GIMPLE_ASSIGN)
955 return stmt;
956 }
957
958 /* Verify that B is loop invariant but A is not. Verify that with
959 all the stmt walking we are still in the same loop. */
960 if (gimple_assign_rhs_code (stmt1) != RSHIFT_EXPR
961 || loop_containing_stmt (stmt1) != loop_containing_stmt (stmt))
962 return stmt;
963
964 a = gimple_assign_rhs1 (stmt1);
965 b = gimple_assign_rhs2 (stmt1);
966
967 if (outermost_invariant_loop (b, loop_containing_stmt (stmt1)) != NULL
968 && outermost_invariant_loop (a, loop_containing_stmt (stmt1)) == NULL)
969 {
970 gimple_stmt_iterator rsi;
971
972 /* 1 << B */
973 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
974 build_int_cst (TREE_TYPE (a), 1), b);
975 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
976 stmt1 = gimple_build_assign (name, t);
977
978 /* A & (1 << B) */
979 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
980 name = make_temp_ssa_name (TREE_TYPE (a), NULL, "shifttmp");
981 stmt2 = gimple_build_assign (name, t);
982
983 /* Replace the SSA_NAME we compare against zero. Adjust
984 the type of zero accordingly. */
985 SET_USE (use, name);
986 gimple_cond_set_rhs (cond_stmt,
987 build_int_cst_type (TREE_TYPE (name),
988 0));
989
990 /* Don't use gsi_replace here, none of the new assignments sets
991 the variable originally set in stmt. Move bsi to stmt1, and
992 then remove the original stmt, so that we get a chance to
993 retain debug info for it. */
994 rsi = *bsi;
995 gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
996 gsi_insert_before (&rsi, stmt2, GSI_SAME_STMT);
997 gsi_remove (&rsi, true);
998
999 return stmt1;
1000 }
1001
1002 return stmt;
1003 }
1004
1005 /* For each statement determines the outermost loop in that it is invariant,
1006 - statements on whose motion it depends and the cost of the computation.
1007 - This information is stored to the LIM_DATA structure associated with
1008 - each statement. */
1009 class invariantness_dom_walker : public dom_walker
1010 {
1011 public:
1012 invariantness_dom_walker (cdi_direction direction)
1013 : dom_walker (direction) {}
1014
1015 virtual void before_dom_children (basic_block);
1016 };
1017
1018 /* Determine the outermost loops in that statements in basic block BB are
1019 invariant, and record them to the LIM_DATA associated with the statements.
1020 Callback for dom_walker. */
1021
1022 void
1023 invariantness_dom_walker::before_dom_children (basic_block bb)
1024 {
1025 enum move_pos pos;
1026 gimple_stmt_iterator bsi;
1027 gimple stmt;
1028 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
1029 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
1030 struct lim_aux_data *lim_data;
1031
1032 if (!loop_outer (bb->loop_father))
1033 return;
1034
1035 if (dump_file && (dump_flags & TDF_DETAILS))
1036 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
1037 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
1038
1039 /* Look at PHI nodes, but only if there is at most two.
1040 ??? We could relax this further by post-processing the inserted
1041 code and transforming adjacent cond-exprs with the same predicate
1042 to control flow again. */
1043 bsi = gsi_start_phis (bb);
1044 if (!gsi_end_p (bsi)
1045 && ((gsi_next (&bsi), gsi_end_p (bsi))
1046 || (gsi_next (&bsi), gsi_end_p (bsi))))
1047 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1048 {
1049 stmt = gsi_stmt (bsi);
1050
1051 pos = movement_possibility (stmt);
1052 if (pos == MOVE_IMPOSSIBLE)
1053 continue;
1054
1055 lim_data = init_lim_data (stmt);
1056 lim_data->always_executed_in = outermost;
1057
1058 if (!determine_max_movement (stmt, false))
1059 {
1060 lim_data->max_loop = NULL;
1061 continue;
1062 }
1063
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1065 {
1066 print_gimple_stmt (dump_file, stmt, 2, 0);
1067 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1068 loop_depth (lim_data->max_loop),
1069 lim_data->cost);
1070 }
1071
1072 if (lim_data->cost >= LIM_EXPENSIVE)
1073 set_profitable_level (stmt);
1074 }
1075
1076 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1077 {
1078 stmt = gsi_stmt (bsi);
1079
1080 pos = movement_possibility (stmt);
1081 if (pos == MOVE_IMPOSSIBLE)
1082 {
1083 if (nonpure_call_p (stmt))
1084 {
1085 maybe_never = true;
1086 outermost = NULL;
1087 }
1088 /* Make sure to note always_executed_in for stores to make
1089 store-motion work. */
1090 else if (stmt_makes_single_store (stmt))
1091 {
1092 struct lim_aux_data *lim_data = init_lim_data (stmt);
1093 lim_data->always_executed_in = outermost;
1094 }
1095 continue;
1096 }
1097
1098 if (is_gimple_assign (stmt)
1099 && (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
1100 == GIMPLE_BINARY_RHS))
1101 {
1102 tree op0 = gimple_assign_rhs1 (stmt);
1103 tree op1 = gimple_assign_rhs2 (stmt);
1104 struct loop *ol1 = outermost_invariant_loop (op1,
1105 loop_containing_stmt (stmt));
1106
1107 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
1108 to be hoisted out of loop, saving expensive divide. */
1109 if (pos == MOVE_POSSIBLE
1110 && gimple_assign_rhs_code (stmt) == RDIV_EXPR
1111 && flag_unsafe_math_optimizations
1112 && !flag_trapping_math
1113 && ol1 != NULL
1114 && outermost_invariant_loop (op0, ol1) == NULL)
1115 stmt = rewrite_reciprocal (&bsi);
1116
1117 /* If the shift count is invariant, convert (A >> B) & 1 to
1118 A & (1 << B) allowing the bit mask to be hoisted out of the loop
1119 saving an expensive shift. */
1120 if (pos == MOVE_POSSIBLE
1121 && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR
1122 && integer_onep (op1)
1123 && TREE_CODE (op0) == SSA_NAME
1124 && has_single_use (op0))
1125 stmt = rewrite_bittest (&bsi);
1126 }
1127
1128 lim_data = init_lim_data (stmt);
1129 lim_data->always_executed_in = outermost;
1130
1131 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
1132 continue;
1133
1134 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
1135 {
1136 lim_data->max_loop = NULL;
1137 continue;
1138 }
1139
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1141 {
1142 print_gimple_stmt (dump_file, stmt, 2, 0);
1143 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
1144 loop_depth (lim_data->max_loop),
1145 lim_data->cost);
1146 }
1147
1148 if (lim_data->cost >= LIM_EXPENSIVE)
1149 set_profitable_level (stmt);
1150 }
1151 }
1152
1153 class move_computations_dom_walker : public dom_walker
1154 {
1155 public:
1156 move_computations_dom_walker (cdi_direction direction)
1157 : dom_walker (direction), todo_ (0) {}
1158
1159 virtual void before_dom_children (basic_block);
1160
1161 unsigned int todo_;
1162 };
1163
1164 /* Hoist the statements in basic block BB out of the loops prescribed by
1165 data stored in LIM_DATA structures associated with each statement. Callback
1166 for walk_dominator_tree. */
1167
1168 void
1169 move_computations_dom_walker::before_dom_children (basic_block bb)
1170 {
1171 struct loop *level;
1172 unsigned cost = 0;
1173 struct lim_aux_data *lim_data;
1174
1175 if (!loop_outer (bb->loop_father))
1176 return;
1177
1178 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); )
1179 {
1180 gassign *new_stmt;
1181 gphi *stmt = bsi.phi ();
1182
1183 lim_data = get_lim_data (stmt);
1184 if (lim_data == NULL)
1185 {
1186 gsi_next (&bsi);
1187 continue;
1188 }
1189
1190 cost = lim_data->cost;
1191 level = lim_data->tgt_loop;
1192 clear_lim_data (stmt);
1193
1194 if (!level)
1195 {
1196 gsi_next (&bsi);
1197 continue;
1198 }
1199
1200 if (dump_file && (dump_flags & TDF_DETAILS))
1201 {
1202 fprintf (dump_file, "Moving PHI node\n");
1203 print_gimple_stmt (dump_file, stmt, 0, 0);
1204 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1205 cost, level->num);
1206 }
1207
1208 if (gimple_phi_num_args (stmt) == 1)
1209 {
1210 tree arg = PHI_ARG_DEF (stmt, 0);
1211 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1212 TREE_CODE (arg), arg);
1213 }
1214 else
1215 {
1216 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, bb);
1217 gimple cond = gsi_stmt (gsi_last_bb (dom));
1218 tree arg0 = NULL_TREE, arg1 = NULL_TREE, t;
1219 /* Get the PHI arguments corresponding to the true and false
1220 edges of COND. */
1221 extract_true_false_args_from_phi (dom, stmt, &arg0, &arg1);
1222 gcc_assert (arg0 && arg1);
1223 t = build2 (gimple_cond_code (cond), boolean_type_node,
1224 gimple_cond_lhs (cond), gimple_cond_rhs (cond));
1225 new_stmt = gimple_build_assign (gimple_phi_result (stmt),
1226 COND_EXPR, t, arg0, arg1);
1227 todo_ |= TODO_cleanup_cfg;
1228 }
1229 if (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (new_stmt)))
1230 && (!ALWAYS_EXECUTED_IN (bb)
1231 || (ALWAYS_EXECUTED_IN (bb) != level
1232 && !flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1233 {
1234 tree lhs = gimple_assign_lhs (new_stmt);
1235 SSA_NAME_RANGE_INFO (lhs) = NULL;
1236 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1237 }
1238 gsi_insert_on_edge (loop_preheader_edge (level), new_stmt);
1239 remove_phi_node (&bsi, false);
1240 }
1241
1242 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); )
1243 {
1244 edge e;
1245
1246 gimple stmt = gsi_stmt (bsi);
1247
1248 lim_data = get_lim_data (stmt);
1249 if (lim_data == NULL)
1250 {
1251 gsi_next (&bsi);
1252 continue;
1253 }
1254
1255 cost = lim_data->cost;
1256 level = lim_data->tgt_loop;
1257 clear_lim_data (stmt);
1258
1259 if (!level)
1260 {
1261 gsi_next (&bsi);
1262 continue;
1263 }
1264
1265 /* We do not really want to move conditionals out of the loop; we just
1266 placed it here to force its operands to be moved if necessary. */
1267 if (gimple_code (stmt) == GIMPLE_COND)
1268 continue;
1269
1270 if (dump_file && (dump_flags & TDF_DETAILS))
1271 {
1272 fprintf (dump_file, "Moving statement\n");
1273 print_gimple_stmt (dump_file, stmt, 0, 0);
1274 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
1275 cost, level->num);
1276 }
1277
1278 e = loop_preheader_edge (level);
1279 gcc_assert (!gimple_vdef (stmt));
1280 if (gimple_vuse (stmt))
1281 {
1282 /* The new VUSE is the one from the virtual PHI in the loop
1283 header or the one already present. */
1284 gphi_iterator gsi2;
1285 for (gsi2 = gsi_start_phis (e->dest);
1286 !gsi_end_p (gsi2); gsi_next (&gsi2))
1287 {
1288 gphi *phi = gsi2.phi ();
1289 if (virtual_operand_p (gimple_phi_result (phi)))
1290 {
1291 gimple_set_vuse (stmt, PHI_ARG_DEF_FROM_EDGE (phi, e));
1292 break;
1293 }
1294 }
1295 }
1296 gsi_remove (&bsi, false);
1297 if (gimple_has_lhs (stmt)
1298 && TREE_CODE (gimple_get_lhs (stmt)) == SSA_NAME
1299 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_get_lhs (stmt)))
1300 && (!ALWAYS_EXECUTED_IN (bb)
1301 || !(ALWAYS_EXECUTED_IN (bb) == level
1302 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1303 {
1304 tree lhs = gimple_get_lhs (stmt);
1305 SSA_NAME_RANGE_INFO (lhs) = NULL;
1306 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
1307 }
1308 /* In case this is a stmt that is not unconditionally executed
1309 when the target loop header is executed and the stmt may
1310 invoke undefined integer or pointer overflow rewrite it to
1311 unsigned arithmetic. */
1312 if (is_gimple_assign (stmt)
1313 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
1314 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
1315 && arith_code_with_undefined_signed_overflow
1316 (gimple_assign_rhs_code (stmt))
1317 && (!ALWAYS_EXECUTED_IN (bb)
1318 || !(ALWAYS_EXECUTED_IN (bb) == level
1319 || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
1320 gsi_insert_seq_on_edge (e, rewrite_to_defined_overflow (stmt));
1321 else
1322 gsi_insert_on_edge (e, stmt);
1323 }
1324 }
1325
1326 /* Hoist the statements out of the loops prescribed by data stored in
1327 LIM_DATA structures associated with each statement.*/
1328
1329 static unsigned int
1330 move_computations (void)
1331 {
1332 move_computations_dom_walker walker (CDI_DOMINATORS);
1333 walker.walk (cfun->cfg->x_entry_block_ptr);
1334
1335 gsi_commit_edge_inserts ();
1336 if (need_ssa_update_p (cfun))
1337 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1338
1339 return walker.todo_;
1340 }
1341
1342 /* Checks whether the statement defining variable *INDEX can be hoisted
1343 out of the loop passed in DATA. Callback for for_each_index. */
1344
1345 static bool
1346 may_move_till (tree ref, tree *index, void *data)
1347 {
1348 struct loop *loop = (struct loop *) data, *max_loop;
1349
1350 /* If REF is an array reference, check also that the step and the lower
1351 bound is invariant in LOOP. */
1352 if (TREE_CODE (ref) == ARRAY_REF)
1353 {
1354 tree step = TREE_OPERAND (ref, 3);
1355 tree lbound = TREE_OPERAND (ref, 2);
1356
1357 max_loop = outermost_invariant_loop (step, loop);
1358 if (!max_loop)
1359 return false;
1360
1361 max_loop = outermost_invariant_loop (lbound, loop);
1362 if (!max_loop)
1363 return false;
1364 }
1365
1366 max_loop = outermost_invariant_loop (*index, loop);
1367 if (!max_loop)
1368 return false;
1369
1370 return true;
1371 }
1372
1373 /* If OP is SSA NAME, force the statement that defines it to be
1374 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
1375
1376 static void
1377 force_move_till_op (tree op, struct loop *orig_loop, struct loop *loop)
1378 {
1379 gimple stmt;
1380
1381 if (!op
1382 || is_gimple_min_invariant (op))
1383 return;
1384
1385 gcc_assert (TREE_CODE (op) == SSA_NAME);
1386
1387 stmt = SSA_NAME_DEF_STMT (op);
1388 if (gimple_nop_p (stmt))
1389 return;
1390
1391 set_level (stmt, orig_loop, loop);
1392 }
1393
1394 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
1395 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
1396 for_each_index. */
1397
1398 struct fmt_data
1399 {
1400 struct loop *loop;
1401 struct loop *orig_loop;
1402 };
1403
1404 static bool
1405 force_move_till (tree ref, tree *index, void *data)
1406 {
1407 struct fmt_data *fmt_data = (struct fmt_data *) data;
1408
1409 if (TREE_CODE (ref) == ARRAY_REF)
1410 {
1411 tree step = TREE_OPERAND (ref, 3);
1412 tree lbound = TREE_OPERAND (ref, 2);
1413
1414 force_move_till_op (step, fmt_data->orig_loop, fmt_data->loop);
1415 force_move_till_op (lbound, fmt_data->orig_loop, fmt_data->loop);
1416 }
1417
1418 force_move_till_op (*index, fmt_data->orig_loop, fmt_data->loop);
1419
1420 return true;
1421 }
1422
1423 /* A function to free the mem_ref object OBJ. */
1424
1425 static void
1426 memref_free (struct im_mem_ref *mem)
1427 {
1428 mem->accesses_in_loop.release ();
1429 }
1430
1431 /* Allocates and returns a memory reference description for MEM whose hash
1432 value is HASH and id is ID. */
1433
1434 static mem_ref_p
1435 mem_ref_alloc (tree mem, unsigned hash, unsigned id)
1436 {
1437 mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
1438 ao_ref_init (&ref->mem, mem);
1439 ref->id = id;
1440 ref->hash = hash;
1441 ref->stored = NULL;
1442 bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
1443 bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
1444 ref->accesses_in_loop.create (1);
1445
1446 return ref;
1447 }
1448
1449 /* Records memory reference location *LOC in LOOP to the memory reference
1450 description REF. The reference occurs in statement STMT. */
1451
1452 static void
1453 record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
1454 {
1455 mem_ref_loc aref;
1456 aref.stmt = stmt;
1457 aref.ref = loc;
1458 ref->accesses_in_loop.safe_push (aref);
1459 }
1460
1461 /* Set the LOOP bit in REF stored bitmap and allocate that if
1462 necessary. Return whether a bit was changed. */
1463
1464 static bool
1465 set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
1466 {
1467 if (!ref->stored)
1468 ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
1469 return bitmap_set_bit (ref->stored, loop->num);
1470 }
1471
1472 /* Marks reference REF as stored in LOOP. */
1473
1474 static void
1475 mark_ref_stored (mem_ref_p ref, struct loop *loop)
1476 {
1477 while (loop != current_loops->tree_root
1478 && set_ref_stored_in_loop (ref, loop))
1479 loop = loop_outer (loop);
1480 }
1481
1482 /* Gathers memory references in statement STMT in LOOP, storing the
1483 information about them in the memory_accesses structure. Marks
1484 the vops accessed through unrecognized statements there as
1485 well. */
1486
1487 static void
1488 gather_mem_refs_stmt (struct loop *loop, gimple stmt)
1489 {
1490 tree *mem = NULL;
1491 hashval_t hash;
1492 im_mem_ref **slot;
1493 mem_ref_p ref;
1494 bool is_stored;
1495 unsigned id;
1496
1497 if (!gimple_vuse (stmt))
1498 return;
1499
1500 mem = simple_mem_ref_in_stmt (stmt, &is_stored);
1501 if (!mem)
1502 {
1503 /* We use the shared mem_ref for all unanalyzable refs. */
1504 id = UNANALYZABLE_MEM_ID;
1505 ref = memory_accesses.refs_list[id];
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1507 {
1508 fprintf (dump_file, "Unanalyzed memory reference %u: ", id);
1509 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1510 }
1511 is_stored = gimple_vdef (stmt);
1512 }
1513 else
1514 {
1515 hash = iterative_hash_expr (*mem, 0);
1516 slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
1517 if (*slot)
1518 {
1519 ref = (mem_ref_p) *slot;
1520 id = ref->id;
1521 }
1522 else
1523 {
1524 id = memory_accesses.refs_list.length ();
1525 ref = mem_ref_alloc (*mem, hash, id);
1526 memory_accesses.refs_list.safe_push (ref);
1527 *slot = ref;
1528
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1530 {
1531 fprintf (dump_file, "Memory reference %u: ", id);
1532 print_generic_expr (dump_file, ref->mem.ref, TDF_SLIM);
1533 fprintf (dump_file, "\n");
1534 }
1535 }
1536
1537 record_mem_ref_loc (ref, stmt, mem);
1538 }
1539 bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
1540 if (is_stored)
1541 {
1542 bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
1543 mark_ref_stored (ref, loop);
1544 }
1545 return;
1546 }
1547
1548 static unsigned *bb_loop_postorder;
1549
1550 /* qsort sort function to sort blocks after their loop fathers postorder. */
1551
1552 static int
1553 sort_bbs_in_loop_postorder_cmp (const void *bb1_, const void *bb2_)
1554 {
1555 basic_block bb1 = *(basic_block *)const_cast<void *>(bb1_);
1556 basic_block bb2 = *(basic_block *)const_cast<void *>(bb2_);
1557 struct loop *loop1 = bb1->loop_father;
1558 struct loop *loop2 = bb2->loop_father;
1559 if (loop1->num == loop2->num)
1560 return 0;
1561 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1562 }
1563
1564 /* qsort sort function to sort ref locs after their loop fathers postorder. */
1565
1566 static int
1567 sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
1568 {
1569 mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
1570 mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
1571 struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
1572 struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
1573 if (loop1->num == loop2->num)
1574 return 0;
1575 return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 1;
1576 }
1577
1578 /* Gathers memory references in loops. */
1579
1580 static void
1581 analyze_memory_references (void)
1582 {
1583 gimple_stmt_iterator bsi;
1584 basic_block bb, *bbs;
1585 struct loop *loop, *outer;
1586 unsigned i, n;
1587
1588 /* Collect all basic-blocks in loops and sort them after their
1589 loops postorder. */
1590 i = 0;
1591 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
1592 FOR_EACH_BB_FN (bb, cfun)
1593 if (bb->loop_father != current_loops->tree_root)
1594 bbs[i++] = bb;
1595 n = i;
1596 qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
1597
1598 /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
1599 That results in better locality for all the bitmaps. */
1600 for (i = 0; i < n; ++i)
1601 {
1602 basic_block bb = bbs[i];
1603 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1604 gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
1605 }
1606
1607 /* Sort the location list of gathered memory references after their
1608 loop postorder number. */
1609 im_mem_ref *ref;
1610 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
1611 ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
1612
1613 free (bbs);
1614 // free (bb_loop_postorder);
1615
1616 /* Propagate the information about accessed memory references up
1617 the loop hierarchy. */
1618 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1619 {
1620 /* Finalize the overall touched references (including subloops). */
1621 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[loop->num],
1622 &memory_accesses.refs_stored_in_loop[loop->num]);
1623
1624 /* Propagate the information about accessed memory references up
1625 the loop hierarchy. */
1626 outer = loop_outer (loop);
1627 if (outer == current_loops->tree_root)
1628 continue;
1629
1630 bitmap_ior_into (&memory_accesses.all_refs_stored_in_loop[outer->num],
1631 &memory_accesses.all_refs_stored_in_loop[loop->num]);
1632 }
1633 }
1634
1635 /* Returns true if MEM1 and MEM2 may alias. TTAE_CACHE is used as a cache in
1636 tree_to_aff_combination_expand. */
1637
1638 static bool
1639 mem_refs_may_alias_p (mem_ref_p mem1, mem_ref_p mem2,
1640 hash_map<tree, name_expansion *> **ttae_cache)
1641 {
1642 /* Perform BASE + OFFSET analysis -- if MEM1 and MEM2 are based on the same
1643 object and their offset differ in such a way that the locations cannot
1644 overlap, then they cannot alias. */
1645 widest_int size1, size2;
1646 aff_tree off1, off2;
1647
1648 /* Perform basic offset and type-based disambiguation. */
1649 if (!refs_may_alias_p_1 (&mem1->mem, &mem2->mem, true))
1650 return false;
1651
1652 /* The expansion of addresses may be a bit expensive, thus we only do
1653 the check at -O2 and higher optimization levels. */
1654 if (optimize < 2)
1655 return true;
1656
1657 get_inner_reference_aff (mem1->mem.ref, &off1, &size1);
1658 get_inner_reference_aff (mem2->mem.ref, &off2, &size2);
1659 aff_combination_expand (&off1, ttae_cache);
1660 aff_combination_expand (&off2, ttae_cache);
1661 aff_combination_scale (&off1, -1);
1662 aff_combination_add (&off2, &off1);
1663
1664 if (aff_comb_cannot_overlap_p (&off2, size1, size2))
1665 return false;
1666
1667 return true;
1668 }
1669
1670 /* Compare function for bsearch searching for reference locations
1671 in a loop. */
1672
1673 static int
1674 find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
1675 {
1676 struct loop *loop = (struct loop *)const_cast<void *>(loop_);
1677 mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
1678 struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
1679 if (loop->num == loc_loop->num
1680 || flow_loop_nested_p (loop, loc_loop))
1681 return 0;
1682 return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
1683 ? -1 : 1);
1684 }
1685
1686 /* Iterates over all locations of REF in LOOP and its subloops calling
1687 fn.operator() with the location as argument. When that operator
1688 returns true the iteration is stopped and true is returned.
1689 Otherwise false is returned. */
1690
1691 template <typename FN>
1692 static bool
1693 for_all_locs_in_loop (struct loop *loop, mem_ref_p ref, FN fn)
1694 {
1695 unsigned i;
1696 mem_ref_loc_p loc;
1697
1698 /* Search for the cluster of locs in the accesses_in_loop vector
1699 which is sorted after postorder index of the loop father. */
1700 loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
1701 if (!loc)
1702 return false;
1703
1704 /* We have found one location inside loop or its sub-loops. Iterate
1705 both forward and backward to cover the whole cluster. */
1706 i = loc - ref->accesses_in_loop.address ();
1707 while (i > 0)
1708 {
1709 --i;
1710 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1711 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1712 break;
1713 if (fn (l))
1714 return true;
1715 }
1716 for (i = loc - ref->accesses_in_loop.address ();
1717 i < ref->accesses_in_loop.length (); ++i)
1718 {
1719 mem_ref_loc_p l = &ref->accesses_in_loop[i];
1720 if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
1721 break;
1722 if (fn (l))
1723 return true;
1724 }
1725
1726 return false;
1727 }
1728
1729 /* Rewrites location LOC by TMP_VAR. */
1730
1731 struct rewrite_mem_ref_loc
1732 {
1733 rewrite_mem_ref_loc (tree tmp_var_) : tmp_var (tmp_var_) {}
1734 bool operator () (mem_ref_loc_p loc);
1735 tree tmp_var;
1736 };
1737
1738 bool
1739 rewrite_mem_ref_loc::operator () (mem_ref_loc_p loc)
1740 {
1741 *loc->ref = tmp_var;
1742 update_stmt (loc->stmt);
1743 return false;
1744 }
1745
1746 /* Rewrites all references to REF in LOOP by variable TMP_VAR. */
1747
1748 static void
1749 rewrite_mem_refs (struct loop *loop, mem_ref_p ref, tree tmp_var)
1750 {
1751 for_all_locs_in_loop (loop, ref, rewrite_mem_ref_loc (tmp_var));
1752 }
1753
1754 /* Stores the first reference location in LOCP. */
1755
1756 struct first_mem_ref_loc_1
1757 {
1758 first_mem_ref_loc_1 (mem_ref_loc_p *locp_) : locp (locp_) {}
1759 bool operator () (mem_ref_loc_p loc);
1760 mem_ref_loc_p *locp;
1761 };
1762
1763 bool
1764 first_mem_ref_loc_1::operator () (mem_ref_loc_p loc)
1765 {
1766 *locp = loc;
1767 return true;
1768 }
1769
1770 /* Returns the first reference location to REF in LOOP. */
1771
1772 static mem_ref_loc_p
1773 first_mem_ref_loc (struct loop *loop, mem_ref_p ref)
1774 {
1775 mem_ref_loc_p locp = NULL;
1776 for_all_locs_in_loop (loop, ref, first_mem_ref_loc_1 (&locp));
1777 return locp;
1778 }
1779
1780 struct prev_flag_edges {
1781 /* Edge to insert new flag comparison code. */
1782 edge append_cond_position;
1783
1784 /* Edge for fall through from previous flag comparison. */
1785 edge last_cond_fallthru;
1786 };
1787
1788 /* Helper function for execute_sm. Emit code to store TMP_VAR into
1789 MEM along edge EX.
1790
1791 The store is only done if MEM has changed. We do this so no
1792 changes to MEM occur on code paths that did not originally store
1793 into it.
1794
1795 The common case for execute_sm will transform:
1796
1797 for (...) {
1798 if (foo)
1799 stuff;
1800 else
1801 MEM = TMP_VAR;
1802 }
1803
1804 into:
1805
1806 lsm = MEM;
1807 for (...) {
1808 if (foo)
1809 stuff;
1810 else
1811 lsm = TMP_VAR;
1812 }
1813 MEM = lsm;
1814
1815 This function will generate:
1816
1817 lsm = MEM;
1818
1819 lsm_flag = false;
1820 ...
1821 for (...) {
1822 if (foo)
1823 stuff;
1824 else {
1825 lsm = TMP_VAR;
1826 lsm_flag = true;
1827 }
1828 }
1829 if (lsm_flag) <--
1830 MEM = lsm; <--
1831 */
1832
1833 static void
1834 execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag)
1835 {
1836 basic_block new_bb, then_bb, old_dest;
1837 bool loop_has_only_one_exit;
1838 edge then_old_edge, orig_ex = ex;
1839 gimple_stmt_iterator gsi;
1840 gimple stmt;
1841 struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux;
1842 bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP;
1843
1844 /* ?? Insert store after previous store if applicable. See note
1845 below. */
1846 if (prev_edges)
1847 ex = prev_edges->append_cond_position;
1848
1849 loop_has_only_one_exit = single_pred_p (ex->dest);
1850
1851 if (loop_has_only_one_exit)
1852 ex = split_block_after_labels (ex->dest);
1853
1854 old_dest = ex->dest;
1855 new_bb = split_edge (ex);
1856 then_bb = create_empty_bb (new_bb);
1857 if (irr)
1858 then_bb->flags = BB_IRREDUCIBLE_LOOP;
1859 add_bb_to_loop (then_bb, new_bb->loop_father);
1860
1861 gsi = gsi_start_bb (new_bb);
1862 stmt = gimple_build_cond (NE_EXPR, flag, boolean_false_node,
1863 NULL_TREE, NULL_TREE);
1864 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1865
1866 gsi = gsi_start_bb (then_bb);
1867 /* Insert actual store. */
1868 stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
1869 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1870
1871 make_edge (new_bb, then_bb,
1872 EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1873 make_edge (new_bb, old_dest,
1874 EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1875 then_old_edge = make_edge (then_bb, old_dest,
1876 EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0));
1877
1878 set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb);
1879
1880 if (prev_edges)
1881 {
1882 basic_block prevbb = prev_edges->last_cond_fallthru->src;
1883 redirect_edge_succ (prev_edges->last_cond_fallthru, new_bb);
1884 set_immediate_dominator (CDI_DOMINATORS, new_bb, prevbb);
1885 set_immediate_dominator (CDI_DOMINATORS, old_dest,
1886 recompute_dominator (CDI_DOMINATORS, old_dest));
1887 }
1888
1889 /* ?? Because stores may alias, they must happen in the exact
1890 sequence they originally happened. Save the position right after
1891 the (_lsm) store we just created so we can continue appending after
1892 it and maintain the original order. */
1893 {
1894 struct prev_flag_edges *p;
1895
1896 if (orig_ex->aux)
1897 orig_ex->aux = NULL;
1898 alloc_aux_for_edge (orig_ex, sizeof (struct prev_flag_edges));
1899 p = (struct prev_flag_edges *) orig_ex->aux;
1900 p->append_cond_position = then_old_edge;
1901 p->last_cond_fallthru = find_edge (new_bb, old_dest);
1902 orig_ex->aux = (void *) p;
1903 }
1904
1905 if (!loop_has_only_one_exit)
1906 for (gphi_iterator gpi = gsi_start_phis (old_dest);
1907 !gsi_end_p (gpi); gsi_next (&gpi))
1908 {
1909 gphi *phi = gpi.phi ();
1910 unsigned i;
1911
1912 for (i = 0; i < gimple_phi_num_args (phi); i++)
1913 if (gimple_phi_arg_edge (phi, i)->src == new_bb)
1914 {
1915 tree arg = gimple_phi_arg_def (phi, i);
1916 add_phi_arg (phi, arg, then_old_edge, UNKNOWN_LOCATION);
1917 update_stmt (phi);
1918 }
1919 }
1920 /* Remove the original fall through edge. This was the
1921 single_succ_edge (new_bb). */
1922 EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU;
1923 }
1924
1925 /* When REF is set on the location, set flag indicating the store. */
1926
1927 struct sm_set_flag_if_changed
1928 {
1929 sm_set_flag_if_changed (tree flag_) : flag (flag_) {}
1930 bool operator () (mem_ref_loc_p loc);
1931 tree flag;
1932 };
1933
1934 bool
1935 sm_set_flag_if_changed::operator () (mem_ref_loc_p loc)
1936 {
1937 /* Only set the flag for writes. */
1938 if (is_gimple_assign (loc->stmt)
1939 && gimple_assign_lhs_ptr (loc->stmt) == loc->ref)
1940 {
1941 gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt);
1942 gimple stmt = gimple_build_assign (flag, boolean_true_node);
1943 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1944 }
1945 return false;
1946 }
1947
1948 /* Helper function for execute_sm. On every location where REF is
1949 set, set an appropriate flag indicating the store. */
1950
1951 static tree
1952 execute_sm_if_changed_flag_set (struct loop *loop, mem_ref_p ref)
1953 {
1954 tree flag;
1955 char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag");
1956 flag = create_tmp_reg (boolean_type_node, str);
1957 for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag));
1958 return flag;
1959 }
1960
1961 /* Executes store motion of memory reference REF from LOOP.
1962 Exits from the LOOP are stored in EXITS. The initialization of the
1963 temporary variable is put to the preheader of the loop, and assignments
1964 to the reference from the temporary variable are emitted to exits. */
1965
1966 static void
1967 execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
1968 {
1969 tree tmp_var, store_flag = NULL_TREE;
1970 unsigned i;
1971 gassign *load;
1972 struct fmt_data fmt_data;
1973 edge ex;
1974 struct lim_aux_data *lim_data;
1975 bool multi_threaded_model_p = false;
1976 gimple_stmt_iterator gsi;
1977
1978 if (dump_file && (dump_flags & TDF_DETAILS))
1979 {
1980 fprintf (dump_file, "Executing store motion of ");
1981 print_generic_expr (dump_file, ref->mem.ref, 0);
1982 fprintf (dump_file, " from loop %d\n", loop->num);
1983 }
1984
1985 tmp_var = create_tmp_reg (TREE_TYPE (ref->mem.ref),
1986 get_lsm_tmp_name (ref->mem.ref, ~0));
1987
1988 fmt_data.loop = loop;
1989 fmt_data.orig_loop = loop;
1990 for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
1991
1992 if (bb_in_transaction (loop_preheader_edge (loop)->src)
1993 || !PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES))
1994 multi_threaded_model_p = true;
1995
1996 if (multi_threaded_model_p)
1997 store_flag = execute_sm_if_changed_flag_set (loop, ref);
1998
1999 rewrite_mem_refs (loop, ref, tmp_var);
2000
2001 /* Emit the load code on a random exit edge or into the latch if
2002 the loop does not exit, so that we are sure it will be processed
2003 by move_computations after all dependencies. */
2004 gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
2005
2006 /* FIXME/TODO: For the multi-threaded variant, we could avoid this
2007 load altogether, since the store is predicated by a flag. We
2008 could, do the load only if it was originally in the loop. */
2009 load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
2010 lim_data = init_lim_data (load);
2011 lim_data->max_loop = loop;
2012 lim_data->tgt_loop = loop;
2013 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2014
2015 if (multi_threaded_model_p)
2016 {
2017 load = gimple_build_assign (store_flag, boolean_false_node);
2018 lim_data = init_lim_data (load);
2019 lim_data->max_loop = loop;
2020 lim_data->tgt_loop = loop;
2021 gsi_insert_before (&gsi, load, GSI_SAME_STMT);
2022 }
2023
2024 /* Sink the store to every exit from the loop. */
2025 FOR_EACH_VEC_ELT (exits, i, ex)
2026 if (!multi_threaded_model_p)
2027 {
2028 gassign *store;
2029 store = gimple_build_assign (unshare_expr (ref->mem.ref), tmp_var);
2030 gsi_insert_on_edge (ex, store);
2031 }
2032 else
2033 execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag);
2034 }
2035
2036 /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit
2037 edges of the LOOP. */
2038
2039 static void
2040 hoist_memory_references (struct loop *loop, bitmap mem_refs,
2041 vec<edge> exits)
2042 {
2043 mem_ref_p ref;
2044 unsigned i;
2045 bitmap_iterator bi;
2046
2047 EXECUTE_IF_SET_IN_BITMAP (mem_refs, 0, i, bi)
2048 {
2049 ref = memory_accesses.refs_list[i];
2050 execute_sm (loop, exits, ref);
2051 }
2052 }
2053
2054 struct ref_always_accessed
2055 {
2056 ref_always_accessed (struct loop *loop_, bool stored_p_)
2057 : loop (loop_), stored_p (stored_p_) {}
2058 bool operator () (mem_ref_loc_p loc);
2059 struct loop *loop;
2060 bool stored_p;
2061 };
2062
2063 bool
2064 ref_always_accessed::operator () (mem_ref_loc_p loc)
2065 {
2066 struct loop *must_exec;
2067
2068 if (!get_lim_data (loc->stmt))
2069 return false;
2070
2071 /* If we require an always executed store make sure the statement
2072 stores to the reference. */
2073 if (stored_p)
2074 {
2075 tree lhs = gimple_get_lhs (loc->stmt);
2076 if (!lhs
2077 || lhs != *loc->ref)
2078 return false;
2079 }
2080
2081 must_exec = get_lim_data (loc->stmt)->always_executed_in;
2082 if (!must_exec)
2083 return false;
2084
2085 if (must_exec == loop
2086 || flow_loop_nested_p (must_exec, loop))
2087 return true;
2088
2089 return false;
2090 }
2091
2092 /* Returns true if REF is always accessed in LOOP. If STORED_P is true
2093 make sure REF is always stored to in LOOP. */
2094
2095 static bool
2096 ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
2097 {
2098 return for_all_locs_in_loop (loop, ref,
2099 ref_always_accessed (loop, stored_p));
2100 }
2101
2102 /* Returns true if REF1 and REF2 are independent. */
2103
2104 static bool
2105 refs_independent_p (mem_ref_p ref1, mem_ref_p ref2)
2106 {
2107 if (ref1 == ref2)
2108 return true;
2109
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fprintf (dump_file, "Querying dependency of refs %u and %u: ",
2112 ref1->id, ref2->id);
2113
2114 if (mem_refs_may_alias_p (ref1, ref2, &memory_accesses.ttae_cache))
2115 {
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 fprintf (dump_file, "dependent.\n");
2118 return false;
2119 }
2120 else
2121 {
2122 if (dump_file && (dump_flags & TDF_DETAILS))
2123 fprintf (dump_file, "independent.\n");
2124 return true;
2125 }
2126 }
2127
2128 /* Mark REF dependent on stores or loads (according to STORED_P) in LOOP
2129 and its super-loops. */
2130
2131 static void
2132 record_dep_loop (struct loop *loop, mem_ref_p ref, bool stored_p)
2133 {
2134 /* We can propagate dependent-in-loop bits up the loop
2135 hierarchy to all outer loops. */
2136 while (loop != current_loops->tree_root
2137 && bitmap_set_bit (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2138 loop = loop_outer (loop);
2139 }
2140
2141 /* Returns true if REF is independent on all other memory references in
2142 LOOP. */
2143
2144 static bool
2145 ref_indep_loop_p_1 (struct loop *loop, mem_ref_p ref, bool stored_p)
2146 {
2147 bitmap refs_to_check;
2148 unsigned i;
2149 bitmap_iterator bi;
2150 mem_ref_p aref;
2151
2152 if (stored_p)
2153 refs_to_check = &memory_accesses.refs_in_loop[loop->num];
2154 else
2155 refs_to_check = &memory_accesses.refs_stored_in_loop[loop->num];
2156
2157 if (bitmap_bit_p (refs_to_check, UNANALYZABLE_MEM_ID))
2158 return false;
2159
2160 EXECUTE_IF_SET_IN_BITMAP (refs_to_check, 0, i, bi)
2161 {
2162 aref = memory_accesses.refs_list[i];
2163 if (!refs_independent_p (ref, aref))
2164 return false;
2165 }
2166
2167 return true;
2168 }
2169
2170 /* Returns true if REF is independent on all other memory references in
2171 LOOP. Wrapper over ref_indep_loop_p_1, caching its results. */
2172
2173 static bool
2174 ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
2175 {
2176 stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
2177
2178 if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2179 return true;
2180 if (bitmap_bit_p (&ref->dep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
2181 return false;
2182
2183 struct loop *inner = loop->inner;
2184 while (inner)
2185 {
2186 if (!ref_indep_loop_p_2 (inner, ref, stored_p))
2187 return false;
2188 inner = inner->next;
2189 }
2190
2191 bool indep_p = ref_indep_loop_p_1 (loop, ref, stored_p);
2192
2193 if (dump_file && (dump_flags & TDF_DETAILS))
2194 fprintf (dump_file, "Querying dependencies of ref %u in loop %d: %s\n",
2195 ref->id, loop->num, indep_p ? "independent" : "dependent");
2196
2197 /* Record the computed result in the cache. */
2198 if (indep_p)
2199 {
2200 if (bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p))
2201 && stored_p)
2202 {
2203 /* If it's independend against all refs then it's independent
2204 against stores, too. */
2205 bitmap_set_bit (&ref->indep_loop, LOOP_DEP_BIT (loop->num, false));
2206 }
2207 }
2208 else
2209 {
2210 record_dep_loop (loop, ref, stored_p);
2211 if (!stored_p)
2212 {
2213 /* If it's dependent against stores it's dependent against
2214 all refs, too. */
2215 record_dep_loop (loop, ref, true);
2216 }
2217 }
2218
2219 return indep_p;
2220 }
2221
2222 /* Returns true if REF is independent on all other memory references in
2223 LOOP. */
2224
2225 static bool
2226 ref_indep_loop_p (struct loop *loop, mem_ref_p ref)
2227 {
2228 gcc_checking_assert (MEM_ANALYZABLE (ref));
2229
2230 return ref_indep_loop_p_2 (loop, ref, false);
2231 }
2232
2233 /* Returns true if we can perform store motion of REF from LOOP. */
2234
2235 static bool
2236 can_sm_ref_p (struct loop *loop, mem_ref_p ref)
2237 {
2238 tree base;
2239
2240 /* Can't hoist unanalyzable refs. */
2241 if (!MEM_ANALYZABLE (ref))
2242 return false;
2243
2244 /* It should be movable. */
2245 if (!is_gimple_reg_type (TREE_TYPE (ref->mem.ref))
2246 || TREE_THIS_VOLATILE (ref->mem.ref)
2247 || !for_each_index (&ref->mem.ref, may_move_till, loop))
2248 return false;
2249
2250 /* If it can throw fail, we do not properly update EH info. */
2251 if (tree_could_throw_p (ref->mem.ref))
2252 return false;
2253
2254 /* If it can trap, it must be always executed in LOOP.
2255 Readonly memory locations may trap when storing to them, but
2256 tree_could_trap_p is a predicate for rvalues, so check that
2257 explicitly. */
2258 base = get_base_address (ref->mem.ref);
2259 if ((tree_could_trap_p (ref->mem.ref)
2260 || (DECL_P (base) && TREE_READONLY (base)))
2261 && !ref_always_accessed_p (loop, ref, true))
2262 return false;
2263
2264 /* And it must be independent on all other memory references
2265 in LOOP. */
2266 if (!ref_indep_loop_p (loop, ref))
2267 return false;
2268
2269 return true;
2270 }
2271
2272 /* Marks the references in LOOP for that store motion should be performed
2273 in REFS_TO_SM. SM_EXECUTED is the set of references for that store
2274 motion was performed in one of the outer loops. */
2275
2276 static void
2277 find_refs_for_sm (struct loop *loop, bitmap sm_executed, bitmap refs_to_sm)
2278 {
2279 bitmap refs = &memory_accesses.all_refs_stored_in_loop[loop->num];
2280 unsigned i;
2281 bitmap_iterator bi;
2282 mem_ref_p ref;
2283
2284 EXECUTE_IF_AND_COMPL_IN_BITMAP (refs, sm_executed, 0, i, bi)
2285 {
2286 ref = memory_accesses.refs_list[i];
2287 if (can_sm_ref_p (loop, ref))
2288 bitmap_set_bit (refs_to_sm, i);
2289 }
2290 }
2291
2292 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
2293 for a store motion optimization (i.e. whether we can insert statement
2294 on its exits). */
2295
2296 static bool
2297 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
2298 vec<edge> exits)
2299 {
2300 unsigned i;
2301 edge ex;
2302
2303 FOR_EACH_VEC_ELT (exits, i, ex)
2304 if (ex->flags & (EDGE_ABNORMAL | EDGE_EH))
2305 return false;
2306
2307 return true;
2308 }
2309
2310 /* Try to perform store motion for all memory references modified inside
2311 LOOP. SM_EXECUTED is the bitmap of the memory references for that
2312 store motion was executed in one of the outer loops. */
2313
2314 static void
2315 store_motion_loop (struct loop *loop, bitmap sm_executed)
2316 {
2317 vec<edge> exits = get_loop_exit_edges (loop);
2318 struct loop *subloop;
2319 bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack);
2320
2321 if (loop_suitable_for_sm (loop, exits))
2322 {
2323 find_refs_for_sm (loop, sm_executed, sm_in_loop);
2324 hoist_memory_references (loop, sm_in_loop, exits);
2325 }
2326 exits.release ();
2327
2328 bitmap_ior_into (sm_executed, sm_in_loop);
2329 for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
2330 store_motion_loop (subloop, sm_executed);
2331 bitmap_and_compl_into (sm_executed, sm_in_loop);
2332 BITMAP_FREE (sm_in_loop);
2333 }
2334
2335 /* Try to perform store motion for all memory references modified inside
2336 loops. */
2337
2338 static void
2339 store_motion (void)
2340 {
2341 struct loop *loop;
2342 bitmap sm_executed = BITMAP_ALLOC (&lim_bitmap_obstack);
2343
2344 for (loop = current_loops->tree_root->inner; loop != NULL; loop = loop->next)
2345 store_motion_loop (loop, sm_executed);
2346
2347 BITMAP_FREE (sm_executed);
2348 gsi_commit_edge_inserts ();
2349 }
2350
2351 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
2352 for each such basic block bb records the outermost loop for that execution
2353 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
2354 blocks that contain a nonpure call. */
2355
2356 static void
2357 fill_always_executed_in_1 (struct loop *loop, sbitmap contains_call)
2358 {
2359 basic_block bb = NULL, *bbs, last = NULL;
2360 unsigned i;
2361 edge e;
2362 struct loop *inn_loop = loop;
2363
2364 if (ALWAYS_EXECUTED_IN (loop->header) == NULL)
2365 {
2366 bbs = get_loop_body_in_dom_order (loop);
2367
2368 for (i = 0; i < loop->num_nodes; i++)
2369 {
2370 edge_iterator ei;
2371 bb = bbs[i];
2372
2373 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2374 last = bb;
2375
2376 if (bitmap_bit_p (contains_call, bb->index))
2377 break;
2378
2379 FOR_EACH_EDGE (e, ei, bb->succs)
2380 if (!flow_bb_inside_loop_p (loop, e->dest))
2381 break;
2382 if (e)
2383 break;
2384
2385 /* A loop might be infinite (TODO use simple loop analysis
2386 to disprove this if possible). */
2387 if (bb->flags & BB_IRREDUCIBLE_LOOP)
2388 break;
2389
2390 if (!flow_bb_inside_loop_p (inn_loop, bb))
2391 break;
2392
2393 if (bb->loop_father->header == bb)
2394 {
2395 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
2396 break;
2397
2398 /* In a loop that is always entered we may proceed anyway.
2399 But record that we entered it and stop once we leave it. */
2400 inn_loop = bb->loop_father;
2401 }
2402 }
2403
2404 while (1)
2405 {
2406 SET_ALWAYS_EXECUTED_IN (last, loop);
2407 if (last == loop->header)
2408 break;
2409 last = get_immediate_dominator (CDI_DOMINATORS, last);
2410 }
2411
2412 free (bbs);
2413 }
2414
2415 for (loop = loop->inner; loop; loop = loop->next)
2416 fill_always_executed_in_1 (loop, contains_call);
2417 }
2418
2419 /* Fills ALWAYS_EXECUTED_IN information for basic blocks, i.e.
2420 for each such basic block bb records the outermost loop for that execution
2421 of its header implies execution of bb. */
2422
2423 static void
2424 fill_always_executed_in (void)
2425 {
2426 sbitmap contains_call = sbitmap_alloc (last_basic_block_for_fn (cfun));
2427 basic_block bb;
2428 struct loop *loop;
2429
2430 bitmap_clear (contains_call);
2431 FOR_EACH_BB_FN (bb, cfun)
2432 {
2433 gimple_stmt_iterator gsi;
2434 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2435 {
2436 if (nonpure_call_p (gsi_stmt (gsi)))
2437 break;
2438 }
2439
2440 if (!gsi_end_p (gsi))
2441 bitmap_set_bit (contains_call, bb->index);
2442 }
2443
2444 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
2445 fill_always_executed_in_1 (loop, contains_call);
2446
2447 sbitmap_free (contains_call);
2448 }
2449
2450
2451 /* Compute the global information needed by the loop invariant motion pass. */
2452
2453 static void
2454 tree_ssa_lim_initialize (void)
2455 {
2456 struct loop *loop;
2457 unsigned i;
2458
2459 bitmap_obstack_initialize (&lim_bitmap_obstack);
2460 gcc_obstack_init (&mem_ref_obstack);
2461 lim_aux_data_map = new hash_map<gimple, lim_aux_data *>;
2462
2463 if (flag_tm)
2464 compute_transaction_bits ();
2465
2466 alloc_aux_for_edges (0);
2467
2468 memory_accesses.refs = new hash_table<mem_ref_hasher> (100);
2469 memory_accesses.refs_list.create (100);
2470 /* Allocate a special, unanalyzable mem-ref with ID zero. */
2471 memory_accesses.refs_list.quick_push
2472 (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
2473
2474 memory_accesses.refs_in_loop.create (number_of_loops (cfun));
2475 memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));
2476 memory_accesses.refs_stored_in_loop.create (number_of_loops (cfun));
2477 memory_accesses.refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2478 memory_accesses.all_refs_stored_in_loop.create (number_of_loops (cfun));
2479 memory_accesses.all_refs_stored_in_loop.quick_grow (number_of_loops (cfun));
2480
2481 for (i = 0; i < number_of_loops (cfun); i++)
2482 {
2483 bitmap_initialize (&memory_accesses.refs_in_loop[i],
2484 &lim_bitmap_obstack);
2485 bitmap_initialize (&memory_accesses.refs_stored_in_loop[i],
2486 &lim_bitmap_obstack);
2487 bitmap_initialize (&memory_accesses.all_refs_stored_in_loop[i],
2488 &lim_bitmap_obstack);
2489 }
2490
2491 memory_accesses.ttae_cache = NULL;
2492
2493 /* Initialize bb_loop_postorder with a mapping from loop->num to
2494 its postorder index. */
2495 i = 0;
2496 bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
2497 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2498 bb_loop_postorder[loop->num] = i++;
2499 }
2500
2501 /* Cleans up after the invariant motion pass. */
2502
2503 static void
2504 tree_ssa_lim_finalize (void)
2505 {
2506 basic_block bb;
2507 unsigned i;
2508 mem_ref_p ref;
2509
2510 free_aux_for_edges ();
2511
2512 FOR_EACH_BB_FN (bb, cfun)
2513 SET_ALWAYS_EXECUTED_IN (bb, NULL);
2514
2515 bitmap_obstack_release (&lim_bitmap_obstack);
2516 delete lim_aux_data_map;
2517
2518 delete memory_accesses.refs;
2519 memory_accesses.refs = NULL;
2520
2521 FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
2522 memref_free (ref);
2523 memory_accesses.refs_list.release ();
2524 obstack_free (&mem_ref_obstack, NULL);
2525
2526 memory_accesses.refs_in_loop.release ();
2527 memory_accesses.refs_stored_in_loop.release ();
2528 memory_accesses.all_refs_stored_in_loop.release ();
2529
2530 if (memory_accesses.ttae_cache)
2531 free_affine_expand_cache (&memory_accesses.ttae_cache);
2532
2533 free (bb_loop_postorder);
2534 }
2535
2536 /* Moves invariants from loops. Only "expensive" invariants are moved out --
2537 i.e. those that are likely to be win regardless of the register pressure. */
2538
2539 unsigned int
2540 tree_ssa_lim (void)
2541 {
2542 unsigned int todo;
2543
2544 tree_ssa_lim_initialize ();
2545
2546 /* Gathers information about memory accesses in the loops. */
2547 analyze_memory_references ();
2548
2549 /* Fills ALWAYS_EXECUTED_IN information for basic blocks. */
2550 fill_always_executed_in ();
2551
2552 /* For each statement determine the outermost loop in that it is
2553 invariant and cost for computing the invariant. */
2554 invariantness_dom_walker (CDI_DOMINATORS)
2555 .walk (cfun->cfg->x_entry_block_ptr);
2556
2557 /* Execute store motion. Force the necessary invariants to be moved
2558 out of the loops as well. */
2559 store_motion ();
2560
2561 /* Move the expressions that are expensive enough. */
2562 todo = move_computations ();
2563
2564 tree_ssa_lim_finalize ();
2565
2566 return todo;
2567 }
2568
2569 /* Loop invariant motion pass. */
2570
2571 namespace {
2572
2573 const pass_data pass_data_lim =
2574 {
2575 GIMPLE_PASS, /* type */
2576 "lim", /* name */
2577 OPTGROUP_LOOP, /* optinfo_flags */
2578 TV_LIM, /* tv_id */
2579 PROP_cfg, /* properties_required */
2580 0, /* properties_provided */
2581 0, /* properties_destroyed */
2582 0, /* todo_flags_start */
2583 0, /* todo_flags_finish */
2584 };
2585
2586 class pass_lim : public gimple_opt_pass
2587 {
2588 public:
2589 pass_lim (gcc::context *ctxt)
2590 : gimple_opt_pass (pass_data_lim, ctxt)
2591 {}
2592
2593 /* opt_pass methods: */
2594 opt_pass * clone () { return new pass_lim (m_ctxt); }
2595 virtual bool gate (function *) { return flag_tree_loop_im != 0; }
2596 virtual unsigned int execute (function *);
2597
2598 }; // class pass_lim
2599
2600 unsigned int
2601 pass_lim::execute (function *fun)
2602 {
2603 if (number_of_loops (fun) <= 1)
2604 return 0;
2605
2606 return tree_ssa_lim ();
2607 }
2608
2609 } // anon namespace
2610
2611 gimple_opt_pass *
2612 make_pass_lim (gcc::context *ctxt)
2613 {
2614 return new pass_lim (ctxt);
2615 }
2616
2617