tree.def (TARGET_MEM_REF): Merge TMR_SYMBOL and TMR_BASE.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
94
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
99
100 /* The infinite cost. */
101 #define INFTY 10000000
102
103 #define AVG_LOOP_NITER(LOOP) 5
104
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
108
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
111 {
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
115
116 return niter;
117 }
118
119 /* Representation of the induction variable. */
120 struct iv
121 {
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
129 };
130
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
133 {
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
140 };
141
142 /* Types of uses. */
143 enum use_type
144 {
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
148 };
149
150 /* Cost of a computation. */
151 typedef struct
152 {
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
159
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
162
163 /* The candidate - cost pair. */
164 struct cost_pair
165 {
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
174 };
175
176 /* Use. */
177 struct iv_use
178 {
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
186
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
190
191 struct iv_cand *selected;
192 /* The selected candidate. */
193 };
194
195 /* The position where the iv is computed. */
196 enum iv_position
197 {
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
203 };
204
205 /* The induction variable candidate. */
206 struct iv_cand
207 {
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
226 };
227
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
230 {
231 tree expr;
232 int id;
233 hashval_t hash;
234 };
235
236 /* The data used by the induction variable optimizations. */
237
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
241
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
245
246 struct ivopts_data
247 {
248 /* The currently optimized loop. */
249 struct loop *current_loop;
250
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
253
254 /* Number of registers used in it. */
255 unsigned regs_used;
256
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
259
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
262
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
266
267 /* Loop invariant expression id. */
268 int inv_expr_id;
269
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
272
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
275
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
278
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
281
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
284
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
288
289 /* Are we optimizing for speed? */
290 bool speed;
291
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
294 };
295
296 /* An assignment of iv candidates to uses. */
297
298 struct iv_ca
299 {
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
302
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
305
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
308
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
311
312 /* The candidates used. */
313 bitmap cands;
314
315 /* The number of candidates in the set. */
316 unsigned n_cands;
317
318 /* Total number of registers needed. */
319 unsigned n_regs;
320
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
323
324 /* Total cost of candidates. */
325 unsigned cand_cost;
326
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
329
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr;
333
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr;
336
337 /* Total cost of the assignment. */
338 comp_cost cost;
339 };
340
341 /* Difference of two iv candidate assignments. */
342
343 struct iv_ca_delta
344 {
345 /* Changed use. */
346 struct iv_use *use;
347
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair *old_cp;
350
351 /* A new assignment. */
352 struct cost_pair *new_cp;
353
354 /* Next change in the list. */
355 struct iv_ca_delta *next_change;
356 };
357
358 /* Bound on number of candidates below that all candidates are considered. */
359
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
362
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
365
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
368
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
371
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
374
375 /* The list of trees for that the decl_rtl field must be reset is stored
376 here. */
377
378 static VEC(tree,heap) *decl_rtl_to_reset;
379
380 /* Number of uses recorded in DATA. */
381
382 static inline unsigned
383 n_iv_uses (struct ivopts_data *data)
384 {
385 return VEC_length (iv_use_p, data->iv_uses);
386 }
387
388 /* Ith use recorded in DATA. */
389
390 static inline struct iv_use *
391 iv_use (struct ivopts_data *data, unsigned i)
392 {
393 return VEC_index (iv_use_p, data->iv_uses, i);
394 }
395
396 /* Number of candidates recorded in DATA. */
397
398 static inline unsigned
399 n_iv_cands (struct ivopts_data *data)
400 {
401 return VEC_length (iv_cand_p, data->iv_candidates);
402 }
403
404 /* Ith candidate recorded in DATA. */
405
406 static inline struct iv_cand *
407 iv_cand (struct ivopts_data *data, unsigned i)
408 {
409 return VEC_index (iv_cand_p, data->iv_candidates, i);
410 }
411
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
413
414 edge
415 single_dom_exit (struct loop *loop)
416 {
417 edge exit = single_exit (loop);
418
419 if (!exit)
420 return NULL;
421
422 if (!just_once_each_iteration_p (loop, exit->src))
423 return NULL;
424
425 return exit;
426 }
427
428 /* Dumps information about the induction variable IV to FILE. */
429
430 extern void dump_iv (FILE *, struct iv *);
431 void
432 dump_iv (FILE *file, struct iv *iv)
433 {
434 if (iv->ssa_name)
435 {
436 fprintf (file, "ssa name ");
437 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
438 fprintf (file, "\n");
439 }
440
441 fprintf (file, " type ");
442 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
443 fprintf (file, "\n");
444
445 if (iv->step)
446 {
447 fprintf (file, " base ");
448 print_generic_expr (file, iv->base, TDF_SLIM);
449 fprintf (file, "\n");
450
451 fprintf (file, " step ");
452 print_generic_expr (file, iv->step, TDF_SLIM);
453 fprintf (file, "\n");
454 }
455 else
456 {
457 fprintf (file, " invariant ");
458 print_generic_expr (file, iv->base, TDF_SLIM);
459 fprintf (file, "\n");
460 }
461
462 if (iv->base_object)
463 {
464 fprintf (file, " base object ");
465 print_generic_expr (file, iv->base_object, TDF_SLIM);
466 fprintf (file, "\n");
467 }
468
469 if (iv->biv_p)
470 fprintf (file, " is a biv\n");
471 }
472
473 /* Dumps information about the USE to FILE. */
474
475 extern void dump_use (FILE *, struct iv_use *);
476 void
477 dump_use (FILE *file, struct iv_use *use)
478 {
479 fprintf (file, "use %d\n", use->id);
480
481 switch (use->type)
482 {
483 case USE_NONLINEAR_EXPR:
484 fprintf (file, " generic\n");
485 break;
486
487 case USE_ADDRESS:
488 fprintf (file, " address\n");
489 break;
490
491 case USE_COMPARE:
492 fprintf (file, " compare\n");
493 break;
494
495 default:
496 gcc_unreachable ();
497 }
498
499 fprintf (file, " in statement ");
500 print_gimple_stmt (file, use->stmt, 0, 0);
501 fprintf (file, "\n");
502
503 fprintf (file, " at position ");
504 if (use->op_p)
505 print_generic_expr (file, *use->op_p, TDF_SLIM);
506 fprintf (file, "\n");
507
508 dump_iv (file, use->iv);
509
510 if (use->related_cands)
511 {
512 fprintf (file, " related candidates ");
513 dump_bitmap (file, use->related_cands);
514 }
515 }
516
517 /* Dumps information about the uses to FILE. */
518
519 extern void dump_uses (FILE *, struct ivopts_data *);
520 void
521 dump_uses (FILE *file, struct ivopts_data *data)
522 {
523 unsigned i;
524 struct iv_use *use;
525
526 for (i = 0; i < n_iv_uses (data); i++)
527 {
528 use = iv_use (data, i);
529
530 dump_use (file, use);
531 fprintf (file, "\n");
532 }
533 }
534
535 /* Dumps information about induction variable candidate CAND to FILE. */
536
537 extern void dump_cand (FILE *, struct iv_cand *);
538 void
539 dump_cand (FILE *file, struct iv_cand *cand)
540 {
541 struct iv *iv = cand->iv;
542
543 fprintf (file, "candidate %d%s\n",
544 cand->id, cand->important ? " (important)" : "");
545
546 if (cand->depends_on)
547 {
548 fprintf (file, " depends on ");
549 dump_bitmap (file, cand->depends_on);
550 }
551
552 if (!iv)
553 {
554 fprintf (file, " final value replacement\n");
555 return;
556 }
557
558 if (cand->var_before)
559 {
560 fprintf (file, " var_before ");
561 print_generic_expr (file, cand->var_before, TDF_SLIM);
562 fprintf (file, "\n");
563 }
564 if (cand->var_after)
565 {
566 fprintf (file, " var_after ");
567 print_generic_expr (file, cand->var_after, TDF_SLIM);
568 fprintf (file, "\n");
569 }
570
571 switch (cand->pos)
572 {
573 case IP_NORMAL:
574 fprintf (file, " incremented before exit test\n");
575 break;
576
577 case IP_BEFORE_USE:
578 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
579 break;
580
581 case IP_AFTER_USE:
582 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
583 break;
584
585 case IP_END:
586 fprintf (file, " incremented at end\n");
587 break;
588
589 case IP_ORIGINAL:
590 fprintf (file, " original biv\n");
591 break;
592 }
593
594 dump_iv (file, iv);
595 }
596
597 /* Returns the info for ssa version VER. */
598
599 static inline struct version_info *
600 ver_info (struct ivopts_data *data, unsigned ver)
601 {
602 return data->version_info + ver;
603 }
604
605 /* Returns the info for ssa name NAME. */
606
607 static inline struct version_info *
608 name_info (struct ivopts_data *data, tree name)
609 {
610 return ver_info (data, SSA_NAME_VERSION (name));
611 }
612
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
614 emitted in LOOP. */
615
616 static bool
617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
618 {
619 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
620
621 gcc_assert (bb);
622
623 if (sbb == loop->latch)
624 return true;
625
626 if (sbb != bb)
627 return false;
628
629 return stmt == last_stmt (bb);
630 }
631
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
635
636 static bool
637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
638 {
639 basic_block cand_bb = gimple_bb (cand->incremented_at);
640 basic_block stmt_bb = gimple_bb (stmt);
641
642 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
643 return false;
644
645 if (stmt_bb != cand_bb)
646 return true;
647
648 if (true_if_equal
649 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
650 return true;
651 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
652 }
653
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
656
657 static bool
658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
659 {
660 switch (cand->pos)
661 {
662 case IP_END:
663 return false;
664
665 case IP_NORMAL:
666 return stmt_after_ip_normal_pos (loop, stmt);
667
668 case IP_ORIGINAL:
669 case IP_AFTER_USE:
670 return stmt_after_inc_pos (cand, stmt, false);
671
672 case IP_BEFORE_USE:
673 return stmt_after_inc_pos (cand, stmt, true);
674
675 default:
676 gcc_unreachable ();
677 }
678 }
679
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
681
682 static bool
683 abnormal_ssa_name_p (tree exp)
684 {
685 if (!exp)
686 return false;
687
688 if (TREE_CODE (exp) != SSA_NAME)
689 return false;
690
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
692 }
693
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
696
697 static bool
698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
699 void *data ATTRIBUTE_UNUSED)
700 {
701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
702 {
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
704 return false;
705 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
706 return false;
707 }
708
709 return !abnormal_ssa_name_p (*index);
710 }
711
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
714
715 bool
716 contains_abnormal_ssa_name_p (tree expr)
717 {
718 enum tree_code code;
719 enum tree_code_class codeclass;
720
721 if (!expr)
722 return false;
723
724 code = TREE_CODE (expr);
725 codeclass = TREE_CODE_CLASS (code);
726
727 if (code == SSA_NAME)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
729
730 if (code == INTEGER_CST
731 || is_gimple_min_invariant (expr))
732 return false;
733
734 if (code == ADDR_EXPR)
735 return !for_each_index (&TREE_OPERAND (expr, 0),
736 idx_contains_abnormal_ssa_name_p,
737 NULL);
738
739 if (code == COND_EXPR)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
743
744 switch (codeclass)
745 {
746 case tcc_binary:
747 case tcc_comparison:
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
749 return true;
750
751 /* Fallthru. */
752 case tcc_unary:
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
754 return true;
755
756 break;
757
758 default:
759 gcc_unreachable ();
760 }
761
762 return false;
763 }
764
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
767
768 static tree
769 niter_for_exit (struct ivopts_data *data, edge exit,
770 struct tree_niter_desc **desc_p)
771 {
772 struct tree_niter_desc* desc = NULL;
773 tree niter;
774 void **slot;
775
776 if (!data->niters)
777 {
778 data->niters = pointer_map_create ();
779 slot = NULL;
780 }
781 else
782 slot = pointer_map_contains (data->niters, exit);
783
784 if (!slot)
785 {
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc = XNEW (struct tree_niter_desc);
792 if (number_of_iterations_exit (data->current_loop,
793 exit, desc, true)
794 && integer_zerop (desc->may_be_zero)
795 && !contains_abnormal_ssa_name_p (desc->niter))
796 niter = desc->niter;
797 else
798 niter = NULL_TREE;
799
800 desc->niter = niter;
801 slot = pointer_map_insert (data->niters, exit);
802 *slot = desc;
803 }
804 else
805 niter = ((struct tree_niter_desc *) *slot)->niter;
806
807 if (desc_p)
808 *desc_p = (struct tree_niter_desc *) *slot;
809 return niter;
810 }
811
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
814 goes wrong. */
815
816 static tree
817 niter_for_single_dom_exit (struct ivopts_data *data)
818 {
819 edge exit = single_dom_exit (data->current_loop);
820
821 if (!exit)
822 return NULL;
823
824 return niter_for_exit (data, exit, NULL);
825 }
826
827 /* Hash table equality function for expressions. */
828
829 static int
830 htab_inv_expr_eq (const void *ent1, const void *ent2)
831 {
832 const struct iv_inv_expr_ent *expr1 =
833 (const struct iv_inv_expr_ent *)ent1;
834 const struct iv_inv_expr_ent *expr2 =
835 (const struct iv_inv_expr_ent *)ent2;
836
837 return operand_equal_p (expr1->expr, expr2->expr, 0);
838 }
839
840 /* Hash function for loop invariant expressions. */
841
842 static hashval_t
843 htab_inv_expr_hash (const void *ent)
844 {
845 const struct iv_inv_expr_ent *expr =
846 (const struct iv_inv_expr_ent *)ent;
847 return expr->hash;
848 }
849
850 /* Initializes data structures used by the iv optimization pass, stored
851 in DATA. */
852
853 static void
854 tree_ssa_iv_optimize_init (struct ivopts_data *data)
855 {
856 data->version_info_size = 2 * num_ssa_names;
857 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
858 data->relevant = BITMAP_ALLOC (NULL);
859 data->important_candidates = BITMAP_ALLOC (NULL);
860 data->max_inv_id = 0;
861 data->niters = NULL;
862 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
863 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
864 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
865 htab_inv_expr_eq, free);
866 data->inv_expr_id = 0;
867 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
868 }
869
870 /* Returns a memory object to that EXPR points. In case we are able to
871 determine that it does not point to any such object, NULL is returned. */
872
873 static tree
874 determine_base_object (tree expr)
875 {
876 enum tree_code code = TREE_CODE (expr);
877 tree base, obj;
878
879 /* If this is a pointer casted to any type, we need to determine
880 the base object for the pointer; so handle conversions before
881 throwing away non-pointer expressions. */
882 if (CONVERT_EXPR_P (expr))
883 return determine_base_object (TREE_OPERAND (expr, 0));
884
885 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
886 return NULL_TREE;
887
888 switch (code)
889 {
890 case INTEGER_CST:
891 return NULL_TREE;
892
893 case ADDR_EXPR:
894 obj = TREE_OPERAND (expr, 0);
895 base = get_base_address (obj);
896
897 if (!base)
898 return expr;
899
900 if (TREE_CODE (base) == MEM_REF)
901 return determine_base_object (TREE_OPERAND (base, 0));
902
903 return fold_convert (ptr_type_node,
904 build_fold_addr_expr (base));
905
906 case POINTER_PLUS_EXPR:
907 return determine_base_object (TREE_OPERAND (expr, 0));
908
909 case PLUS_EXPR:
910 case MINUS_EXPR:
911 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
912 gcc_unreachable ();
913
914 default:
915 return fold_convert (ptr_type_node, expr);
916 }
917 }
918
919 /* Allocates an induction variable with given initial value BASE and step STEP
920 for loop LOOP. */
921
922 static struct iv *
923 alloc_iv (tree base, tree step)
924 {
925 struct iv *iv = XCNEW (struct iv);
926 gcc_assert (step != NULL_TREE);
927
928 iv->base = base;
929 iv->base_object = determine_base_object (base);
930 iv->step = step;
931 iv->biv_p = false;
932 iv->have_use_for = false;
933 iv->use_id = 0;
934 iv->ssa_name = NULL_TREE;
935
936 return iv;
937 }
938
939 /* Sets STEP and BASE for induction variable IV. */
940
941 static void
942 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
943 {
944 struct version_info *info = name_info (data, iv);
945
946 gcc_assert (!info->iv);
947
948 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
949 info->iv = alloc_iv (base, step);
950 info->iv->ssa_name = iv;
951 }
952
953 /* Finds induction variable declaration for VAR. */
954
955 static struct iv *
956 get_iv (struct ivopts_data *data, tree var)
957 {
958 basic_block bb;
959 tree type = TREE_TYPE (var);
960
961 if (!POINTER_TYPE_P (type)
962 && !INTEGRAL_TYPE_P (type))
963 return NULL;
964
965 if (!name_info (data, var)->iv)
966 {
967 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
968
969 if (!bb
970 || !flow_bb_inside_loop_p (data->current_loop, bb))
971 set_iv (data, var, var, build_int_cst (type, 0));
972 }
973
974 return name_info (data, var)->iv;
975 }
976
977 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
978 not define a simple affine biv with nonzero step. */
979
980 static tree
981 determine_biv_step (gimple phi)
982 {
983 struct loop *loop = gimple_bb (phi)->loop_father;
984 tree name = PHI_RESULT (phi);
985 affine_iv iv;
986
987 if (!is_gimple_reg (name))
988 return NULL_TREE;
989
990 if (!simple_iv (loop, loop, name, &iv, true))
991 return NULL_TREE;
992
993 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
994 }
995
996 /* Finds basic ivs. */
997
998 static bool
999 find_bivs (struct ivopts_data *data)
1000 {
1001 gimple phi;
1002 tree step, type, base;
1003 bool found = false;
1004 struct loop *loop = data->current_loop;
1005 gimple_stmt_iterator psi;
1006
1007 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1008 {
1009 phi = gsi_stmt (psi);
1010
1011 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1012 continue;
1013
1014 step = determine_biv_step (phi);
1015 if (!step)
1016 continue;
1017
1018 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1019 base = expand_simple_operations (base);
1020 if (contains_abnormal_ssa_name_p (base)
1021 || contains_abnormal_ssa_name_p (step))
1022 continue;
1023
1024 type = TREE_TYPE (PHI_RESULT (phi));
1025 base = fold_convert (type, base);
1026 if (step)
1027 {
1028 if (POINTER_TYPE_P (type))
1029 step = fold_convert (sizetype, step);
1030 else
1031 step = fold_convert (type, step);
1032 }
1033
1034 set_iv (data, PHI_RESULT (phi), base, step);
1035 found = true;
1036 }
1037
1038 return found;
1039 }
1040
1041 /* Marks basic ivs. */
1042
1043 static void
1044 mark_bivs (struct ivopts_data *data)
1045 {
1046 gimple phi;
1047 tree var;
1048 struct iv *iv, *incr_iv;
1049 struct loop *loop = data->current_loop;
1050 basic_block incr_bb;
1051 gimple_stmt_iterator psi;
1052
1053 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1054 {
1055 phi = gsi_stmt (psi);
1056
1057 iv = get_iv (data, PHI_RESULT (phi));
1058 if (!iv)
1059 continue;
1060
1061 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1062 incr_iv = get_iv (data, var);
1063 if (!incr_iv)
1064 continue;
1065
1066 /* If the increment is in the subloop, ignore it. */
1067 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1068 if (incr_bb->loop_father != data->current_loop
1069 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1070 continue;
1071
1072 iv->biv_p = true;
1073 incr_iv->biv_p = true;
1074 }
1075 }
1076
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078 parameters to IV. */
1079
1080 static bool
1081 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1082 {
1083 tree lhs;
1084 struct loop *loop = data->current_loop;
1085
1086 iv->base = NULL_TREE;
1087 iv->step = NULL_TREE;
1088
1089 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1090 return false;
1091
1092 lhs = gimple_assign_lhs (stmt);
1093 if (TREE_CODE (lhs) != SSA_NAME)
1094 return false;
1095
1096 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1097 return false;
1098 iv->base = expand_simple_operations (iv->base);
1099
1100 if (contains_abnormal_ssa_name_p (iv->base)
1101 || contains_abnormal_ssa_name_p (iv->step))
1102 return false;
1103
1104 return true;
1105 }
1106
1107 /* Finds general ivs in statement STMT. */
1108
1109 static void
1110 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1111 {
1112 affine_iv iv;
1113
1114 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1115 return;
1116
1117 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1118 }
1119
1120 /* Finds general ivs in basic block BB. */
1121
1122 static void
1123 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1124 {
1125 gimple_stmt_iterator bsi;
1126
1127 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1128 find_givs_in_stmt (data, gsi_stmt (bsi));
1129 }
1130
1131 /* Finds general ivs. */
1132
1133 static void
1134 find_givs (struct ivopts_data *data)
1135 {
1136 struct loop *loop = data->current_loop;
1137 basic_block *body = get_loop_body_in_dom_order (loop);
1138 unsigned i;
1139
1140 for (i = 0; i < loop->num_nodes; i++)
1141 find_givs_in_bb (data, body[i]);
1142 free (body);
1143 }
1144
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146 variable and if so, its initial value and step. */
1147
1148 static bool
1149 find_induction_variables (struct ivopts_data *data)
1150 {
1151 unsigned i;
1152 bitmap_iterator bi;
1153
1154 if (!find_bivs (data))
1155 return false;
1156
1157 find_givs (data);
1158 mark_bivs (data);
1159
1160 if (dump_file && (dump_flags & TDF_DETAILS))
1161 {
1162 tree niter = niter_for_single_dom_exit (data);
1163
1164 if (niter)
1165 {
1166 fprintf (dump_file, " number of iterations ");
1167 print_generic_expr (dump_file, niter, TDF_SLIM);
1168 fprintf (dump_file, "\n\n");
1169 };
1170
1171 fprintf (dump_file, "Induction variables:\n\n");
1172
1173 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1174 {
1175 if (ver_info (data, i)->iv)
1176 dump_iv (dump_file, ver_info (data, i)->iv);
1177 }
1178 }
1179
1180 return true;
1181 }
1182
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1184
1185 static struct iv_use *
1186 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1187 gimple stmt, enum use_type use_type)
1188 {
1189 struct iv_use *use = XCNEW (struct iv_use);
1190
1191 use->id = n_iv_uses (data);
1192 use->type = use_type;
1193 use->iv = iv;
1194 use->stmt = stmt;
1195 use->op_p = use_p;
1196 use->related_cands = BITMAP_ALLOC (NULL);
1197
1198 /* To avoid showing ssa name in the dumps, if it was not reset by the
1199 caller. */
1200 iv->ssa_name = NULL_TREE;
1201
1202 if (dump_file && (dump_flags & TDF_DETAILS))
1203 dump_use (dump_file, use);
1204
1205 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1206
1207 return use;
1208 }
1209
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211 NONLINEAR_USE is true if the invariant is used in a way we do not
1212 handle specially. */
1213
1214 static void
1215 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1216 {
1217 basic_block bb;
1218 struct version_info *info;
1219
1220 if (TREE_CODE (op) != SSA_NAME
1221 || !is_gimple_reg (op))
1222 return;
1223
1224 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1225 if (bb
1226 && flow_bb_inside_loop_p (data->current_loop, bb))
1227 return;
1228
1229 info = name_info (data, op);
1230 info->name = op;
1231 info->has_nonlin_use |= nonlinear_use;
1232 if (!info->inv_id)
1233 info->inv_id = ++data->max_inv_id;
1234 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1235 }
1236
1237 /* Checks whether the use OP is interesting and if so, records it. */
1238
1239 static struct iv_use *
1240 find_interesting_uses_op (struct ivopts_data *data, tree op)
1241 {
1242 struct iv *iv;
1243 struct iv *civ;
1244 gimple stmt;
1245 struct iv_use *use;
1246
1247 if (TREE_CODE (op) != SSA_NAME)
1248 return NULL;
1249
1250 iv = get_iv (data, op);
1251 if (!iv)
1252 return NULL;
1253
1254 if (iv->have_use_for)
1255 {
1256 use = iv_use (data, iv->use_id);
1257
1258 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1259 return use;
1260 }
1261
1262 if (integer_zerop (iv->step))
1263 {
1264 record_invariant (data, op, true);
1265 return NULL;
1266 }
1267 iv->have_use_for = true;
1268
1269 civ = XNEW (struct iv);
1270 *civ = *iv;
1271
1272 stmt = SSA_NAME_DEF_STMT (op);
1273 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1274 || is_gimple_assign (stmt));
1275
1276 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1277 iv->use_id = use->id;
1278
1279 return use;
1280 }
1281
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283 of an induction variable and an invariant. If this is the case,
1284 CONTROL_VAR is set to location of the iv, BOUND to the location of
1285 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286 induction variable descriptions, and true is returned. If this is not
1287 the case, CONTROL_VAR and BOUND are set to the arguments of the
1288 condition and false is returned. */
1289
1290 static bool
1291 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1292 tree **control_var, tree **bound,
1293 struct iv **iv_var, struct iv **iv_bound)
1294 {
1295 /* The objects returned when COND has constant operands. */
1296 static struct iv const_iv;
1297 static tree zero;
1298 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1299 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1300 bool ret = false;
1301
1302 if (gimple_code (stmt) == GIMPLE_COND)
1303 {
1304 op0 = gimple_cond_lhs_ptr (stmt);
1305 op1 = gimple_cond_rhs_ptr (stmt);
1306 }
1307 else
1308 {
1309 op0 = gimple_assign_rhs1_ptr (stmt);
1310 op1 = gimple_assign_rhs2_ptr (stmt);
1311 }
1312
1313 zero = integer_zero_node;
1314 const_iv.step = integer_zero_node;
1315
1316 if (TREE_CODE (*op0) == SSA_NAME)
1317 iv0 = get_iv (data, *op0);
1318 if (TREE_CODE (*op1) == SSA_NAME)
1319 iv1 = get_iv (data, *op1);
1320
1321 /* Exactly one of the compared values must be an iv, and the other one must
1322 be an invariant. */
1323 if (!iv0 || !iv1)
1324 goto end;
1325
1326 if (integer_zerop (iv0->step))
1327 {
1328 /* Control variable may be on the other side. */
1329 tmp_op = op0; op0 = op1; op1 = tmp_op;
1330 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1331 }
1332 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1333
1334 end:
1335 if (control_var)
1336 *control_var = op0;;
1337 if (iv_var)
1338 *iv_var = iv0;;
1339 if (bound)
1340 *bound = op1;
1341 if (iv_bound)
1342 *iv_bound = iv1;
1343
1344 return ret;
1345 }
1346
1347 /* Checks whether the condition in STMT is interesting and if so,
1348 records it. */
1349
1350 static void
1351 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1352 {
1353 tree *var_p, *bound_p;
1354 struct iv *var_iv, *civ;
1355
1356 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1357 {
1358 find_interesting_uses_op (data, *var_p);
1359 find_interesting_uses_op (data, *bound_p);
1360 return;
1361 }
1362
1363 civ = XNEW (struct iv);
1364 *civ = *var_iv;
1365 record_use (data, NULL, civ, stmt, USE_COMPARE);
1366 }
1367
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369 i.e. if all its operands are defined outside of the LOOP. LOOP
1370 should not be the function body. */
1371
1372 bool
1373 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1374 {
1375 basic_block def_bb;
1376 unsigned i, len;
1377
1378 gcc_assert (loop_depth (loop) > 0);
1379
1380 if (is_gimple_min_invariant (expr))
1381 return true;
1382
1383 if (TREE_CODE (expr) == SSA_NAME)
1384 {
1385 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386 if (def_bb
1387 && flow_bb_inside_loop_p (loop, def_bb))
1388 return false;
1389
1390 return true;
1391 }
1392
1393 if (!EXPR_P (expr))
1394 return false;
1395
1396 len = TREE_OPERAND_LENGTH (expr);
1397 for (i = 0; i < len; i++)
1398 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1399 return false;
1400
1401 return true;
1402 }
1403
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405 i.e. if all its operands on the RHS are defined outside of the LOOP.
1406 LOOP should not be the function body. */
1407
1408 bool
1409 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1410 {
1411 unsigned i;
1412 tree lhs;
1413
1414 gcc_assert (loop_depth (loop) > 0);
1415
1416 lhs = gimple_get_lhs (stmt);
1417 for (i = 0; i < gimple_num_ops (stmt); i++)
1418 {
1419 tree op = gimple_op (stmt, i);
1420 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1421 return false;
1422 }
1423
1424 return true;
1425 }
1426
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428 initial ones. Returns false when the value of the index cannot be determined.
1429 Callback for for_each_index. */
1430
1431 struct ifs_ivopts_data
1432 {
1433 struct ivopts_data *ivopts_data;
1434 gimple stmt;
1435 tree step;
1436 };
1437
1438 static bool
1439 idx_find_step (tree base, tree *idx, void *data)
1440 {
1441 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1442 struct iv *iv;
1443 tree step, iv_base, iv_step, lbound, off;
1444 struct loop *loop = dta->ivopts_data->current_loop;
1445
1446 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1447 return false;
1448
1449 /* If base is a component ref, require that the offset of the reference
1450 be invariant. */
1451 if (TREE_CODE (base) == COMPONENT_REF)
1452 {
1453 off = component_ref_field_offset (base);
1454 return expr_invariant_in_loop_p (loop, off);
1455 }
1456
1457 /* If base is array, first check whether we will be able to move the
1458 reference out of the loop (in order to take its address in strength
1459 reduction). In order for this to work we need both lower bound
1460 and step to be loop invariants. */
1461 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1462 {
1463 /* Moreover, for a range, the size needs to be invariant as well. */
1464 if (TREE_CODE (base) == ARRAY_RANGE_REF
1465 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466 return false;
1467
1468 step = array_ref_element_size (base);
1469 lbound = array_ref_low_bound (base);
1470
1471 if (!expr_invariant_in_loop_p (loop, step)
1472 || !expr_invariant_in_loop_p (loop, lbound))
1473 return false;
1474 }
1475
1476 if (TREE_CODE (*idx) != SSA_NAME)
1477 return true;
1478
1479 iv = get_iv (dta->ivopts_data, *idx);
1480 if (!iv)
1481 return false;
1482
1483 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1484 *&x[0], which is not folded and does not trigger the
1485 ARRAY_REF path below. */
1486 *idx = iv->base;
1487
1488 if (integer_zerop (iv->step))
1489 return true;
1490
1491 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1492 {
1493 step = array_ref_element_size (base);
1494
1495 /* We only handle addresses whose step is an integer constant. */
1496 if (TREE_CODE (step) != INTEGER_CST)
1497 return false;
1498 }
1499 else
1500 /* The step for pointer arithmetics already is 1 byte. */
1501 step = size_one_node;
1502
1503 iv_base = iv->base;
1504 iv_step = iv->step;
1505 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506 sizetype, &iv_base, &iv_step, dta->stmt,
1507 false))
1508 {
1509 /* The index might wrap. */
1510 return false;
1511 }
1512
1513 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1515
1516 return true;
1517 }
1518
1519 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1520 object is passed to it in DATA. */
1521
1522 static bool
1523 idx_record_use (tree base, tree *idx,
1524 void *vdata)
1525 {
1526 struct ivopts_data *data = (struct ivopts_data *) vdata;
1527 find_interesting_uses_op (data, *idx);
1528 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1529 {
1530 find_interesting_uses_op (data, array_ref_element_size (base));
1531 find_interesting_uses_op (data, array_ref_low_bound (base));
1532 }
1533 return true;
1534 }
1535
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537 store cst to MUL and return true. Otherwise return false.
1538 The returned value is always sign-extended, regardless of the
1539 signedness of TOP and BOT. */
1540
1541 static bool
1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1543 {
1544 tree mby;
1545 enum tree_code code;
1546 double_int res, p0, p1;
1547 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1548
1549 STRIP_NOPS (top);
1550 STRIP_NOPS (bot);
1551
1552 if (operand_equal_p (top, bot, 0))
1553 {
1554 *mul = double_int_one;
1555 return true;
1556 }
1557
1558 code = TREE_CODE (top);
1559 switch (code)
1560 {
1561 case MULT_EXPR:
1562 mby = TREE_OPERAND (top, 1);
1563 if (TREE_CODE (mby) != INTEGER_CST)
1564 return false;
1565
1566 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567 return false;
1568
1569 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1570 precision);
1571 return true;
1572
1573 case PLUS_EXPR:
1574 case MINUS_EXPR:
1575 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1576 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1577 return false;
1578
1579 if (code == MINUS_EXPR)
1580 p1 = double_int_neg (p1);
1581 *mul = double_int_sext (double_int_add (p0, p1), precision);
1582 return true;
1583
1584 case INTEGER_CST:
1585 if (TREE_CODE (bot) != INTEGER_CST)
1586 return false;
1587
1588 p0 = double_int_sext (tree_to_double_int (top), precision);
1589 p1 = double_int_sext (tree_to_double_int (bot), precision);
1590 if (double_int_zero_p (p1))
1591 return false;
1592 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1593 precision);
1594 return double_int_zero_p (res);
1595
1596 default:
1597 return false;
1598 }
1599 }
1600
1601 /* Returns true if memory reference REF with step STEP may be unaligned. */
1602
1603 static bool
1604 may_be_unaligned_p (tree ref, tree step)
1605 {
1606 tree base;
1607 tree base_type;
1608 HOST_WIDE_INT bitsize;
1609 HOST_WIDE_INT bitpos;
1610 tree toffset;
1611 enum machine_mode mode;
1612 int unsignedp, volatilep;
1613 unsigned base_align;
1614
1615 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1616 thus they are not misaligned. */
1617 if (TREE_CODE (ref) == TARGET_MEM_REF)
1618 return false;
1619
1620 /* The test below is basically copy of what expr.c:normal_inner_ref
1621 does to check whether the object must be loaded by parts when
1622 STRICT_ALIGNMENT is true. */
1623 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1624 &unsignedp, &volatilep, true);
1625 base_type = TREE_TYPE (base);
1626 base_align = TYPE_ALIGN (base_type);
1627
1628 if (mode != BLKmode)
1629 {
1630 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1631
1632 if (base_align < mode_align
1633 || (bitpos % mode_align) != 0
1634 || (bitpos % BITS_PER_UNIT) != 0)
1635 return true;
1636
1637 if (toffset
1638 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1639 return true;
1640
1641 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1642 return true;
1643 }
1644
1645 return false;
1646 }
1647
1648 /* Return true if EXPR may be non-addressable. */
1649
1650 bool
1651 may_be_nonaddressable_p (tree expr)
1652 {
1653 switch (TREE_CODE (expr))
1654 {
1655 case TARGET_MEM_REF:
1656 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657 target, thus they are always addressable. */
1658 return false;
1659
1660 case COMPONENT_REF:
1661 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1662 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1663
1664 case VIEW_CONVERT_EXPR:
1665 /* This kind of view-conversions may wrap non-addressable objects
1666 and make them look addressable. After some processing the
1667 non-addressability may be uncovered again, causing ADDR_EXPRs
1668 of inappropriate objects to be built. */
1669 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1670 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1671 return true;
1672
1673 /* ... fall through ... */
1674
1675 case ARRAY_REF:
1676 case ARRAY_RANGE_REF:
1677 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1678
1679 CASE_CONVERT:
1680 return true;
1681
1682 default:
1683 break;
1684 }
1685
1686 return false;
1687 }
1688
1689 /* Finds addresses in *OP_P inside STMT. */
1690
1691 static void
1692 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1693 {
1694 tree base = *op_p, step = size_zero_node;
1695 struct iv *civ;
1696 struct ifs_ivopts_data ifs_ivopts_data;
1697
1698 /* Do not play with volatile memory references. A bit too conservative,
1699 perhaps, but safe. */
1700 if (gimple_has_volatile_ops (stmt))
1701 goto fail;
1702
1703 /* Ignore bitfields for now. Not really something terribly complicated
1704 to handle. TODO. */
1705 if (TREE_CODE (base) == BIT_FIELD_REF)
1706 goto fail;
1707
1708 base = unshare_expr (base);
1709
1710 if (TREE_CODE (base) == TARGET_MEM_REF)
1711 {
1712 tree type = build_pointer_type (TREE_TYPE (base));
1713 tree astep;
1714
1715 if (TMR_BASE (base)
1716 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1717 {
1718 civ = get_iv (data, TMR_BASE (base));
1719 if (!civ)
1720 goto fail;
1721
1722 TMR_BASE (base) = civ->base;
1723 step = civ->step;
1724 }
1725 if (TMR_INDEX2 (base)
1726 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1727 {
1728 civ = get_iv (data, TMR_INDEX2 (base));
1729 if (!civ)
1730 goto fail;
1731
1732 TMR_INDEX2 (base) = civ->base;
1733 step = civ->step;
1734 }
1735 if (TMR_INDEX (base)
1736 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1737 {
1738 civ = get_iv (data, TMR_INDEX (base));
1739 if (!civ)
1740 goto fail;
1741
1742 TMR_INDEX (base) = civ->base;
1743 astep = civ->step;
1744
1745 if (astep)
1746 {
1747 if (TMR_STEP (base))
1748 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1749
1750 step = fold_build2 (PLUS_EXPR, type, step, astep);
1751 }
1752 }
1753
1754 if (integer_zerop (step))
1755 goto fail;
1756 base = tree_mem_ref_addr (type, base);
1757 }
1758 else
1759 {
1760 ifs_ivopts_data.ivopts_data = data;
1761 ifs_ivopts_data.stmt = stmt;
1762 ifs_ivopts_data.step = size_zero_node;
1763 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1764 || integer_zerop (ifs_ivopts_data.step))
1765 goto fail;
1766 step = ifs_ivopts_data.step;
1767
1768 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1769
1770 /* Check that the base expression is addressable. This needs
1771 to be done after substituting bases of IVs into it. */
1772 if (may_be_nonaddressable_p (base))
1773 goto fail;
1774
1775 /* Moreover, on strict alignment platforms, check that it is
1776 sufficiently aligned. */
1777 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1778 goto fail;
1779
1780 base = build_fold_addr_expr (base);
1781
1782 /* Substituting bases of IVs into the base expression might
1783 have caused folding opportunities. */
1784 if (TREE_CODE (base) == ADDR_EXPR)
1785 {
1786 tree *ref = &TREE_OPERAND (base, 0);
1787 while (handled_component_p (*ref))
1788 ref = &TREE_OPERAND (*ref, 0);
1789 if (TREE_CODE (*ref) == MEM_REF)
1790 {
1791 tree tem = TREE_OPERAND (*ref, 0);
1792 STRIP_NOPS (tem);
1793 if (tem != TREE_OPERAND (*ref, 0))
1794 tem = fold_build2 (MEM_REF, TREE_TYPE (*ref),
1795 tem, TREE_OPERAND (*ref, 1));
1796 else
1797 tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1798 tem, TREE_OPERAND (*ref, 1));
1799 if (tem)
1800 *ref = tem;
1801 }
1802 }
1803 }
1804
1805 civ = alloc_iv (base, step);
1806 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1807 return;
1808
1809 fail:
1810 for_each_index (op_p, idx_record_use, data);
1811 }
1812
1813 /* Finds and records invariants used in STMT. */
1814
1815 static void
1816 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1817 {
1818 ssa_op_iter iter;
1819 use_operand_p use_p;
1820 tree op;
1821
1822 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1823 {
1824 op = USE_FROM_PTR (use_p);
1825 record_invariant (data, op, false);
1826 }
1827 }
1828
1829 /* Finds interesting uses of induction variables in the statement STMT. */
1830
1831 static void
1832 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1833 {
1834 struct iv *iv;
1835 tree op, *lhs, *rhs;
1836 ssa_op_iter iter;
1837 use_operand_p use_p;
1838 enum tree_code code;
1839
1840 find_invariants_stmt (data, stmt);
1841
1842 if (gimple_code (stmt) == GIMPLE_COND)
1843 {
1844 find_interesting_uses_cond (data, stmt);
1845 return;
1846 }
1847
1848 if (is_gimple_assign (stmt))
1849 {
1850 lhs = gimple_assign_lhs_ptr (stmt);
1851 rhs = gimple_assign_rhs1_ptr (stmt);
1852
1853 if (TREE_CODE (*lhs) == SSA_NAME)
1854 {
1855 /* If the statement defines an induction variable, the uses are not
1856 interesting by themselves. */
1857
1858 iv = get_iv (data, *lhs);
1859
1860 if (iv && !integer_zerop (iv->step))
1861 return;
1862 }
1863
1864 code = gimple_assign_rhs_code (stmt);
1865 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1866 && (REFERENCE_CLASS_P (*rhs)
1867 || is_gimple_val (*rhs)))
1868 {
1869 if (REFERENCE_CLASS_P (*rhs))
1870 find_interesting_uses_address (data, stmt, rhs);
1871 else
1872 find_interesting_uses_op (data, *rhs);
1873
1874 if (REFERENCE_CLASS_P (*lhs))
1875 find_interesting_uses_address (data, stmt, lhs);
1876 return;
1877 }
1878 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1879 {
1880 find_interesting_uses_cond (data, stmt);
1881 return;
1882 }
1883
1884 /* TODO -- we should also handle address uses of type
1885
1886 memory = call (whatever);
1887
1888 and
1889
1890 call (memory). */
1891 }
1892
1893 if (gimple_code (stmt) == GIMPLE_PHI
1894 && gimple_bb (stmt) == data->current_loop->header)
1895 {
1896 iv = get_iv (data, PHI_RESULT (stmt));
1897
1898 if (iv && !integer_zerop (iv->step))
1899 return;
1900 }
1901
1902 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1903 {
1904 op = USE_FROM_PTR (use_p);
1905
1906 if (TREE_CODE (op) != SSA_NAME)
1907 continue;
1908
1909 iv = get_iv (data, op);
1910 if (!iv)
1911 continue;
1912
1913 find_interesting_uses_op (data, op);
1914 }
1915 }
1916
1917 /* Finds interesting uses of induction variables outside of loops
1918 on loop exit edge EXIT. */
1919
1920 static void
1921 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1922 {
1923 gimple phi;
1924 gimple_stmt_iterator psi;
1925 tree def;
1926
1927 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1928 {
1929 phi = gsi_stmt (psi);
1930 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1931 if (is_gimple_reg (def))
1932 find_interesting_uses_op (data, def);
1933 }
1934 }
1935
1936 /* Finds uses of the induction variables that are interesting. */
1937
1938 static void
1939 find_interesting_uses (struct ivopts_data *data)
1940 {
1941 basic_block bb;
1942 gimple_stmt_iterator bsi;
1943 basic_block *body = get_loop_body (data->current_loop);
1944 unsigned i;
1945 struct version_info *info;
1946 edge e;
1947
1948 if (dump_file && (dump_flags & TDF_DETAILS))
1949 fprintf (dump_file, "Uses:\n\n");
1950
1951 for (i = 0; i < data->current_loop->num_nodes; i++)
1952 {
1953 edge_iterator ei;
1954 bb = body[i];
1955
1956 FOR_EACH_EDGE (e, ei, bb->succs)
1957 if (e->dest != EXIT_BLOCK_PTR
1958 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1959 find_interesting_uses_outside (data, e);
1960
1961 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1962 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1963 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1964 if (!is_gimple_debug (gsi_stmt (bsi)))
1965 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1966 }
1967
1968 if (dump_file && (dump_flags & TDF_DETAILS))
1969 {
1970 bitmap_iterator bi;
1971
1972 fprintf (dump_file, "\n");
1973
1974 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1975 {
1976 info = ver_info (data, i);
1977 if (info->inv_id)
1978 {
1979 fprintf (dump_file, " ");
1980 print_generic_expr (dump_file, info->name, TDF_SLIM);
1981 fprintf (dump_file, " is invariant (%d)%s\n",
1982 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1983 }
1984 }
1985
1986 fprintf (dump_file, "\n");
1987 }
1988
1989 free (body);
1990 }
1991
1992 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1993 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1994 we are at the top-level of the processed address. */
1995
1996 static tree
1997 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1998 unsigned HOST_WIDE_INT *offset)
1999 {
2000 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2001 enum tree_code code;
2002 tree type, orig_type = TREE_TYPE (expr);
2003 unsigned HOST_WIDE_INT off0, off1, st;
2004 tree orig_expr = expr;
2005
2006 STRIP_NOPS (expr);
2007
2008 type = TREE_TYPE (expr);
2009 code = TREE_CODE (expr);
2010 *offset = 0;
2011
2012 switch (code)
2013 {
2014 case INTEGER_CST:
2015 if (!cst_and_fits_in_hwi (expr)
2016 || integer_zerop (expr))
2017 return orig_expr;
2018
2019 *offset = int_cst_value (expr);
2020 return build_int_cst (orig_type, 0);
2021
2022 case POINTER_PLUS_EXPR:
2023 case PLUS_EXPR:
2024 case MINUS_EXPR:
2025 op0 = TREE_OPERAND (expr, 0);
2026 op1 = TREE_OPERAND (expr, 1);
2027
2028 op0 = strip_offset_1 (op0, false, false, &off0);
2029 op1 = strip_offset_1 (op1, false, false, &off1);
2030
2031 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2032 if (op0 == TREE_OPERAND (expr, 0)
2033 && op1 == TREE_OPERAND (expr, 1))
2034 return orig_expr;
2035
2036 if (integer_zerop (op1))
2037 expr = op0;
2038 else if (integer_zerop (op0))
2039 {
2040 if (code == MINUS_EXPR)
2041 expr = fold_build1 (NEGATE_EXPR, type, op1);
2042 else
2043 expr = op1;
2044 }
2045 else
2046 expr = fold_build2 (code, type, op0, op1);
2047
2048 return fold_convert (orig_type, expr);
2049
2050 case MULT_EXPR:
2051 op1 = TREE_OPERAND (expr, 1);
2052 if (!cst_and_fits_in_hwi (op1))
2053 return orig_expr;
2054
2055 op0 = TREE_OPERAND (expr, 0);
2056 op0 = strip_offset_1 (op0, false, false, &off0);
2057 if (op0 == TREE_OPERAND (expr, 0))
2058 return orig_expr;
2059
2060 *offset = off0 * int_cst_value (op1);
2061 if (integer_zerop (op0))
2062 expr = op0;
2063 else
2064 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2065
2066 return fold_convert (orig_type, expr);
2067
2068 case ARRAY_REF:
2069 case ARRAY_RANGE_REF:
2070 if (!inside_addr)
2071 return orig_expr;
2072
2073 step = array_ref_element_size (expr);
2074 if (!cst_and_fits_in_hwi (step))
2075 break;
2076
2077 st = int_cst_value (step);
2078 op1 = TREE_OPERAND (expr, 1);
2079 op1 = strip_offset_1 (op1, false, false, &off1);
2080 *offset = off1 * st;
2081
2082 if (top_compref
2083 && integer_zerop (op1))
2084 {
2085 /* Strip the component reference completely. */
2086 op0 = TREE_OPERAND (expr, 0);
2087 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2088 *offset += off0;
2089 return op0;
2090 }
2091 break;
2092
2093 case COMPONENT_REF:
2094 if (!inside_addr)
2095 return orig_expr;
2096
2097 tmp = component_ref_field_offset (expr);
2098 if (top_compref
2099 && cst_and_fits_in_hwi (tmp))
2100 {
2101 /* Strip the component reference completely. */
2102 op0 = TREE_OPERAND (expr, 0);
2103 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2104 *offset = off0 + int_cst_value (tmp);
2105 return op0;
2106 }
2107 break;
2108
2109 case ADDR_EXPR:
2110 op0 = TREE_OPERAND (expr, 0);
2111 op0 = strip_offset_1 (op0, true, true, &off0);
2112 *offset += off0;
2113
2114 if (op0 == TREE_OPERAND (expr, 0))
2115 return orig_expr;
2116
2117 expr = build_fold_addr_expr (op0);
2118 return fold_convert (orig_type, expr);
2119
2120 case MEM_REF:
2121 /* ??? Offset operand? */
2122 inside_addr = false;
2123 break;
2124
2125 default:
2126 return orig_expr;
2127 }
2128
2129 /* Default handling of expressions for that we want to recurse into
2130 the first operand. */
2131 op0 = TREE_OPERAND (expr, 0);
2132 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2133 *offset += off0;
2134
2135 if (op0 == TREE_OPERAND (expr, 0)
2136 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2137 return orig_expr;
2138
2139 expr = copy_node (expr);
2140 TREE_OPERAND (expr, 0) = op0;
2141 if (op1)
2142 TREE_OPERAND (expr, 1) = op1;
2143
2144 /* Inside address, we might strip the top level component references,
2145 thus changing type of the expression. Handling of ADDR_EXPR
2146 will fix that. */
2147 expr = fold_convert (orig_type, expr);
2148
2149 return expr;
2150 }
2151
2152 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2153
2154 static tree
2155 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2156 {
2157 return strip_offset_1 (expr, false, false, offset);
2158 }
2159
2160 /* Returns variant of TYPE that can be used as base for different uses.
2161 We return unsigned type with the same precision, which avoids problems
2162 with overflows. */
2163
2164 static tree
2165 generic_type_for (tree type)
2166 {
2167 if (POINTER_TYPE_P (type))
2168 return unsigned_type_for (type);
2169
2170 if (TYPE_UNSIGNED (type))
2171 return type;
2172
2173 return unsigned_type_for (type);
2174 }
2175
2176 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2177 the bitmap to that we should store it. */
2178
2179 static struct ivopts_data *fd_ivopts_data;
2180 static tree
2181 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2182 {
2183 bitmap *depends_on = (bitmap *) data;
2184 struct version_info *info;
2185
2186 if (TREE_CODE (*expr_p) != SSA_NAME)
2187 return NULL_TREE;
2188 info = name_info (fd_ivopts_data, *expr_p);
2189
2190 if (!info->inv_id || info->has_nonlin_use)
2191 return NULL_TREE;
2192
2193 if (!*depends_on)
2194 *depends_on = BITMAP_ALLOC (NULL);
2195 bitmap_set_bit (*depends_on, info->inv_id);
2196
2197 return NULL_TREE;
2198 }
2199
2200 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2201 position to POS. If USE is not NULL, the candidate is set as related to
2202 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2203 replacement of the final value of the iv by a direct computation. */
2204
2205 static struct iv_cand *
2206 add_candidate_1 (struct ivopts_data *data,
2207 tree base, tree step, bool important, enum iv_position pos,
2208 struct iv_use *use, gimple incremented_at)
2209 {
2210 unsigned i;
2211 struct iv_cand *cand = NULL;
2212 tree type, orig_type;
2213
2214 if (base)
2215 {
2216 orig_type = TREE_TYPE (base);
2217 type = generic_type_for (orig_type);
2218 if (type != orig_type)
2219 {
2220 base = fold_convert (type, base);
2221 step = fold_convert (type, step);
2222 }
2223 }
2224
2225 for (i = 0; i < n_iv_cands (data); i++)
2226 {
2227 cand = iv_cand (data, i);
2228
2229 if (cand->pos != pos)
2230 continue;
2231
2232 if (cand->incremented_at != incremented_at
2233 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2234 && cand->ainc_use != use))
2235 continue;
2236
2237 if (!cand->iv)
2238 {
2239 if (!base && !step)
2240 break;
2241
2242 continue;
2243 }
2244
2245 if (!base && !step)
2246 continue;
2247
2248 if (operand_equal_p (base, cand->iv->base, 0)
2249 && operand_equal_p (step, cand->iv->step, 0)
2250 && (TYPE_PRECISION (TREE_TYPE (base))
2251 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2252 break;
2253 }
2254
2255 if (i == n_iv_cands (data))
2256 {
2257 cand = XCNEW (struct iv_cand);
2258 cand->id = i;
2259
2260 if (!base && !step)
2261 cand->iv = NULL;
2262 else
2263 cand->iv = alloc_iv (base, step);
2264
2265 cand->pos = pos;
2266 if (pos != IP_ORIGINAL && cand->iv)
2267 {
2268 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2269 cand->var_after = cand->var_before;
2270 }
2271 cand->important = important;
2272 cand->incremented_at = incremented_at;
2273 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2274
2275 if (step
2276 && TREE_CODE (step) != INTEGER_CST)
2277 {
2278 fd_ivopts_data = data;
2279 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2280 }
2281
2282 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2283 cand->ainc_use = use;
2284 else
2285 cand->ainc_use = NULL;
2286
2287 if (dump_file && (dump_flags & TDF_DETAILS))
2288 dump_cand (dump_file, cand);
2289 }
2290
2291 if (important && !cand->important)
2292 {
2293 cand->important = true;
2294 if (dump_file && (dump_flags & TDF_DETAILS))
2295 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2296 }
2297
2298 if (use)
2299 {
2300 bitmap_set_bit (use->related_cands, i);
2301 if (dump_file && (dump_flags & TDF_DETAILS))
2302 fprintf (dump_file, "Candidate %d is related to use %d\n",
2303 cand->id, use->id);
2304 }
2305
2306 return cand;
2307 }
2308
2309 /* Returns true if incrementing the induction variable at the end of the LOOP
2310 is allowed.
2311
2312 The purpose is to avoid splitting latch edge with a biv increment, thus
2313 creating a jump, possibly confusing other optimization passes and leaving
2314 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2315 is not available (so we do not have a better alternative), or if the latch
2316 edge is already nonempty. */
2317
2318 static bool
2319 allow_ip_end_pos_p (struct loop *loop)
2320 {
2321 if (!ip_normal_pos (loop))
2322 return true;
2323
2324 if (!empty_block_p (ip_end_pos (loop)))
2325 return true;
2326
2327 return false;
2328 }
2329
2330 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2331 Important field is set to IMPORTANT. */
2332
2333 static void
2334 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2335 bool important, struct iv_use *use)
2336 {
2337 basic_block use_bb = gimple_bb (use->stmt);
2338 enum machine_mode mem_mode;
2339 unsigned HOST_WIDE_INT cstepi;
2340
2341 /* If we insert the increment in any position other than the standard
2342 ones, we must ensure that it is incremented once per iteration.
2343 It must not be in an inner nested loop, or one side of an if
2344 statement. */
2345 if (use_bb->loop_father != data->current_loop
2346 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2347 || stmt_could_throw_p (use->stmt)
2348 || !cst_and_fits_in_hwi (step))
2349 return;
2350
2351 cstepi = int_cst_value (step);
2352
2353 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2354 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2355 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2356 {
2357 enum tree_code code = MINUS_EXPR;
2358 tree new_base;
2359 tree new_step = step;
2360
2361 if (POINTER_TYPE_P (TREE_TYPE (base)))
2362 {
2363 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2364 code = POINTER_PLUS_EXPR;
2365 }
2366 else
2367 new_step = fold_convert (TREE_TYPE (base), new_step);
2368 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2369 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2370 use->stmt);
2371 }
2372 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2373 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2374 {
2375 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2376 use->stmt);
2377 }
2378 }
2379
2380 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2381 position to POS. If USE is not NULL, the candidate is set as related to
2382 it. The candidate computation is scheduled on all available positions. */
2383
2384 static void
2385 add_candidate (struct ivopts_data *data,
2386 tree base, tree step, bool important, struct iv_use *use)
2387 {
2388 if (ip_normal_pos (data->current_loop))
2389 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2390 if (ip_end_pos (data->current_loop)
2391 && allow_ip_end_pos_p (data->current_loop))
2392 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2393
2394 if (use != NULL && use->type == USE_ADDRESS)
2395 add_autoinc_candidates (data, base, step, important, use);
2396 }
2397
2398 /* Add a standard "0 + 1 * iteration" iv candidate for a
2399 type with SIZE bits. */
2400
2401 static void
2402 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2403 unsigned int size)
2404 {
2405 tree type = lang_hooks.types.type_for_size (size, true);
2406 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2407 true, NULL);
2408 }
2409
2410 /* Adds standard iv candidates. */
2411
2412 static void
2413 add_standard_iv_candidates (struct ivopts_data *data)
2414 {
2415 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2416
2417 /* The same for a double-integer type if it is still fast enough. */
2418 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2419 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2420 }
2421
2422
2423 /* Adds candidates bases on the old induction variable IV. */
2424
2425 static void
2426 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2427 {
2428 gimple phi;
2429 tree def;
2430 struct iv_cand *cand;
2431
2432 add_candidate (data, iv->base, iv->step, true, NULL);
2433
2434 /* The same, but with initial value zero. */
2435 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2436 add_candidate (data, size_int (0), iv->step, true, NULL);
2437 else
2438 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2439 iv->step, true, NULL);
2440
2441 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2442 if (gimple_code (phi) == GIMPLE_PHI)
2443 {
2444 /* Additionally record the possibility of leaving the original iv
2445 untouched. */
2446 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2447 cand = add_candidate_1 (data,
2448 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2449 SSA_NAME_DEF_STMT (def));
2450 cand->var_before = iv->ssa_name;
2451 cand->var_after = def;
2452 }
2453 }
2454
2455 /* Adds candidates based on the old induction variables. */
2456
2457 static void
2458 add_old_ivs_candidates (struct ivopts_data *data)
2459 {
2460 unsigned i;
2461 struct iv *iv;
2462 bitmap_iterator bi;
2463
2464 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2465 {
2466 iv = ver_info (data, i)->iv;
2467 if (iv && iv->biv_p && !integer_zerop (iv->step))
2468 add_old_iv_candidates (data, iv);
2469 }
2470 }
2471
2472 /* Adds candidates based on the value of the induction variable IV and USE. */
2473
2474 static void
2475 add_iv_value_candidates (struct ivopts_data *data,
2476 struct iv *iv, struct iv_use *use)
2477 {
2478 unsigned HOST_WIDE_INT offset;
2479 tree base;
2480 tree basetype;
2481
2482 add_candidate (data, iv->base, iv->step, false, use);
2483
2484 /* The same, but with initial value zero. Make such variable important,
2485 since it is generic enough so that possibly many uses may be based
2486 on it. */
2487 basetype = TREE_TYPE (iv->base);
2488 if (POINTER_TYPE_P (basetype))
2489 basetype = sizetype;
2490 add_candidate (data, build_int_cst (basetype, 0),
2491 iv->step, true, use);
2492
2493 /* Third, try removing the constant offset. Make sure to even
2494 add a candidate for &a[0] vs. (T *)&a. */
2495 base = strip_offset (iv->base, &offset);
2496 if (offset
2497 || base != iv->base)
2498 add_candidate (data, base, iv->step, false, use);
2499 }
2500
2501 /* Adds candidates based on the uses. */
2502
2503 static void
2504 add_derived_ivs_candidates (struct ivopts_data *data)
2505 {
2506 unsigned i;
2507
2508 for (i = 0; i < n_iv_uses (data); i++)
2509 {
2510 struct iv_use *use = iv_use (data, i);
2511
2512 if (!use)
2513 continue;
2514
2515 switch (use->type)
2516 {
2517 case USE_NONLINEAR_EXPR:
2518 case USE_COMPARE:
2519 case USE_ADDRESS:
2520 /* Just add the ivs based on the value of the iv used here. */
2521 add_iv_value_candidates (data, use->iv, use);
2522 break;
2523
2524 default:
2525 gcc_unreachable ();
2526 }
2527 }
2528 }
2529
2530 /* Record important candidates and add them to related_cands bitmaps
2531 if needed. */
2532
2533 static void
2534 record_important_candidates (struct ivopts_data *data)
2535 {
2536 unsigned i;
2537 struct iv_use *use;
2538
2539 for (i = 0; i < n_iv_cands (data); i++)
2540 {
2541 struct iv_cand *cand = iv_cand (data, i);
2542
2543 if (cand->important)
2544 bitmap_set_bit (data->important_candidates, i);
2545 }
2546
2547 data->consider_all_candidates = (n_iv_cands (data)
2548 <= CONSIDER_ALL_CANDIDATES_BOUND);
2549
2550 if (data->consider_all_candidates)
2551 {
2552 /* We will not need "related_cands" bitmaps in this case,
2553 so release them to decrease peak memory consumption. */
2554 for (i = 0; i < n_iv_uses (data); i++)
2555 {
2556 use = iv_use (data, i);
2557 BITMAP_FREE (use->related_cands);
2558 }
2559 }
2560 else
2561 {
2562 /* Add important candidates to the related_cands bitmaps. */
2563 for (i = 0; i < n_iv_uses (data); i++)
2564 bitmap_ior_into (iv_use (data, i)->related_cands,
2565 data->important_candidates);
2566 }
2567 }
2568
2569 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2570 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2571 we allocate a simple list to every use. */
2572
2573 static void
2574 alloc_use_cost_map (struct ivopts_data *data)
2575 {
2576 unsigned i, size, s, j;
2577
2578 for (i = 0; i < n_iv_uses (data); i++)
2579 {
2580 struct iv_use *use = iv_use (data, i);
2581 bitmap_iterator bi;
2582
2583 if (data->consider_all_candidates)
2584 size = n_iv_cands (data);
2585 else
2586 {
2587 s = 0;
2588 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2589 {
2590 s++;
2591 }
2592
2593 /* Round up to the power of two, so that moduling by it is fast. */
2594 for (size = 1; size < s; size <<= 1)
2595 continue;
2596 }
2597
2598 use->n_map_members = size;
2599 use->cost_map = XCNEWVEC (struct cost_pair, size);
2600 }
2601 }
2602
2603 /* Returns description of computation cost of expression whose runtime
2604 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2605
2606 static comp_cost
2607 new_cost (unsigned runtime, unsigned complexity)
2608 {
2609 comp_cost cost;
2610
2611 cost.cost = runtime;
2612 cost.complexity = complexity;
2613
2614 return cost;
2615 }
2616
2617 /* Adds costs COST1 and COST2. */
2618
2619 static comp_cost
2620 add_costs (comp_cost cost1, comp_cost cost2)
2621 {
2622 cost1.cost += cost2.cost;
2623 cost1.complexity += cost2.complexity;
2624
2625 return cost1;
2626 }
2627 /* Subtracts costs COST1 and COST2. */
2628
2629 static comp_cost
2630 sub_costs (comp_cost cost1, comp_cost cost2)
2631 {
2632 cost1.cost -= cost2.cost;
2633 cost1.complexity -= cost2.complexity;
2634
2635 return cost1;
2636 }
2637
2638 /* Returns a negative number if COST1 < COST2, a positive number if
2639 COST1 > COST2, and 0 if COST1 = COST2. */
2640
2641 static int
2642 compare_costs (comp_cost cost1, comp_cost cost2)
2643 {
2644 if (cost1.cost == cost2.cost)
2645 return cost1.complexity - cost2.complexity;
2646
2647 return cost1.cost - cost2.cost;
2648 }
2649
2650 /* Returns true if COST is infinite. */
2651
2652 static bool
2653 infinite_cost_p (comp_cost cost)
2654 {
2655 return cost.cost == INFTY;
2656 }
2657
2658 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2659 on invariants DEPENDS_ON and that the value used in expressing it
2660 is VALUE. */
2661
2662 static void
2663 set_use_iv_cost (struct ivopts_data *data,
2664 struct iv_use *use, struct iv_cand *cand,
2665 comp_cost cost, bitmap depends_on, tree value,
2666 int inv_expr_id)
2667 {
2668 unsigned i, s;
2669
2670 if (infinite_cost_p (cost))
2671 {
2672 BITMAP_FREE (depends_on);
2673 return;
2674 }
2675
2676 if (data->consider_all_candidates)
2677 {
2678 use->cost_map[cand->id].cand = cand;
2679 use->cost_map[cand->id].cost = cost;
2680 use->cost_map[cand->id].depends_on = depends_on;
2681 use->cost_map[cand->id].value = value;
2682 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2683 return;
2684 }
2685
2686 /* n_map_members is a power of two, so this computes modulo. */
2687 s = cand->id & (use->n_map_members - 1);
2688 for (i = s; i < use->n_map_members; i++)
2689 if (!use->cost_map[i].cand)
2690 goto found;
2691 for (i = 0; i < s; i++)
2692 if (!use->cost_map[i].cand)
2693 goto found;
2694
2695 gcc_unreachable ();
2696
2697 found:
2698 use->cost_map[i].cand = cand;
2699 use->cost_map[i].cost = cost;
2700 use->cost_map[i].depends_on = depends_on;
2701 use->cost_map[i].value = value;
2702 use->cost_map[i].inv_expr_id = inv_expr_id;
2703 }
2704
2705 /* Gets cost of (USE, CANDIDATE) pair. */
2706
2707 static struct cost_pair *
2708 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2709 struct iv_cand *cand)
2710 {
2711 unsigned i, s;
2712 struct cost_pair *ret;
2713
2714 if (!cand)
2715 return NULL;
2716
2717 if (data->consider_all_candidates)
2718 {
2719 ret = use->cost_map + cand->id;
2720 if (!ret->cand)
2721 return NULL;
2722
2723 return ret;
2724 }
2725
2726 /* n_map_members is a power of two, so this computes modulo. */
2727 s = cand->id & (use->n_map_members - 1);
2728 for (i = s; i < use->n_map_members; i++)
2729 if (use->cost_map[i].cand == cand)
2730 return use->cost_map + i;
2731
2732 for (i = 0; i < s; i++)
2733 if (use->cost_map[i].cand == cand)
2734 return use->cost_map + i;
2735
2736 return NULL;
2737 }
2738
2739 /* Returns estimate on cost of computing SEQ. */
2740
2741 static unsigned
2742 seq_cost (rtx seq, bool speed)
2743 {
2744 unsigned cost = 0;
2745 rtx set;
2746
2747 for (; seq; seq = NEXT_INSN (seq))
2748 {
2749 set = single_set (seq);
2750 if (set)
2751 cost += rtx_cost (set, SET,speed);
2752 else
2753 cost++;
2754 }
2755
2756 return cost;
2757 }
2758
2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2760 static rtx
2761 produce_memory_decl_rtl (tree obj, int *regno)
2762 {
2763 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2764 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2765 rtx x;
2766
2767 gcc_assert (obj);
2768 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2769 {
2770 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2771 x = gen_rtx_SYMBOL_REF (address_mode, name);
2772 SET_SYMBOL_REF_DECL (x, obj);
2773 x = gen_rtx_MEM (DECL_MODE (obj), x);
2774 set_mem_addr_space (x, as);
2775 targetm.encode_section_info (obj, x, true);
2776 }
2777 else
2778 {
2779 x = gen_raw_REG (address_mode, (*regno)++);
2780 x = gen_rtx_MEM (DECL_MODE (obj), x);
2781 set_mem_addr_space (x, as);
2782 }
2783
2784 return x;
2785 }
2786
2787 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2788 walk_tree. DATA contains the actual fake register number. */
2789
2790 static tree
2791 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2792 {
2793 tree obj = NULL_TREE;
2794 rtx x = NULL_RTX;
2795 int *regno = (int *) data;
2796
2797 switch (TREE_CODE (*expr_p))
2798 {
2799 case ADDR_EXPR:
2800 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2801 handled_component_p (*expr_p);
2802 expr_p = &TREE_OPERAND (*expr_p, 0))
2803 continue;
2804 obj = *expr_p;
2805 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2806 x = produce_memory_decl_rtl (obj, regno);
2807 break;
2808
2809 case SSA_NAME:
2810 *ws = 0;
2811 obj = SSA_NAME_VAR (*expr_p);
2812 if (!DECL_RTL_SET_P (obj))
2813 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2814 break;
2815
2816 case VAR_DECL:
2817 case PARM_DECL:
2818 case RESULT_DECL:
2819 *ws = 0;
2820 obj = *expr_p;
2821
2822 if (DECL_RTL_SET_P (obj))
2823 break;
2824
2825 if (DECL_MODE (obj) == BLKmode)
2826 x = produce_memory_decl_rtl (obj, regno);
2827 else
2828 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2829
2830 break;
2831
2832 default:
2833 break;
2834 }
2835
2836 if (x)
2837 {
2838 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2839 SET_DECL_RTL (obj, x);
2840 }
2841
2842 return NULL_TREE;
2843 }
2844
2845 /* Determines cost of the computation of EXPR. */
2846
2847 static unsigned
2848 computation_cost (tree expr, bool speed)
2849 {
2850 rtx seq, rslt;
2851 tree type = TREE_TYPE (expr);
2852 unsigned cost;
2853 /* Avoid using hard regs in ways which may be unsupported. */
2854 int regno = LAST_VIRTUAL_REGISTER + 1;
2855 struct cgraph_node *node = cgraph_node (current_function_decl);
2856 enum node_frequency real_frequency = node->frequency;
2857
2858 node->frequency = NODE_FREQUENCY_NORMAL;
2859 crtl->maybe_hot_insn_p = speed;
2860 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2861 start_sequence ();
2862 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2863 seq = get_insns ();
2864 end_sequence ();
2865 default_rtl_profile ();
2866 node->frequency = real_frequency;
2867
2868 cost = seq_cost (seq, speed);
2869 if (MEM_P (rslt))
2870 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2871 TYPE_ADDR_SPACE (type), speed);
2872
2873 return cost;
2874 }
2875
2876 /* Returns variable containing the value of candidate CAND at statement AT. */
2877
2878 static tree
2879 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2880 {
2881 if (stmt_after_increment (loop, cand, stmt))
2882 return cand->var_after;
2883 else
2884 return cand->var_before;
2885 }
2886
2887 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2888 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2889
2890 int
2891 tree_int_cst_sign_bit (const_tree t)
2892 {
2893 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2894 unsigned HOST_WIDE_INT w;
2895
2896 if (bitno < HOST_BITS_PER_WIDE_INT)
2897 w = TREE_INT_CST_LOW (t);
2898 else
2899 {
2900 w = TREE_INT_CST_HIGH (t);
2901 bitno -= HOST_BITS_PER_WIDE_INT;
2902 }
2903
2904 return (w >> bitno) & 1;
2905 }
2906
2907 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2908 same precision that is at least as wide as the precision of TYPE, stores
2909 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2910 type of A and B. */
2911
2912 static tree
2913 determine_common_wider_type (tree *a, tree *b)
2914 {
2915 tree wider_type = NULL;
2916 tree suba, subb;
2917 tree atype = TREE_TYPE (*a);
2918
2919 if (CONVERT_EXPR_P (*a))
2920 {
2921 suba = TREE_OPERAND (*a, 0);
2922 wider_type = TREE_TYPE (suba);
2923 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2924 return atype;
2925 }
2926 else
2927 return atype;
2928
2929 if (CONVERT_EXPR_P (*b))
2930 {
2931 subb = TREE_OPERAND (*b, 0);
2932 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2933 return atype;
2934 }
2935 else
2936 return atype;
2937
2938 *a = suba;
2939 *b = subb;
2940 return wider_type;
2941 }
2942
2943 /* Determines the expression by that USE is expressed from induction variable
2944 CAND at statement AT in LOOP. The expression is stored in a decomposed
2945 form into AFF. Returns false if USE cannot be expressed using CAND. */
2946
2947 static bool
2948 get_computation_aff (struct loop *loop,
2949 struct iv_use *use, struct iv_cand *cand, gimple at,
2950 struct affine_tree_combination *aff)
2951 {
2952 tree ubase = use->iv->base;
2953 tree ustep = use->iv->step;
2954 tree cbase = cand->iv->base;
2955 tree cstep = cand->iv->step, cstep_common;
2956 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2957 tree common_type, var;
2958 tree uutype;
2959 aff_tree cbase_aff, var_aff;
2960 double_int rat;
2961
2962 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2963 {
2964 /* We do not have a precision to express the values of use. */
2965 return false;
2966 }
2967
2968 var = var_at_stmt (loop, cand, at);
2969 uutype = unsigned_type_for (utype);
2970
2971 /* If the conversion is not noop, perform it. */
2972 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2973 {
2974 cstep = fold_convert (uutype, cstep);
2975 cbase = fold_convert (uutype, cbase);
2976 var = fold_convert (uutype, var);
2977 }
2978
2979 if (!constant_multiple_of (ustep, cstep, &rat))
2980 return false;
2981
2982 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2983 type, we achieve better folding by computing their difference in this
2984 wider type, and cast the result to UUTYPE. We do not need to worry about
2985 overflows, as all the arithmetics will in the end be performed in UUTYPE
2986 anyway. */
2987 common_type = determine_common_wider_type (&ubase, &cbase);
2988
2989 /* use = ubase - ratio * cbase + ratio * var. */
2990 tree_to_aff_combination (ubase, common_type, aff);
2991 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2992 tree_to_aff_combination (var, uutype, &var_aff);
2993
2994 /* We need to shift the value if we are after the increment. */
2995 if (stmt_after_increment (loop, cand, at))
2996 {
2997 aff_tree cstep_aff;
2998
2999 if (common_type != uutype)
3000 cstep_common = fold_convert (common_type, cstep);
3001 else
3002 cstep_common = cstep;
3003
3004 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3005 aff_combination_add (&cbase_aff, &cstep_aff);
3006 }
3007
3008 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3009 aff_combination_add (aff, &cbase_aff);
3010 if (common_type != uutype)
3011 aff_combination_convert (aff, uutype);
3012
3013 aff_combination_scale (&var_aff, rat);
3014 aff_combination_add (aff, &var_aff);
3015
3016 return true;
3017 }
3018
3019 /* Determines the expression by that USE is expressed from induction variable
3020 CAND at statement AT in LOOP. The computation is unshared. */
3021
3022 static tree
3023 get_computation_at (struct loop *loop,
3024 struct iv_use *use, struct iv_cand *cand, gimple at)
3025 {
3026 aff_tree aff;
3027 tree type = TREE_TYPE (use->iv->base);
3028
3029 if (!get_computation_aff (loop, use, cand, at, &aff))
3030 return NULL_TREE;
3031 unshare_aff_combination (&aff);
3032 return fold_convert (type, aff_combination_to_tree (&aff));
3033 }
3034
3035 /* Determines the expression by that USE is expressed from induction variable
3036 CAND in LOOP. The computation is unshared. */
3037
3038 static tree
3039 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3040 {
3041 return get_computation_at (loop, use, cand, use->stmt);
3042 }
3043
3044 /* Adjust the cost COST for being in loop setup rather than loop body.
3045 If we're optimizing for space, the loop setup overhead is constant;
3046 if we're optimizing for speed, amortize it over the per-iteration cost. */
3047 static unsigned
3048 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3049 {
3050 if (cost == INFTY)
3051 return cost;
3052 else if (optimize_loop_for_speed_p (data->current_loop))
3053 return cost / avg_loop_niter (data->current_loop);
3054 else
3055 return cost;
3056 }
3057
3058 /* Returns cost of addition in MODE. */
3059
3060 static unsigned
3061 add_cost (enum machine_mode mode, bool speed)
3062 {
3063 static unsigned costs[NUM_MACHINE_MODES];
3064 rtx seq;
3065 unsigned cost;
3066
3067 if (costs[mode])
3068 return costs[mode];
3069
3070 start_sequence ();
3071 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3072 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3073 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3074 NULL_RTX);
3075 seq = get_insns ();
3076 end_sequence ();
3077
3078 cost = seq_cost (seq, speed);
3079 if (!cost)
3080 cost = 1;
3081
3082 costs[mode] = cost;
3083
3084 if (dump_file && (dump_flags & TDF_DETAILS))
3085 fprintf (dump_file, "Addition in %s costs %d\n",
3086 GET_MODE_NAME (mode), cost);
3087 return cost;
3088 }
3089
3090 /* Entry in a hashtable of already known costs for multiplication. */
3091 struct mbc_entry
3092 {
3093 HOST_WIDE_INT cst; /* The constant to multiply by. */
3094 enum machine_mode mode; /* In mode. */
3095 unsigned cost; /* The cost. */
3096 };
3097
3098 /* Counts hash value for the ENTRY. */
3099
3100 static hashval_t
3101 mbc_entry_hash (const void *entry)
3102 {
3103 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3104
3105 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3106 }
3107
3108 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3109
3110 static int
3111 mbc_entry_eq (const void *entry1, const void *entry2)
3112 {
3113 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3114 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3115
3116 return (e1->mode == e2->mode
3117 && e1->cst == e2->cst);
3118 }
3119
3120 /* Returns cost of multiplication by constant CST in MODE. */
3121
3122 unsigned
3123 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3124 {
3125 static htab_t costs;
3126 struct mbc_entry **cached, act;
3127 rtx seq;
3128 unsigned cost;
3129
3130 if (!costs)
3131 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3132
3133 act.mode = mode;
3134 act.cst = cst;
3135 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3136 if (*cached)
3137 return (*cached)->cost;
3138
3139 *cached = XNEW (struct mbc_entry);
3140 (*cached)->mode = mode;
3141 (*cached)->cst = cst;
3142
3143 start_sequence ();
3144 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3145 gen_int_mode (cst, mode), NULL_RTX, 0);
3146 seq = get_insns ();
3147 end_sequence ();
3148
3149 cost = seq_cost (seq, speed);
3150
3151 if (dump_file && (dump_flags & TDF_DETAILS))
3152 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3153 (int) cst, GET_MODE_NAME (mode), cost);
3154
3155 (*cached)->cost = cost;
3156
3157 return cost;
3158 }
3159
3160 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3161 validity for a memory reference accessing memory of mode MODE in
3162 address space AS. */
3163
3164 DEF_VEC_P (sbitmap);
3165 DEF_VEC_ALLOC_P (sbitmap, heap);
3166
3167 bool
3168 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3169 addr_space_t as)
3170 {
3171 #define MAX_RATIO 128
3172 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3173 static VEC (sbitmap, heap) *valid_mult_list;
3174 sbitmap valid_mult;
3175
3176 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3177 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3178
3179 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3180 if (!valid_mult)
3181 {
3182 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3183 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3184 rtx addr;
3185 HOST_WIDE_INT i;
3186
3187 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3188 sbitmap_zero (valid_mult);
3189 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3190 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3191 {
3192 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3193 if (memory_address_addr_space_p (mode, addr, as))
3194 SET_BIT (valid_mult, i + MAX_RATIO);
3195 }
3196
3197 if (dump_file && (dump_flags & TDF_DETAILS))
3198 {
3199 fprintf (dump_file, " allowed multipliers:");
3200 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3201 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3202 fprintf (dump_file, " %d", (int) i);
3203 fprintf (dump_file, "\n");
3204 fprintf (dump_file, "\n");
3205 }
3206
3207 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3208 }
3209
3210 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3211 return false;
3212
3213 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3214 }
3215
3216 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3217 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3218 variable is omitted. Compute the cost for a memory reference that accesses
3219 a memory location of mode MEM_MODE in address space AS.
3220
3221 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3222 size of MEM_MODE / RATIO) is available. To make this determination, we
3223 look at the size of the increment to be made, which is given in CSTEP.
3224 CSTEP may be zero if the step is unknown.
3225 STMT_AFTER_INC is true iff the statement we're looking at is after the
3226 increment of the original biv.
3227
3228 TODO -- there must be some better way. This all is quite crude. */
3229
3230 typedef struct
3231 {
3232 HOST_WIDE_INT min_offset, max_offset;
3233 unsigned costs[2][2][2][2];
3234 } *address_cost_data;
3235
3236 DEF_VEC_P (address_cost_data);
3237 DEF_VEC_ALLOC_P (address_cost_data, heap);
3238
3239 static comp_cost
3240 get_address_cost (bool symbol_present, bool var_present,
3241 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3242 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3243 addr_space_t as, bool speed,
3244 bool stmt_after_inc, bool *may_autoinc)
3245 {
3246 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3247 static VEC(address_cost_data, heap) *address_cost_data_list;
3248 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3249 address_cost_data data;
3250 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3251 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3252 unsigned cost, acost, complexity;
3253 bool offset_p, ratio_p, autoinc;
3254 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3255 unsigned HOST_WIDE_INT mask;
3256 unsigned bits;
3257
3258 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3259 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3260 data_index + 1);
3261
3262 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3263 if (!data)
3264 {
3265 HOST_WIDE_INT i;
3266 HOST_WIDE_INT rat, off = 0;
3267 int old_cse_not_expected, width;
3268 unsigned sym_p, var_p, off_p, rat_p, add_c;
3269 rtx seq, addr, base;
3270 rtx reg0, reg1;
3271
3272 data = (address_cost_data) xcalloc (1, sizeof (*data));
3273
3274 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3275
3276 width = GET_MODE_BITSIZE (address_mode) - 1;
3277 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3278 width = HOST_BITS_PER_WIDE_INT - 1;
3279 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3280
3281 for (i = width; i >= 0; i--)
3282 {
3283 off = -((HOST_WIDE_INT) 1 << i);
3284 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285 if (memory_address_addr_space_p (mem_mode, addr, as))
3286 break;
3287 }
3288 data->min_offset = (i == -1? 0 : off);
3289
3290 for (i = width; i >= 0; i--)
3291 {
3292 off = ((HOST_WIDE_INT) 1 << i) - 1;
3293 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3294 if (memory_address_addr_space_p (mem_mode, addr, as))
3295 break;
3296 }
3297 if (i == -1)
3298 off = 0;
3299 data->max_offset = off;
3300
3301 if (dump_file && (dump_flags & TDF_DETAILS))
3302 {
3303 fprintf (dump_file, "get_address_cost:\n");
3304 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3305 GET_MODE_NAME (mem_mode),
3306 data->min_offset);
3307 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3308 GET_MODE_NAME (mem_mode),
3309 data->max_offset);
3310 }
3311
3312 rat = 1;
3313 for (i = 2; i <= MAX_RATIO; i++)
3314 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3315 {
3316 rat = i;
3317 break;
3318 }
3319
3320 /* Compute the cost of various addressing modes. */
3321 acost = 0;
3322 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3323 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3324
3325 if (HAVE_PRE_DECREMENT)
3326 {
3327 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3328 has_predec[mem_mode]
3329 = memory_address_addr_space_p (mem_mode, addr, as);
3330 }
3331 if (HAVE_POST_DECREMENT)
3332 {
3333 addr = gen_rtx_POST_DEC (address_mode, reg0);
3334 has_postdec[mem_mode]
3335 = memory_address_addr_space_p (mem_mode, addr, as);
3336 }
3337 if (HAVE_PRE_INCREMENT)
3338 {
3339 addr = gen_rtx_PRE_INC (address_mode, reg0);
3340 has_preinc[mem_mode]
3341 = memory_address_addr_space_p (mem_mode, addr, as);
3342 }
3343 if (HAVE_POST_INCREMENT)
3344 {
3345 addr = gen_rtx_POST_INC (address_mode, reg0);
3346 has_postinc[mem_mode]
3347 = memory_address_addr_space_p (mem_mode, addr, as);
3348 }
3349 for (i = 0; i < 16; i++)
3350 {
3351 sym_p = i & 1;
3352 var_p = (i >> 1) & 1;
3353 off_p = (i >> 2) & 1;
3354 rat_p = (i >> 3) & 1;
3355
3356 addr = reg0;
3357 if (rat_p)
3358 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3359 gen_int_mode (rat, address_mode));
3360
3361 if (var_p)
3362 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3363
3364 if (sym_p)
3365 {
3366 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3367 /* ??? We can run into trouble with some backends by presenting
3368 it with symbols which haven't been properly passed through
3369 targetm.encode_section_info. By setting the local bit, we
3370 enhance the probability of things working. */
3371 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3372
3373 if (off_p)
3374 base = gen_rtx_fmt_e (CONST, address_mode,
3375 gen_rtx_fmt_ee
3376 (PLUS, address_mode, base,
3377 gen_int_mode (off, address_mode)));
3378 }
3379 else if (off_p)
3380 base = gen_int_mode (off, address_mode);
3381 else
3382 base = NULL_RTX;
3383
3384 if (base)
3385 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3386
3387 start_sequence ();
3388 /* To avoid splitting addressing modes, pretend that no cse will
3389 follow. */
3390 old_cse_not_expected = cse_not_expected;
3391 cse_not_expected = true;
3392 addr = memory_address_addr_space (mem_mode, addr, as);
3393 cse_not_expected = old_cse_not_expected;
3394 seq = get_insns ();
3395 end_sequence ();
3396
3397 acost = seq_cost (seq, speed);
3398 acost += address_cost (addr, mem_mode, as, speed);
3399
3400 if (!acost)
3401 acost = 1;
3402 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3403 }
3404
3405 /* On some targets, it is quite expensive to load symbol to a register,
3406 which makes addresses that contain symbols look much more expensive.
3407 However, the symbol will have to be loaded in any case before the
3408 loop (and quite likely we have it in register already), so it does not
3409 make much sense to penalize them too heavily. So make some final
3410 tweaks for the SYMBOL_PRESENT modes:
3411
3412 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3413 var is cheaper, use this mode with small penalty.
3414 If VAR_PRESENT is true, try whether the mode with
3415 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3416 if this is the case, use it. */
3417 add_c = add_cost (address_mode, speed);
3418 for (i = 0; i < 8; i++)
3419 {
3420 var_p = i & 1;
3421 off_p = (i >> 1) & 1;
3422 rat_p = (i >> 2) & 1;
3423
3424 acost = data->costs[0][1][off_p][rat_p] + 1;
3425 if (var_p)
3426 acost += add_c;
3427
3428 if (acost < data->costs[1][var_p][off_p][rat_p])
3429 data->costs[1][var_p][off_p][rat_p] = acost;
3430 }
3431
3432 if (dump_file && (dump_flags & TDF_DETAILS))
3433 {
3434 fprintf (dump_file, "Address costs:\n");
3435
3436 for (i = 0; i < 16; i++)
3437 {
3438 sym_p = i & 1;
3439 var_p = (i >> 1) & 1;
3440 off_p = (i >> 2) & 1;
3441 rat_p = (i >> 3) & 1;
3442
3443 fprintf (dump_file, " ");
3444 if (sym_p)
3445 fprintf (dump_file, "sym + ");
3446 if (var_p)
3447 fprintf (dump_file, "var + ");
3448 if (off_p)
3449 fprintf (dump_file, "cst + ");
3450 if (rat_p)
3451 fprintf (dump_file, "rat * ");
3452
3453 acost = data->costs[sym_p][var_p][off_p][rat_p];
3454 fprintf (dump_file, "index costs %d\n", acost);
3455 }
3456 if (has_predec[mem_mode] || has_postdec[mem_mode]
3457 || has_preinc[mem_mode] || has_postinc[mem_mode])
3458 fprintf (dump_file, " May include autoinc/dec\n");
3459 fprintf (dump_file, "\n");
3460 }
3461
3462 VEC_replace (address_cost_data, address_cost_data_list,
3463 data_index, data);
3464 }
3465
3466 bits = GET_MODE_BITSIZE (address_mode);
3467 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3468 offset &= mask;
3469 if ((offset >> (bits - 1) & 1))
3470 offset |= ~mask;
3471 s_offset = offset;
3472
3473 autoinc = false;
3474 msize = GET_MODE_SIZE (mem_mode);
3475 autoinc_offset = offset;
3476 if (stmt_after_inc)
3477 autoinc_offset += ratio * cstep;
3478 if (symbol_present || var_present || ratio != 1)
3479 autoinc = false;
3480 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3481 && msize == cstep)
3482 || (has_postdec[mem_mode] && autoinc_offset == 0
3483 && msize == -cstep)
3484 || (has_preinc[mem_mode] && autoinc_offset == msize
3485 && msize == cstep)
3486 || (has_predec[mem_mode] && autoinc_offset == -msize
3487 && msize == -cstep))
3488 autoinc = true;
3489
3490 cost = 0;
3491 offset_p = (s_offset != 0
3492 && data->min_offset <= s_offset
3493 && s_offset <= data->max_offset);
3494 ratio_p = (ratio != 1
3495 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3496
3497 if (ratio != 1 && !ratio_p)
3498 cost += multiply_by_cost (ratio, address_mode, speed);
3499
3500 if (s_offset && !offset_p && !symbol_present)
3501 cost += add_cost (address_mode, speed);
3502
3503 if (may_autoinc)
3504 *may_autoinc = autoinc;
3505 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3506 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3507 return new_cost (cost + acost, complexity);
3508 }
3509
3510 /* Estimates cost of forcing expression EXPR into a variable. */
3511
3512 static comp_cost
3513 force_expr_to_var_cost (tree expr, bool speed)
3514 {
3515 static bool costs_initialized = false;
3516 static unsigned integer_cost [2];
3517 static unsigned symbol_cost [2];
3518 static unsigned address_cost [2];
3519 tree op0, op1;
3520 comp_cost cost0, cost1, cost;
3521 enum machine_mode mode;
3522
3523 if (!costs_initialized)
3524 {
3525 tree type = build_pointer_type (integer_type_node);
3526 tree var, addr;
3527 rtx x;
3528 int i;
3529
3530 var = create_tmp_var_raw (integer_type_node, "test_var");
3531 TREE_STATIC (var) = 1;
3532 x = produce_memory_decl_rtl (var, NULL);
3533 SET_DECL_RTL (var, x);
3534
3535 addr = build1 (ADDR_EXPR, type, var);
3536
3537
3538 for (i = 0; i < 2; i++)
3539 {
3540 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3541 2000), i);
3542
3543 symbol_cost[i] = computation_cost (addr, i) + 1;
3544
3545 address_cost[i]
3546 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3547 addr,
3548 build_int_cst (sizetype, 2000)), i) + 1;
3549 if (dump_file && (dump_flags & TDF_DETAILS))
3550 {
3551 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3552 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3553 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3554 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3555 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3556 fprintf (dump_file, "\n");
3557 }
3558 }
3559
3560 costs_initialized = true;
3561 }
3562
3563 STRIP_NOPS (expr);
3564
3565 if (SSA_VAR_P (expr))
3566 return zero_cost;
3567
3568 if (is_gimple_min_invariant (expr))
3569 {
3570 if (TREE_CODE (expr) == INTEGER_CST)
3571 return new_cost (integer_cost [speed], 0);
3572
3573 if (TREE_CODE (expr) == ADDR_EXPR)
3574 {
3575 tree obj = TREE_OPERAND (expr, 0);
3576
3577 if (TREE_CODE (obj) == VAR_DECL
3578 || TREE_CODE (obj) == PARM_DECL
3579 || TREE_CODE (obj) == RESULT_DECL)
3580 return new_cost (symbol_cost [speed], 0);
3581 }
3582
3583 return new_cost (address_cost [speed], 0);
3584 }
3585
3586 switch (TREE_CODE (expr))
3587 {
3588 case POINTER_PLUS_EXPR:
3589 case PLUS_EXPR:
3590 case MINUS_EXPR:
3591 case MULT_EXPR:
3592 op0 = TREE_OPERAND (expr, 0);
3593 op1 = TREE_OPERAND (expr, 1);
3594 STRIP_NOPS (op0);
3595 STRIP_NOPS (op1);
3596
3597 if (is_gimple_val (op0))
3598 cost0 = zero_cost;
3599 else
3600 cost0 = force_expr_to_var_cost (op0, speed);
3601
3602 if (is_gimple_val (op1))
3603 cost1 = zero_cost;
3604 else
3605 cost1 = force_expr_to_var_cost (op1, speed);
3606
3607 break;
3608
3609 case NEGATE_EXPR:
3610 op0 = TREE_OPERAND (expr, 0);
3611 STRIP_NOPS (op0);
3612 op1 = NULL_TREE;
3613
3614 if (is_gimple_val (op0))
3615 cost0 = zero_cost;
3616 else
3617 cost0 = force_expr_to_var_cost (op0, speed);
3618
3619 cost1 = zero_cost;
3620 break;
3621
3622 default:
3623 /* Just an arbitrary value, FIXME. */
3624 return new_cost (target_spill_cost[speed], 0);
3625 }
3626
3627 mode = TYPE_MODE (TREE_TYPE (expr));
3628 switch (TREE_CODE (expr))
3629 {
3630 case POINTER_PLUS_EXPR:
3631 case PLUS_EXPR:
3632 case MINUS_EXPR:
3633 case NEGATE_EXPR:
3634 cost = new_cost (add_cost (mode, speed), 0);
3635 break;
3636
3637 case MULT_EXPR:
3638 if (cst_and_fits_in_hwi (op0))
3639 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3640 else if (cst_and_fits_in_hwi (op1))
3641 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3642 else
3643 return new_cost (target_spill_cost [speed], 0);
3644 break;
3645
3646 default:
3647 gcc_unreachable ();
3648 }
3649
3650 cost = add_costs (cost, cost0);
3651 cost = add_costs (cost, cost1);
3652
3653 /* Bound the cost by target_spill_cost. The parts of complicated
3654 computations often are either loop invariant or at least can
3655 be shared between several iv uses, so letting this grow without
3656 limits would not give reasonable results. */
3657 if (cost.cost > (int) target_spill_cost [speed])
3658 cost.cost = target_spill_cost [speed];
3659
3660 return cost;
3661 }
3662
3663 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3664 invariants the computation depends on. */
3665
3666 static comp_cost
3667 force_var_cost (struct ivopts_data *data,
3668 tree expr, bitmap *depends_on)
3669 {
3670 if (depends_on)
3671 {
3672 fd_ivopts_data = data;
3673 walk_tree (&expr, find_depends, depends_on, NULL);
3674 }
3675
3676 return force_expr_to_var_cost (expr, data->speed);
3677 }
3678
3679 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3680 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3681 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3682 invariants the computation depends on. */
3683
3684 static comp_cost
3685 split_address_cost (struct ivopts_data *data,
3686 tree addr, bool *symbol_present, bool *var_present,
3687 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3688 {
3689 tree core;
3690 HOST_WIDE_INT bitsize;
3691 HOST_WIDE_INT bitpos;
3692 tree toffset;
3693 enum machine_mode mode;
3694 int unsignedp, volatilep;
3695
3696 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3697 &unsignedp, &volatilep, false);
3698
3699 if (toffset != 0
3700 || bitpos % BITS_PER_UNIT != 0
3701 || TREE_CODE (core) != VAR_DECL)
3702 {
3703 *symbol_present = false;
3704 *var_present = true;
3705 fd_ivopts_data = data;
3706 walk_tree (&addr, find_depends, depends_on, NULL);
3707 return new_cost (target_spill_cost[data->speed], 0);
3708 }
3709
3710 *offset += bitpos / BITS_PER_UNIT;
3711 if (TREE_STATIC (core)
3712 || DECL_EXTERNAL (core))
3713 {
3714 *symbol_present = true;
3715 *var_present = false;
3716 return zero_cost;
3717 }
3718
3719 *symbol_present = false;
3720 *var_present = true;
3721 return zero_cost;
3722 }
3723
3724 /* Estimates cost of expressing difference of addresses E1 - E2 as
3725 var + symbol + offset. The value of offset is added to OFFSET,
3726 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3727 part is missing. DEPENDS_ON is a set of the invariants the computation
3728 depends on. */
3729
3730 static comp_cost
3731 ptr_difference_cost (struct ivopts_data *data,
3732 tree e1, tree e2, bool *symbol_present, bool *var_present,
3733 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3734 {
3735 HOST_WIDE_INT diff = 0;
3736 aff_tree aff_e1, aff_e2;
3737 tree type;
3738
3739 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3740
3741 if (ptr_difference_const (e1, e2, &diff))
3742 {
3743 *offset += diff;
3744 *symbol_present = false;
3745 *var_present = false;
3746 return zero_cost;
3747 }
3748
3749 if (integer_zerop (e2))
3750 return split_address_cost (data, TREE_OPERAND (e1, 0),
3751 symbol_present, var_present, offset, depends_on);
3752
3753 *symbol_present = false;
3754 *var_present = true;
3755
3756 type = signed_type_for (TREE_TYPE (e1));
3757 tree_to_aff_combination (e1, type, &aff_e1);
3758 tree_to_aff_combination (e2, type, &aff_e2);
3759 aff_combination_scale (&aff_e2, double_int_minus_one);
3760 aff_combination_add (&aff_e1, &aff_e2);
3761
3762 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3763 }
3764
3765 /* Estimates cost of expressing difference E1 - E2 as
3766 var + symbol + offset. The value of offset is added to OFFSET,
3767 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3768 part is missing. DEPENDS_ON is a set of the invariants the computation
3769 depends on. */
3770
3771 static comp_cost
3772 difference_cost (struct ivopts_data *data,
3773 tree e1, tree e2, bool *symbol_present, bool *var_present,
3774 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 {
3776 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3777 unsigned HOST_WIDE_INT off1, off2;
3778 aff_tree aff_e1, aff_e2;
3779 tree type;
3780
3781 e1 = strip_offset (e1, &off1);
3782 e2 = strip_offset (e2, &off2);
3783 *offset += off1 - off2;
3784
3785 STRIP_NOPS (e1);
3786 STRIP_NOPS (e2);
3787
3788 if (TREE_CODE (e1) == ADDR_EXPR)
3789 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3790 offset, depends_on);
3791 *symbol_present = false;
3792
3793 if (operand_equal_p (e1, e2, 0))
3794 {
3795 *var_present = false;
3796 return zero_cost;
3797 }
3798
3799 *var_present = true;
3800
3801 if (integer_zerop (e2))
3802 return force_var_cost (data, e1, depends_on);
3803
3804 if (integer_zerop (e1))
3805 {
3806 comp_cost cost = force_var_cost (data, e2, depends_on);
3807 cost.cost += multiply_by_cost (-1, mode, data->speed);
3808 return cost;
3809 }
3810
3811 type = signed_type_for (TREE_TYPE (e1));
3812 tree_to_aff_combination (e1, type, &aff_e1);
3813 tree_to_aff_combination (e2, type, &aff_e2);
3814 aff_combination_scale (&aff_e2, double_int_minus_one);
3815 aff_combination_add (&aff_e1, &aff_e2);
3816
3817 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3818 }
3819
3820 /* Returns true if AFF1 and AFF2 are identical. */
3821
3822 static bool
3823 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3824 {
3825 unsigned i;
3826
3827 if (aff1->n != aff2->n)
3828 return false;
3829
3830 for (i = 0; i < aff1->n; i++)
3831 {
3832 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3833 return false;
3834
3835 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3836 return false;
3837 }
3838 return true;
3839 }
3840
3841 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3842 requires a new compiler generated temporary. Returns -1 otherwise.
3843 ADDRESS_P is a flag indicating if the expression is for address
3844 computation. */
3845
3846 static int
3847 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3848 tree cbase, HOST_WIDE_INT ratio,
3849 bool address_p)
3850 {
3851 aff_tree ubase_aff, cbase_aff;
3852 tree expr, ub, cb;
3853 struct iv_inv_expr_ent ent;
3854 struct iv_inv_expr_ent **slot;
3855
3856 STRIP_NOPS (ubase);
3857 STRIP_NOPS (cbase);
3858 ub = ubase;
3859 cb = cbase;
3860
3861 if ((TREE_CODE (ubase) == INTEGER_CST)
3862 && (TREE_CODE (cbase) == INTEGER_CST))
3863 return -1;
3864
3865 /* Strips the constant part. */
3866 if (TREE_CODE (ubase) == PLUS_EXPR
3867 || TREE_CODE (ubase) == MINUS_EXPR
3868 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3869 {
3870 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3871 ubase = TREE_OPERAND (ubase, 0);
3872 }
3873
3874 /* Strips the constant part. */
3875 if (TREE_CODE (cbase) == PLUS_EXPR
3876 || TREE_CODE (cbase) == MINUS_EXPR
3877 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3878 {
3879 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3880 cbase = TREE_OPERAND (cbase, 0);
3881 }
3882
3883 if (address_p)
3884 {
3885 if (((TREE_CODE (ubase) == SSA_NAME)
3886 || (TREE_CODE (ubase) == ADDR_EXPR
3887 && is_gimple_min_invariant (ubase)))
3888 && (TREE_CODE (cbase) == INTEGER_CST))
3889 return -1;
3890
3891 if (((TREE_CODE (cbase) == SSA_NAME)
3892 || (TREE_CODE (cbase) == ADDR_EXPR
3893 && is_gimple_min_invariant (cbase)))
3894 && (TREE_CODE (ubase) == INTEGER_CST))
3895 return -1;
3896 }
3897
3898 if (ratio == 1)
3899 {
3900 if(operand_equal_p (ubase, cbase, 0))
3901 return -1;
3902
3903 if (TREE_CODE (ubase) == ADDR_EXPR
3904 && TREE_CODE (cbase) == ADDR_EXPR)
3905 {
3906 tree usym, csym;
3907
3908 usym = TREE_OPERAND (ubase, 0);
3909 csym = TREE_OPERAND (cbase, 0);
3910 if (TREE_CODE (usym) == ARRAY_REF)
3911 {
3912 tree ind = TREE_OPERAND (usym, 1);
3913 if (TREE_CODE (ind) == INTEGER_CST
3914 && host_integerp (ind, 0)
3915 && TREE_INT_CST_LOW (ind) == 0)
3916 usym = TREE_OPERAND (usym, 0);
3917 }
3918 if (TREE_CODE (csym) == ARRAY_REF)
3919 {
3920 tree ind = TREE_OPERAND (csym, 1);
3921 if (TREE_CODE (ind) == INTEGER_CST
3922 && host_integerp (ind, 0)
3923 && TREE_INT_CST_LOW (ind) == 0)
3924 csym = TREE_OPERAND (csym, 0);
3925 }
3926 if (operand_equal_p (usym, csym, 0))
3927 return -1;
3928 }
3929 /* Now do more complex comparison */
3930 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3931 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3932 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3933 return -1;
3934 }
3935
3936 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3937 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3938
3939 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3940 aff_combination_add (&ubase_aff, &cbase_aff);
3941 expr = aff_combination_to_tree (&ubase_aff);
3942 ent.expr = expr;
3943 ent.hash = iterative_hash_expr (expr, 0);
3944 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3945 &ent, INSERT);
3946 if (*slot)
3947 return (*slot)->id;
3948
3949 *slot = XNEW (struct iv_inv_expr_ent);
3950 (*slot)->expr = expr;
3951 (*slot)->hash = ent.hash;
3952 (*slot)->id = data->inv_expr_id++;
3953 return (*slot)->id;
3954 }
3955
3956
3957
3958 /* Determines the cost of the computation by that USE is expressed
3959 from induction variable CAND. If ADDRESS_P is true, we just need
3960 to create an address from it, otherwise we want to get it into
3961 register. A set of invariants we depend on is stored in
3962 DEPENDS_ON. AT is the statement at that the value is computed.
3963 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3964 addressing is likely. */
3965
3966 static comp_cost
3967 get_computation_cost_at (struct ivopts_data *data,
3968 struct iv_use *use, struct iv_cand *cand,
3969 bool address_p, bitmap *depends_on, gimple at,
3970 bool *can_autoinc,
3971 int *inv_expr_id)
3972 {
3973 tree ubase = use->iv->base, ustep = use->iv->step;
3974 tree cbase, cstep;
3975 tree utype = TREE_TYPE (ubase), ctype;
3976 unsigned HOST_WIDE_INT cstepi, offset = 0;
3977 HOST_WIDE_INT ratio, aratio;
3978 bool var_present, symbol_present, stmt_is_after_inc;
3979 comp_cost cost;
3980 double_int rat;
3981 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3982
3983 *depends_on = NULL;
3984
3985 /* Only consider real candidates. */
3986 if (!cand->iv)
3987 return infinite_cost;
3988
3989 cbase = cand->iv->base;
3990 cstep = cand->iv->step;
3991 ctype = TREE_TYPE (cbase);
3992
3993 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3994 {
3995 /* We do not have a precision to express the values of use. */
3996 return infinite_cost;
3997 }
3998
3999 if (address_p)
4000 {
4001 /* Do not try to express address of an object with computation based
4002 on address of a different object. This may cause problems in rtl
4003 level alias analysis (that does not expect this to be happening,
4004 as this is illegal in C), and would be unlikely to be useful
4005 anyway. */
4006 if (use->iv->base_object
4007 && cand->iv->base_object
4008 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4009 return infinite_cost;
4010 }
4011
4012 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4013 {
4014 /* TODO -- add direct handling of this case. */
4015 goto fallback;
4016 }
4017
4018 /* CSTEPI is removed from the offset in case statement is after the
4019 increment. If the step is not constant, we use zero instead.
4020 This is a bit imprecise (there is the extra addition), but
4021 redundancy elimination is likely to transform the code so that
4022 it uses value of the variable before increment anyway,
4023 so it is not that much unrealistic. */
4024 if (cst_and_fits_in_hwi (cstep))
4025 cstepi = int_cst_value (cstep);
4026 else
4027 cstepi = 0;
4028
4029 if (!constant_multiple_of (ustep, cstep, &rat))
4030 return infinite_cost;
4031
4032 if (double_int_fits_in_shwi_p (rat))
4033 ratio = double_int_to_shwi (rat);
4034 else
4035 return infinite_cost;
4036
4037 STRIP_NOPS (cbase);
4038 ctype = TREE_TYPE (cbase);
4039
4040 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4041 or ratio == 1, it is better to handle this like
4042
4043 ubase - ratio * cbase + ratio * var
4044
4045 (also holds in the case ratio == -1, TODO. */
4046
4047 if (cst_and_fits_in_hwi (cbase))
4048 {
4049 offset = - ratio * int_cst_value (cbase);
4050 cost = difference_cost (data,
4051 ubase, build_int_cst (utype, 0),
4052 &symbol_present, &var_present, &offset,
4053 depends_on);
4054 cost.cost /= avg_loop_niter (data->current_loop);
4055 }
4056 else if (ratio == 1)
4057 {
4058 cost = difference_cost (data,
4059 ubase, cbase,
4060 &symbol_present, &var_present, &offset,
4061 depends_on);
4062 cost.cost /= avg_loop_niter (data->current_loop);
4063 }
4064 else if (address_p
4065 && !POINTER_TYPE_P (ctype)
4066 && multiplier_allowed_in_address_p
4067 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4068 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4069 {
4070 cbase
4071 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4072 cost = difference_cost (data,
4073 ubase, cbase,
4074 &symbol_present, &var_present, &offset,
4075 depends_on);
4076 cost.cost /= avg_loop_niter (data->current_loop);
4077 }
4078 else
4079 {
4080 cost = force_var_cost (data, cbase, depends_on);
4081 cost = add_costs (cost,
4082 difference_cost (data,
4083 ubase, build_int_cst (utype, 0),
4084 &symbol_present, &var_present,
4085 &offset, depends_on));
4086 cost.cost /= avg_loop_niter (data->current_loop);
4087 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4088 }
4089
4090 if (inv_expr_id)
4091 {
4092 *inv_expr_id =
4093 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4094 /* Clear depends on. */
4095 if (*inv_expr_id != -1 && depends_on && *depends_on)
4096 bitmap_clear (*depends_on);
4097 }
4098
4099 /* If we are after the increment, the value of the candidate is higher by
4100 one iteration. */
4101 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4102 if (stmt_is_after_inc)
4103 offset -= ratio * cstepi;
4104
4105 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4106 (symbol/var1/const parts may be omitted). If we are looking for an
4107 address, find the cost of addressing this. */
4108 if (address_p)
4109 return add_costs (cost,
4110 get_address_cost (symbol_present, var_present,
4111 offset, ratio, cstepi,
4112 TYPE_MODE (TREE_TYPE (utype)),
4113 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4114 speed, stmt_is_after_inc,
4115 can_autoinc));
4116
4117 /* Otherwise estimate the costs for computing the expression. */
4118 if (!symbol_present && !var_present && !offset)
4119 {
4120 if (ratio != 1)
4121 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4122 return cost;
4123 }
4124
4125 /* Symbol + offset should be compile-time computable so consider that they
4126 are added once to the variable, if present. */
4127 if (var_present && (symbol_present || offset))
4128 cost.cost += adjust_setup_cost (data,
4129 add_cost (TYPE_MODE (ctype), speed));
4130
4131 /* Having offset does not affect runtime cost in case it is added to
4132 symbol, but it increases complexity. */
4133 if (offset)
4134 cost.complexity++;
4135
4136 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4137
4138 aratio = ratio > 0 ? ratio : -ratio;
4139 if (aratio != 1)
4140 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4141 return cost;
4142
4143 fallback:
4144 if (can_autoinc)
4145 *can_autoinc = false;
4146
4147 {
4148 /* Just get the expression, expand it and measure the cost. */
4149 tree comp = get_computation_at (data->current_loop, use, cand, at);
4150
4151 if (!comp)
4152 return infinite_cost;
4153
4154 if (address_p)
4155 comp = build_simple_mem_ref (comp);
4156
4157 return new_cost (computation_cost (comp, speed), 0);
4158 }
4159 }
4160
4161 /* Determines the cost of the computation by that USE is expressed
4162 from induction variable CAND. If ADDRESS_P is true, we just need
4163 to create an address from it, otherwise we want to get it into
4164 register. A set of invariants we depend on is stored in
4165 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4166 autoinc addressing is likely. */
4167
4168 static comp_cost
4169 get_computation_cost (struct ivopts_data *data,
4170 struct iv_use *use, struct iv_cand *cand,
4171 bool address_p, bitmap *depends_on,
4172 bool *can_autoinc, int *inv_expr_id)
4173 {
4174 return get_computation_cost_at (data,
4175 use, cand, address_p, depends_on, use->stmt,
4176 can_autoinc, inv_expr_id);
4177 }
4178
4179 /* Determines cost of basing replacement of USE on CAND in a generic
4180 expression. */
4181
4182 static bool
4183 determine_use_iv_cost_generic (struct ivopts_data *data,
4184 struct iv_use *use, struct iv_cand *cand)
4185 {
4186 bitmap depends_on;
4187 comp_cost cost;
4188 int inv_expr_id = -1;
4189
4190 /* The simple case first -- if we need to express value of the preserved
4191 original biv, the cost is 0. This also prevents us from counting the
4192 cost of increment twice -- once at this use and once in the cost of
4193 the candidate. */
4194 if (cand->pos == IP_ORIGINAL
4195 && cand->incremented_at == use->stmt)
4196 {
4197 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4198 return true;
4199 }
4200
4201 cost = get_computation_cost (data, use, cand, false, &depends_on,
4202 NULL, &inv_expr_id);
4203
4204 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4205 inv_expr_id);
4206
4207 return !infinite_cost_p (cost);
4208 }
4209
4210 /* Determines cost of basing replacement of USE on CAND in an address. */
4211
4212 static bool
4213 determine_use_iv_cost_address (struct ivopts_data *data,
4214 struct iv_use *use, struct iv_cand *cand)
4215 {
4216 bitmap depends_on;
4217 bool can_autoinc;
4218 int inv_expr_id = -1;
4219 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4220 &can_autoinc, &inv_expr_id);
4221
4222 if (cand->ainc_use == use)
4223 {
4224 if (can_autoinc)
4225 cost.cost -= cand->cost_step;
4226 /* If we generated the candidate solely for exploiting autoincrement
4227 opportunities, and it turns out it can't be used, set the cost to
4228 infinity to make sure we ignore it. */
4229 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4230 cost = infinite_cost;
4231 }
4232 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4233 inv_expr_id);
4234
4235 return !infinite_cost_p (cost);
4236 }
4237
4238 /* Computes value of candidate CAND at position AT in iteration NITER, and
4239 stores it to VAL. */
4240
4241 static void
4242 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4243 aff_tree *val)
4244 {
4245 aff_tree step, delta, nit;
4246 struct iv *iv = cand->iv;
4247 tree type = TREE_TYPE (iv->base);
4248 tree steptype = type;
4249 if (POINTER_TYPE_P (type))
4250 steptype = sizetype;
4251
4252 tree_to_aff_combination (iv->step, steptype, &step);
4253 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4254 aff_combination_convert (&nit, steptype);
4255 aff_combination_mult (&nit, &step, &delta);
4256 if (stmt_after_increment (loop, cand, at))
4257 aff_combination_add (&delta, &step);
4258
4259 tree_to_aff_combination (iv->base, type, val);
4260 aff_combination_add (val, &delta);
4261 }
4262
4263 /* Returns period of induction variable iv. */
4264
4265 static tree
4266 iv_period (struct iv *iv)
4267 {
4268 tree step = iv->step, period, type;
4269 tree pow2div;
4270
4271 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4272
4273 type = unsigned_type_for (TREE_TYPE (step));
4274 /* Period of the iv is lcm (step, type_range)/step -1,
4275 i.e., N*type_range/step - 1. Since type range is power
4276 of two, N == (step >> num_of_ending_zeros_binary (step),
4277 so the final result is
4278
4279 (type_range >> num_of_ending_zeros_binary (step)) - 1
4280
4281 */
4282 pow2div = num_ending_zeros (step);
4283
4284 period = build_low_bits_mask (type,
4285 (TYPE_PRECISION (type)
4286 - tree_low_cst (pow2div, 1)));
4287
4288 return period;
4289 }
4290
4291 /* Returns the comparison operator used when eliminating the iv USE. */
4292
4293 static enum tree_code
4294 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4295 {
4296 struct loop *loop = data->current_loop;
4297 basic_block ex_bb;
4298 edge exit;
4299
4300 ex_bb = gimple_bb (use->stmt);
4301 exit = EDGE_SUCC (ex_bb, 0);
4302 if (flow_bb_inside_loop_p (loop, exit->dest))
4303 exit = EDGE_SUCC (ex_bb, 1);
4304
4305 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4306 }
4307
4308 /* Check whether it is possible to express the condition in USE by comparison
4309 of candidate CAND. If so, store the value compared with to BOUND. */
4310
4311 static bool
4312 may_eliminate_iv (struct ivopts_data *data,
4313 struct iv_use *use, struct iv_cand *cand, tree *bound)
4314 {
4315 basic_block ex_bb;
4316 edge exit;
4317 tree nit, period;
4318 struct loop *loop = data->current_loop;
4319 aff_tree bnd;
4320 struct tree_niter_desc *desc = NULL;
4321
4322 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4323 return false;
4324
4325 /* For now works only for exits that dominate the loop latch.
4326 TODO: extend to other conditions inside loop body. */
4327 ex_bb = gimple_bb (use->stmt);
4328 if (use->stmt != last_stmt (ex_bb)
4329 || gimple_code (use->stmt) != GIMPLE_COND
4330 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4331 return false;
4332
4333 exit = EDGE_SUCC (ex_bb, 0);
4334 if (flow_bb_inside_loop_p (loop, exit->dest))
4335 exit = EDGE_SUCC (ex_bb, 1);
4336 if (flow_bb_inside_loop_p (loop, exit->dest))
4337 return false;
4338
4339 nit = niter_for_exit (data, exit, &desc);
4340 if (!nit)
4341 return false;
4342
4343 /* Determine whether we can use the variable to test the exit condition.
4344 This is the case iff the period of the induction variable is greater
4345 than the number of iterations for which the exit condition is true. */
4346 period = iv_period (cand->iv);
4347
4348 /* If the number of iterations is constant, compare against it directly. */
4349 if (TREE_CODE (nit) == INTEGER_CST)
4350 {
4351 /* See cand_value_at. */
4352 if (stmt_after_increment (loop, cand, use->stmt))
4353 {
4354 if (!tree_int_cst_lt (nit, period))
4355 return false;
4356 }
4357 else
4358 {
4359 if (tree_int_cst_lt (period, nit))
4360 return false;
4361 }
4362 }
4363
4364 /* If not, and if this is the only possible exit of the loop, see whether
4365 we can get a conservative estimate on the number of iterations of the
4366 entire loop and compare against that instead. */
4367 else
4368 {
4369 double_int period_value, max_niter;
4370
4371 max_niter = desc->max;
4372 if (stmt_after_increment (loop, cand, use->stmt))
4373 max_niter = double_int_add (max_niter, double_int_one);
4374 period_value = tree_to_double_int (period);
4375 if (double_int_ucmp (max_niter, period_value) > 0)
4376 {
4377 /* See if we can take advantage of infered loop bound information. */
4378 if (loop_only_exit_p (loop, exit))
4379 {
4380 if (!estimated_loop_iterations (loop, true, &max_niter))
4381 return false;
4382 /* The loop bound is already adjusted by adding 1. */
4383 if (double_int_ucmp (max_niter, period_value) > 0)
4384 return false;
4385 }
4386 else
4387 return false;
4388 }
4389 }
4390
4391 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4392
4393 *bound = aff_combination_to_tree (&bnd);
4394 /* It is unlikely that computing the number of iterations using division
4395 would be more profitable than keeping the original induction variable. */
4396 if (expression_expensive_p (*bound))
4397 return false;
4398 return true;
4399 }
4400
4401
4402 /* Determines cost of basing replacement of USE on CAND in a condition. */
4403
4404 static bool
4405 determine_use_iv_cost_condition (struct ivopts_data *data,
4406 struct iv_use *use, struct iv_cand *cand)
4407 {
4408 tree bound = NULL_TREE;
4409 struct iv *cmp_iv;
4410 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4411 comp_cost elim_cost, express_cost, cost;
4412 bool ok;
4413 int inv_expr_id = -1;
4414 tree *control_var, *bound_cst;
4415
4416 /* Only consider real candidates. */
4417 if (!cand->iv)
4418 {
4419 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4420 return false;
4421 }
4422
4423 /* Try iv elimination. */
4424 if (may_eliminate_iv (data, use, cand, &bound))
4425 {
4426 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4427 /* The bound is a loop invariant, so it will be only computed
4428 once. */
4429 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4430 }
4431 else
4432 elim_cost = infinite_cost;
4433
4434 /* Try expressing the original giv. If it is compared with an invariant,
4435 note that we cannot get rid of it. */
4436 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4437 NULL, &cmp_iv);
4438 gcc_assert (ok);
4439
4440 /* When the condition is a comparison of the candidate IV against
4441 zero, prefer this IV.
4442
4443 TODO: The constant that we're substracting from the cost should
4444 be target-dependent. This information should be added to the
4445 target costs for each backend. */
4446 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4447 && integer_zerop (*bound_cst)
4448 && (operand_equal_p (*control_var, cand->var_after, 0)
4449 || operand_equal_p (*control_var, cand->var_before, 0)))
4450 elim_cost.cost -= 1;
4451
4452 express_cost = get_computation_cost (data, use, cand, false,
4453 &depends_on_express, NULL,
4454 &inv_expr_id);
4455 fd_ivopts_data = data;
4456 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4457
4458 /* Choose the better approach, preferring the eliminated IV. */
4459 if (compare_costs (elim_cost, express_cost) <= 0)
4460 {
4461 cost = elim_cost;
4462 depends_on = depends_on_elim;
4463 depends_on_elim = NULL;
4464 }
4465 else
4466 {
4467 cost = express_cost;
4468 depends_on = depends_on_express;
4469 depends_on_express = NULL;
4470 bound = NULL_TREE;
4471 }
4472
4473 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4474
4475 if (depends_on_elim)
4476 BITMAP_FREE (depends_on_elim);
4477 if (depends_on_express)
4478 BITMAP_FREE (depends_on_express);
4479
4480 return !infinite_cost_p (cost);
4481 }
4482
4483 /* Determines cost of basing replacement of USE on CAND. Returns false
4484 if USE cannot be based on CAND. */
4485
4486 static bool
4487 determine_use_iv_cost (struct ivopts_data *data,
4488 struct iv_use *use, struct iv_cand *cand)
4489 {
4490 switch (use->type)
4491 {
4492 case USE_NONLINEAR_EXPR:
4493 return determine_use_iv_cost_generic (data, use, cand);
4494
4495 case USE_ADDRESS:
4496 return determine_use_iv_cost_address (data, use, cand);
4497
4498 case USE_COMPARE:
4499 return determine_use_iv_cost_condition (data, use, cand);
4500
4501 default:
4502 gcc_unreachable ();
4503 }
4504 }
4505
4506 /* Return true if get_computation_cost indicates that autoincrement is
4507 a possibility for the pair of USE and CAND, false otherwise. */
4508
4509 static bool
4510 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4511 struct iv_cand *cand)
4512 {
4513 bitmap depends_on;
4514 bool can_autoinc;
4515 comp_cost cost;
4516
4517 if (use->type != USE_ADDRESS)
4518 return false;
4519
4520 cost = get_computation_cost (data, use, cand, true, &depends_on,
4521 &can_autoinc, NULL);
4522
4523 BITMAP_FREE (depends_on);
4524
4525 return !infinite_cost_p (cost) && can_autoinc;
4526 }
4527
4528 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4529 use that allows autoincrement, and set their AINC_USE if possible. */
4530
4531 static void
4532 set_autoinc_for_original_candidates (struct ivopts_data *data)
4533 {
4534 unsigned i, j;
4535
4536 for (i = 0; i < n_iv_cands (data); i++)
4537 {
4538 struct iv_cand *cand = iv_cand (data, i);
4539 struct iv_use *closest = NULL;
4540 if (cand->pos != IP_ORIGINAL)
4541 continue;
4542 for (j = 0; j < n_iv_uses (data); j++)
4543 {
4544 struct iv_use *use = iv_use (data, j);
4545 unsigned uid = gimple_uid (use->stmt);
4546 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4547 || uid > gimple_uid (cand->incremented_at))
4548 continue;
4549 if (closest == NULL || uid > gimple_uid (closest->stmt))
4550 closest = use;
4551 }
4552 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4553 continue;
4554 cand->ainc_use = closest;
4555 }
4556 }
4557
4558 /* Finds the candidates for the induction variables. */
4559
4560 static void
4561 find_iv_candidates (struct ivopts_data *data)
4562 {
4563 /* Add commonly used ivs. */
4564 add_standard_iv_candidates (data);
4565
4566 /* Add old induction variables. */
4567 add_old_ivs_candidates (data);
4568
4569 /* Add induction variables derived from uses. */
4570 add_derived_ivs_candidates (data);
4571
4572 set_autoinc_for_original_candidates (data);
4573
4574 /* Record the important candidates. */
4575 record_important_candidates (data);
4576 }
4577
4578 /* Determines costs of basing the use of the iv on an iv candidate. */
4579
4580 static void
4581 determine_use_iv_costs (struct ivopts_data *data)
4582 {
4583 unsigned i, j;
4584 struct iv_use *use;
4585 struct iv_cand *cand;
4586 bitmap to_clear = BITMAP_ALLOC (NULL);
4587
4588 alloc_use_cost_map (data);
4589
4590 for (i = 0; i < n_iv_uses (data); i++)
4591 {
4592 use = iv_use (data, i);
4593
4594 if (data->consider_all_candidates)
4595 {
4596 for (j = 0; j < n_iv_cands (data); j++)
4597 {
4598 cand = iv_cand (data, j);
4599 determine_use_iv_cost (data, use, cand);
4600 }
4601 }
4602 else
4603 {
4604 bitmap_iterator bi;
4605
4606 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4607 {
4608 cand = iv_cand (data, j);
4609 if (!determine_use_iv_cost (data, use, cand))
4610 bitmap_set_bit (to_clear, j);
4611 }
4612
4613 /* Remove the candidates for that the cost is infinite from
4614 the list of related candidates. */
4615 bitmap_and_compl_into (use->related_cands, to_clear);
4616 bitmap_clear (to_clear);
4617 }
4618 }
4619
4620 BITMAP_FREE (to_clear);
4621
4622 if (dump_file && (dump_flags & TDF_DETAILS))
4623 {
4624 fprintf (dump_file, "Use-candidate costs:\n");
4625
4626 for (i = 0; i < n_iv_uses (data); i++)
4627 {
4628 use = iv_use (data, i);
4629
4630 fprintf (dump_file, "Use %d:\n", i);
4631 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4632 for (j = 0; j < use->n_map_members; j++)
4633 {
4634 if (!use->cost_map[j].cand
4635 || infinite_cost_p (use->cost_map[j].cost))
4636 continue;
4637
4638 fprintf (dump_file, " %d\t%d\t%d\t",
4639 use->cost_map[j].cand->id,
4640 use->cost_map[j].cost.cost,
4641 use->cost_map[j].cost.complexity);
4642 if (use->cost_map[j].depends_on)
4643 bitmap_print (dump_file,
4644 use->cost_map[j].depends_on, "","");
4645 if (use->cost_map[j].inv_expr_id != -1)
4646 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4647 fprintf (dump_file, "\n");
4648 }
4649
4650 fprintf (dump_file, "\n");
4651 }
4652 fprintf (dump_file, "\n");
4653 }
4654 }
4655
4656 /* Determines cost of the candidate CAND. */
4657
4658 static void
4659 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4660 {
4661 comp_cost cost_base;
4662 unsigned cost, cost_step;
4663 tree base;
4664
4665 if (!cand->iv)
4666 {
4667 cand->cost = 0;
4668 return;
4669 }
4670
4671 /* There are two costs associated with the candidate -- its increment
4672 and its initialization. The second is almost negligible for any loop
4673 that rolls enough, so we take it just very little into account. */
4674
4675 base = cand->iv->base;
4676 cost_base = force_var_cost (data, base, NULL);
4677 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4678
4679 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4680
4681 /* Prefer the original ivs unless we may gain something by replacing it.
4682 The reason is to make debugging simpler; so this is not relevant for
4683 artificial ivs created by other optimization passes. */
4684 if (cand->pos != IP_ORIGINAL
4685 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4686 cost++;
4687
4688 /* Prefer not to insert statements into latch unless there are some
4689 already (so that we do not create unnecessary jumps). */
4690 if (cand->pos == IP_END
4691 && empty_block_p (ip_end_pos (data->current_loop)))
4692 cost++;
4693
4694 cand->cost = cost;
4695 cand->cost_step = cost_step;
4696 }
4697
4698 /* Determines costs of computation of the candidates. */
4699
4700 static void
4701 determine_iv_costs (struct ivopts_data *data)
4702 {
4703 unsigned i;
4704
4705 if (dump_file && (dump_flags & TDF_DETAILS))
4706 {
4707 fprintf (dump_file, "Candidate costs:\n");
4708 fprintf (dump_file, " cand\tcost\n");
4709 }
4710
4711 for (i = 0; i < n_iv_cands (data); i++)
4712 {
4713 struct iv_cand *cand = iv_cand (data, i);
4714
4715 determine_iv_cost (data, cand);
4716
4717 if (dump_file && (dump_flags & TDF_DETAILS))
4718 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4719 }
4720
4721 if (dump_file && (dump_flags & TDF_DETAILS))
4722 fprintf (dump_file, "\n");
4723 }
4724
4725 /* Calculates cost for having SIZE induction variables. */
4726
4727 static unsigned
4728 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4729 {
4730 /* We add size to the cost, so that we prefer eliminating ivs
4731 if possible. */
4732 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4733 data->body_includes_call);
4734 }
4735
4736 /* For each size of the induction variable set determine the penalty. */
4737
4738 static void
4739 determine_set_costs (struct ivopts_data *data)
4740 {
4741 unsigned j, n;
4742 gimple phi;
4743 gimple_stmt_iterator psi;
4744 tree op;
4745 struct loop *loop = data->current_loop;
4746 bitmap_iterator bi;
4747
4748 if (dump_file && (dump_flags & TDF_DETAILS))
4749 {
4750 fprintf (dump_file, "Global costs:\n");
4751 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4752 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4753 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4754 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4755 }
4756
4757 n = 0;
4758 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4759 {
4760 phi = gsi_stmt (psi);
4761 op = PHI_RESULT (phi);
4762
4763 if (!is_gimple_reg (op))
4764 continue;
4765
4766 if (get_iv (data, op))
4767 continue;
4768
4769 n++;
4770 }
4771
4772 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4773 {
4774 struct version_info *info = ver_info (data, j);
4775
4776 if (info->inv_id && info->has_nonlin_use)
4777 n++;
4778 }
4779
4780 data->regs_used = n;
4781 if (dump_file && (dump_flags & TDF_DETAILS))
4782 fprintf (dump_file, " regs_used %d\n", n);
4783
4784 if (dump_file && (dump_flags & TDF_DETAILS))
4785 {
4786 fprintf (dump_file, " cost for size:\n");
4787 fprintf (dump_file, " ivs\tcost\n");
4788 for (j = 0; j <= 2 * target_avail_regs; j++)
4789 fprintf (dump_file, " %d\t%d\n", j,
4790 ivopts_global_cost_for_size (data, j));
4791 fprintf (dump_file, "\n");
4792 }
4793 }
4794
4795 /* Returns true if A is a cheaper cost pair than B. */
4796
4797 static bool
4798 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4799 {
4800 int cmp;
4801
4802 if (!a)
4803 return false;
4804
4805 if (!b)
4806 return true;
4807
4808 cmp = compare_costs (a->cost, b->cost);
4809 if (cmp < 0)
4810 return true;
4811
4812 if (cmp > 0)
4813 return false;
4814
4815 /* In case the costs are the same, prefer the cheaper candidate. */
4816 if (a->cand->cost < b->cand->cost)
4817 return true;
4818
4819 return false;
4820 }
4821
4822
4823 /* Returns candidate by that USE is expressed in IVS. */
4824
4825 static struct cost_pair *
4826 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4827 {
4828 return ivs->cand_for_use[use->id];
4829 }
4830
4831 /* Computes the cost field of IVS structure. */
4832
4833 static void
4834 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4835 {
4836 comp_cost cost = ivs->cand_use_cost;
4837
4838 cost.cost += ivs->cand_cost;
4839
4840 cost.cost += ivopts_global_cost_for_size (data,
4841 ivs->n_regs + ivs->num_used_inv_expr);
4842
4843 ivs->cost = cost;
4844 }
4845
4846 /* Remove invariants in set INVS to set IVS. */
4847
4848 static void
4849 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4850 {
4851 bitmap_iterator bi;
4852 unsigned iid;
4853
4854 if (!invs)
4855 return;
4856
4857 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4858 {
4859 ivs->n_invariant_uses[iid]--;
4860 if (ivs->n_invariant_uses[iid] == 0)
4861 ivs->n_regs--;
4862 }
4863 }
4864
4865 /* Set USE not to be expressed by any candidate in IVS. */
4866
4867 static void
4868 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4869 struct iv_use *use)
4870 {
4871 unsigned uid = use->id, cid;
4872 struct cost_pair *cp;
4873
4874 cp = ivs->cand_for_use[uid];
4875 if (!cp)
4876 return;
4877 cid = cp->cand->id;
4878
4879 ivs->bad_uses++;
4880 ivs->cand_for_use[uid] = NULL;
4881 ivs->n_cand_uses[cid]--;
4882
4883 if (ivs->n_cand_uses[cid] == 0)
4884 {
4885 bitmap_clear_bit (ivs->cands, cid);
4886 /* Do not count the pseudocandidates. */
4887 if (cp->cand->iv)
4888 ivs->n_regs--;
4889 ivs->n_cands--;
4890 ivs->cand_cost -= cp->cand->cost;
4891
4892 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4893 }
4894
4895 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4896
4897 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4898
4899 if (cp->inv_expr_id != -1)
4900 {
4901 ivs->used_inv_expr[cp->inv_expr_id]--;
4902 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4903 ivs->num_used_inv_expr--;
4904 }
4905 iv_ca_recount_cost (data, ivs);
4906 }
4907
4908 /* Add invariants in set INVS to set IVS. */
4909
4910 static void
4911 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4912 {
4913 bitmap_iterator bi;
4914 unsigned iid;
4915
4916 if (!invs)
4917 return;
4918
4919 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4920 {
4921 ivs->n_invariant_uses[iid]++;
4922 if (ivs->n_invariant_uses[iid] == 1)
4923 ivs->n_regs++;
4924 }
4925 }
4926
4927 /* Set cost pair for USE in set IVS to CP. */
4928
4929 static void
4930 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4931 struct iv_use *use, struct cost_pair *cp)
4932 {
4933 unsigned uid = use->id, cid;
4934
4935 if (ivs->cand_for_use[uid] == cp)
4936 return;
4937
4938 if (ivs->cand_for_use[uid])
4939 iv_ca_set_no_cp (data, ivs, use);
4940
4941 if (cp)
4942 {
4943 cid = cp->cand->id;
4944
4945 ivs->bad_uses--;
4946 ivs->cand_for_use[uid] = cp;
4947 ivs->n_cand_uses[cid]++;
4948 if (ivs->n_cand_uses[cid] == 1)
4949 {
4950 bitmap_set_bit (ivs->cands, cid);
4951 /* Do not count the pseudocandidates. */
4952 if (cp->cand->iv)
4953 ivs->n_regs++;
4954 ivs->n_cands++;
4955 ivs->cand_cost += cp->cand->cost;
4956
4957 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4958 }
4959
4960 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4961 iv_ca_set_add_invariants (ivs, cp->depends_on);
4962
4963 if (cp->inv_expr_id != -1)
4964 {
4965 ivs->used_inv_expr[cp->inv_expr_id]++;
4966 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4967 ivs->num_used_inv_expr++;
4968 }
4969 iv_ca_recount_cost (data, ivs);
4970 }
4971 }
4972
4973 /* Extend set IVS by expressing USE by some of the candidates in it
4974 if possible. All important candidates will be considered
4975 if IMPORTANT_CANDIDATES is true. */
4976
4977 static void
4978 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4979 struct iv_use *use, bool important_candidates)
4980 {
4981 struct cost_pair *best_cp = NULL, *cp;
4982 bitmap_iterator bi;
4983 bitmap cands;
4984 unsigned i;
4985
4986 gcc_assert (ivs->upto >= use->id);
4987
4988 if (ivs->upto == use->id)
4989 {
4990 ivs->upto++;
4991 ivs->bad_uses++;
4992 }
4993
4994 cands = (important_candidates ? data->important_candidates : ivs->cands);
4995 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4996 {
4997 struct iv_cand *cand = iv_cand (data, i);
4998
4999 cp = get_use_iv_cost (data, use, cand);
5000
5001 if (cheaper_cost_pair (cp, best_cp))
5002 best_cp = cp;
5003 }
5004
5005 iv_ca_set_cp (data, ivs, use, best_cp);
5006 }
5007
5008 /* Get cost for assignment IVS. */
5009
5010 static comp_cost
5011 iv_ca_cost (struct iv_ca *ivs)
5012 {
5013 /* This was a conditional expression but it triggered a bug in
5014 Sun C 5.5. */
5015 if (ivs->bad_uses)
5016 return infinite_cost;
5017 else
5018 return ivs->cost;
5019 }
5020
5021 /* Returns true if all dependences of CP are among invariants in IVS. */
5022
5023 static bool
5024 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5025 {
5026 unsigned i;
5027 bitmap_iterator bi;
5028
5029 if (!cp->depends_on)
5030 return true;
5031
5032 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5033 {
5034 if (ivs->n_invariant_uses[i] == 0)
5035 return false;
5036 }
5037
5038 return true;
5039 }
5040
5041 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5042 it before NEXT_CHANGE. */
5043
5044 static struct iv_ca_delta *
5045 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5046 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5047 {
5048 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5049
5050 change->use = use;
5051 change->old_cp = old_cp;
5052 change->new_cp = new_cp;
5053 change->next_change = next_change;
5054
5055 return change;
5056 }
5057
5058 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5059 are rewritten. */
5060
5061 static struct iv_ca_delta *
5062 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5063 {
5064 struct iv_ca_delta *last;
5065
5066 if (!l2)
5067 return l1;
5068
5069 if (!l1)
5070 return l2;
5071
5072 for (last = l1; last->next_change; last = last->next_change)
5073 continue;
5074 last->next_change = l2;
5075
5076 return l1;
5077 }
5078
5079 /* Reverse the list of changes DELTA, forming the inverse to it. */
5080
5081 static struct iv_ca_delta *
5082 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5083 {
5084 struct iv_ca_delta *act, *next, *prev = NULL;
5085 struct cost_pair *tmp;
5086
5087 for (act = delta; act; act = next)
5088 {
5089 next = act->next_change;
5090 act->next_change = prev;
5091 prev = act;
5092
5093 tmp = act->old_cp;
5094 act->old_cp = act->new_cp;
5095 act->new_cp = tmp;
5096 }
5097
5098 return prev;
5099 }
5100
5101 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5102 reverted instead. */
5103
5104 static void
5105 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5106 struct iv_ca_delta *delta, bool forward)
5107 {
5108 struct cost_pair *from, *to;
5109 struct iv_ca_delta *act;
5110
5111 if (!forward)
5112 delta = iv_ca_delta_reverse (delta);
5113
5114 for (act = delta; act; act = act->next_change)
5115 {
5116 from = act->old_cp;
5117 to = act->new_cp;
5118 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5119 iv_ca_set_cp (data, ivs, act->use, to);
5120 }
5121
5122 if (!forward)
5123 iv_ca_delta_reverse (delta);
5124 }
5125
5126 /* Returns true if CAND is used in IVS. */
5127
5128 static bool
5129 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5130 {
5131 return ivs->n_cand_uses[cand->id] > 0;
5132 }
5133
5134 /* Returns number of induction variable candidates in the set IVS. */
5135
5136 static unsigned
5137 iv_ca_n_cands (struct iv_ca *ivs)
5138 {
5139 return ivs->n_cands;
5140 }
5141
5142 /* Free the list of changes DELTA. */
5143
5144 static void
5145 iv_ca_delta_free (struct iv_ca_delta **delta)
5146 {
5147 struct iv_ca_delta *act, *next;
5148
5149 for (act = *delta; act; act = next)
5150 {
5151 next = act->next_change;
5152 free (act);
5153 }
5154
5155 *delta = NULL;
5156 }
5157
5158 /* Allocates new iv candidates assignment. */
5159
5160 static struct iv_ca *
5161 iv_ca_new (struct ivopts_data *data)
5162 {
5163 struct iv_ca *nw = XNEW (struct iv_ca);
5164
5165 nw->upto = 0;
5166 nw->bad_uses = 0;
5167 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5168 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5169 nw->cands = BITMAP_ALLOC (NULL);
5170 nw->n_cands = 0;
5171 nw->n_regs = 0;
5172 nw->cand_use_cost = zero_cost;
5173 nw->cand_cost = 0;
5174 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5175 nw->cost = zero_cost;
5176 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5177 nw->num_used_inv_expr = 0;
5178
5179 return nw;
5180 }
5181
5182 /* Free memory occupied by the set IVS. */
5183
5184 static void
5185 iv_ca_free (struct iv_ca **ivs)
5186 {
5187 free ((*ivs)->cand_for_use);
5188 free ((*ivs)->n_cand_uses);
5189 BITMAP_FREE ((*ivs)->cands);
5190 free ((*ivs)->n_invariant_uses);
5191 free ((*ivs)->used_inv_expr);
5192 free (*ivs);
5193 *ivs = NULL;
5194 }
5195
5196 /* Dumps IVS to FILE. */
5197
5198 static void
5199 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5200 {
5201 const char *pref = " invariants ";
5202 unsigned i;
5203 comp_cost cost = iv_ca_cost (ivs);
5204
5205 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5206 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5207 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5208 bitmap_print (file, ivs->cands, " candidates: ","\n");
5209
5210 for (i = 0; i < ivs->upto; i++)
5211 {
5212 struct iv_use *use = iv_use (data, i);
5213 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5214 if (cp)
5215 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5216 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5217 else
5218 fprintf (file, " use:%d --> ??\n", use->id);
5219 }
5220
5221 for (i = 1; i <= data->max_inv_id; i++)
5222 if (ivs->n_invariant_uses[i])
5223 {
5224 fprintf (file, "%s%d", pref, i);
5225 pref = ", ";
5226 }
5227 fprintf (file, "\n\n");
5228 }
5229
5230 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5231 new set, and store differences in DELTA. Number of induction variables
5232 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5233 the function will try to find a solution with mimimal iv candidates. */
5234
5235 static comp_cost
5236 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5237 struct iv_cand *cand, struct iv_ca_delta **delta,
5238 unsigned *n_ivs, bool min_ncand)
5239 {
5240 unsigned i;
5241 comp_cost cost;
5242 struct iv_use *use;
5243 struct cost_pair *old_cp, *new_cp;
5244
5245 *delta = NULL;
5246 for (i = 0; i < ivs->upto; i++)
5247 {
5248 use = iv_use (data, i);
5249 old_cp = iv_ca_cand_for_use (ivs, use);
5250
5251 if (old_cp
5252 && old_cp->cand == cand)
5253 continue;
5254
5255 new_cp = get_use_iv_cost (data, use, cand);
5256 if (!new_cp)
5257 continue;
5258
5259 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5260 continue;
5261
5262 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5263 continue;
5264
5265 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5266 }
5267
5268 iv_ca_delta_commit (data, ivs, *delta, true);
5269 cost = iv_ca_cost (ivs);
5270 if (n_ivs)
5271 *n_ivs = iv_ca_n_cands (ivs);
5272 iv_ca_delta_commit (data, ivs, *delta, false);
5273
5274 return cost;
5275 }
5276
5277 /* Try narrowing set IVS by removing CAND. Return the cost of
5278 the new set and store the differences in DELTA. */
5279
5280 static comp_cost
5281 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5282 struct iv_cand *cand, struct iv_ca_delta **delta)
5283 {
5284 unsigned i, ci;
5285 struct iv_use *use;
5286 struct cost_pair *old_cp, *new_cp, *cp;
5287 bitmap_iterator bi;
5288 struct iv_cand *cnd;
5289 comp_cost cost;
5290
5291 *delta = NULL;
5292 for (i = 0; i < n_iv_uses (data); i++)
5293 {
5294 use = iv_use (data, i);
5295
5296 old_cp = iv_ca_cand_for_use (ivs, use);
5297 if (old_cp->cand != cand)
5298 continue;
5299
5300 new_cp = NULL;
5301
5302 if (data->consider_all_candidates)
5303 {
5304 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5305 {
5306 if (ci == cand->id)
5307 continue;
5308
5309 cnd = iv_cand (data, ci);
5310
5311 cp = get_use_iv_cost (data, use, cnd);
5312 if (!cp)
5313 continue;
5314
5315 if (!iv_ca_has_deps (ivs, cp))
5316 continue;
5317
5318 if (!cheaper_cost_pair (cp, new_cp))
5319 continue;
5320
5321 new_cp = cp;
5322 }
5323 }
5324 else
5325 {
5326 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5327 {
5328 if (ci == cand->id)
5329 continue;
5330
5331 cnd = iv_cand (data, ci);
5332
5333 cp = get_use_iv_cost (data, use, cnd);
5334 if (!cp)
5335 continue;
5336 if (!iv_ca_has_deps (ivs, cp))
5337 continue;
5338
5339 if (!cheaper_cost_pair (cp, new_cp))
5340 continue;
5341
5342 new_cp = cp;
5343 }
5344 }
5345
5346 if (!new_cp)
5347 {
5348 iv_ca_delta_free (delta);
5349 return infinite_cost;
5350 }
5351
5352 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5353 }
5354
5355 iv_ca_delta_commit (data, ivs, *delta, true);
5356 cost = iv_ca_cost (ivs);
5357 iv_ca_delta_commit (data, ivs, *delta, false);
5358
5359 return cost;
5360 }
5361
5362 /* Try optimizing the set of candidates IVS by removing candidates different
5363 from to EXCEPT_CAND from it. Return cost of the new set, and store
5364 differences in DELTA. */
5365
5366 static comp_cost
5367 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5368 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5369 {
5370 bitmap_iterator bi;
5371 struct iv_ca_delta *act_delta, *best_delta;
5372 unsigned i;
5373 comp_cost best_cost, acost;
5374 struct iv_cand *cand;
5375
5376 best_delta = NULL;
5377 best_cost = iv_ca_cost (ivs);
5378
5379 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5380 {
5381 cand = iv_cand (data, i);
5382
5383 if (cand == except_cand)
5384 continue;
5385
5386 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5387
5388 if (compare_costs (acost, best_cost) < 0)
5389 {
5390 best_cost = acost;
5391 iv_ca_delta_free (&best_delta);
5392 best_delta = act_delta;
5393 }
5394 else
5395 iv_ca_delta_free (&act_delta);
5396 }
5397
5398 if (!best_delta)
5399 {
5400 *delta = NULL;
5401 return best_cost;
5402 }
5403
5404 /* Recurse to possibly remove other unnecessary ivs. */
5405 iv_ca_delta_commit (data, ivs, best_delta, true);
5406 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5407 iv_ca_delta_commit (data, ivs, best_delta, false);
5408 *delta = iv_ca_delta_join (best_delta, *delta);
5409 return best_cost;
5410 }
5411
5412 /* Tries to extend the sets IVS in the best possible way in order
5413 to express the USE. If ORIGINALP is true, prefer candidates from
5414 the original set of IVs, otherwise favor important candidates not
5415 based on any memory object. */
5416
5417 static bool
5418 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5419 struct iv_use *use, bool originalp)
5420 {
5421 comp_cost best_cost, act_cost;
5422 unsigned i;
5423 bitmap_iterator bi;
5424 struct iv_cand *cand;
5425 struct iv_ca_delta *best_delta = NULL, *act_delta;
5426 struct cost_pair *cp;
5427
5428 iv_ca_add_use (data, ivs, use, false);
5429 best_cost = iv_ca_cost (ivs);
5430
5431 cp = iv_ca_cand_for_use (ivs, use);
5432 if (!cp)
5433 {
5434 ivs->upto--;
5435 ivs->bad_uses--;
5436 iv_ca_add_use (data, ivs, use, true);
5437 best_cost = iv_ca_cost (ivs);
5438 cp = iv_ca_cand_for_use (ivs, use);
5439 }
5440 if (cp)
5441 {
5442 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5443 iv_ca_set_no_cp (data, ivs, use);
5444 }
5445
5446 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5447 first try important candidates not based on any memory object. Only if
5448 this fails, try the specific ones. Rationale -- in loops with many
5449 variables the best choice often is to use just one generic biv. If we
5450 added here many ivs specific to the uses, the optimization algorithm later
5451 would be likely to get stuck in a local minimum, thus causing us to create
5452 too many ivs. The approach from few ivs to more seems more likely to be
5453 successful -- starting from few ivs, replacing an expensive use by a
5454 specific iv should always be a win. */
5455 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5456 {
5457 cand = iv_cand (data, i);
5458
5459 if (originalp && cand->pos !=IP_ORIGINAL)
5460 continue;
5461
5462 if (!originalp && cand->iv->base_object != NULL_TREE)
5463 continue;
5464
5465 if (iv_ca_cand_used_p (ivs, cand))
5466 continue;
5467
5468 cp = get_use_iv_cost (data, use, cand);
5469 if (!cp)
5470 continue;
5471
5472 iv_ca_set_cp (data, ivs, use, cp);
5473 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5474 true);
5475 iv_ca_set_no_cp (data, ivs, use);
5476 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5477
5478 if (compare_costs (act_cost, best_cost) < 0)
5479 {
5480 best_cost = act_cost;
5481
5482 iv_ca_delta_free (&best_delta);
5483 best_delta = act_delta;
5484 }
5485 else
5486 iv_ca_delta_free (&act_delta);
5487 }
5488
5489 if (infinite_cost_p (best_cost))
5490 {
5491 for (i = 0; i < use->n_map_members; i++)
5492 {
5493 cp = use->cost_map + i;
5494 cand = cp->cand;
5495 if (!cand)
5496 continue;
5497
5498 /* Already tried this. */
5499 if (cand->important)
5500 {
5501 if (originalp && cand->pos == IP_ORIGINAL)
5502 continue;
5503 if (!originalp && cand->iv->base_object == NULL_TREE)
5504 continue;
5505 }
5506
5507 if (iv_ca_cand_used_p (ivs, cand))
5508 continue;
5509
5510 act_delta = NULL;
5511 iv_ca_set_cp (data, ivs, use, cp);
5512 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5513 iv_ca_set_no_cp (data, ivs, use);
5514 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5515 cp, act_delta);
5516
5517 if (compare_costs (act_cost, best_cost) < 0)
5518 {
5519 best_cost = act_cost;
5520
5521 if (best_delta)
5522 iv_ca_delta_free (&best_delta);
5523 best_delta = act_delta;
5524 }
5525 else
5526 iv_ca_delta_free (&act_delta);
5527 }
5528 }
5529
5530 iv_ca_delta_commit (data, ivs, best_delta, true);
5531 iv_ca_delta_free (&best_delta);
5532
5533 return !infinite_cost_p (best_cost);
5534 }
5535
5536 /* Finds an initial assignment of candidates to uses. */
5537
5538 static struct iv_ca *
5539 get_initial_solution (struct ivopts_data *data, bool originalp)
5540 {
5541 struct iv_ca *ivs = iv_ca_new (data);
5542 unsigned i;
5543
5544 for (i = 0; i < n_iv_uses (data); i++)
5545 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5546 {
5547 iv_ca_free (&ivs);
5548 return NULL;
5549 }
5550
5551 return ivs;
5552 }
5553
5554 /* Tries to improve set of induction variables IVS. */
5555
5556 static bool
5557 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5558 {
5559 unsigned i, n_ivs;
5560 comp_cost acost, best_cost = iv_ca_cost (ivs);
5561 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5562 struct iv_cand *cand;
5563
5564 /* Try extending the set of induction variables by one. */
5565 for (i = 0; i < n_iv_cands (data); i++)
5566 {
5567 cand = iv_cand (data, i);
5568
5569 if (iv_ca_cand_used_p (ivs, cand))
5570 continue;
5571
5572 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5573 if (!act_delta)
5574 continue;
5575
5576 /* If we successfully added the candidate and the set is small enough,
5577 try optimizing it by removing other candidates. */
5578 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5579 {
5580 iv_ca_delta_commit (data, ivs, act_delta, true);
5581 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5582 iv_ca_delta_commit (data, ivs, act_delta, false);
5583 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5584 }
5585
5586 if (compare_costs (acost, best_cost) < 0)
5587 {
5588 best_cost = acost;
5589 iv_ca_delta_free (&best_delta);
5590 best_delta = act_delta;
5591 }
5592 else
5593 iv_ca_delta_free (&act_delta);
5594 }
5595
5596 if (!best_delta)
5597 {
5598 /* Try removing the candidates from the set instead. */
5599 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5600
5601 /* Nothing more we can do. */
5602 if (!best_delta)
5603 return false;
5604 }
5605
5606 iv_ca_delta_commit (data, ivs, best_delta, true);
5607 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5608 iv_ca_delta_free (&best_delta);
5609 return true;
5610 }
5611
5612 /* Attempts to find the optimal set of induction variables. We do simple
5613 greedy heuristic -- we try to replace at most one candidate in the selected
5614 solution and remove the unused ivs while this improves the cost. */
5615
5616 static struct iv_ca *
5617 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5618 {
5619 struct iv_ca *set;
5620
5621 /* Get the initial solution. */
5622 set = get_initial_solution (data, originalp);
5623 if (!set)
5624 {
5625 if (dump_file && (dump_flags & TDF_DETAILS))
5626 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5627 return NULL;
5628 }
5629
5630 if (dump_file && (dump_flags & TDF_DETAILS))
5631 {
5632 fprintf (dump_file, "Initial set of candidates:\n");
5633 iv_ca_dump (data, dump_file, set);
5634 }
5635
5636 while (try_improve_iv_set (data, set))
5637 {
5638 if (dump_file && (dump_flags & TDF_DETAILS))
5639 {
5640 fprintf (dump_file, "Improved to:\n");
5641 iv_ca_dump (data, dump_file, set);
5642 }
5643 }
5644
5645 return set;
5646 }
5647
5648 static struct iv_ca *
5649 find_optimal_iv_set (struct ivopts_data *data)
5650 {
5651 unsigned i;
5652 struct iv_ca *set, *origset;
5653 struct iv_use *use;
5654 comp_cost cost, origcost;
5655
5656 /* Determine the cost based on a strategy that starts with original IVs,
5657 and try again using a strategy that prefers candidates not based
5658 on any IVs. */
5659 origset = find_optimal_iv_set_1 (data, true);
5660 set = find_optimal_iv_set_1 (data, false);
5661
5662 if (!origset && !set)
5663 return NULL;
5664
5665 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5666 cost = set ? iv_ca_cost (set) : infinite_cost;
5667
5668 if (dump_file && (dump_flags & TDF_DETAILS))
5669 {
5670 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5671 origcost.cost, origcost.complexity);
5672 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5673 cost.cost, cost.complexity);
5674 }
5675
5676 /* Choose the one with the best cost. */
5677 if (compare_costs (origcost, cost) <= 0)
5678 {
5679 if (set)
5680 iv_ca_free (&set);
5681 set = origset;
5682 }
5683 else if (origset)
5684 iv_ca_free (&origset);
5685
5686 for (i = 0; i < n_iv_uses (data); i++)
5687 {
5688 use = iv_use (data, i);
5689 use->selected = iv_ca_cand_for_use (set, use)->cand;
5690 }
5691
5692 return set;
5693 }
5694
5695 /* Creates a new induction variable corresponding to CAND. */
5696
5697 static void
5698 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5699 {
5700 gimple_stmt_iterator incr_pos;
5701 tree base;
5702 bool after = false;
5703
5704 if (!cand->iv)
5705 return;
5706
5707 switch (cand->pos)
5708 {
5709 case IP_NORMAL:
5710 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5711 break;
5712
5713 case IP_END:
5714 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5715 after = true;
5716 break;
5717
5718 case IP_AFTER_USE:
5719 after = true;
5720 /* fall through */
5721 case IP_BEFORE_USE:
5722 incr_pos = gsi_for_stmt (cand->incremented_at);
5723 break;
5724
5725 case IP_ORIGINAL:
5726 /* Mark that the iv is preserved. */
5727 name_info (data, cand->var_before)->preserve_biv = true;
5728 name_info (data, cand->var_after)->preserve_biv = true;
5729
5730 /* Rewrite the increment so that it uses var_before directly. */
5731 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5732 return;
5733 }
5734
5735 gimple_add_tmp_var (cand->var_before);
5736 add_referenced_var (cand->var_before);
5737
5738 base = unshare_expr (cand->iv->base);
5739
5740 create_iv (base, unshare_expr (cand->iv->step),
5741 cand->var_before, data->current_loop,
5742 &incr_pos, after, &cand->var_before, &cand->var_after);
5743 }
5744
5745 /* Creates new induction variables described in SET. */
5746
5747 static void
5748 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5749 {
5750 unsigned i;
5751 struct iv_cand *cand;
5752 bitmap_iterator bi;
5753
5754 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5755 {
5756 cand = iv_cand (data, i);
5757 create_new_iv (data, cand);
5758 }
5759
5760 if (dump_file && (dump_flags & TDF_DETAILS))
5761 {
5762 fprintf (dump_file, "\nSelected IV set: \n");
5763 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5764 {
5765 cand = iv_cand (data, i);
5766 dump_cand (dump_file, cand);
5767 }
5768 fprintf (dump_file, "\n");
5769 }
5770 }
5771
5772 /* Rewrites USE (definition of iv used in a nonlinear expression)
5773 using candidate CAND. */
5774
5775 static void
5776 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5777 struct iv_use *use, struct iv_cand *cand)
5778 {
5779 tree comp;
5780 tree op, tgt;
5781 gimple ass;
5782 gimple_stmt_iterator bsi;
5783
5784 /* An important special case -- if we are asked to express value of
5785 the original iv by itself, just exit; there is no need to
5786 introduce a new computation (that might also need casting the
5787 variable to unsigned and back). */
5788 if (cand->pos == IP_ORIGINAL
5789 && cand->incremented_at == use->stmt)
5790 {
5791 tree step, ctype, utype;
5792 enum tree_code incr_code = PLUS_EXPR, old_code;
5793
5794 gcc_assert (is_gimple_assign (use->stmt));
5795 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5796
5797 step = cand->iv->step;
5798 ctype = TREE_TYPE (step);
5799 utype = TREE_TYPE (cand->var_after);
5800 if (TREE_CODE (step) == NEGATE_EXPR)
5801 {
5802 incr_code = MINUS_EXPR;
5803 step = TREE_OPERAND (step, 0);
5804 }
5805
5806 /* Check whether we may leave the computation unchanged.
5807 This is the case only if it does not rely on other
5808 computations in the loop -- otherwise, the computation
5809 we rely upon may be removed in remove_unused_ivs,
5810 thus leading to ICE. */
5811 old_code = gimple_assign_rhs_code (use->stmt);
5812 if (old_code == PLUS_EXPR
5813 || old_code == MINUS_EXPR
5814 || old_code == POINTER_PLUS_EXPR)
5815 {
5816 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5817 op = gimple_assign_rhs2 (use->stmt);
5818 else if (old_code != MINUS_EXPR
5819 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5820 op = gimple_assign_rhs1 (use->stmt);
5821 else
5822 op = NULL_TREE;
5823 }
5824 else
5825 op = NULL_TREE;
5826
5827 if (op
5828 && (TREE_CODE (op) == INTEGER_CST
5829 || operand_equal_p (op, step, 0)))
5830 return;
5831
5832 /* Otherwise, add the necessary computations to express
5833 the iv. */
5834 op = fold_convert (ctype, cand->var_before);
5835 comp = fold_convert (utype,
5836 build2 (incr_code, ctype, op,
5837 unshare_expr (step)));
5838 }
5839 else
5840 {
5841 comp = get_computation (data->current_loop, use, cand);
5842 gcc_assert (comp != NULL_TREE);
5843 }
5844
5845 switch (gimple_code (use->stmt))
5846 {
5847 case GIMPLE_PHI:
5848 tgt = PHI_RESULT (use->stmt);
5849
5850 /* If we should keep the biv, do not replace it. */
5851 if (name_info (data, tgt)->preserve_biv)
5852 return;
5853
5854 bsi = gsi_after_labels (gimple_bb (use->stmt));
5855 break;
5856
5857 case GIMPLE_ASSIGN:
5858 tgt = gimple_assign_lhs (use->stmt);
5859 bsi = gsi_for_stmt (use->stmt);
5860 break;
5861
5862 default:
5863 gcc_unreachable ();
5864 }
5865
5866 if (!valid_gimple_rhs_p (comp)
5867 || (gimple_code (use->stmt) != GIMPLE_PHI
5868 /* We can't allow re-allocating the stmt as it might be pointed
5869 to still. */
5870 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5871 >= gimple_num_ops (gsi_stmt (bsi)))))
5872 {
5873 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5874 true, GSI_SAME_STMT);
5875 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5876 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5877 }
5878
5879 if (gimple_code (use->stmt) == GIMPLE_PHI)
5880 {
5881 ass = gimple_build_assign (tgt, comp);
5882 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5883
5884 bsi = gsi_for_stmt (use->stmt);
5885 remove_phi_node (&bsi, false);
5886 }
5887 else
5888 {
5889 gimple_assign_set_rhs_from_tree (&bsi, comp);
5890 use->stmt = gsi_stmt (bsi);
5891 }
5892 }
5893
5894 /* Copies the reference information from OLD_REF to NEW_REF. */
5895
5896 static void
5897 copy_ref_info (tree new_ref, tree old_ref)
5898 {
5899 tree new_ptr_base = NULL_TREE;
5900
5901 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5902 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5903
5904 if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5905 new_ptr_base = TMR_BASE (new_ref);
5906 else if (TREE_CODE (new_ref) == MEM_REF)
5907 new_ptr_base = TREE_OPERAND (new_ref, 0);
5908
5909 /* We can transfer points-to information from an old pointer
5910 or decl base to the new one. */
5911 if (new_ptr_base
5912 && TREE_CODE (new_ptr_base) == SSA_NAME
5913 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5914 && !SSA_NAME_PTR_INFO (new_ptr_base))
5915 {
5916 tree base = get_base_address (old_ref);
5917 if (!base)
5918 ;
5919 else if ((INDIRECT_REF_P (base)
5920 || TREE_CODE (base) == MEM_REF)
5921 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5922 duplicate_ssa_name_ptr_info
5923 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5924 else if (TREE_CODE (base) == VAR_DECL
5925 || TREE_CODE (base) == PARM_DECL
5926 || TREE_CODE (base) == RESULT_DECL)
5927 {
5928 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5929 pt_solution_set_var (&pi->pt, base);
5930 }
5931 }
5932 }
5933
5934 /* Performs a peephole optimization to reorder the iv update statement with
5935 a mem ref to enable instruction combining in later phases. The mem ref uses
5936 the iv value before the update, so the reordering transformation requires
5937 adjustment of the offset. CAND is the selected IV_CAND.
5938
5939 Example:
5940
5941 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5942 iv2 = iv1 + 1;
5943
5944 if (t < val) (1)
5945 goto L;
5946 goto Head;
5947
5948
5949 directly propagating t over to (1) will introduce overlapping live range
5950 thus increase register pressure. This peephole transform it into:
5951
5952
5953 iv2 = iv1 + 1;
5954 t = MEM_REF (base, iv2, 8, 8);
5955 if (t < val)
5956 goto L;
5957 goto Head;
5958 */
5959
5960 static void
5961 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5962 {
5963 tree var_after;
5964 gimple iv_update, stmt;
5965 basic_block bb;
5966 gimple_stmt_iterator gsi, gsi_iv;
5967
5968 if (cand->pos != IP_NORMAL)
5969 return;
5970
5971 var_after = cand->var_after;
5972 iv_update = SSA_NAME_DEF_STMT (var_after);
5973
5974 bb = gimple_bb (iv_update);
5975 gsi = gsi_last_nondebug_bb (bb);
5976 stmt = gsi_stmt (gsi);
5977
5978 /* Only handle conditional statement for now. */
5979 if (gimple_code (stmt) != GIMPLE_COND)
5980 return;
5981
5982 gsi_prev_nondebug (&gsi);
5983 stmt = gsi_stmt (gsi);
5984 if (stmt != iv_update)
5985 return;
5986
5987 gsi_prev_nondebug (&gsi);
5988 if (gsi_end_p (gsi))
5989 return;
5990
5991 stmt = gsi_stmt (gsi);
5992 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5993 return;
5994
5995 if (stmt != use->stmt)
5996 return;
5997
5998 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5999 return;
6000
6001 if (dump_file && (dump_flags & TDF_DETAILS))
6002 {
6003 fprintf (dump_file, "Reordering \n");
6004 print_gimple_stmt (dump_file, iv_update, 0, 0);
6005 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6006 fprintf (dump_file, "\n");
6007 }
6008
6009 gsi = gsi_for_stmt (use->stmt);
6010 gsi_iv = gsi_for_stmt (iv_update);
6011 gsi_move_before (&gsi_iv, &gsi);
6012
6013 cand->pos = IP_BEFORE_USE;
6014 cand->incremented_at = use->stmt;
6015 }
6016
6017 /* Rewrites USE (address that is an iv) using candidate CAND. */
6018
6019 static void
6020 rewrite_use_address (struct ivopts_data *data,
6021 struct iv_use *use, struct iv_cand *cand)
6022 {
6023 aff_tree aff;
6024 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6025 tree base_hint = NULL_TREE;
6026 tree ref, iv;
6027 bool ok;
6028
6029 adjust_iv_update_pos (cand, use);
6030 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6031 gcc_assert (ok);
6032 unshare_aff_combination (&aff);
6033
6034 /* To avoid undefined overflow problems, all IV candidates use unsigned
6035 integer types. The drawback is that this makes it impossible for
6036 create_mem_ref to distinguish an IV that is based on a memory object
6037 from one that represents simply an offset.
6038
6039 To work around this problem, we pass a hint to create_mem_ref that
6040 indicates which variable (if any) in aff is an IV based on a memory
6041 object. Note that we only consider the candidate. If this is not
6042 based on an object, the base of the reference is in some subexpression
6043 of the use -- but these will use pointer types, so they are recognized
6044 by the create_mem_ref heuristics anyway. */
6045 if (cand->iv->base_object)
6046 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6047
6048 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6049 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6050 reference_alias_ptr_type (*use->op_p),
6051 iv, base_hint, data->speed);
6052 copy_ref_info (ref, *use->op_p);
6053 *use->op_p = ref;
6054 }
6055
6056 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6057 candidate CAND. */
6058
6059 static void
6060 rewrite_use_compare (struct ivopts_data *data,
6061 struct iv_use *use, struct iv_cand *cand)
6062 {
6063 tree comp, *var_p, op, bound;
6064 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6065 enum tree_code compare;
6066 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6067 bool ok;
6068
6069 bound = cp->value;
6070 if (bound)
6071 {
6072 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6073 tree var_type = TREE_TYPE (var);
6074 gimple_seq stmts;
6075
6076 if (dump_file && (dump_flags & TDF_DETAILS))
6077 {
6078 fprintf (dump_file, "Replacing exit test: ");
6079 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6080 }
6081 compare = iv_elimination_compare (data, use);
6082 bound = unshare_expr (fold_convert (var_type, bound));
6083 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6084 if (stmts)
6085 gsi_insert_seq_on_edge_immediate (
6086 loop_preheader_edge (data->current_loop),
6087 stmts);
6088
6089 gimple_cond_set_lhs (use->stmt, var);
6090 gimple_cond_set_code (use->stmt, compare);
6091 gimple_cond_set_rhs (use->stmt, op);
6092 return;
6093 }
6094
6095 /* The induction variable elimination failed; just express the original
6096 giv. */
6097 comp = get_computation (data->current_loop, use, cand);
6098 gcc_assert (comp != NULL_TREE);
6099
6100 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6101 gcc_assert (ok);
6102
6103 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6104 true, GSI_SAME_STMT);
6105 }
6106
6107 /* Rewrites USE using candidate CAND. */
6108
6109 static void
6110 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6111 {
6112 switch (use->type)
6113 {
6114 case USE_NONLINEAR_EXPR:
6115 rewrite_use_nonlinear_expr (data, use, cand);
6116 break;
6117
6118 case USE_ADDRESS:
6119 rewrite_use_address (data, use, cand);
6120 break;
6121
6122 case USE_COMPARE:
6123 rewrite_use_compare (data, use, cand);
6124 break;
6125
6126 default:
6127 gcc_unreachable ();
6128 }
6129
6130 update_stmt (use->stmt);
6131 }
6132
6133 /* Rewrite the uses using the selected induction variables. */
6134
6135 static void
6136 rewrite_uses (struct ivopts_data *data)
6137 {
6138 unsigned i;
6139 struct iv_cand *cand;
6140 struct iv_use *use;
6141
6142 for (i = 0; i < n_iv_uses (data); i++)
6143 {
6144 use = iv_use (data, i);
6145 cand = use->selected;
6146 gcc_assert (cand);
6147
6148 rewrite_use (data, use, cand);
6149 }
6150 }
6151
6152 /* Removes the ivs that are not used after rewriting. */
6153
6154 static void
6155 remove_unused_ivs (struct ivopts_data *data)
6156 {
6157 unsigned j;
6158 bitmap_iterator bi;
6159 bitmap toremove = BITMAP_ALLOC (NULL);
6160
6161 /* Figure out an order in which to release SSA DEFs so that we don't
6162 release something that we'd have to propagate into a debug stmt
6163 afterwards. */
6164 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6165 {
6166 struct version_info *info;
6167
6168 info = ver_info (data, j);
6169 if (info->iv
6170 && !integer_zerop (info->iv->step)
6171 && !info->inv_id
6172 && !info->iv->have_use_for
6173 && !info->preserve_biv)
6174 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6175 }
6176
6177 release_defs_bitset (toremove);
6178
6179 BITMAP_FREE (toremove);
6180 }
6181
6182 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6183 for pointer_map_traverse. */
6184
6185 static bool
6186 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6187 void *data ATTRIBUTE_UNUSED)
6188 {
6189 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6190
6191 free (niter);
6192 return true;
6193 }
6194
6195 /* Frees data allocated by the optimization of a single loop. */
6196
6197 static void
6198 free_loop_data (struct ivopts_data *data)
6199 {
6200 unsigned i, j;
6201 bitmap_iterator bi;
6202 tree obj;
6203
6204 if (data->niters)
6205 {
6206 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6207 pointer_map_destroy (data->niters);
6208 data->niters = NULL;
6209 }
6210
6211 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6212 {
6213 struct version_info *info;
6214
6215 info = ver_info (data, i);
6216 if (info->iv)
6217 free (info->iv);
6218 info->iv = NULL;
6219 info->has_nonlin_use = false;
6220 info->preserve_biv = false;
6221 info->inv_id = 0;
6222 }
6223 bitmap_clear (data->relevant);
6224 bitmap_clear (data->important_candidates);
6225
6226 for (i = 0; i < n_iv_uses (data); i++)
6227 {
6228 struct iv_use *use = iv_use (data, i);
6229
6230 free (use->iv);
6231 BITMAP_FREE (use->related_cands);
6232 for (j = 0; j < use->n_map_members; j++)
6233 if (use->cost_map[j].depends_on)
6234 BITMAP_FREE (use->cost_map[j].depends_on);
6235 free (use->cost_map);
6236 free (use);
6237 }
6238 VEC_truncate (iv_use_p, data->iv_uses, 0);
6239
6240 for (i = 0; i < n_iv_cands (data); i++)
6241 {
6242 struct iv_cand *cand = iv_cand (data, i);
6243
6244 if (cand->iv)
6245 free (cand->iv);
6246 if (cand->depends_on)
6247 BITMAP_FREE (cand->depends_on);
6248 free (cand);
6249 }
6250 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6251
6252 if (data->version_info_size < num_ssa_names)
6253 {
6254 data->version_info_size = 2 * num_ssa_names;
6255 free (data->version_info);
6256 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6257 }
6258
6259 data->max_inv_id = 0;
6260
6261 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6262 SET_DECL_RTL (obj, NULL_RTX);
6263
6264 VEC_truncate (tree, decl_rtl_to_reset, 0);
6265
6266 htab_empty (data->inv_expr_tab);
6267 data->inv_expr_id = 0;
6268 }
6269
6270 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6271 loop tree. */
6272
6273 static void
6274 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6275 {
6276 free_loop_data (data);
6277 free (data->version_info);
6278 BITMAP_FREE (data->relevant);
6279 BITMAP_FREE (data->important_candidates);
6280
6281 VEC_free (tree, heap, decl_rtl_to_reset);
6282 VEC_free (iv_use_p, heap, data->iv_uses);
6283 VEC_free (iv_cand_p, heap, data->iv_candidates);
6284 htab_delete (data->inv_expr_tab);
6285 }
6286
6287 /* Returns true if the loop body BODY includes any function calls. */
6288
6289 static bool
6290 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6291 {
6292 gimple_stmt_iterator gsi;
6293 unsigned i;
6294
6295 for (i = 0; i < num_nodes; i++)
6296 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6297 {
6298 gimple stmt = gsi_stmt (gsi);
6299 if (is_gimple_call (stmt)
6300 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6301 return true;
6302 }
6303 return false;
6304 }
6305
6306 /* Optimizes the LOOP. Returns true if anything changed. */
6307
6308 static bool
6309 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6310 {
6311 bool changed = false;
6312 struct iv_ca *iv_ca;
6313 edge exit;
6314 basic_block *body;
6315
6316 gcc_assert (!data->niters);
6317 data->current_loop = loop;
6318 data->speed = optimize_loop_for_speed_p (loop);
6319
6320 if (dump_file && (dump_flags & TDF_DETAILS))
6321 {
6322 fprintf (dump_file, "Processing loop %d\n", loop->num);
6323
6324 exit = single_dom_exit (loop);
6325 if (exit)
6326 {
6327 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6328 exit->src->index, exit->dest->index);
6329 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6330 fprintf (dump_file, "\n");
6331 }
6332
6333 fprintf (dump_file, "\n");
6334 }
6335
6336 body = get_loop_body (loop);
6337 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6338 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6339 free (body);
6340
6341 /* For each ssa name determines whether it behaves as an induction variable
6342 in some loop. */
6343 if (!find_induction_variables (data))
6344 goto finish;
6345
6346 /* Finds interesting uses (item 1). */
6347 find_interesting_uses (data);
6348 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6349 goto finish;
6350
6351 /* Finds candidates for the induction variables (item 2). */
6352 find_iv_candidates (data);
6353
6354 /* Calculates the costs (item 3, part 1). */
6355 determine_iv_costs (data);
6356 determine_use_iv_costs (data);
6357 determine_set_costs (data);
6358
6359 /* Find the optimal set of induction variables (item 3, part 2). */
6360 iv_ca = find_optimal_iv_set (data);
6361 if (!iv_ca)
6362 goto finish;
6363 changed = true;
6364
6365 /* Create the new induction variables (item 4, part 1). */
6366 create_new_ivs (data, iv_ca);
6367 iv_ca_free (&iv_ca);
6368
6369 /* Rewrite the uses (item 4, part 2). */
6370 rewrite_uses (data);
6371
6372 /* Remove the ivs that are unused after rewriting. */
6373 remove_unused_ivs (data);
6374
6375 /* We have changed the structure of induction variables; it might happen
6376 that definitions in the scev database refer to some of them that were
6377 eliminated. */
6378 scev_reset ();
6379
6380 finish:
6381 free_loop_data (data);
6382
6383 return changed;
6384 }
6385
6386 /* Main entry point. Optimizes induction variables in loops. */
6387
6388 void
6389 tree_ssa_iv_optimize (void)
6390 {
6391 struct loop *loop;
6392 struct ivopts_data data;
6393 loop_iterator li;
6394
6395 tree_ssa_iv_optimize_init (&data);
6396
6397 /* Optimize the loops starting with the innermost ones. */
6398 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6399 {
6400 if (dump_file && (dump_flags & TDF_DETAILS))
6401 flow_loop_dump (loop, dump_file, NULL, 1);
6402
6403 tree_ssa_iv_optimize_loop (&data, loop);
6404 }
6405
6406 tree_ssa_iv_optimize_finalize (&data);
6407 }