vec.h (FOR_EACH_VEC_ELT): Define.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
94
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
99
100 /* The infinite cost. */
101 #define INFTY 10000000
102
103 #define AVG_LOOP_NITER(LOOP) 5
104
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
108
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
111 {
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
115
116 return niter;
117 }
118
119 /* Representation of the induction variable. */
120 struct iv
121 {
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
129 };
130
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
133 {
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
140 };
141
142 /* Types of uses. */
143 enum use_type
144 {
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
148 };
149
150 /* Cost of a computation. */
151 typedef struct
152 {
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
159
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
162
163 /* The candidate - cost pair. */
164 struct cost_pair
165 {
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
174 };
175
176 /* Use. */
177 struct iv_use
178 {
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
186
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
190
191 struct iv_cand *selected;
192 /* The selected candidate. */
193 };
194
195 /* The position where the iv is computed. */
196 enum iv_position
197 {
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
203 };
204
205 /* The induction variable candidate. */
206 struct iv_cand
207 {
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
226 };
227
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
230 {
231 tree expr;
232 int id;
233 hashval_t hash;
234 };
235
236 /* The data used by the induction variable optimizations. */
237
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
241
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
245
246 struct ivopts_data
247 {
248 /* The currently optimized loop. */
249 struct loop *current_loop;
250
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
253
254 /* Number of registers used in it. */
255 unsigned regs_used;
256
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
259
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
262
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
266
267 /* Loop invariant expression id. */
268 int inv_expr_id;
269
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
272
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
275
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
278
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
281
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
284
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
288
289 /* Are we optimizing for speed? */
290 bool speed;
291
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
294 };
295
296 /* An assignment of iv candidates to uses. */
297
298 struct iv_ca
299 {
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
302
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
305
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
308
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
311
312 /* The candidates used. */
313 bitmap cands;
314
315 /* The number of candidates in the set. */
316 unsigned n_cands;
317
318 /* Total number of registers needed. */
319 unsigned n_regs;
320
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
323
324 /* Total cost of candidates. */
325 unsigned cand_cost;
326
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
329
330 /* Total cost of the assignment. */
331 comp_cost cost;
332 };
333
334 /* Difference of two iv candidate assignments. */
335
336 struct iv_ca_delta
337 {
338 /* Changed use. */
339 struct iv_use *use;
340
341 /* An old assignment (for rollback purposes). */
342 struct cost_pair *old_cp;
343
344 /* A new assignment. */
345 struct cost_pair *new_cp;
346
347 /* Next change in the list. */
348 struct iv_ca_delta *next_change;
349 };
350
351 /* Bound on number of candidates below that all candidates are considered. */
352
353 #define CONSIDER_ALL_CANDIDATES_BOUND \
354 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
355
356 /* If there are more iv occurrences, we just give up (it is quite unlikely that
357 optimizing such a loop would help, and it would take ages). */
358
359 #define MAX_CONSIDERED_USES \
360 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
361
362 /* If there are at most this number of ivs in the set, try removing unnecessary
363 ivs from the set always. */
364
365 #define ALWAYS_PRUNE_CAND_SET_BOUND \
366 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
367
368 /* The list of trees for that the decl_rtl field must be reset is stored
369 here. */
370
371 static VEC(tree,heap) *decl_rtl_to_reset;
372
373 /* Number of uses recorded in DATA. */
374
375 static inline unsigned
376 n_iv_uses (struct ivopts_data *data)
377 {
378 return VEC_length (iv_use_p, data->iv_uses);
379 }
380
381 /* Ith use recorded in DATA. */
382
383 static inline struct iv_use *
384 iv_use (struct ivopts_data *data, unsigned i)
385 {
386 return VEC_index (iv_use_p, data->iv_uses, i);
387 }
388
389 /* Number of candidates recorded in DATA. */
390
391 static inline unsigned
392 n_iv_cands (struct ivopts_data *data)
393 {
394 return VEC_length (iv_cand_p, data->iv_candidates);
395 }
396
397 /* Ith candidate recorded in DATA. */
398
399 static inline struct iv_cand *
400 iv_cand (struct ivopts_data *data, unsigned i)
401 {
402 return VEC_index (iv_cand_p, data->iv_candidates, i);
403 }
404
405 /* The single loop exit if it dominates the latch, NULL otherwise. */
406
407 edge
408 single_dom_exit (struct loop *loop)
409 {
410 edge exit = single_exit (loop);
411
412 if (!exit)
413 return NULL;
414
415 if (!just_once_each_iteration_p (loop, exit->src))
416 return NULL;
417
418 return exit;
419 }
420
421 /* Dumps information about the induction variable IV to FILE. */
422
423 extern void dump_iv (FILE *, struct iv *);
424 void
425 dump_iv (FILE *file, struct iv *iv)
426 {
427 if (iv->ssa_name)
428 {
429 fprintf (file, "ssa name ");
430 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
431 fprintf (file, "\n");
432 }
433
434 fprintf (file, " type ");
435 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
436 fprintf (file, "\n");
437
438 if (iv->step)
439 {
440 fprintf (file, " base ");
441 print_generic_expr (file, iv->base, TDF_SLIM);
442 fprintf (file, "\n");
443
444 fprintf (file, " step ");
445 print_generic_expr (file, iv->step, TDF_SLIM);
446 fprintf (file, "\n");
447 }
448 else
449 {
450 fprintf (file, " invariant ");
451 print_generic_expr (file, iv->base, TDF_SLIM);
452 fprintf (file, "\n");
453 }
454
455 if (iv->base_object)
456 {
457 fprintf (file, " base object ");
458 print_generic_expr (file, iv->base_object, TDF_SLIM);
459 fprintf (file, "\n");
460 }
461
462 if (iv->biv_p)
463 fprintf (file, " is a biv\n");
464 }
465
466 /* Dumps information about the USE to FILE. */
467
468 extern void dump_use (FILE *, struct iv_use *);
469 void
470 dump_use (FILE *file, struct iv_use *use)
471 {
472 fprintf (file, "use %d\n", use->id);
473
474 switch (use->type)
475 {
476 case USE_NONLINEAR_EXPR:
477 fprintf (file, " generic\n");
478 break;
479
480 case USE_ADDRESS:
481 fprintf (file, " address\n");
482 break;
483
484 case USE_COMPARE:
485 fprintf (file, " compare\n");
486 break;
487
488 default:
489 gcc_unreachable ();
490 }
491
492 fprintf (file, " in statement ");
493 print_gimple_stmt (file, use->stmt, 0, 0);
494 fprintf (file, "\n");
495
496 fprintf (file, " at position ");
497 if (use->op_p)
498 print_generic_expr (file, *use->op_p, TDF_SLIM);
499 fprintf (file, "\n");
500
501 dump_iv (file, use->iv);
502
503 if (use->related_cands)
504 {
505 fprintf (file, " related candidates ");
506 dump_bitmap (file, use->related_cands);
507 }
508 }
509
510 /* Dumps information about the uses to FILE. */
511
512 extern void dump_uses (FILE *, struct ivopts_data *);
513 void
514 dump_uses (FILE *file, struct ivopts_data *data)
515 {
516 unsigned i;
517 struct iv_use *use;
518
519 for (i = 0; i < n_iv_uses (data); i++)
520 {
521 use = iv_use (data, i);
522
523 dump_use (file, use);
524 fprintf (file, "\n");
525 }
526 }
527
528 /* Dumps information about induction variable candidate CAND to FILE. */
529
530 extern void dump_cand (FILE *, struct iv_cand *);
531 void
532 dump_cand (FILE *file, struct iv_cand *cand)
533 {
534 struct iv *iv = cand->iv;
535
536 fprintf (file, "candidate %d%s\n",
537 cand->id, cand->important ? " (important)" : "");
538
539 if (cand->depends_on)
540 {
541 fprintf (file, " depends on ");
542 dump_bitmap (file, cand->depends_on);
543 }
544
545 if (!iv)
546 {
547 fprintf (file, " final value replacement\n");
548 return;
549 }
550
551 if (cand->var_before)
552 {
553 fprintf (file, " var_before ");
554 print_generic_expr (file, cand->var_before, TDF_SLIM);
555 fprintf (file, "\n");
556 }
557 if (cand->var_after)
558 {
559 fprintf (file, " var_after ");
560 print_generic_expr (file, cand->var_after, TDF_SLIM);
561 fprintf (file, "\n");
562 }
563
564 switch (cand->pos)
565 {
566 case IP_NORMAL:
567 fprintf (file, " incremented before exit test\n");
568 break;
569
570 case IP_BEFORE_USE:
571 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
572 break;
573
574 case IP_AFTER_USE:
575 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
576 break;
577
578 case IP_END:
579 fprintf (file, " incremented at end\n");
580 break;
581
582 case IP_ORIGINAL:
583 fprintf (file, " original biv\n");
584 break;
585 }
586
587 dump_iv (file, iv);
588 }
589
590 /* Returns the info for ssa version VER. */
591
592 static inline struct version_info *
593 ver_info (struct ivopts_data *data, unsigned ver)
594 {
595 return data->version_info + ver;
596 }
597
598 /* Returns the info for ssa name NAME. */
599
600 static inline struct version_info *
601 name_info (struct ivopts_data *data, tree name)
602 {
603 return ver_info (data, SSA_NAME_VERSION (name));
604 }
605
606 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
607 emitted in LOOP. */
608
609 static bool
610 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
611 {
612 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
613
614 gcc_assert (bb);
615
616 if (sbb == loop->latch)
617 return true;
618
619 if (sbb != bb)
620 return false;
621
622 return stmt == last_stmt (bb);
623 }
624
625 /* Returns true if STMT if after the place where the original induction
626 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
627 if the positions are identical. */
628
629 static bool
630 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
631 {
632 basic_block cand_bb = gimple_bb (cand->incremented_at);
633 basic_block stmt_bb = gimple_bb (stmt);
634
635 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
636 return false;
637
638 if (stmt_bb != cand_bb)
639 return true;
640
641 if (true_if_equal
642 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
643 return true;
644 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
645 }
646
647 /* Returns true if STMT if after the place where the induction variable
648 CAND is incremented in LOOP. */
649
650 static bool
651 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
652 {
653 switch (cand->pos)
654 {
655 case IP_END:
656 return false;
657
658 case IP_NORMAL:
659 return stmt_after_ip_normal_pos (loop, stmt);
660
661 case IP_ORIGINAL:
662 case IP_AFTER_USE:
663 return stmt_after_inc_pos (cand, stmt, false);
664
665 case IP_BEFORE_USE:
666 return stmt_after_inc_pos (cand, stmt, true);
667
668 default:
669 gcc_unreachable ();
670 }
671 }
672
673 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
674
675 static bool
676 abnormal_ssa_name_p (tree exp)
677 {
678 if (!exp)
679 return false;
680
681 if (TREE_CODE (exp) != SSA_NAME)
682 return false;
683
684 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
685 }
686
687 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
688 abnormal phi node. Callback for for_each_index. */
689
690 static bool
691 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
692 void *data ATTRIBUTE_UNUSED)
693 {
694 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
695 {
696 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
697 return false;
698 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
699 return false;
700 }
701
702 return !abnormal_ssa_name_p (*index);
703 }
704
705 /* Returns true if EXPR contains a ssa name that occurs in an
706 abnormal phi node. */
707
708 bool
709 contains_abnormal_ssa_name_p (tree expr)
710 {
711 enum tree_code code;
712 enum tree_code_class codeclass;
713
714 if (!expr)
715 return false;
716
717 code = TREE_CODE (expr);
718 codeclass = TREE_CODE_CLASS (code);
719
720 if (code == SSA_NAME)
721 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
722
723 if (code == INTEGER_CST
724 || is_gimple_min_invariant (expr))
725 return false;
726
727 if (code == ADDR_EXPR)
728 return !for_each_index (&TREE_OPERAND (expr, 0),
729 idx_contains_abnormal_ssa_name_p,
730 NULL);
731
732 if (code == COND_EXPR)
733 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
734 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
735 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
736
737 switch (codeclass)
738 {
739 case tcc_binary:
740 case tcc_comparison:
741 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
742 return true;
743
744 /* Fallthru. */
745 case tcc_unary:
746 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
747 return true;
748
749 break;
750
751 default:
752 gcc_unreachable ();
753 }
754
755 return false;
756 }
757
758 /* Returns tree describing number of iterations determined from
759 EXIT of DATA->current_loop, or NULL if something goes wrong. */
760
761 static tree
762 niter_for_exit (struct ivopts_data *data, edge exit,
763 struct tree_niter_desc **desc_p)
764 {
765 struct tree_niter_desc* desc = NULL;
766 tree niter;
767 void **slot;
768
769 if (!data->niters)
770 {
771 data->niters = pointer_map_create ();
772 slot = NULL;
773 }
774 else
775 slot = pointer_map_contains (data->niters, exit);
776
777 if (!slot)
778 {
779 /* Try to determine number of iterations. We must know it
780 unconditionally (i.e., without possibility of # of iterations
781 being zero). Also, we cannot safely work with ssa names that
782 appear in phi nodes on abnormal edges, so that we do not create
783 overlapping life ranges for them (PR 27283). */
784 desc = XNEW (struct tree_niter_desc);
785 if (number_of_iterations_exit (data->current_loop,
786 exit, desc, true)
787 && integer_zerop (desc->may_be_zero)
788 && !contains_abnormal_ssa_name_p (desc->niter))
789 niter = desc->niter;
790 else
791 niter = NULL_TREE;
792
793 desc->niter = niter;
794 slot = pointer_map_insert (data->niters, exit);
795 *slot = desc;
796 }
797 else
798 niter = ((struct tree_niter_desc *) *slot)->niter;
799
800 if (desc_p)
801 *desc_p = (struct tree_niter_desc *) *slot;
802 return niter;
803 }
804
805 /* Returns tree describing number of iterations determined from
806 single dominating exit of DATA->current_loop, or NULL if something
807 goes wrong. */
808
809 static tree
810 niter_for_single_dom_exit (struct ivopts_data *data)
811 {
812 edge exit = single_dom_exit (data->current_loop);
813
814 if (!exit)
815 return NULL;
816
817 return niter_for_exit (data, exit, NULL);
818 }
819
820 /* Hash table equality function for expressions. */
821
822 static int
823 htab_inv_expr_eq (const void *ent1, const void *ent2)
824 {
825 const struct iv_inv_expr_ent *expr1 =
826 (const struct iv_inv_expr_ent *)ent1;
827 const struct iv_inv_expr_ent *expr2 =
828 (const struct iv_inv_expr_ent *)ent2;
829
830 return operand_equal_p (expr1->expr, expr2->expr, 0);
831 }
832
833 /* Hash function for loop invariant expressions. */
834
835 static hashval_t
836 htab_inv_expr_hash (const void *ent)
837 {
838 const struct iv_inv_expr_ent *expr =
839 (const struct iv_inv_expr_ent *)ent;
840 return expr->hash;
841 }
842
843 /* Initializes data structures used by the iv optimization pass, stored
844 in DATA. */
845
846 static void
847 tree_ssa_iv_optimize_init (struct ivopts_data *data)
848 {
849 data->version_info_size = 2 * num_ssa_names;
850 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
851 data->relevant = BITMAP_ALLOC (NULL);
852 data->important_candidates = BITMAP_ALLOC (NULL);
853 data->max_inv_id = 0;
854 data->niters = NULL;
855 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
856 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
857 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
858 htab_inv_expr_eq, free);
859 data->inv_expr_id = 0;
860 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
861 }
862
863 /* Returns a memory object to that EXPR points. In case we are able to
864 determine that it does not point to any such object, NULL is returned. */
865
866 static tree
867 determine_base_object (tree expr)
868 {
869 enum tree_code code = TREE_CODE (expr);
870 tree base, obj;
871
872 /* If this is a pointer casted to any type, we need to determine
873 the base object for the pointer; so handle conversions before
874 throwing away non-pointer expressions. */
875 if (CONVERT_EXPR_P (expr))
876 return determine_base_object (TREE_OPERAND (expr, 0));
877
878 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
879 return NULL_TREE;
880
881 switch (code)
882 {
883 case INTEGER_CST:
884 return NULL_TREE;
885
886 case ADDR_EXPR:
887 obj = TREE_OPERAND (expr, 0);
888 base = get_base_address (obj);
889
890 if (!base)
891 return expr;
892
893 if (TREE_CODE (base) == MEM_REF)
894 return determine_base_object (TREE_OPERAND (base, 0));
895
896 return fold_convert (ptr_type_node,
897 build_fold_addr_expr (base));
898
899 case POINTER_PLUS_EXPR:
900 return determine_base_object (TREE_OPERAND (expr, 0));
901
902 case PLUS_EXPR:
903 case MINUS_EXPR:
904 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
905 gcc_unreachable ();
906
907 default:
908 return fold_convert (ptr_type_node, expr);
909 }
910 }
911
912 /* Allocates an induction variable with given initial value BASE and step STEP
913 for loop LOOP. */
914
915 static struct iv *
916 alloc_iv (tree base, tree step)
917 {
918 struct iv *iv = XCNEW (struct iv);
919 gcc_assert (step != NULL_TREE);
920
921 iv->base = base;
922 iv->base_object = determine_base_object (base);
923 iv->step = step;
924 iv->biv_p = false;
925 iv->have_use_for = false;
926 iv->use_id = 0;
927 iv->ssa_name = NULL_TREE;
928
929 return iv;
930 }
931
932 /* Sets STEP and BASE for induction variable IV. */
933
934 static void
935 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
936 {
937 struct version_info *info = name_info (data, iv);
938
939 gcc_assert (!info->iv);
940
941 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
942 info->iv = alloc_iv (base, step);
943 info->iv->ssa_name = iv;
944 }
945
946 /* Finds induction variable declaration for VAR. */
947
948 static struct iv *
949 get_iv (struct ivopts_data *data, tree var)
950 {
951 basic_block bb;
952 tree type = TREE_TYPE (var);
953
954 if (!POINTER_TYPE_P (type)
955 && !INTEGRAL_TYPE_P (type))
956 return NULL;
957
958 if (!name_info (data, var)->iv)
959 {
960 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
961
962 if (!bb
963 || !flow_bb_inside_loop_p (data->current_loop, bb))
964 set_iv (data, var, var, build_int_cst (type, 0));
965 }
966
967 return name_info (data, var)->iv;
968 }
969
970 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
971 not define a simple affine biv with nonzero step. */
972
973 static tree
974 determine_biv_step (gimple phi)
975 {
976 struct loop *loop = gimple_bb (phi)->loop_father;
977 tree name = PHI_RESULT (phi);
978 affine_iv iv;
979
980 if (!is_gimple_reg (name))
981 return NULL_TREE;
982
983 if (!simple_iv (loop, loop, name, &iv, true))
984 return NULL_TREE;
985
986 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
987 }
988
989 /* Finds basic ivs. */
990
991 static bool
992 find_bivs (struct ivopts_data *data)
993 {
994 gimple phi;
995 tree step, type, base;
996 bool found = false;
997 struct loop *loop = data->current_loop;
998 gimple_stmt_iterator psi;
999
1000 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1001 {
1002 phi = gsi_stmt (psi);
1003
1004 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1005 continue;
1006
1007 step = determine_biv_step (phi);
1008 if (!step)
1009 continue;
1010
1011 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1012 base = expand_simple_operations (base);
1013 if (contains_abnormal_ssa_name_p (base)
1014 || contains_abnormal_ssa_name_p (step))
1015 continue;
1016
1017 type = TREE_TYPE (PHI_RESULT (phi));
1018 base = fold_convert (type, base);
1019 if (step)
1020 {
1021 if (POINTER_TYPE_P (type))
1022 step = fold_convert (sizetype, step);
1023 else
1024 step = fold_convert (type, step);
1025 }
1026
1027 set_iv (data, PHI_RESULT (phi), base, step);
1028 found = true;
1029 }
1030
1031 return found;
1032 }
1033
1034 /* Marks basic ivs. */
1035
1036 static void
1037 mark_bivs (struct ivopts_data *data)
1038 {
1039 gimple phi;
1040 tree var;
1041 struct iv *iv, *incr_iv;
1042 struct loop *loop = data->current_loop;
1043 basic_block incr_bb;
1044 gimple_stmt_iterator psi;
1045
1046 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1047 {
1048 phi = gsi_stmt (psi);
1049
1050 iv = get_iv (data, PHI_RESULT (phi));
1051 if (!iv)
1052 continue;
1053
1054 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1055 incr_iv = get_iv (data, var);
1056 if (!incr_iv)
1057 continue;
1058
1059 /* If the increment is in the subloop, ignore it. */
1060 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061 if (incr_bb->loop_father != data->current_loop
1062 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1063 continue;
1064
1065 iv->biv_p = true;
1066 incr_iv->biv_p = true;
1067 }
1068 }
1069
1070 /* Checks whether STMT defines a linear induction variable and stores its
1071 parameters to IV. */
1072
1073 static bool
1074 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1075 {
1076 tree lhs;
1077 struct loop *loop = data->current_loop;
1078
1079 iv->base = NULL_TREE;
1080 iv->step = NULL_TREE;
1081
1082 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1083 return false;
1084
1085 lhs = gimple_assign_lhs (stmt);
1086 if (TREE_CODE (lhs) != SSA_NAME)
1087 return false;
1088
1089 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1090 return false;
1091 iv->base = expand_simple_operations (iv->base);
1092
1093 if (contains_abnormal_ssa_name_p (iv->base)
1094 || contains_abnormal_ssa_name_p (iv->step))
1095 return false;
1096
1097 return true;
1098 }
1099
1100 /* Finds general ivs in statement STMT. */
1101
1102 static void
1103 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1104 {
1105 affine_iv iv;
1106
1107 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1108 return;
1109
1110 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1111 }
1112
1113 /* Finds general ivs in basic block BB. */
1114
1115 static void
1116 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1117 {
1118 gimple_stmt_iterator bsi;
1119
1120 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1121 find_givs_in_stmt (data, gsi_stmt (bsi));
1122 }
1123
1124 /* Finds general ivs. */
1125
1126 static void
1127 find_givs (struct ivopts_data *data)
1128 {
1129 struct loop *loop = data->current_loop;
1130 basic_block *body = get_loop_body_in_dom_order (loop);
1131 unsigned i;
1132
1133 for (i = 0; i < loop->num_nodes; i++)
1134 find_givs_in_bb (data, body[i]);
1135 free (body);
1136 }
1137
1138 /* For each ssa name defined in LOOP determines whether it is an induction
1139 variable and if so, its initial value and step. */
1140
1141 static bool
1142 find_induction_variables (struct ivopts_data *data)
1143 {
1144 unsigned i;
1145 bitmap_iterator bi;
1146
1147 if (!find_bivs (data))
1148 return false;
1149
1150 find_givs (data);
1151 mark_bivs (data);
1152
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 {
1155 tree niter = niter_for_single_dom_exit (data);
1156
1157 if (niter)
1158 {
1159 fprintf (dump_file, " number of iterations ");
1160 print_generic_expr (dump_file, niter, TDF_SLIM);
1161 fprintf (dump_file, "\n\n");
1162 };
1163
1164 fprintf (dump_file, "Induction variables:\n\n");
1165
1166 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1167 {
1168 if (ver_info (data, i)->iv)
1169 dump_iv (dump_file, ver_info (data, i)->iv);
1170 }
1171 }
1172
1173 return true;
1174 }
1175
1176 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1177
1178 static struct iv_use *
1179 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1180 gimple stmt, enum use_type use_type)
1181 {
1182 struct iv_use *use = XCNEW (struct iv_use);
1183
1184 use->id = n_iv_uses (data);
1185 use->type = use_type;
1186 use->iv = iv;
1187 use->stmt = stmt;
1188 use->op_p = use_p;
1189 use->related_cands = BITMAP_ALLOC (NULL);
1190
1191 /* To avoid showing ssa name in the dumps, if it was not reset by the
1192 caller. */
1193 iv->ssa_name = NULL_TREE;
1194
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 dump_use (dump_file, use);
1197
1198 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1199
1200 return use;
1201 }
1202
1203 /* Checks whether OP is a loop-level invariant and if so, records it.
1204 NONLINEAR_USE is true if the invariant is used in a way we do not
1205 handle specially. */
1206
1207 static void
1208 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1209 {
1210 basic_block bb;
1211 struct version_info *info;
1212
1213 if (TREE_CODE (op) != SSA_NAME
1214 || !is_gimple_reg (op))
1215 return;
1216
1217 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1218 if (bb
1219 && flow_bb_inside_loop_p (data->current_loop, bb))
1220 return;
1221
1222 info = name_info (data, op);
1223 info->name = op;
1224 info->has_nonlin_use |= nonlinear_use;
1225 if (!info->inv_id)
1226 info->inv_id = ++data->max_inv_id;
1227 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1228 }
1229
1230 /* Checks whether the use OP is interesting and if so, records it. */
1231
1232 static struct iv_use *
1233 find_interesting_uses_op (struct ivopts_data *data, tree op)
1234 {
1235 struct iv *iv;
1236 struct iv *civ;
1237 gimple stmt;
1238 struct iv_use *use;
1239
1240 if (TREE_CODE (op) != SSA_NAME)
1241 return NULL;
1242
1243 iv = get_iv (data, op);
1244 if (!iv)
1245 return NULL;
1246
1247 if (iv->have_use_for)
1248 {
1249 use = iv_use (data, iv->use_id);
1250
1251 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1252 return use;
1253 }
1254
1255 if (integer_zerop (iv->step))
1256 {
1257 record_invariant (data, op, true);
1258 return NULL;
1259 }
1260 iv->have_use_for = true;
1261
1262 civ = XNEW (struct iv);
1263 *civ = *iv;
1264
1265 stmt = SSA_NAME_DEF_STMT (op);
1266 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1267 || is_gimple_assign (stmt));
1268
1269 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1270 iv->use_id = use->id;
1271
1272 return use;
1273 }
1274
1275 /* Given a condition in statement STMT, checks whether it is a compare
1276 of an induction variable and an invariant. If this is the case,
1277 CONTROL_VAR is set to location of the iv, BOUND to the location of
1278 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1279 induction variable descriptions, and true is returned. If this is not
1280 the case, CONTROL_VAR and BOUND are set to the arguments of the
1281 condition and false is returned. */
1282
1283 static bool
1284 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1285 tree **control_var, tree **bound,
1286 struct iv **iv_var, struct iv **iv_bound)
1287 {
1288 /* The objects returned when COND has constant operands. */
1289 static struct iv const_iv;
1290 static tree zero;
1291 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1292 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1293 bool ret = false;
1294
1295 if (gimple_code (stmt) == GIMPLE_COND)
1296 {
1297 op0 = gimple_cond_lhs_ptr (stmt);
1298 op1 = gimple_cond_rhs_ptr (stmt);
1299 }
1300 else
1301 {
1302 op0 = gimple_assign_rhs1_ptr (stmt);
1303 op1 = gimple_assign_rhs2_ptr (stmt);
1304 }
1305
1306 zero = integer_zero_node;
1307 const_iv.step = integer_zero_node;
1308
1309 if (TREE_CODE (*op0) == SSA_NAME)
1310 iv0 = get_iv (data, *op0);
1311 if (TREE_CODE (*op1) == SSA_NAME)
1312 iv1 = get_iv (data, *op1);
1313
1314 /* Exactly one of the compared values must be an iv, and the other one must
1315 be an invariant. */
1316 if (!iv0 || !iv1)
1317 goto end;
1318
1319 if (integer_zerop (iv0->step))
1320 {
1321 /* Control variable may be on the other side. */
1322 tmp_op = op0; op0 = op1; op1 = tmp_op;
1323 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1324 }
1325 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1326
1327 end:
1328 if (control_var)
1329 *control_var = op0;;
1330 if (iv_var)
1331 *iv_var = iv0;;
1332 if (bound)
1333 *bound = op1;
1334 if (iv_bound)
1335 *iv_bound = iv1;
1336
1337 return ret;
1338 }
1339
1340 /* Checks whether the condition in STMT is interesting and if so,
1341 records it. */
1342
1343 static void
1344 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1345 {
1346 tree *var_p, *bound_p;
1347 struct iv *var_iv, *civ;
1348
1349 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1350 {
1351 find_interesting_uses_op (data, *var_p);
1352 find_interesting_uses_op (data, *bound_p);
1353 return;
1354 }
1355
1356 civ = XNEW (struct iv);
1357 *civ = *var_iv;
1358 record_use (data, NULL, civ, stmt, USE_COMPARE);
1359 }
1360
1361 /* Returns true if expression EXPR is obviously invariant in LOOP,
1362 i.e. if all its operands are defined outside of the LOOP. LOOP
1363 should not be the function body. */
1364
1365 bool
1366 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1367 {
1368 basic_block def_bb;
1369 unsigned i, len;
1370
1371 gcc_assert (loop_depth (loop) > 0);
1372
1373 if (is_gimple_min_invariant (expr))
1374 return true;
1375
1376 if (TREE_CODE (expr) == SSA_NAME)
1377 {
1378 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1379 if (def_bb
1380 && flow_bb_inside_loop_p (loop, def_bb))
1381 return false;
1382
1383 return true;
1384 }
1385
1386 if (!EXPR_P (expr))
1387 return false;
1388
1389 len = TREE_OPERAND_LENGTH (expr);
1390 for (i = 0; i < len; i++)
1391 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1392 return false;
1393
1394 return true;
1395 }
1396
1397 /* Returns true if statement STMT is obviously invariant in LOOP,
1398 i.e. if all its operands on the RHS are defined outside of the LOOP.
1399 LOOP should not be the function body. */
1400
1401 bool
1402 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1403 {
1404 unsigned i;
1405 tree lhs;
1406
1407 gcc_assert (loop_depth (loop) > 0);
1408
1409 lhs = gimple_get_lhs (stmt);
1410 for (i = 0; i < gimple_num_ops (stmt); i++)
1411 {
1412 tree op = gimple_op (stmt, i);
1413 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1414 return false;
1415 }
1416
1417 return true;
1418 }
1419
1420 /* Cumulates the steps of indices into DATA and replaces their values with the
1421 initial ones. Returns false when the value of the index cannot be determined.
1422 Callback for for_each_index. */
1423
1424 struct ifs_ivopts_data
1425 {
1426 struct ivopts_data *ivopts_data;
1427 gimple stmt;
1428 tree step;
1429 };
1430
1431 static bool
1432 idx_find_step (tree base, tree *idx, void *data)
1433 {
1434 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1435 struct iv *iv;
1436 tree step, iv_base, iv_step, lbound, off;
1437 struct loop *loop = dta->ivopts_data->current_loop;
1438
1439 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1440 return false;
1441
1442 /* If base is a component ref, require that the offset of the reference
1443 be invariant. */
1444 if (TREE_CODE (base) == COMPONENT_REF)
1445 {
1446 off = component_ref_field_offset (base);
1447 return expr_invariant_in_loop_p (loop, off);
1448 }
1449
1450 /* If base is array, first check whether we will be able to move the
1451 reference out of the loop (in order to take its address in strength
1452 reduction). In order for this to work we need both lower bound
1453 and step to be loop invariants. */
1454 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1455 {
1456 /* Moreover, for a range, the size needs to be invariant as well. */
1457 if (TREE_CODE (base) == ARRAY_RANGE_REF
1458 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1459 return false;
1460
1461 step = array_ref_element_size (base);
1462 lbound = array_ref_low_bound (base);
1463
1464 if (!expr_invariant_in_loop_p (loop, step)
1465 || !expr_invariant_in_loop_p (loop, lbound))
1466 return false;
1467 }
1468
1469 if (TREE_CODE (*idx) != SSA_NAME)
1470 return true;
1471
1472 iv = get_iv (dta->ivopts_data, *idx);
1473 if (!iv)
1474 return false;
1475
1476 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1477 *&x[0], which is not folded and does not trigger the
1478 ARRAY_REF path below. */
1479 *idx = iv->base;
1480
1481 if (integer_zerop (iv->step))
1482 return true;
1483
1484 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1485 {
1486 step = array_ref_element_size (base);
1487
1488 /* We only handle addresses whose step is an integer constant. */
1489 if (TREE_CODE (step) != INTEGER_CST)
1490 return false;
1491 }
1492 else
1493 /* The step for pointer arithmetics already is 1 byte. */
1494 step = size_one_node;
1495
1496 iv_base = iv->base;
1497 iv_step = iv->step;
1498 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1499 sizetype, &iv_base, &iv_step, dta->stmt,
1500 false))
1501 {
1502 /* The index might wrap. */
1503 return false;
1504 }
1505
1506 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1507 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1508
1509 return true;
1510 }
1511
1512 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1513 object is passed to it in DATA. */
1514
1515 static bool
1516 idx_record_use (tree base, tree *idx,
1517 void *vdata)
1518 {
1519 struct ivopts_data *data = (struct ivopts_data *) vdata;
1520 find_interesting_uses_op (data, *idx);
1521 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1522 {
1523 find_interesting_uses_op (data, array_ref_element_size (base));
1524 find_interesting_uses_op (data, array_ref_low_bound (base));
1525 }
1526 return true;
1527 }
1528
1529 /* If we can prove that TOP = cst * BOT for some constant cst,
1530 store cst to MUL and return true. Otherwise return false.
1531 The returned value is always sign-extended, regardless of the
1532 signedness of TOP and BOT. */
1533
1534 static bool
1535 constant_multiple_of (tree top, tree bot, double_int *mul)
1536 {
1537 tree mby;
1538 enum tree_code code;
1539 double_int res, p0, p1;
1540 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1541
1542 STRIP_NOPS (top);
1543 STRIP_NOPS (bot);
1544
1545 if (operand_equal_p (top, bot, 0))
1546 {
1547 *mul = double_int_one;
1548 return true;
1549 }
1550
1551 code = TREE_CODE (top);
1552 switch (code)
1553 {
1554 case MULT_EXPR:
1555 mby = TREE_OPERAND (top, 1);
1556 if (TREE_CODE (mby) != INTEGER_CST)
1557 return false;
1558
1559 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1560 return false;
1561
1562 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1563 precision);
1564 return true;
1565
1566 case PLUS_EXPR:
1567 case MINUS_EXPR:
1568 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1569 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1570 return false;
1571
1572 if (code == MINUS_EXPR)
1573 p1 = double_int_neg (p1);
1574 *mul = double_int_sext (double_int_add (p0, p1), precision);
1575 return true;
1576
1577 case INTEGER_CST:
1578 if (TREE_CODE (bot) != INTEGER_CST)
1579 return false;
1580
1581 p0 = double_int_sext (tree_to_double_int (top), precision);
1582 p1 = double_int_sext (tree_to_double_int (bot), precision);
1583 if (double_int_zero_p (p1))
1584 return false;
1585 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1586 precision);
1587 return double_int_zero_p (res);
1588
1589 default:
1590 return false;
1591 }
1592 }
1593
1594 /* Returns true if memory reference REF with step STEP may be unaligned. */
1595
1596 static bool
1597 may_be_unaligned_p (tree ref, tree step)
1598 {
1599 tree base;
1600 tree base_type;
1601 HOST_WIDE_INT bitsize;
1602 HOST_WIDE_INT bitpos;
1603 tree toffset;
1604 enum machine_mode mode;
1605 int unsignedp, volatilep;
1606 unsigned base_align;
1607
1608 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1609 thus they are not misaligned. */
1610 if (TREE_CODE (ref) == TARGET_MEM_REF)
1611 return false;
1612
1613 /* The test below is basically copy of what expr.c:normal_inner_ref
1614 does to check whether the object must be loaded by parts when
1615 STRICT_ALIGNMENT is true. */
1616 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1617 &unsignedp, &volatilep, true);
1618 base_type = TREE_TYPE (base);
1619 base_align = TYPE_ALIGN (base_type);
1620
1621 if (mode != BLKmode)
1622 {
1623 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1624
1625 if (base_align < mode_align
1626 || (bitpos % mode_align) != 0
1627 || (bitpos % BITS_PER_UNIT) != 0)
1628 return true;
1629
1630 if (toffset
1631 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1632 return true;
1633
1634 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1635 return true;
1636 }
1637
1638 return false;
1639 }
1640
1641 /* Return true if EXPR may be non-addressable. */
1642
1643 static bool
1644 may_be_nonaddressable_p (tree expr)
1645 {
1646 switch (TREE_CODE (expr))
1647 {
1648 case TARGET_MEM_REF:
1649 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1650 target, thus they are always addressable. */
1651 return false;
1652
1653 case COMPONENT_REF:
1654 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1655 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1656
1657 case VIEW_CONVERT_EXPR:
1658 /* This kind of view-conversions may wrap non-addressable objects
1659 and make them look addressable. After some processing the
1660 non-addressability may be uncovered again, causing ADDR_EXPRs
1661 of inappropriate objects to be built. */
1662 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1663 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1664 return true;
1665
1666 /* ... fall through ... */
1667
1668 case ARRAY_REF:
1669 case ARRAY_RANGE_REF:
1670 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1671
1672 CASE_CONVERT:
1673 return true;
1674
1675 default:
1676 break;
1677 }
1678
1679 return false;
1680 }
1681
1682 /* Finds addresses in *OP_P inside STMT. */
1683
1684 static void
1685 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1686 {
1687 tree base = *op_p, step = size_zero_node;
1688 struct iv *civ;
1689 struct ifs_ivopts_data ifs_ivopts_data;
1690
1691 /* Do not play with volatile memory references. A bit too conservative,
1692 perhaps, but safe. */
1693 if (gimple_has_volatile_ops (stmt))
1694 goto fail;
1695
1696 /* Ignore bitfields for now. Not really something terribly complicated
1697 to handle. TODO. */
1698 if (TREE_CODE (base) == BIT_FIELD_REF)
1699 goto fail;
1700
1701 base = unshare_expr (base);
1702
1703 if (TREE_CODE (base) == TARGET_MEM_REF)
1704 {
1705 tree type = build_pointer_type (TREE_TYPE (base));
1706 tree astep;
1707
1708 if (TMR_BASE (base)
1709 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1710 {
1711 civ = get_iv (data, TMR_BASE (base));
1712 if (!civ)
1713 goto fail;
1714
1715 TMR_BASE (base) = civ->base;
1716 step = civ->step;
1717 }
1718 if (TMR_INDEX (base)
1719 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1720 {
1721 civ = get_iv (data, TMR_INDEX (base));
1722 if (!civ)
1723 goto fail;
1724
1725 TMR_INDEX (base) = civ->base;
1726 astep = civ->step;
1727
1728 if (astep)
1729 {
1730 if (TMR_STEP (base))
1731 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1732
1733 step = fold_build2 (PLUS_EXPR, type, step, astep);
1734 }
1735 }
1736
1737 if (integer_zerop (step))
1738 goto fail;
1739 base = tree_mem_ref_addr (type, base);
1740 }
1741 else
1742 {
1743 ifs_ivopts_data.ivopts_data = data;
1744 ifs_ivopts_data.stmt = stmt;
1745 ifs_ivopts_data.step = size_zero_node;
1746 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1747 || integer_zerop (ifs_ivopts_data.step))
1748 goto fail;
1749 step = ifs_ivopts_data.step;
1750
1751 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1752
1753 /* Check that the base expression is addressable. This needs
1754 to be done after substituting bases of IVs into it. */
1755 if (may_be_nonaddressable_p (base))
1756 goto fail;
1757
1758 /* Moreover, on strict alignment platforms, check that it is
1759 sufficiently aligned. */
1760 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1761 goto fail;
1762
1763 base = build_fold_addr_expr (base);
1764
1765 /* Substituting bases of IVs into the base expression might
1766 have caused folding opportunities. */
1767 if (TREE_CODE (base) == ADDR_EXPR)
1768 {
1769 tree *ref = &TREE_OPERAND (base, 0);
1770 while (handled_component_p (*ref))
1771 ref = &TREE_OPERAND (*ref, 0);
1772 if (TREE_CODE (*ref) == MEM_REF)
1773 {
1774 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1775 TREE_OPERAND (*ref, 0),
1776 TREE_OPERAND (*ref, 1));
1777 if (tem)
1778 *ref = tem;
1779 }
1780 }
1781 }
1782
1783 civ = alloc_iv (base, step);
1784 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1785 return;
1786
1787 fail:
1788 for_each_index (op_p, idx_record_use, data);
1789 }
1790
1791 /* Finds and records invariants used in STMT. */
1792
1793 static void
1794 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1795 {
1796 ssa_op_iter iter;
1797 use_operand_p use_p;
1798 tree op;
1799
1800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1801 {
1802 op = USE_FROM_PTR (use_p);
1803 record_invariant (data, op, false);
1804 }
1805 }
1806
1807 /* Finds interesting uses of induction variables in the statement STMT. */
1808
1809 static void
1810 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1811 {
1812 struct iv *iv;
1813 tree op, *lhs, *rhs;
1814 ssa_op_iter iter;
1815 use_operand_p use_p;
1816 enum tree_code code;
1817
1818 find_invariants_stmt (data, stmt);
1819
1820 if (gimple_code (stmt) == GIMPLE_COND)
1821 {
1822 find_interesting_uses_cond (data, stmt);
1823 return;
1824 }
1825
1826 if (is_gimple_assign (stmt))
1827 {
1828 lhs = gimple_assign_lhs_ptr (stmt);
1829 rhs = gimple_assign_rhs1_ptr (stmt);
1830
1831 if (TREE_CODE (*lhs) == SSA_NAME)
1832 {
1833 /* If the statement defines an induction variable, the uses are not
1834 interesting by themselves. */
1835
1836 iv = get_iv (data, *lhs);
1837
1838 if (iv && !integer_zerop (iv->step))
1839 return;
1840 }
1841
1842 code = gimple_assign_rhs_code (stmt);
1843 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1844 && (REFERENCE_CLASS_P (*rhs)
1845 || is_gimple_val (*rhs)))
1846 {
1847 if (REFERENCE_CLASS_P (*rhs))
1848 find_interesting_uses_address (data, stmt, rhs);
1849 else
1850 find_interesting_uses_op (data, *rhs);
1851
1852 if (REFERENCE_CLASS_P (*lhs))
1853 find_interesting_uses_address (data, stmt, lhs);
1854 return;
1855 }
1856 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1857 {
1858 find_interesting_uses_cond (data, stmt);
1859 return;
1860 }
1861
1862 /* TODO -- we should also handle address uses of type
1863
1864 memory = call (whatever);
1865
1866 and
1867
1868 call (memory). */
1869 }
1870
1871 if (gimple_code (stmt) == GIMPLE_PHI
1872 && gimple_bb (stmt) == data->current_loop->header)
1873 {
1874 iv = get_iv (data, PHI_RESULT (stmt));
1875
1876 if (iv && !integer_zerop (iv->step))
1877 return;
1878 }
1879
1880 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1881 {
1882 op = USE_FROM_PTR (use_p);
1883
1884 if (TREE_CODE (op) != SSA_NAME)
1885 continue;
1886
1887 iv = get_iv (data, op);
1888 if (!iv)
1889 continue;
1890
1891 find_interesting_uses_op (data, op);
1892 }
1893 }
1894
1895 /* Finds interesting uses of induction variables outside of loops
1896 on loop exit edge EXIT. */
1897
1898 static void
1899 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1900 {
1901 gimple phi;
1902 gimple_stmt_iterator psi;
1903 tree def;
1904
1905 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1906 {
1907 phi = gsi_stmt (psi);
1908 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1909 if (is_gimple_reg (def))
1910 find_interesting_uses_op (data, def);
1911 }
1912 }
1913
1914 /* Finds uses of the induction variables that are interesting. */
1915
1916 static void
1917 find_interesting_uses (struct ivopts_data *data)
1918 {
1919 basic_block bb;
1920 gimple_stmt_iterator bsi;
1921 basic_block *body = get_loop_body (data->current_loop);
1922 unsigned i;
1923 struct version_info *info;
1924 edge e;
1925
1926 if (dump_file && (dump_flags & TDF_DETAILS))
1927 fprintf (dump_file, "Uses:\n\n");
1928
1929 for (i = 0; i < data->current_loop->num_nodes; i++)
1930 {
1931 edge_iterator ei;
1932 bb = body[i];
1933
1934 FOR_EACH_EDGE (e, ei, bb->succs)
1935 if (e->dest != EXIT_BLOCK_PTR
1936 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1937 find_interesting_uses_outside (data, e);
1938
1939 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1940 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1941 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1942 if (!is_gimple_debug (gsi_stmt (bsi)))
1943 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1944 }
1945
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1947 {
1948 bitmap_iterator bi;
1949
1950 fprintf (dump_file, "\n");
1951
1952 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1953 {
1954 info = ver_info (data, i);
1955 if (info->inv_id)
1956 {
1957 fprintf (dump_file, " ");
1958 print_generic_expr (dump_file, info->name, TDF_SLIM);
1959 fprintf (dump_file, " is invariant (%d)%s\n",
1960 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1961 }
1962 }
1963
1964 fprintf (dump_file, "\n");
1965 }
1966
1967 free (body);
1968 }
1969
1970 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1971 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1972 we are at the top-level of the processed address. */
1973
1974 static tree
1975 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1976 unsigned HOST_WIDE_INT *offset)
1977 {
1978 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1979 enum tree_code code;
1980 tree type, orig_type = TREE_TYPE (expr);
1981 unsigned HOST_WIDE_INT off0, off1, st;
1982 tree orig_expr = expr;
1983
1984 STRIP_NOPS (expr);
1985
1986 type = TREE_TYPE (expr);
1987 code = TREE_CODE (expr);
1988 *offset = 0;
1989
1990 switch (code)
1991 {
1992 case INTEGER_CST:
1993 if (!cst_and_fits_in_hwi (expr)
1994 || integer_zerop (expr))
1995 return orig_expr;
1996
1997 *offset = int_cst_value (expr);
1998 return build_int_cst (orig_type, 0);
1999
2000 case POINTER_PLUS_EXPR:
2001 case PLUS_EXPR:
2002 case MINUS_EXPR:
2003 op0 = TREE_OPERAND (expr, 0);
2004 op1 = TREE_OPERAND (expr, 1);
2005
2006 op0 = strip_offset_1 (op0, false, false, &off0);
2007 op1 = strip_offset_1 (op1, false, false, &off1);
2008
2009 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2010 if (op0 == TREE_OPERAND (expr, 0)
2011 && op1 == TREE_OPERAND (expr, 1))
2012 return orig_expr;
2013
2014 if (integer_zerop (op1))
2015 expr = op0;
2016 else if (integer_zerop (op0))
2017 {
2018 if (code == MINUS_EXPR)
2019 expr = fold_build1 (NEGATE_EXPR, type, op1);
2020 else
2021 expr = op1;
2022 }
2023 else
2024 expr = fold_build2 (code, type, op0, op1);
2025
2026 return fold_convert (orig_type, expr);
2027
2028 case MULT_EXPR:
2029 op1 = TREE_OPERAND (expr, 1);
2030 if (!cst_and_fits_in_hwi (op1))
2031 return orig_expr;
2032
2033 op0 = TREE_OPERAND (expr, 0);
2034 op0 = strip_offset_1 (op0, false, false, &off0);
2035 if (op0 == TREE_OPERAND (expr, 0))
2036 return orig_expr;
2037
2038 *offset = off0 * int_cst_value (op1);
2039 if (integer_zerop (op0))
2040 expr = op0;
2041 else
2042 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2043
2044 return fold_convert (orig_type, expr);
2045
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (!inside_addr)
2049 return orig_expr;
2050
2051 step = array_ref_element_size (expr);
2052 if (!cst_and_fits_in_hwi (step))
2053 break;
2054
2055 st = int_cst_value (step);
2056 op1 = TREE_OPERAND (expr, 1);
2057 op1 = strip_offset_1 (op1, false, false, &off1);
2058 *offset = off1 * st;
2059
2060 if (top_compref
2061 && integer_zerop (op1))
2062 {
2063 /* Strip the component reference completely. */
2064 op0 = TREE_OPERAND (expr, 0);
2065 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2066 *offset += off0;
2067 return op0;
2068 }
2069 break;
2070
2071 case COMPONENT_REF:
2072 if (!inside_addr)
2073 return orig_expr;
2074
2075 tmp = component_ref_field_offset (expr);
2076 if (top_compref
2077 && cst_and_fits_in_hwi (tmp))
2078 {
2079 /* Strip the component reference completely. */
2080 op0 = TREE_OPERAND (expr, 0);
2081 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2082 *offset = off0 + int_cst_value (tmp);
2083 return op0;
2084 }
2085 break;
2086
2087 case ADDR_EXPR:
2088 op0 = TREE_OPERAND (expr, 0);
2089 op0 = strip_offset_1 (op0, true, true, &off0);
2090 *offset += off0;
2091
2092 if (op0 == TREE_OPERAND (expr, 0))
2093 return orig_expr;
2094
2095 expr = build_fold_addr_expr (op0);
2096 return fold_convert (orig_type, expr);
2097
2098 case MEM_REF:
2099 /* ??? Offset operand? */
2100 inside_addr = false;
2101 break;
2102
2103 default:
2104 return orig_expr;
2105 }
2106
2107 /* Default handling of expressions for that we want to recurse into
2108 the first operand. */
2109 op0 = TREE_OPERAND (expr, 0);
2110 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2111 *offset += off0;
2112
2113 if (op0 == TREE_OPERAND (expr, 0)
2114 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2115 return orig_expr;
2116
2117 expr = copy_node (expr);
2118 TREE_OPERAND (expr, 0) = op0;
2119 if (op1)
2120 TREE_OPERAND (expr, 1) = op1;
2121
2122 /* Inside address, we might strip the top level component references,
2123 thus changing type of the expression. Handling of ADDR_EXPR
2124 will fix that. */
2125 expr = fold_convert (orig_type, expr);
2126
2127 return expr;
2128 }
2129
2130 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2131
2132 static tree
2133 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2134 {
2135 return strip_offset_1 (expr, false, false, offset);
2136 }
2137
2138 /* Returns variant of TYPE that can be used as base for different uses.
2139 We return unsigned type with the same precision, which avoids problems
2140 with overflows. */
2141
2142 static tree
2143 generic_type_for (tree type)
2144 {
2145 if (POINTER_TYPE_P (type))
2146 return unsigned_type_for (type);
2147
2148 if (TYPE_UNSIGNED (type))
2149 return type;
2150
2151 return unsigned_type_for (type);
2152 }
2153
2154 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2155 the bitmap to that we should store it. */
2156
2157 static struct ivopts_data *fd_ivopts_data;
2158 static tree
2159 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2160 {
2161 bitmap *depends_on = (bitmap *) data;
2162 struct version_info *info;
2163
2164 if (TREE_CODE (*expr_p) != SSA_NAME)
2165 return NULL_TREE;
2166 info = name_info (fd_ivopts_data, *expr_p);
2167
2168 if (!info->inv_id || info->has_nonlin_use)
2169 return NULL_TREE;
2170
2171 if (!*depends_on)
2172 *depends_on = BITMAP_ALLOC (NULL);
2173 bitmap_set_bit (*depends_on, info->inv_id);
2174
2175 return NULL_TREE;
2176 }
2177
2178 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2179 position to POS. If USE is not NULL, the candidate is set as related to
2180 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2181 replacement of the final value of the iv by a direct computation. */
2182
2183 static struct iv_cand *
2184 add_candidate_1 (struct ivopts_data *data,
2185 tree base, tree step, bool important, enum iv_position pos,
2186 struct iv_use *use, gimple incremented_at)
2187 {
2188 unsigned i;
2189 struct iv_cand *cand = NULL;
2190 tree type, orig_type;
2191
2192 if (base)
2193 {
2194 orig_type = TREE_TYPE (base);
2195 type = generic_type_for (orig_type);
2196 if (type != orig_type)
2197 {
2198 base = fold_convert (type, base);
2199 step = fold_convert (type, step);
2200 }
2201 }
2202
2203 for (i = 0; i < n_iv_cands (data); i++)
2204 {
2205 cand = iv_cand (data, i);
2206
2207 if (cand->pos != pos)
2208 continue;
2209
2210 if (cand->incremented_at != incremented_at
2211 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2212 && cand->ainc_use != use))
2213 continue;
2214
2215 if (!cand->iv)
2216 {
2217 if (!base && !step)
2218 break;
2219
2220 continue;
2221 }
2222
2223 if (!base && !step)
2224 continue;
2225
2226 if (operand_equal_p (base, cand->iv->base, 0)
2227 && operand_equal_p (step, cand->iv->step, 0)
2228 && (TYPE_PRECISION (TREE_TYPE (base))
2229 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2230 break;
2231 }
2232
2233 if (i == n_iv_cands (data))
2234 {
2235 cand = XCNEW (struct iv_cand);
2236 cand->id = i;
2237
2238 if (!base && !step)
2239 cand->iv = NULL;
2240 else
2241 cand->iv = alloc_iv (base, step);
2242
2243 cand->pos = pos;
2244 if (pos != IP_ORIGINAL && cand->iv)
2245 {
2246 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2247 cand->var_after = cand->var_before;
2248 }
2249 cand->important = important;
2250 cand->incremented_at = incremented_at;
2251 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2252
2253 if (step
2254 && TREE_CODE (step) != INTEGER_CST)
2255 {
2256 fd_ivopts_data = data;
2257 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2258 }
2259
2260 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2261 cand->ainc_use = use;
2262 else
2263 cand->ainc_use = NULL;
2264
2265 if (dump_file && (dump_flags & TDF_DETAILS))
2266 dump_cand (dump_file, cand);
2267 }
2268
2269 if (important && !cand->important)
2270 {
2271 cand->important = true;
2272 if (dump_file && (dump_flags & TDF_DETAILS))
2273 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2274 }
2275
2276 if (use)
2277 {
2278 bitmap_set_bit (use->related_cands, i);
2279 if (dump_file && (dump_flags & TDF_DETAILS))
2280 fprintf (dump_file, "Candidate %d is related to use %d\n",
2281 cand->id, use->id);
2282 }
2283
2284 return cand;
2285 }
2286
2287 /* Returns true if incrementing the induction variable at the end of the LOOP
2288 is allowed.
2289
2290 The purpose is to avoid splitting latch edge with a biv increment, thus
2291 creating a jump, possibly confusing other optimization passes and leaving
2292 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2293 is not available (so we do not have a better alternative), or if the latch
2294 edge is already nonempty. */
2295
2296 static bool
2297 allow_ip_end_pos_p (struct loop *loop)
2298 {
2299 if (!ip_normal_pos (loop))
2300 return true;
2301
2302 if (!empty_block_p (ip_end_pos (loop)))
2303 return true;
2304
2305 return false;
2306 }
2307
2308 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2309 Important field is set to IMPORTANT. */
2310
2311 static void
2312 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2313 bool important, struct iv_use *use)
2314 {
2315 basic_block use_bb = gimple_bb (use->stmt);
2316 enum machine_mode mem_mode;
2317 unsigned HOST_WIDE_INT cstepi;
2318
2319 /* If we insert the increment in any position other than the standard
2320 ones, we must ensure that it is incremented once per iteration.
2321 It must not be in an inner nested loop, or one side of an if
2322 statement. */
2323 if (use_bb->loop_father != data->current_loop
2324 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2325 || stmt_could_throw_p (use->stmt)
2326 || !cst_and_fits_in_hwi (step))
2327 return;
2328
2329 cstepi = int_cst_value (step);
2330
2331 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2332 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2333 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2334 {
2335 enum tree_code code = MINUS_EXPR;
2336 tree new_base;
2337 tree new_step = step;
2338
2339 if (POINTER_TYPE_P (TREE_TYPE (base)))
2340 {
2341 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2342 code = POINTER_PLUS_EXPR;
2343 }
2344 else
2345 new_step = fold_convert (TREE_TYPE (base), new_step);
2346 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2347 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2348 use->stmt);
2349 }
2350 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2351 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2352 {
2353 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2354 use->stmt);
2355 }
2356 }
2357
2358 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2359 position to POS. If USE is not NULL, the candidate is set as related to
2360 it. The candidate computation is scheduled on all available positions. */
2361
2362 static void
2363 add_candidate (struct ivopts_data *data,
2364 tree base, tree step, bool important, struct iv_use *use)
2365 {
2366 if (ip_normal_pos (data->current_loop))
2367 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2368 if (ip_end_pos (data->current_loop)
2369 && allow_ip_end_pos_p (data->current_loop))
2370 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2371
2372 if (use != NULL && use->type == USE_ADDRESS)
2373 add_autoinc_candidates (data, base, step, important, use);
2374 }
2375
2376 /* Add a standard "0 + 1 * iteration" iv candidate for a
2377 type with SIZE bits. */
2378
2379 static void
2380 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2381 unsigned int size)
2382 {
2383 tree type = lang_hooks.types.type_for_size (size, true);
2384 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2385 true, NULL);
2386 }
2387
2388 /* Adds standard iv candidates. */
2389
2390 static void
2391 add_standard_iv_candidates (struct ivopts_data *data)
2392 {
2393 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2394
2395 /* The same for a double-integer type if it is still fast enough. */
2396 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2397 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2398 }
2399
2400
2401 /* Adds candidates bases on the old induction variable IV. */
2402
2403 static void
2404 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2405 {
2406 gimple phi;
2407 tree def;
2408 struct iv_cand *cand;
2409
2410 add_candidate (data, iv->base, iv->step, true, NULL);
2411
2412 /* The same, but with initial value zero. */
2413 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2414 add_candidate (data, size_int (0), iv->step, true, NULL);
2415 else
2416 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2417 iv->step, true, NULL);
2418
2419 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2420 if (gimple_code (phi) == GIMPLE_PHI)
2421 {
2422 /* Additionally record the possibility of leaving the original iv
2423 untouched. */
2424 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2425 cand = add_candidate_1 (data,
2426 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2427 SSA_NAME_DEF_STMT (def));
2428 cand->var_before = iv->ssa_name;
2429 cand->var_after = def;
2430 }
2431 }
2432
2433 /* Adds candidates based on the old induction variables. */
2434
2435 static void
2436 add_old_ivs_candidates (struct ivopts_data *data)
2437 {
2438 unsigned i;
2439 struct iv *iv;
2440 bitmap_iterator bi;
2441
2442 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2443 {
2444 iv = ver_info (data, i)->iv;
2445 if (iv && iv->biv_p && !integer_zerop (iv->step))
2446 add_old_iv_candidates (data, iv);
2447 }
2448 }
2449
2450 /* Adds candidates based on the value of the induction variable IV and USE. */
2451
2452 static void
2453 add_iv_value_candidates (struct ivopts_data *data,
2454 struct iv *iv, struct iv_use *use)
2455 {
2456 unsigned HOST_WIDE_INT offset;
2457 tree base;
2458 tree basetype;
2459
2460 add_candidate (data, iv->base, iv->step, false, use);
2461
2462 /* The same, but with initial value zero. Make such variable important,
2463 since it is generic enough so that possibly many uses may be based
2464 on it. */
2465 basetype = TREE_TYPE (iv->base);
2466 if (POINTER_TYPE_P (basetype))
2467 basetype = sizetype;
2468 add_candidate (data, build_int_cst (basetype, 0),
2469 iv->step, true, use);
2470
2471 /* Third, try removing the constant offset. Make sure to even
2472 add a candidate for &a[0] vs. (T *)&a. */
2473 base = strip_offset (iv->base, &offset);
2474 if (offset
2475 || base != iv->base)
2476 add_candidate (data, base, iv->step, false, use);
2477 }
2478
2479 /* Adds candidates based on the uses. */
2480
2481 static void
2482 add_derived_ivs_candidates (struct ivopts_data *data)
2483 {
2484 unsigned i;
2485
2486 for (i = 0; i < n_iv_uses (data); i++)
2487 {
2488 struct iv_use *use = iv_use (data, i);
2489
2490 if (!use)
2491 continue;
2492
2493 switch (use->type)
2494 {
2495 case USE_NONLINEAR_EXPR:
2496 case USE_COMPARE:
2497 case USE_ADDRESS:
2498 /* Just add the ivs based on the value of the iv used here. */
2499 add_iv_value_candidates (data, use->iv, use);
2500 break;
2501
2502 default:
2503 gcc_unreachable ();
2504 }
2505 }
2506 }
2507
2508 /* Record important candidates and add them to related_cands bitmaps
2509 if needed. */
2510
2511 static void
2512 record_important_candidates (struct ivopts_data *data)
2513 {
2514 unsigned i;
2515 struct iv_use *use;
2516
2517 for (i = 0; i < n_iv_cands (data); i++)
2518 {
2519 struct iv_cand *cand = iv_cand (data, i);
2520
2521 if (cand->important)
2522 bitmap_set_bit (data->important_candidates, i);
2523 }
2524
2525 data->consider_all_candidates = (n_iv_cands (data)
2526 <= CONSIDER_ALL_CANDIDATES_BOUND);
2527
2528 if (data->consider_all_candidates)
2529 {
2530 /* We will not need "related_cands" bitmaps in this case,
2531 so release them to decrease peak memory consumption. */
2532 for (i = 0; i < n_iv_uses (data); i++)
2533 {
2534 use = iv_use (data, i);
2535 BITMAP_FREE (use->related_cands);
2536 }
2537 }
2538 else
2539 {
2540 /* Add important candidates to the related_cands bitmaps. */
2541 for (i = 0; i < n_iv_uses (data); i++)
2542 bitmap_ior_into (iv_use (data, i)->related_cands,
2543 data->important_candidates);
2544 }
2545 }
2546
2547 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2548 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2549 we allocate a simple list to every use. */
2550
2551 static void
2552 alloc_use_cost_map (struct ivopts_data *data)
2553 {
2554 unsigned i, size, s, j;
2555
2556 for (i = 0; i < n_iv_uses (data); i++)
2557 {
2558 struct iv_use *use = iv_use (data, i);
2559 bitmap_iterator bi;
2560
2561 if (data->consider_all_candidates)
2562 size = n_iv_cands (data);
2563 else
2564 {
2565 s = 0;
2566 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2567 {
2568 s++;
2569 }
2570
2571 /* Round up to the power of two, so that moduling by it is fast. */
2572 for (size = 1; size < s; size <<= 1)
2573 continue;
2574 }
2575
2576 use->n_map_members = size;
2577 use->cost_map = XCNEWVEC (struct cost_pair, size);
2578 }
2579 }
2580
2581 /* Returns description of computation cost of expression whose runtime
2582 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2583
2584 static comp_cost
2585 new_cost (unsigned runtime, unsigned complexity)
2586 {
2587 comp_cost cost;
2588
2589 cost.cost = runtime;
2590 cost.complexity = complexity;
2591
2592 return cost;
2593 }
2594
2595 /* Adds costs COST1 and COST2. */
2596
2597 static comp_cost
2598 add_costs (comp_cost cost1, comp_cost cost2)
2599 {
2600 cost1.cost += cost2.cost;
2601 cost1.complexity += cost2.complexity;
2602
2603 return cost1;
2604 }
2605 /* Subtracts costs COST1 and COST2. */
2606
2607 static comp_cost
2608 sub_costs (comp_cost cost1, comp_cost cost2)
2609 {
2610 cost1.cost -= cost2.cost;
2611 cost1.complexity -= cost2.complexity;
2612
2613 return cost1;
2614 }
2615
2616 /* Returns a negative number if COST1 < COST2, a positive number if
2617 COST1 > COST2, and 0 if COST1 = COST2. */
2618
2619 static int
2620 compare_costs (comp_cost cost1, comp_cost cost2)
2621 {
2622 if (cost1.cost == cost2.cost)
2623 return cost1.complexity - cost2.complexity;
2624
2625 return cost1.cost - cost2.cost;
2626 }
2627
2628 /* Returns true if COST is infinite. */
2629
2630 static bool
2631 infinite_cost_p (comp_cost cost)
2632 {
2633 return cost.cost == INFTY;
2634 }
2635
2636 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2637 on invariants DEPENDS_ON and that the value used in expressing it
2638 is VALUE. */
2639
2640 static void
2641 set_use_iv_cost (struct ivopts_data *data,
2642 struct iv_use *use, struct iv_cand *cand,
2643 comp_cost cost, bitmap depends_on, tree value,
2644 int inv_expr_id)
2645 {
2646 unsigned i, s;
2647
2648 if (infinite_cost_p (cost))
2649 {
2650 BITMAP_FREE (depends_on);
2651 return;
2652 }
2653
2654 if (data->consider_all_candidates)
2655 {
2656 use->cost_map[cand->id].cand = cand;
2657 use->cost_map[cand->id].cost = cost;
2658 use->cost_map[cand->id].depends_on = depends_on;
2659 use->cost_map[cand->id].value = value;
2660 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2661 return;
2662 }
2663
2664 /* n_map_members is a power of two, so this computes modulo. */
2665 s = cand->id & (use->n_map_members - 1);
2666 for (i = s; i < use->n_map_members; i++)
2667 if (!use->cost_map[i].cand)
2668 goto found;
2669 for (i = 0; i < s; i++)
2670 if (!use->cost_map[i].cand)
2671 goto found;
2672
2673 gcc_unreachable ();
2674
2675 found:
2676 use->cost_map[i].cand = cand;
2677 use->cost_map[i].cost = cost;
2678 use->cost_map[i].depends_on = depends_on;
2679 use->cost_map[i].value = value;
2680 use->cost_map[i].inv_expr_id = inv_expr_id;
2681 }
2682
2683 /* Gets cost of (USE, CANDIDATE) pair. */
2684
2685 static struct cost_pair *
2686 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2687 struct iv_cand *cand)
2688 {
2689 unsigned i, s;
2690 struct cost_pair *ret;
2691
2692 if (!cand)
2693 return NULL;
2694
2695 if (data->consider_all_candidates)
2696 {
2697 ret = use->cost_map + cand->id;
2698 if (!ret->cand)
2699 return NULL;
2700
2701 return ret;
2702 }
2703
2704 /* n_map_members is a power of two, so this computes modulo. */
2705 s = cand->id & (use->n_map_members - 1);
2706 for (i = s; i < use->n_map_members; i++)
2707 if (use->cost_map[i].cand == cand)
2708 return use->cost_map + i;
2709
2710 for (i = 0; i < s; i++)
2711 if (use->cost_map[i].cand == cand)
2712 return use->cost_map + i;
2713
2714 return NULL;
2715 }
2716
2717 /* Returns estimate on cost of computing SEQ. */
2718
2719 static unsigned
2720 seq_cost (rtx seq, bool speed)
2721 {
2722 unsigned cost = 0;
2723 rtx set;
2724
2725 for (; seq; seq = NEXT_INSN (seq))
2726 {
2727 set = single_set (seq);
2728 if (set)
2729 cost += rtx_cost (set, SET,speed);
2730 else
2731 cost++;
2732 }
2733
2734 return cost;
2735 }
2736
2737 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2738 static rtx
2739 produce_memory_decl_rtl (tree obj, int *regno)
2740 {
2741 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2742 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2743 rtx x;
2744
2745 gcc_assert (obj);
2746 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2747 {
2748 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2749 x = gen_rtx_SYMBOL_REF (address_mode, name);
2750 SET_SYMBOL_REF_DECL (x, obj);
2751 x = gen_rtx_MEM (DECL_MODE (obj), x);
2752 set_mem_addr_space (x, as);
2753 targetm.encode_section_info (obj, x, true);
2754 }
2755 else
2756 {
2757 x = gen_raw_REG (address_mode, (*regno)++);
2758 x = gen_rtx_MEM (DECL_MODE (obj), x);
2759 set_mem_addr_space (x, as);
2760 }
2761
2762 return x;
2763 }
2764
2765 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2766 walk_tree. DATA contains the actual fake register number. */
2767
2768 static tree
2769 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2770 {
2771 tree obj = NULL_TREE;
2772 rtx x = NULL_RTX;
2773 int *regno = (int *) data;
2774
2775 switch (TREE_CODE (*expr_p))
2776 {
2777 case ADDR_EXPR:
2778 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2779 handled_component_p (*expr_p);
2780 expr_p = &TREE_OPERAND (*expr_p, 0))
2781 continue;
2782 obj = *expr_p;
2783 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2784 x = produce_memory_decl_rtl (obj, regno);
2785 break;
2786
2787 case SSA_NAME:
2788 *ws = 0;
2789 obj = SSA_NAME_VAR (*expr_p);
2790 if (!DECL_RTL_SET_P (obj))
2791 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2792 break;
2793
2794 case VAR_DECL:
2795 case PARM_DECL:
2796 case RESULT_DECL:
2797 *ws = 0;
2798 obj = *expr_p;
2799
2800 if (DECL_RTL_SET_P (obj))
2801 break;
2802
2803 if (DECL_MODE (obj) == BLKmode)
2804 x = produce_memory_decl_rtl (obj, regno);
2805 else
2806 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2807
2808 break;
2809
2810 default:
2811 break;
2812 }
2813
2814 if (x)
2815 {
2816 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2817 SET_DECL_RTL (obj, x);
2818 }
2819
2820 return NULL_TREE;
2821 }
2822
2823 /* Determines cost of the computation of EXPR. */
2824
2825 static unsigned
2826 computation_cost (tree expr, bool speed)
2827 {
2828 rtx seq, rslt;
2829 tree type = TREE_TYPE (expr);
2830 unsigned cost;
2831 /* Avoid using hard regs in ways which may be unsupported. */
2832 int regno = LAST_VIRTUAL_REGISTER + 1;
2833 struct cgraph_node *node = cgraph_node (current_function_decl);
2834 enum node_frequency real_frequency = node->frequency;
2835
2836 node->frequency = NODE_FREQUENCY_NORMAL;
2837 crtl->maybe_hot_insn_p = speed;
2838 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2839 start_sequence ();
2840 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2841 seq = get_insns ();
2842 end_sequence ();
2843 default_rtl_profile ();
2844 node->frequency = real_frequency;
2845
2846 cost = seq_cost (seq, speed);
2847 if (MEM_P (rslt))
2848 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2849 TYPE_ADDR_SPACE (type), speed);
2850
2851 return cost;
2852 }
2853
2854 /* Returns variable containing the value of candidate CAND at statement AT. */
2855
2856 static tree
2857 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2858 {
2859 if (stmt_after_increment (loop, cand, stmt))
2860 return cand->var_after;
2861 else
2862 return cand->var_before;
2863 }
2864
2865 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2866 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2867
2868 int
2869 tree_int_cst_sign_bit (const_tree t)
2870 {
2871 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2872 unsigned HOST_WIDE_INT w;
2873
2874 if (bitno < HOST_BITS_PER_WIDE_INT)
2875 w = TREE_INT_CST_LOW (t);
2876 else
2877 {
2878 w = TREE_INT_CST_HIGH (t);
2879 bitno -= HOST_BITS_PER_WIDE_INT;
2880 }
2881
2882 return (w >> bitno) & 1;
2883 }
2884
2885 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2886 same precision that is at least as wide as the precision of TYPE, stores
2887 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2888 type of A and B. */
2889
2890 static tree
2891 determine_common_wider_type (tree *a, tree *b)
2892 {
2893 tree wider_type = NULL;
2894 tree suba, subb;
2895 tree atype = TREE_TYPE (*a);
2896
2897 if (CONVERT_EXPR_P (*a))
2898 {
2899 suba = TREE_OPERAND (*a, 0);
2900 wider_type = TREE_TYPE (suba);
2901 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2902 return atype;
2903 }
2904 else
2905 return atype;
2906
2907 if (CONVERT_EXPR_P (*b))
2908 {
2909 subb = TREE_OPERAND (*b, 0);
2910 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2911 return atype;
2912 }
2913 else
2914 return atype;
2915
2916 *a = suba;
2917 *b = subb;
2918 return wider_type;
2919 }
2920
2921 /* Determines the expression by that USE is expressed from induction variable
2922 CAND at statement AT in LOOP. The expression is stored in a decomposed
2923 form into AFF. Returns false if USE cannot be expressed using CAND. */
2924
2925 static bool
2926 get_computation_aff (struct loop *loop,
2927 struct iv_use *use, struct iv_cand *cand, gimple at,
2928 struct affine_tree_combination *aff)
2929 {
2930 tree ubase = use->iv->base;
2931 tree ustep = use->iv->step;
2932 tree cbase = cand->iv->base;
2933 tree cstep = cand->iv->step, cstep_common;
2934 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2935 tree common_type, var;
2936 tree uutype;
2937 aff_tree cbase_aff, var_aff;
2938 double_int rat;
2939
2940 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2941 {
2942 /* We do not have a precision to express the values of use. */
2943 return false;
2944 }
2945
2946 var = var_at_stmt (loop, cand, at);
2947 uutype = unsigned_type_for (utype);
2948
2949 /* If the conversion is not noop, perform it. */
2950 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2951 {
2952 cstep = fold_convert (uutype, cstep);
2953 cbase = fold_convert (uutype, cbase);
2954 var = fold_convert (uutype, var);
2955 }
2956
2957 if (!constant_multiple_of (ustep, cstep, &rat))
2958 return false;
2959
2960 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2961 type, we achieve better folding by computing their difference in this
2962 wider type, and cast the result to UUTYPE. We do not need to worry about
2963 overflows, as all the arithmetics will in the end be performed in UUTYPE
2964 anyway. */
2965 common_type = determine_common_wider_type (&ubase, &cbase);
2966
2967 /* use = ubase - ratio * cbase + ratio * var. */
2968 tree_to_aff_combination (ubase, common_type, aff);
2969 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2970 tree_to_aff_combination (var, uutype, &var_aff);
2971
2972 /* We need to shift the value if we are after the increment. */
2973 if (stmt_after_increment (loop, cand, at))
2974 {
2975 aff_tree cstep_aff;
2976
2977 if (common_type != uutype)
2978 cstep_common = fold_convert (common_type, cstep);
2979 else
2980 cstep_common = cstep;
2981
2982 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2983 aff_combination_add (&cbase_aff, &cstep_aff);
2984 }
2985
2986 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2987 aff_combination_add (aff, &cbase_aff);
2988 if (common_type != uutype)
2989 aff_combination_convert (aff, uutype);
2990
2991 aff_combination_scale (&var_aff, rat);
2992 aff_combination_add (aff, &var_aff);
2993
2994 return true;
2995 }
2996
2997 /* Determines the expression by that USE is expressed from induction variable
2998 CAND at statement AT in LOOP. The computation is unshared. */
2999
3000 static tree
3001 get_computation_at (struct loop *loop,
3002 struct iv_use *use, struct iv_cand *cand, gimple at)
3003 {
3004 aff_tree aff;
3005 tree type = TREE_TYPE (use->iv->base);
3006
3007 if (!get_computation_aff (loop, use, cand, at, &aff))
3008 return NULL_TREE;
3009 unshare_aff_combination (&aff);
3010 return fold_convert (type, aff_combination_to_tree (&aff));
3011 }
3012
3013 /* Determines the expression by that USE is expressed from induction variable
3014 CAND in LOOP. The computation is unshared. */
3015
3016 static tree
3017 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3018 {
3019 return get_computation_at (loop, use, cand, use->stmt);
3020 }
3021
3022 /* Adjust the cost COST for being in loop setup rather than loop body.
3023 If we're optimizing for space, the loop setup overhead is constant;
3024 if we're optimizing for speed, amortize it over the per-iteration cost. */
3025 static unsigned
3026 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3027 {
3028 if (cost == INFTY)
3029 return cost;
3030 else if (optimize_loop_for_speed_p (data->current_loop))
3031 return cost / avg_loop_niter (data->current_loop);
3032 else
3033 return cost;
3034 }
3035
3036 /* Returns cost of addition in MODE. */
3037
3038 static unsigned
3039 add_cost (enum machine_mode mode, bool speed)
3040 {
3041 static unsigned costs[NUM_MACHINE_MODES];
3042 rtx seq;
3043 unsigned cost;
3044
3045 if (costs[mode])
3046 return costs[mode];
3047
3048 start_sequence ();
3049 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3050 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3051 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3052 NULL_RTX);
3053 seq = get_insns ();
3054 end_sequence ();
3055
3056 cost = seq_cost (seq, speed);
3057 if (!cost)
3058 cost = 1;
3059
3060 costs[mode] = cost;
3061
3062 if (dump_file && (dump_flags & TDF_DETAILS))
3063 fprintf (dump_file, "Addition in %s costs %d\n",
3064 GET_MODE_NAME (mode), cost);
3065 return cost;
3066 }
3067
3068 /* Entry in a hashtable of already known costs for multiplication. */
3069 struct mbc_entry
3070 {
3071 HOST_WIDE_INT cst; /* The constant to multiply by. */
3072 enum machine_mode mode; /* In mode. */
3073 unsigned cost; /* The cost. */
3074 };
3075
3076 /* Counts hash value for the ENTRY. */
3077
3078 static hashval_t
3079 mbc_entry_hash (const void *entry)
3080 {
3081 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3082
3083 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3084 }
3085
3086 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3087
3088 static int
3089 mbc_entry_eq (const void *entry1, const void *entry2)
3090 {
3091 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3092 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3093
3094 return (e1->mode == e2->mode
3095 && e1->cst == e2->cst);
3096 }
3097
3098 /* Returns cost of multiplication by constant CST in MODE. */
3099
3100 unsigned
3101 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3102 {
3103 static htab_t costs;
3104 struct mbc_entry **cached, act;
3105 rtx seq;
3106 unsigned cost;
3107
3108 if (!costs)
3109 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3110
3111 act.mode = mode;
3112 act.cst = cst;
3113 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3114 if (*cached)
3115 return (*cached)->cost;
3116
3117 *cached = XNEW (struct mbc_entry);
3118 (*cached)->mode = mode;
3119 (*cached)->cst = cst;
3120
3121 start_sequence ();
3122 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3123 gen_int_mode (cst, mode), NULL_RTX, 0);
3124 seq = get_insns ();
3125 end_sequence ();
3126
3127 cost = seq_cost (seq, speed);
3128
3129 if (dump_file && (dump_flags & TDF_DETAILS))
3130 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3131 (int) cst, GET_MODE_NAME (mode), cost);
3132
3133 (*cached)->cost = cost;
3134
3135 return cost;
3136 }
3137
3138 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3139 validity for a memory reference accessing memory of mode MODE in
3140 address space AS. */
3141
3142 DEF_VEC_P (sbitmap);
3143 DEF_VEC_ALLOC_P (sbitmap, heap);
3144
3145 bool
3146 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3147 addr_space_t as)
3148 {
3149 #define MAX_RATIO 128
3150 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3151 static VEC (sbitmap, heap) *valid_mult_list;
3152 sbitmap valid_mult;
3153
3154 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3155 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3156
3157 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3158 if (!valid_mult)
3159 {
3160 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3161 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3162 rtx addr;
3163 HOST_WIDE_INT i;
3164
3165 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3166 sbitmap_zero (valid_mult);
3167 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3168 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3169 {
3170 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3171 if (memory_address_addr_space_p (mode, addr, as))
3172 SET_BIT (valid_mult, i + MAX_RATIO);
3173 }
3174
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 {
3177 fprintf (dump_file, " allowed multipliers:");
3178 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3179 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3180 fprintf (dump_file, " %d", (int) i);
3181 fprintf (dump_file, "\n");
3182 fprintf (dump_file, "\n");
3183 }
3184
3185 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3186 }
3187
3188 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3189 return false;
3190
3191 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3192 }
3193
3194 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3195 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3196 variable is omitted. Compute the cost for a memory reference that accesses
3197 a memory location of mode MEM_MODE in address space AS.
3198
3199 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3200 size of MEM_MODE / RATIO) is available. To make this determination, we
3201 look at the size of the increment to be made, which is given in CSTEP.
3202 CSTEP may be zero if the step is unknown.
3203 STMT_AFTER_INC is true iff the statement we're looking at is after the
3204 increment of the original biv.
3205
3206 TODO -- there must be some better way. This all is quite crude. */
3207
3208 typedef struct
3209 {
3210 HOST_WIDE_INT min_offset, max_offset;
3211 unsigned costs[2][2][2][2];
3212 } *address_cost_data;
3213
3214 DEF_VEC_P (address_cost_data);
3215 DEF_VEC_ALLOC_P (address_cost_data, heap);
3216
3217 static comp_cost
3218 get_address_cost (bool symbol_present, bool var_present,
3219 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3220 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3221 addr_space_t as, bool speed,
3222 bool stmt_after_inc, bool *may_autoinc)
3223 {
3224 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3225 static VEC(address_cost_data, heap) *address_cost_data_list;
3226 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3227 address_cost_data data;
3228 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3229 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3230 unsigned cost, acost, complexity;
3231 bool offset_p, ratio_p, autoinc;
3232 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3233 unsigned HOST_WIDE_INT mask;
3234 unsigned bits;
3235
3236 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3237 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3238 data_index + 1);
3239
3240 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3241 if (!data)
3242 {
3243 HOST_WIDE_INT i;
3244 HOST_WIDE_INT rat, off = 0;
3245 int old_cse_not_expected, width;
3246 unsigned sym_p, var_p, off_p, rat_p, add_c;
3247 rtx seq, addr, base;
3248 rtx reg0, reg1;
3249
3250 data = (address_cost_data) xcalloc (1, sizeof (*data));
3251
3252 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3253
3254 width = GET_MODE_BITSIZE (address_mode) - 1;
3255 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3256 width = HOST_BITS_PER_WIDE_INT - 1;
3257 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3258
3259 for (i = width; i >= 0; i--)
3260 {
3261 off = -((HOST_WIDE_INT) 1 << i);
3262 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3263 if (memory_address_addr_space_p (mem_mode, addr, as))
3264 break;
3265 }
3266 data->min_offset = (i == -1? 0 : off);
3267
3268 for (i = width; i >= 0; i--)
3269 {
3270 off = ((HOST_WIDE_INT) 1 << i) - 1;
3271 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3272 if (memory_address_addr_space_p (mem_mode, addr, as))
3273 break;
3274 }
3275 if (i == -1)
3276 off = 0;
3277 data->max_offset = off;
3278
3279 if (dump_file && (dump_flags & TDF_DETAILS))
3280 {
3281 fprintf (dump_file, "get_address_cost:\n");
3282 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3283 GET_MODE_NAME (mem_mode),
3284 data->min_offset);
3285 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3286 GET_MODE_NAME (mem_mode),
3287 data->max_offset);
3288 }
3289
3290 rat = 1;
3291 for (i = 2; i <= MAX_RATIO; i++)
3292 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3293 {
3294 rat = i;
3295 break;
3296 }
3297
3298 /* Compute the cost of various addressing modes. */
3299 acost = 0;
3300 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3301 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3302
3303 if (HAVE_PRE_DECREMENT)
3304 {
3305 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3306 has_predec[mem_mode]
3307 = memory_address_addr_space_p (mem_mode, addr, as);
3308 }
3309 if (HAVE_POST_DECREMENT)
3310 {
3311 addr = gen_rtx_POST_DEC (address_mode, reg0);
3312 has_postdec[mem_mode]
3313 = memory_address_addr_space_p (mem_mode, addr, as);
3314 }
3315 if (HAVE_PRE_INCREMENT)
3316 {
3317 addr = gen_rtx_PRE_INC (address_mode, reg0);
3318 has_preinc[mem_mode]
3319 = memory_address_addr_space_p (mem_mode, addr, as);
3320 }
3321 if (HAVE_POST_INCREMENT)
3322 {
3323 addr = gen_rtx_POST_INC (address_mode, reg0);
3324 has_postinc[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3326 }
3327 for (i = 0; i < 16; i++)
3328 {
3329 sym_p = i & 1;
3330 var_p = (i >> 1) & 1;
3331 off_p = (i >> 2) & 1;
3332 rat_p = (i >> 3) & 1;
3333
3334 addr = reg0;
3335 if (rat_p)
3336 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337 gen_int_mode (rat, address_mode));
3338
3339 if (var_p)
3340 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3341
3342 if (sym_p)
3343 {
3344 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345 /* ??? We can run into trouble with some backends by presenting
3346 it with symbols which haven't been properly passed through
3347 targetm.encode_section_info. By setting the local bit, we
3348 enhance the probability of things working. */
3349 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3350
3351 if (off_p)
3352 base = gen_rtx_fmt_e (CONST, address_mode,
3353 gen_rtx_fmt_ee
3354 (PLUS, address_mode, base,
3355 gen_int_mode (off, address_mode)));
3356 }
3357 else if (off_p)
3358 base = gen_int_mode (off, address_mode);
3359 else
3360 base = NULL_RTX;
3361
3362 if (base)
3363 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3364
3365 start_sequence ();
3366 /* To avoid splitting addressing modes, pretend that no cse will
3367 follow. */
3368 old_cse_not_expected = cse_not_expected;
3369 cse_not_expected = true;
3370 addr = memory_address_addr_space (mem_mode, addr, as);
3371 cse_not_expected = old_cse_not_expected;
3372 seq = get_insns ();
3373 end_sequence ();
3374
3375 acost = seq_cost (seq, speed);
3376 acost += address_cost (addr, mem_mode, as, speed);
3377
3378 if (!acost)
3379 acost = 1;
3380 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3381 }
3382
3383 /* On some targets, it is quite expensive to load symbol to a register,
3384 which makes addresses that contain symbols look much more expensive.
3385 However, the symbol will have to be loaded in any case before the
3386 loop (and quite likely we have it in register already), so it does not
3387 make much sense to penalize them too heavily. So make some final
3388 tweaks for the SYMBOL_PRESENT modes:
3389
3390 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391 var is cheaper, use this mode with small penalty.
3392 If VAR_PRESENT is true, try whether the mode with
3393 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394 if this is the case, use it. */
3395 add_c = add_cost (address_mode, speed);
3396 for (i = 0; i < 8; i++)
3397 {
3398 var_p = i & 1;
3399 off_p = (i >> 1) & 1;
3400 rat_p = (i >> 2) & 1;
3401
3402 acost = data->costs[0][1][off_p][rat_p] + 1;
3403 if (var_p)
3404 acost += add_c;
3405
3406 if (acost < data->costs[1][var_p][off_p][rat_p])
3407 data->costs[1][var_p][off_p][rat_p] = acost;
3408 }
3409
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3411 {
3412 fprintf (dump_file, "Address costs:\n");
3413
3414 for (i = 0; i < 16; i++)
3415 {
3416 sym_p = i & 1;
3417 var_p = (i >> 1) & 1;
3418 off_p = (i >> 2) & 1;
3419 rat_p = (i >> 3) & 1;
3420
3421 fprintf (dump_file, " ");
3422 if (sym_p)
3423 fprintf (dump_file, "sym + ");
3424 if (var_p)
3425 fprintf (dump_file, "var + ");
3426 if (off_p)
3427 fprintf (dump_file, "cst + ");
3428 if (rat_p)
3429 fprintf (dump_file, "rat * ");
3430
3431 acost = data->costs[sym_p][var_p][off_p][rat_p];
3432 fprintf (dump_file, "index costs %d\n", acost);
3433 }
3434 if (has_predec[mem_mode] || has_postdec[mem_mode]
3435 || has_preinc[mem_mode] || has_postinc[mem_mode])
3436 fprintf (dump_file, " May include autoinc/dec\n");
3437 fprintf (dump_file, "\n");
3438 }
3439
3440 VEC_replace (address_cost_data, address_cost_data_list,
3441 data_index, data);
3442 }
3443
3444 bits = GET_MODE_BITSIZE (address_mode);
3445 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3446 offset &= mask;
3447 if ((offset >> (bits - 1) & 1))
3448 offset |= ~mask;
3449 s_offset = offset;
3450
3451 autoinc = false;
3452 msize = GET_MODE_SIZE (mem_mode);
3453 autoinc_offset = offset;
3454 if (stmt_after_inc)
3455 autoinc_offset += ratio * cstep;
3456 if (symbol_present || var_present || ratio != 1)
3457 autoinc = false;
3458 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3459 && msize == cstep)
3460 || (has_postdec[mem_mode] && autoinc_offset == 0
3461 && msize == -cstep)
3462 || (has_preinc[mem_mode] && autoinc_offset == msize
3463 && msize == cstep)
3464 || (has_predec[mem_mode] && autoinc_offset == -msize
3465 && msize == -cstep))
3466 autoinc = true;
3467
3468 cost = 0;
3469 offset_p = (s_offset != 0
3470 && data->min_offset <= s_offset
3471 && s_offset <= data->max_offset);
3472 ratio_p = (ratio != 1
3473 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3474
3475 if (ratio != 1 && !ratio_p)
3476 cost += multiply_by_cost (ratio, address_mode, speed);
3477
3478 if (s_offset && !offset_p && !symbol_present)
3479 cost += add_cost (address_mode, speed);
3480
3481 if (may_autoinc)
3482 *may_autoinc = autoinc;
3483 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3484 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3485 return new_cost (cost + acost, complexity);
3486 }
3487
3488 /* Estimates cost of forcing expression EXPR into a variable. */
3489
3490 static comp_cost
3491 force_expr_to_var_cost (tree expr, bool speed)
3492 {
3493 static bool costs_initialized = false;
3494 static unsigned integer_cost [2];
3495 static unsigned symbol_cost [2];
3496 static unsigned address_cost [2];
3497 tree op0, op1;
3498 comp_cost cost0, cost1, cost;
3499 enum machine_mode mode;
3500
3501 if (!costs_initialized)
3502 {
3503 tree type = build_pointer_type (integer_type_node);
3504 tree var, addr;
3505 rtx x;
3506 int i;
3507
3508 var = create_tmp_var_raw (integer_type_node, "test_var");
3509 TREE_STATIC (var) = 1;
3510 x = produce_memory_decl_rtl (var, NULL);
3511 SET_DECL_RTL (var, x);
3512
3513 addr = build1 (ADDR_EXPR, type, var);
3514
3515
3516 for (i = 0; i < 2; i++)
3517 {
3518 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3519 2000), i);
3520
3521 symbol_cost[i] = computation_cost (addr, i) + 1;
3522
3523 address_cost[i]
3524 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3525 addr,
3526 build_int_cst (sizetype, 2000)), i) + 1;
3527 if (dump_file && (dump_flags & TDF_DETAILS))
3528 {
3529 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3530 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3531 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3532 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3533 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3534 fprintf (dump_file, "\n");
3535 }
3536 }
3537
3538 costs_initialized = true;
3539 }
3540
3541 STRIP_NOPS (expr);
3542
3543 if (SSA_VAR_P (expr))
3544 return zero_cost;
3545
3546 if (is_gimple_min_invariant (expr))
3547 {
3548 if (TREE_CODE (expr) == INTEGER_CST)
3549 return new_cost (integer_cost [speed], 0);
3550
3551 if (TREE_CODE (expr) == ADDR_EXPR)
3552 {
3553 tree obj = TREE_OPERAND (expr, 0);
3554
3555 if (TREE_CODE (obj) == VAR_DECL
3556 || TREE_CODE (obj) == PARM_DECL
3557 || TREE_CODE (obj) == RESULT_DECL)
3558 return new_cost (symbol_cost [speed], 0);
3559 }
3560
3561 return new_cost (address_cost [speed], 0);
3562 }
3563
3564 switch (TREE_CODE (expr))
3565 {
3566 case POINTER_PLUS_EXPR:
3567 case PLUS_EXPR:
3568 case MINUS_EXPR:
3569 case MULT_EXPR:
3570 op0 = TREE_OPERAND (expr, 0);
3571 op1 = TREE_OPERAND (expr, 1);
3572 STRIP_NOPS (op0);
3573 STRIP_NOPS (op1);
3574
3575 if (is_gimple_val (op0))
3576 cost0 = zero_cost;
3577 else
3578 cost0 = force_expr_to_var_cost (op0, speed);
3579
3580 if (is_gimple_val (op1))
3581 cost1 = zero_cost;
3582 else
3583 cost1 = force_expr_to_var_cost (op1, speed);
3584
3585 break;
3586
3587 case NEGATE_EXPR:
3588 op0 = TREE_OPERAND (expr, 0);
3589 STRIP_NOPS (op0);
3590 op1 = NULL_TREE;
3591
3592 if (is_gimple_val (op0))
3593 cost0 = zero_cost;
3594 else
3595 cost0 = force_expr_to_var_cost (op0, speed);
3596
3597 cost1 = zero_cost;
3598 break;
3599
3600 default:
3601 /* Just an arbitrary value, FIXME. */
3602 return new_cost (target_spill_cost[speed], 0);
3603 }
3604
3605 mode = TYPE_MODE (TREE_TYPE (expr));
3606 switch (TREE_CODE (expr))
3607 {
3608 case POINTER_PLUS_EXPR:
3609 case PLUS_EXPR:
3610 case MINUS_EXPR:
3611 case NEGATE_EXPR:
3612 cost = new_cost (add_cost (mode, speed), 0);
3613 break;
3614
3615 case MULT_EXPR:
3616 if (cst_and_fits_in_hwi (op0))
3617 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3618 else if (cst_and_fits_in_hwi (op1))
3619 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3620 else
3621 return new_cost (target_spill_cost [speed], 0);
3622 break;
3623
3624 default:
3625 gcc_unreachable ();
3626 }
3627
3628 cost = add_costs (cost, cost0);
3629 cost = add_costs (cost, cost1);
3630
3631 /* Bound the cost by target_spill_cost. The parts of complicated
3632 computations often are either loop invariant or at least can
3633 be shared between several iv uses, so letting this grow without
3634 limits would not give reasonable results. */
3635 if (cost.cost > (int) target_spill_cost [speed])
3636 cost.cost = target_spill_cost [speed];
3637
3638 return cost;
3639 }
3640
3641 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3642 invariants the computation depends on. */
3643
3644 static comp_cost
3645 force_var_cost (struct ivopts_data *data,
3646 tree expr, bitmap *depends_on)
3647 {
3648 if (depends_on)
3649 {
3650 fd_ivopts_data = data;
3651 walk_tree (&expr, find_depends, depends_on, NULL);
3652 }
3653
3654 return force_expr_to_var_cost (expr, data->speed);
3655 }
3656
3657 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3658 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3659 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3660 invariants the computation depends on. */
3661
3662 static comp_cost
3663 split_address_cost (struct ivopts_data *data,
3664 tree addr, bool *symbol_present, bool *var_present,
3665 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3666 {
3667 tree core;
3668 HOST_WIDE_INT bitsize;
3669 HOST_WIDE_INT bitpos;
3670 tree toffset;
3671 enum machine_mode mode;
3672 int unsignedp, volatilep;
3673
3674 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3675 &unsignedp, &volatilep, false);
3676
3677 if (toffset != 0
3678 || bitpos % BITS_PER_UNIT != 0
3679 || TREE_CODE (core) != VAR_DECL)
3680 {
3681 *symbol_present = false;
3682 *var_present = true;
3683 fd_ivopts_data = data;
3684 walk_tree (&addr, find_depends, depends_on, NULL);
3685 return new_cost (target_spill_cost[data->speed], 0);
3686 }
3687
3688 *offset += bitpos / BITS_PER_UNIT;
3689 if (TREE_STATIC (core)
3690 || DECL_EXTERNAL (core))
3691 {
3692 *symbol_present = true;
3693 *var_present = false;
3694 return zero_cost;
3695 }
3696
3697 *symbol_present = false;
3698 *var_present = true;
3699 return zero_cost;
3700 }
3701
3702 /* Estimates cost of expressing difference of addresses E1 - E2 as
3703 var + symbol + offset. The value of offset is added to OFFSET,
3704 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3705 part is missing. DEPENDS_ON is a set of the invariants the computation
3706 depends on. */
3707
3708 static comp_cost
3709 ptr_difference_cost (struct ivopts_data *data,
3710 tree e1, tree e2, bool *symbol_present, bool *var_present,
3711 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3712 {
3713 HOST_WIDE_INT diff = 0;
3714 aff_tree aff_e1, aff_e2;
3715 tree type;
3716
3717 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3718
3719 if (ptr_difference_const (e1, e2, &diff))
3720 {
3721 *offset += diff;
3722 *symbol_present = false;
3723 *var_present = false;
3724 return zero_cost;
3725 }
3726
3727 if (integer_zerop (e2))
3728 return split_address_cost (data, TREE_OPERAND (e1, 0),
3729 symbol_present, var_present, offset, depends_on);
3730
3731 *symbol_present = false;
3732 *var_present = true;
3733
3734 type = signed_type_for (TREE_TYPE (e1));
3735 tree_to_aff_combination (e1, type, &aff_e1);
3736 tree_to_aff_combination (e2, type, &aff_e2);
3737 aff_combination_scale (&aff_e2, double_int_minus_one);
3738 aff_combination_add (&aff_e1, &aff_e2);
3739
3740 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3741 }
3742
3743 /* Estimates cost of expressing difference E1 - E2 as
3744 var + symbol + offset. The value of offset is added to OFFSET,
3745 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3746 part is missing. DEPENDS_ON is a set of the invariants the computation
3747 depends on. */
3748
3749 static comp_cost
3750 difference_cost (struct ivopts_data *data,
3751 tree e1, tree e2, bool *symbol_present, bool *var_present,
3752 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3753 {
3754 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3755 unsigned HOST_WIDE_INT off1, off2;
3756 aff_tree aff_e1, aff_e2;
3757 tree type;
3758
3759 e1 = strip_offset (e1, &off1);
3760 e2 = strip_offset (e2, &off2);
3761 *offset += off1 - off2;
3762
3763 STRIP_NOPS (e1);
3764 STRIP_NOPS (e2);
3765
3766 if (TREE_CODE (e1) == ADDR_EXPR)
3767 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3768 offset, depends_on);
3769 *symbol_present = false;
3770
3771 if (operand_equal_p (e1, e2, 0))
3772 {
3773 *var_present = false;
3774 return zero_cost;
3775 }
3776
3777 *var_present = true;
3778
3779 if (integer_zerop (e2))
3780 return force_var_cost (data, e1, depends_on);
3781
3782 if (integer_zerop (e1))
3783 {
3784 comp_cost cost = force_var_cost (data, e2, depends_on);
3785 cost.cost += multiply_by_cost (-1, mode, data->speed);
3786 return cost;
3787 }
3788
3789 type = signed_type_for (TREE_TYPE (e1));
3790 tree_to_aff_combination (e1, type, &aff_e1);
3791 tree_to_aff_combination (e2, type, &aff_e2);
3792 aff_combination_scale (&aff_e2, double_int_minus_one);
3793 aff_combination_add (&aff_e1, &aff_e2);
3794
3795 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3796 }
3797
3798 /* Returns true if AFF1 and AFF2 are identical. */
3799
3800 static bool
3801 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3802 {
3803 unsigned i;
3804
3805 if (aff1->n != aff2->n)
3806 return false;
3807
3808 for (i = 0; i < aff1->n; i++)
3809 {
3810 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3811 return false;
3812
3813 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3814 return false;
3815 }
3816 return true;
3817 }
3818
3819 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3820 requires a new compiler generated temporary. Returns -1 otherwise.
3821 ADDRESS_P is a flag indicating if the expression is for address
3822 computation. */
3823
3824 static int
3825 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3826 tree cbase, HOST_WIDE_INT ratio,
3827 bool address_p)
3828 {
3829 aff_tree ubase_aff, cbase_aff;
3830 tree expr, ub, cb;
3831 struct iv_inv_expr_ent ent;
3832 struct iv_inv_expr_ent **slot;
3833
3834 STRIP_NOPS (ubase);
3835 STRIP_NOPS (cbase);
3836 ub = ubase;
3837 cb = cbase;
3838
3839 if ((TREE_CODE (ubase) == INTEGER_CST)
3840 && (TREE_CODE (cbase) == INTEGER_CST))
3841 return -1;
3842
3843 /* Strips the constant part. */
3844 if (TREE_CODE (ubase) == PLUS_EXPR
3845 || TREE_CODE (ubase) == MINUS_EXPR
3846 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3847 {
3848 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3849 ubase = TREE_OPERAND (ubase, 0);
3850 }
3851
3852 /* Strips the constant part. */
3853 if (TREE_CODE (cbase) == PLUS_EXPR
3854 || TREE_CODE (cbase) == MINUS_EXPR
3855 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3856 {
3857 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3858 cbase = TREE_OPERAND (cbase, 0);
3859 }
3860
3861 if (address_p)
3862 {
3863 if (((TREE_CODE (ubase) == SSA_NAME)
3864 || (TREE_CODE (ubase) == ADDR_EXPR
3865 && is_gimple_min_invariant (ubase)))
3866 && (TREE_CODE (cbase) == INTEGER_CST))
3867 return -1;
3868
3869 if (((TREE_CODE (cbase) == SSA_NAME)
3870 || (TREE_CODE (cbase) == ADDR_EXPR
3871 && is_gimple_min_invariant (cbase)))
3872 && (TREE_CODE (ubase) == INTEGER_CST))
3873 return -1;
3874 }
3875
3876 if (ratio == 1)
3877 {
3878 if(operand_equal_p (ubase, cbase, 0))
3879 return -1;
3880
3881 if (TREE_CODE (ubase) == ADDR_EXPR
3882 && TREE_CODE (cbase) == ADDR_EXPR)
3883 {
3884 tree usym, csym;
3885
3886 usym = TREE_OPERAND (ubase, 0);
3887 csym = TREE_OPERAND (cbase, 0);
3888 if (TREE_CODE (usym) == ARRAY_REF)
3889 {
3890 tree ind = TREE_OPERAND (usym, 1);
3891 if (TREE_CODE (ind) == INTEGER_CST
3892 && host_integerp (ind, 0)
3893 && TREE_INT_CST_LOW (ind) == 0)
3894 usym = TREE_OPERAND (usym, 0);
3895 }
3896 if (TREE_CODE (csym) == ARRAY_REF)
3897 {
3898 tree ind = TREE_OPERAND (csym, 1);
3899 if (TREE_CODE (ind) == INTEGER_CST
3900 && host_integerp (ind, 0)
3901 && TREE_INT_CST_LOW (ind) == 0)
3902 csym = TREE_OPERAND (csym, 0);
3903 }
3904 if (operand_equal_p (usym, csym, 0))
3905 return -1;
3906 }
3907 /* Now do more complex comparison */
3908 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3909 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3910 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3911 return -1;
3912 }
3913
3914 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3915 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3916
3917 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3918 aff_combination_add (&ubase_aff, &cbase_aff);
3919 expr = aff_combination_to_tree (&ubase_aff);
3920 ent.expr = expr;
3921 ent.hash = iterative_hash_expr (expr, 0);
3922 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3923 &ent, INSERT);
3924 if (*slot)
3925 return (*slot)->id;
3926
3927 *slot = XNEW (struct iv_inv_expr_ent);
3928 (*slot)->expr = expr;
3929 (*slot)->hash = ent.hash;
3930 (*slot)->id = data->inv_expr_id++;
3931 return (*slot)->id;
3932 }
3933
3934
3935
3936 /* Determines the cost of the computation by that USE is expressed
3937 from induction variable CAND. If ADDRESS_P is true, we just need
3938 to create an address from it, otherwise we want to get it into
3939 register. A set of invariants we depend on is stored in
3940 DEPENDS_ON. AT is the statement at that the value is computed.
3941 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3942 addressing is likely. */
3943
3944 static comp_cost
3945 get_computation_cost_at (struct ivopts_data *data,
3946 struct iv_use *use, struct iv_cand *cand,
3947 bool address_p, bitmap *depends_on, gimple at,
3948 bool *can_autoinc,
3949 int *inv_expr_id)
3950 {
3951 tree ubase = use->iv->base, ustep = use->iv->step;
3952 tree cbase, cstep;
3953 tree utype = TREE_TYPE (ubase), ctype;
3954 unsigned HOST_WIDE_INT cstepi, offset = 0;
3955 HOST_WIDE_INT ratio, aratio;
3956 bool var_present, symbol_present, stmt_is_after_inc;
3957 comp_cost cost;
3958 double_int rat;
3959 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3960
3961 *depends_on = NULL;
3962
3963 /* Only consider real candidates. */
3964 if (!cand->iv)
3965 return infinite_cost;
3966
3967 cbase = cand->iv->base;
3968 cstep = cand->iv->step;
3969 ctype = TREE_TYPE (cbase);
3970
3971 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3972 {
3973 /* We do not have a precision to express the values of use. */
3974 return infinite_cost;
3975 }
3976
3977 if (address_p)
3978 {
3979 /* Do not try to express address of an object with computation based
3980 on address of a different object. This may cause problems in rtl
3981 level alias analysis (that does not expect this to be happening,
3982 as this is illegal in C), and would be unlikely to be useful
3983 anyway. */
3984 if (use->iv->base_object
3985 && cand->iv->base_object
3986 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3987 return infinite_cost;
3988 }
3989
3990 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3991 {
3992 /* TODO -- add direct handling of this case. */
3993 goto fallback;
3994 }
3995
3996 /* CSTEPI is removed from the offset in case statement is after the
3997 increment. If the step is not constant, we use zero instead.
3998 This is a bit imprecise (there is the extra addition), but
3999 redundancy elimination is likely to transform the code so that
4000 it uses value of the variable before increment anyway,
4001 so it is not that much unrealistic. */
4002 if (cst_and_fits_in_hwi (cstep))
4003 cstepi = int_cst_value (cstep);
4004 else
4005 cstepi = 0;
4006
4007 if (!constant_multiple_of (ustep, cstep, &rat))
4008 return infinite_cost;
4009
4010 if (double_int_fits_in_shwi_p (rat))
4011 ratio = double_int_to_shwi (rat);
4012 else
4013 return infinite_cost;
4014
4015 STRIP_NOPS (cbase);
4016 ctype = TREE_TYPE (cbase);
4017
4018 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4019 or ratio == 1, it is better to handle this like
4020
4021 ubase - ratio * cbase + ratio * var
4022
4023 (also holds in the case ratio == -1, TODO. */
4024
4025 if (cst_and_fits_in_hwi (cbase))
4026 {
4027 offset = - ratio * int_cst_value (cbase);
4028 cost = difference_cost (data,
4029 ubase, build_int_cst (utype, 0),
4030 &symbol_present, &var_present, &offset,
4031 depends_on);
4032 cost.cost /= avg_loop_niter (data->current_loop);
4033 }
4034 else if (ratio == 1)
4035 {
4036 cost = difference_cost (data,
4037 ubase, cbase,
4038 &symbol_present, &var_present, &offset,
4039 depends_on);
4040 cost.cost /= avg_loop_niter (data->current_loop);
4041 }
4042 else if (address_p
4043 && !POINTER_TYPE_P (ctype)
4044 && multiplier_allowed_in_address_p
4045 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4046 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4047 {
4048 cbase
4049 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4050 cost = difference_cost (data,
4051 ubase, cbase,
4052 &symbol_present, &var_present, &offset,
4053 depends_on);
4054 cost.cost /= avg_loop_niter (data->current_loop);
4055 }
4056 else
4057 {
4058 cost = force_var_cost (data, cbase, depends_on);
4059 cost = add_costs (cost,
4060 difference_cost (data,
4061 ubase, build_int_cst (utype, 0),
4062 &symbol_present, &var_present,
4063 &offset, depends_on));
4064 cost.cost /= avg_loop_niter (data->current_loop);
4065 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4066 }
4067
4068 if (inv_expr_id)
4069 {
4070 *inv_expr_id =
4071 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4072 /* Clear depends on. */
4073 if (*inv_expr_id != -1 && depends_on && *depends_on)
4074 bitmap_clear (*depends_on);
4075 }
4076
4077 /* If we are after the increment, the value of the candidate is higher by
4078 one iteration. */
4079 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4080 if (stmt_is_after_inc)
4081 offset -= ratio * cstepi;
4082
4083 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4084 (symbol/var1/const parts may be omitted). If we are looking for an
4085 address, find the cost of addressing this. */
4086 if (address_p)
4087 return add_costs (cost,
4088 get_address_cost (symbol_present, var_present,
4089 offset, ratio, cstepi,
4090 TYPE_MODE (TREE_TYPE (utype)),
4091 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4092 speed, stmt_is_after_inc,
4093 can_autoinc));
4094
4095 /* Otherwise estimate the costs for computing the expression. */
4096 if (!symbol_present && !var_present && !offset)
4097 {
4098 if (ratio != 1)
4099 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4100 return cost;
4101 }
4102
4103 /* Symbol + offset should be compile-time computable so consider that they
4104 are added once to the variable, if present. */
4105 if (var_present && (symbol_present || offset))
4106 cost.cost += adjust_setup_cost (data,
4107 add_cost (TYPE_MODE (ctype), speed));
4108
4109 /* Having offset does not affect runtime cost in case it is added to
4110 symbol, but it increases complexity. */
4111 if (offset)
4112 cost.complexity++;
4113
4114 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4115
4116 aratio = ratio > 0 ? ratio : -ratio;
4117 if (aratio != 1)
4118 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4119 return cost;
4120
4121 fallback:
4122 if (can_autoinc)
4123 *can_autoinc = false;
4124
4125 {
4126 /* Just get the expression, expand it and measure the cost. */
4127 tree comp = get_computation_at (data->current_loop, use, cand, at);
4128
4129 if (!comp)
4130 return infinite_cost;
4131
4132 if (address_p)
4133 comp = build_simple_mem_ref (comp);
4134
4135 return new_cost (computation_cost (comp, speed), 0);
4136 }
4137 }
4138
4139 /* Determines the cost of the computation by that USE is expressed
4140 from induction variable CAND. If ADDRESS_P is true, we just need
4141 to create an address from it, otherwise we want to get it into
4142 register. A set of invariants we depend on is stored in
4143 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4144 autoinc addressing is likely. */
4145
4146 static comp_cost
4147 get_computation_cost (struct ivopts_data *data,
4148 struct iv_use *use, struct iv_cand *cand,
4149 bool address_p, bitmap *depends_on,
4150 bool *can_autoinc, int *inv_expr_id)
4151 {
4152 return get_computation_cost_at (data,
4153 use, cand, address_p, depends_on, use->stmt,
4154 can_autoinc, inv_expr_id);
4155 }
4156
4157 /* Determines cost of basing replacement of USE on CAND in a generic
4158 expression. */
4159
4160 static bool
4161 determine_use_iv_cost_generic (struct ivopts_data *data,
4162 struct iv_use *use, struct iv_cand *cand)
4163 {
4164 bitmap depends_on;
4165 comp_cost cost;
4166 int inv_expr_id = -1;
4167
4168 /* The simple case first -- if we need to express value of the preserved
4169 original biv, the cost is 0. This also prevents us from counting the
4170 cost of increment twice -- once at this use and once in the cost of
4171 the candidate. */
4172 if (cand->pos == IP_ORIGINAL
4173 && cand->incremented_at == use->stmt)
4174 {
4175 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4176 return true;
4177 }
4178
4179 cost = get_computation_cost (data, use, cand, false, &depends_on,
4180 NULL, &inv_expr_id);
4181
4182 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4183 inv_expr_id);
4184
4185 return !infinite_cost_p (cost);
4186 }
4187
4188 /* Determines cost of basing replacement of USE on CAND in an address. */
4189
4190 static bool
4191 determine_use_iv_cost_address (struct ivopts_data *data,
4192 struct iv_use *use, struct iv_cand *cand)
4193 {
4194 bitmap depends_on;
4195 bool can_autoinc;
4196 int inv_expr_id = -1;
4197 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4198 &can_autoinc, &inv_expr_id);
4199
4200 if (cand->ainc_use == use)
4201 {
4202 if (can_autoinc)
4203 cost.cost -= cand->cost_step;
4204 /* If we generated the candidate solely for exploiting autoincrement
4205 opportunities, and it turns out it can't be used, set the cost to
4206 infinity to make sure we ignore it. */
4207 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4208 cost = infinite_cost;
4209 }
4210 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4211 inv_expr_id);
4212
4213 return !infinite_cost_p (cost);
4214 }
4215
4216 /* Computes value of candidate CAND at position AT in iteration NITER, and
4217 stores it to VAL. */
4218
4219 static void
4220 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4221 aff_tree *val)
4222 {
4223 aff_tree step, delta, nit;
4224 struct iv *iv = cand->iv;
4225 tree type = TREE_TYPE (iv->base);
4226 tree steptype = type;
4227 if (POINTER_TYPE_P (type))
4228 steptype = sizetype;
4229
4230 tree_to_aff_combination (iv->step, steptype, &step);
4231 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4232 aff_combination_convert (&nit, steptype);
4233 aff_combination_mult (&nit, &step, &delta);
4234 if (stmt_after_increment (loop, cand, at))
4235 aff_combination_add (&delta, &step);
4236
4237 tree_to_aff_combination (iv->base, type, val);
4238 aff_combination_add (val, &delta);
4239 }
4240
4241 /* Returns period of induction variable iv. */
4242
4243 static tree
4244 iv_period (struct iv *iv)
4245 {
4246 tree step = iv->step, period, type;
4247 tree pow2div;
4248
4249 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4250
4251 type = unsigned_type_for (TREE_TYPE (step));
4252 /* Period of the iv is lcm (step, type_range)/step -1,
4253 i.e., N*type_range/step - 1. Since type range is power
4254 of two, N == (step >> num_of_ending_zeros_binary (step),
4255 so the final result is
4256
4257 (type_range >> num_of_ending_zeros_binary (step)) - 1
4258
4259 */
4260 pow2div = num_ending_zeros (step);
4261
4262 period = build_low_bits_mask (type,
4263 (TYPE_PRECISION (type)
4264 - tree_low_cst (pow2div, 1)));
4265
4266 return period;
4267 }
4268
4269 /* Returns the comparison operator used when eliminating the iv USE. */
4270
4271 static enum tree_code
4272 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4273 {
4274 struct loop *loop = data->current_loop;
4275 basic_block ex_bb;
4276 edge exit;
4277
4278 ex_bb = gimple_bb (use->stmt);
4279 exit = EDGE_SUCC (ex_bb, 0);
4280 if (flow_bb_inside_loop_p (loop, exit->dest))
4281 exit = EDGE_SUCC (ex_bb, 1);
4282
4283 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4284 }
4285
4286 /* Check whether it is possible to express the condition in USE by comparison
4287 of candidate CAND. If so, store the value compared with to BOUND. */
4288
4289 static bool
4290 may_eliminate_iv (struct ivopts_data *data,
4291 struct iv_use *use, struct iv_cand *cand, tree *bound)
4292 {
4293 basic_block ex_bb;
4294 edge exit;
4295 tree nit, period;
4296 struct loop *loop = data->current_loop;
4297 aff_tree bnd;
4298 struct tree_niter_desc *desc = NULL;
4299
4300 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4301 return false;
4302
4303 /* For now works only for exits that dominate the loop latch.
4304 TODO: extend to other conditions inside loop body. */
4305 ex_bb = gimple_bb (use->stmt);
4306 if (use->stmt != last_stmt (ex_bb)
4307 || gimple_code (use->stmt) != GIMPLE_COND
4308 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4309 return false;
4310
4311 exit = EDGE_SUCC (ex_bb, 0);
4312 if (flow_bb_inside_loop_p (loop, exit->dest))
4313 exit = EDGE_SUCC (ex_bb, 1);
4314 if (flow_bb_inside_loop_p (loop, exit->dest))
4315 return false;
4316
4317 nit = niter_for_exit (data, exit, &desc);
4318 if (!nit)
4319 return false;
4320
4321 /* Determine whether we can use the variable to test the exit condition.
4322 This is the case iff the period of the induction variable is greater
4323 than the number of iterations for which the exit condition is true. */
4324 period = iv_period (cand->iv);
4325
4326 /* If the number of iterations is constant, compare against it directly. */
4327 if (TREE_CODE (nit) == INTEGER_CST)
4328 {
4329 /* See cand_value_at. */
4330 if (stmt_after_increment (loop, cand, use->stmt))
4331 {
4332 if (!tree_int_cst_lt (nit, period))
4333 return false;
4334 }
4335 else
4336 {
4337 if (tree_int_cst_lt (period, nit))
4338 return false;
4339 }
4340 }
4341
4342 /* If not, and if this is the only possible exit of the loop, see whether
4343 we can get a conservative estimate on the number of iterations of the
4344 entire loop and compare against that instead. */
4345 else
4346 {
4347 double_int period_value, max_niter;
4348
4349 max_niter = desc->max;
4350 if (stmt_after_increment (loop, cand, use->stmt))
4351 max_niter = double_int_add (max_niter, double_int_one);
4352 period_value = tree_to_double_int (period);
4353 if (double_int_ucmp (max_niter, period_value) > 0)
4354 {
4355 /* See if we can take advantage of infered loop bound information. */
4356 if (loop_only_exit_p (loop, exit))
4357 {
4358 if (!estimated_loop_iterations (loop, true, &max_niter))
4359 return false;
4360 /* The loop bound is already adjusted by adding 1. */
4361 if (double_int_ucmp (max_niter, period_value) > 0)
4362 return false;
4363 }
4364 else
4365 return false;
4366 }
4367 }
4368
4369 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4370
4371 *bound = aff_combination_to_tree (&bnd);
4372 /* It is unlikely that computing the number of iterations using division
4373 would be more profitable than keeping the original induction variable. */
4374 if (expression_expensive_p (*bound))
4375 return false;
4376 return true;
4377 }
4378
4379
4380 /* Determines cost of basing replacement of USE on CAND in a condition. */
4381
4382 static bool
4383 determine_use_iv_cost_condition (struct ivopts_data *data,
4384 struct iv_use *use, struct iv_cand *cand)
4385 {
4386 tree bound = NULL_TREE;
4387 struct iv *cmp_iv;
4388 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4389 comp_cost elim_cost, express_cost, cost;
4390 bool ok;
4391 int inv_expr_id = -1;
4392 tree *control_var, *bound_cst;
4393
4394 /* Only consider real candidates. */
4395 if (!cand->iv)
4396 {
4397 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4398 return false;
4399 }
4400
4401 /* Try iv elimination. */
4402 if (may_eliminate_iv (data, use, cand, &bound))
4403 {
4404 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4405 /* The bound is a loop invariant, so it will be only computed
4406 once. */
4407 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4408 }
4409 else
4410 elim_cost = infinite_cost;
4411
4412 /* Try expressing the original giv. If it is compared with an invariant,
4413 note that we cannot get rid of it. */
4414 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4415 NULL, &cmp_iv);
4416 gcc_assert (ok);
4417
4418 /* When the condition is a comparison of the candidate IV against
4419 zero, prefer this IV.
4420
4421 TODO: The constant that we're substracting from the cost should
4422 be target-dependent. This information should be added to the
4423 target costs for each backend. */
4424 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4425 && integer_zerop (*bound_cst)
4426 && (operand_equal_p (*control_var, cand->var_after, 0)
4427 || operand_equal_p (*control_var, cand->var_before, 0)))
4428 elim_cost.cost -= 1;
4429
4430 express_cost = get_computation_cost (data, use, cand, false,
4431 &depends_on_express, NULL,
4432 &inv_expr_id);
4433 fd_ivopts_data = data;
4434 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4435
4436 /* Choose the better approach, preferring the eliminated IV. */
4437 if (compare_costs (elim_cost, express_cost) <= 0)
4438 {
4439 cost = elim_cost;
4440 depends_on = depends_on_elim;
4441 depends_on_elim = NULL;
4442 }
4443 else
4444 {
4445 cost = express_cost;
4446 depends_on = depends_on_express;
4447 depends_on_express = NULL;
4448 bound = NULL_TREE;
4449 }
4450
4451 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4452
4453 if (depends_on_elim)
4454 BITMAP_FREE (depends_on_elim);
4455 if (depends_on_express)
4456 BITMAP_FREE (depends_on_express);
4457
4458 return !infinite_cost_p (cost);
4459 }
4460
4461 /* Determines cost of basing replacement of USE on CAND. Returns false
4462 if USE cannot be based on CAND. */
4463
4464 static bool
4465 determine_use_iv_cost (struct ivopts_data *data,
4466 struct iv_use *use, struct iv_cand *cand)
4467 {
4468 switch (use->type)
4469 {
4470 case USE_NONLINEAR_EXPR:
4471 return determine_use_iv_cost_generic (data, use, cand);
4472
4473 case USE_ADDRESS:
4474 return determine_use_iv_cost_address (data, use, cand);
4475
4476 case USE_COMPARE:
4477 return determine_use_iv_cost_condition (data, use, cand);
4478
4479 default:
4480 gcc_unreachable ();
4481 }
4482 }
4483
4484 /* Return true if get_computation_cost indicates that autoincrement is
4485 a possibility for the pair of USE and CAND, false otherwise. */
4486
4487 static bool
4488 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4489 struct iv_cand *cand)
4490 {
4491 bitmap depends_on;
4492 bool can_autoinc;
4493 comp_cost cost;
4494
4495 if (use->type != USE_ADDRESS)
4496 return false;
4497
4498 cost = get_computation_cost (data, use, cand, true, &depends_on,
4499 &can_autoinc, NULL);
4500
4501 BITMAP_FREE (depends_on);
4502
4503 return !infinite_cost_p (cost) && can_autoinc;
4504 }
4505
4506 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4507 use that allows autoincrement, and set their AINC_USE if possible. */
4508
4509 static void
4510 set_autoinc_for_original_candidates (struct ivopts_data *data)
4511 {
4512 unsigned i, j;
4513
4514 for (i = 0; i < n_iv_cands (data); i++)
4515 {
4516 struct iv_cand *cand = iv_cand (data, i);
4517 struct iv_use *closest = NULL;
4518 if (cand->pos != IP_ORIGINAL)
4519 continue;
4520 for (j = 0; j < n_iv_uses (data); j++)
4521 {
4522 struct iv_use *use = iv_use (data, j);
4523 unsigned uid = gimple_uid (use->stmt);
4524 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4525 || uid > gimple_uid (cand->incremented_at))
4526 continue;
4527 if (closest == NULL || uid > gimple_uid (closest->stmt))
4528 closest = use;
4529 }
4530 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4531 continue;
4532 cand->ainc_use = closest;
4533 }
4534 }
4535
4536 /* Finds the candidates for the induction variables. */
4537
4538 static void
4539 find_iv_candidates (struct ivopts_data *data)
4540 {
4541 /* Add commonly used ivs. */
4542 add_standard_iv_candidates (data);
4543
4544 /* Add old induction variables. */
4545 add_old_ivs_candidates (data);
4546
4547 /* Add induction variables derived from uses. */
4548 add_derived_ivs_candidates (data);
4549
4550 set_autoinc_for_original_candidates (data);
4551
4552 /* Record the important candidates. */
4553 record_important_candidates (data);
4554 }
4555
4556 /* Determines costs of basing the use of the iv on an iv candidate. */
4557
4558 static void
4559 determine_use_iv_costs (struct ivopts_data *data)
4560 {
4561 unsigned i, j;
4562 struct iv_use *use;
4563 struct iv_cand *cand;
4564 bitmap to_clear = BITMAP_ALLOC (NULL);
4565
4566 alloc_use_cost_map (data);
4567
4568 for (i = 0; i < n_iv_uses (data); i++)
4569 {
4570 use = iv_use (data, i);
4571
4572 if (data->consider_all_candidates)
4573 {
4574 for (j = 0; j < n_iv_cands (data); j++)
4575 {
4576 cand = iv_cand (data, j);
4577 determine_use_iv_cost (data, use, cand);
4578 }
4579 }
4580 else
4581 {
4582 bitmap_iterator bi;
4583
4584 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4585 {
4586 cand = iv_cand (data, j);
4587 if (!determine_use_iv_cost (data, use, cand))
4588 bitmap_set_bit (to_clear, j);
4589 }
4590
4591 /* Remove the candidates for that the cost is infinite from
4592 the list of related candidates. */
4593 bitmap_and_compl_into (use->related_cands, to_clear);
4594 bitmap_clear (to_clear);
4595 }
4596 }
4597
4598 BITMAP_FREE (to_clear);
4599
4600 if (dump_file && (dump_flags & TDF_DETAILS))
4601 {
4602 fprintf (dump_file, "Use-candidate costs:\n");
4603
4604 for (i = 0; i < n_iv_uses (data); i++)
4605 {
4606 use = iv_use (data, i);
4607
4608 fprintf (dump_file, "Use %d:\n", i);
4609 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4610 for (j = 0; j < use->n_map_members; j++)
4611 {
4612 if (!use->cost_map[j].cand
4613 || infinite_cost_p (use->cost_map[j].cost))
4614 continue;
4615
4616 fprintf (dump_file, " %d\t%d\t%d\t",
4617 use->cost_map[j].cand->id,
4618 use->cost_map[j].cost.cost,
4619 use->cost_map[j].cost.complexity);
4620 if (use->cost_map[j].depends_on)
4621 bitmap_print (dump_file,
4622 use->cost_map[j].depends_on, "","");
4623 if (use->cost_map[j].inv_expr_id != -1)
4624 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4625 fprintf (dump_file, "\n");
4626 }
4627
4628 fprintf (dump_file, "\n");
4629 }
4630 fprintf (dump_file, "\n");
4631 }
4632 }
4633
4634 /* Determines cost of the candidate CAND. */
4635
4636 static void
4637 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4638 {
4639 comp_cost cost_base;
4640 unsigned cost, cost_step;
4641 tree base;
4642
4643 if (!cand->iv)
4644 {
4645 cand->cost = 0;
4646 return;
4647 }
4648
4649 /* There are two costs associated with the candidate -- its increment
4650 and its initialization. The second is almost negligible for any loop
4651 that rolls enough, so we take it just very little into account. */
4652
4653 base = cand->iv->base;
4654 cost_base = force_var_cost (data, base, NULL);
4655 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4656
4657 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4658
4659 /* Prefer the original ivs unless we may gain something by replacing it.
4660 The reason is to make debugging simpler; so this is not relevant for
4661 artificial ivs created by other optimization passes. */
4662 if (cand->pos != IP_ORIGINAL
4663 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4664 cost++;
4665
4666 /* Prefer not to insert statements into latch unless there are some
4667 already (so that we do not create unnecessary jumps). */
4668 if (cand->pos == IP_END
4669 && empty_block_p (ip_end_pos (data->current_loop)))
4670 cost++;
4671
4672 cand->cost = cost;
4673 cand->cost_step = cost_step;
4674 }
4675
4676 /* Determines costs of computation of the candidates. */
4677
4678 static void
4679 determine_iv_costs (struct ivopts_data *data)
4680 {
4681 unsigned i;
4682
4683 if (dump_file && (dump_flags & TDF_DETAILS))
4684 {
4685 fprintf (dump_file, "Candidate costs:\n");
4686 fprintf (dump_file, " cand\tcost\n");
4687 }
4688
4689 for (i = 0; i < n_iv_cands (data); i++)
4690 {
4691 struct iv_cand *cand = iv_cand (data, i);
4692
4693 determine_iv_cost (data, cand);
4694
4695 if (dump_file && (dump_flags & TDF_DETAILS))
4696 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4697 }
4698
4699 if (dump_file && (dump_flags & TDF_DETAILS))
4700 fprintf (dump_file, "\n");
4701 }
4702
4703 /* Calculates cost for having SIZE induction variables. */
4704
4705 static unsigned
4706 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4707 {
4708 /* We add size to the cost, so that we prefer eliminating ivs
4709 if possible. */
4710 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4711 data->body_includes_call);
4712 }
4713
4714 /* For each size of the induction variable set determine the penalty. */
4715
4716 static void
4717 determine_set_costs (struct ivopts_data *data)
4718 {
4719 unsigned j, n;
4720 gimple phi;
4721 gimple_stmt_iterator psi;
4722 tree op;
4723 struct loop *loop = data->current_loop;
4724 bitmap_iterator bi;
4725
4726 if (dump_file && (dump_flags & TDF_DETAILS))
4727 {
4728 fprintf (dump_file, "Global costs:\n");
4729 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4730 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4731 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4732 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4733 }
4734
4735 n = 0;
4736 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4737 {
4738 phi = gsi_stmt (psi);
4739 op = PHI_RESULT (phi);
4740
4741 if (!is_gimple_reg (op))
4742 continue;
4743
4744 if (get_iv (data, op))
4745 continue;
4746
4747 n++;
4748 }
4749
4750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4751 {
4752 struct version_info *info = ver_info (data, j);
4753
4754 if (info->inv_id && info->has_nonlin_use)
4755 n++;
4756 }
4757
4758 data->regs_used = n;
4759 if (dump_file && (dump_flags & TDF_DETAILS))
4760 fprintf (dump_file, " regs_used %d\n", n);
4761
4762 if (dump_file && (dump_flags & TDF_DETAILS))
4763 {
4764 fprintf (dump_file, " cost for size:\n");
4765 fprintf (dump_file, " ivs\tcost\n");
4766 for (j = 0; j <= 2 * target_avail_regs; j++)
4767 fprintf (dump_file, " %d\t%d\n", j,
4768 ivopts_global_cost_for_size (data, j));
4769 fprintf (dump_file, "\n");
4770 }
4771 }
4772
4773 /* Returns true if A is a cheaper cost pair than B. */
4774
4775 static bool
4776 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4777 {
4778 int cmp;
4779
4780 if (!a)
4781 return false;
4782
4783 if (!b)
4784 return true;
4785
4786 cmp = compare_costs (a->cost, b->cost);
4787 if (cmp < 0)
4788 return true;
4789
4790 if (cmp > 0)
4791 return false;
4792
4793 /* In case the costs are the same, prefer the cheaper candidate. */
4794 if (a->cand->cost < b->cand->cost)
4795 return true;
4796
4797 return false;
4798 }
4799
4800
4801 /* Returns candidate by that USE is expressed in IVS. */
4802
4803 static struct cost_pair *
4804 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4805 {
4806 return ivs->cand_for_use[use->id];
4807 }
4808
4809
4810 /* Returns the number of temps needed for new loop invariant
4811 expressions. */
4812
4813 static int
4814 iv_ca_get_num_inv_exprs (struct ivopts_data *data, struct iv_ca *ivs)
4815 {
4816 unsigned i, n = 0;
4817 unsigned *used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
4818
4819 for (i = 0; i < ivs->upto; i++)
4820 {
4821 struct iv_use *use = iv_use (data, i);
4822 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
4823 if (cp && cp->inv_expr_id != -1)
4824 {
4825 used_inv_expr[cp->inv_expr_id]++;
4826 if (used_inv_expr[cp->inv_expr_id] == 1)
4827 n++;
4828 }
4829 }
4830
4831 free (used_inv_expr);
4832 return n;
4833 }
4834
4835 /* Computes the cost field of IVS structure. */
4836
4837 static void
4838 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4839 {
4840 unsigned n_inv_exprs = 0;
4841 comp_cost cost = ivs->cand_use_cost;
4842
4843 cost.cost += ivs->cand_cost;
4844
4845 n_inv_exprs = iv_ca_get_num_inv_exprs (data, ivs);
4846 cost.cost += ivopts_global_cost_for_size (data,
4847 ivs->n_regs + n_inv_exprs);
4848
4849 ivs->cost = cost;
4850 }
4851
4852 /* Remove invariants in set INVS to set IVS. */
4853
4854 static void
4855 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4856 {
4857 bitmap_iterator bi;
4858 unsigned iid;
4859
4860 if (!invs)
4861 return;
4862
4863 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4864 {
4865 ivs->n_invariant_uses[iid]--;
4866 if (ivs->n_invariant_uses[iid] == 0)
4867 ivs->n_regs--;
4868 }
4869 }
4870
4871 /* Set USE not to be expressed by any candidate in IVS. */
4872
4873 static void
4874 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4875 struct iv_use *use)
4876 {
4877 unsigned uid = use->id, cid;
4878 struct cost_pair *cp;
4879
4880 cp = ivs->cand_for_use[uid];
4881 if (!cp)
4882 return;
4883 cid = cp->cand->id;
4884
4885 ivs->bad_uses++;
4886 ivs->cand_for_use[uid] = NULL;
4887 ivs->n_cand_uses[cid]--;
4888
4889 if (ivs->n_cand_uses[cid] == 0)
4890 {
4891 bitmap_clear_bit (ivs->cands, cid);
4892 /* Do not count the pseudocandidates. */
4893 if (cp->cand->iv)
4894 ivs->n_regs--;
4895 ivs->n_cands--;
4896 ivs->cand_cost -= cp->cand->cost;
4897
4898 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4899 }
4900
4901 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4902
4903 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4904 iv_ca_recount_cost (data, ivs);
4905 }
4906
4907 /* Add invariants in set INVS to set IVS. */
4908
4909 static void
4910 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4911 {
4912 bitmap_iterator bi;
4913 unsigned iid;
4914
4915 if (!invs)
4916 return;
4917
4918 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4919 {
4920 ivs->n_invariant_uses[iid]++;
4921 if (ivs->n_invariant_uses[iid] == 1)
4922 ivs->n_regs++;
4923 }
4924 }
4925
4926 /* Set cost pair for USE in set IVS to CP. */
4927
4928 static void
4929 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4930 struct iv_use *use, struct cost_pair *cp)
4931 {
4932 unsigned uid = use->id, cid;
4933
4934 if (ivs->cand_for_use[uid] == cp)
4935 return;
4936
4937 if (ivs->cand_for_use[uid])
4938 iv_ca_set_no_cp (data, ivs, use);
4939
4940 if (cp)
4941 {
4942 cid = cp->cand->id;
4943
4944 ivs->bad_uses--;
4945 ivs->cand_for_use[uid] = cp;
4946 ivs->n_cand_uses[cid]++;
4947 if (ivs->n_cand_uses[cid] == 1)
4948 {
4949 bitmap_set_bit (ivs->cands, cid);
4950 /* Do not count the pseudocandidates. */
4951 if (cp->cand->iv)
4952 ivs->n_regs++;
4953 ivs->n_cands++;
4954 ivs->cand_cost += cp->cand->cost;
4955
4956 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4957 }
4958
4959 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4960 iv_ca_set_add_invariants (ivs, cp->depends_on);
4961 iv_ca_recount_cost (data, ivs);
4962 }
4963 }
4964
4965 /* Extend set IVS by expressing USE by some of the candidates in it
4966 if possible. All important candidates will be considered
4967 if IMPORTANT_CANDIDATES is true. */
4968
4969 static void
4970 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4971 struct iv_use *use, bool important_candidates)
4972 {
4973 struct cost_pair *best_cp = NULL, *cp;
4974 bitmap_iterator bi;
4975 bitmap cands;
4976 unsigned i;
4977
4978 gcc_assert (ivs->upto >= use->id);
4979
4980 if (ivs->upto == use->id)
4981 {
4982 ivs->upto++;
4983 ivs->bad_uses++;
4984 }
4985
4986 cands = (important_candidates ? data->important_candidates : ivs->cands);
4987 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4988 {
4989 struct iv_cand *cand = iv_cand (data, i);
4990
4991 cp = get_use_iv_cost (data, use, cand);
4992
4993 if (cheaper_cost_pair (cp, best_cp))
4994 best_cp = cp;
4995 }
4996
4997 iv_ca_set_cp (data, ivs, use, best_cp);
4998 }
4999
5000 /* Get cost for assignment IVS. */
5001
5002 static comp_cost
5003 iv_ca_cost (struct iv_ca *ivs)
5004 {
5005 /* This was a conditional expression but it triggered a bug in
5006 Sun C 5.5. */
5007 if (ivs->bad_uses)
5008 return infinite_cost;
5009 else
5010 return ivs->cost;
5011 }
5012
5013 /* Returns true if all dependences of CP are among invariants in IVS. */
5014
5015 static bool
5016 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5017 {
5018 unsigned i;
5019 bitmap_iterator bi;
5020
5021 if (!cp->depends_on)
5022 return true;
5023
5024 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5025 {
5026 if (ivs->n_invariant_uses[i] == 0)
5027 return false;
5028 }
5029
5030 return true;
5031 }
5032
5033 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5034 it before NEXT_CHANGE. */
5035
5036 static struct iv_ca_delta *
5037 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5038 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5039 {
5040 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5041
5042 change->use = use;
5043 change->old_cp = old_cp;
5044 change->new_cp = new_cp;
5045 change->next_change = next_change;
5046
5047 return change;
5048 }
5049
5050 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5051 are rewritten. */
5052
5053 static struct iv_ca_delta *
5054 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5055 {
5056 struct iv_ca_delta *last;
5057
5058 if (!l2)
5059 return l1;
5060
5061 if (!l1)
5062 return l2;
5063
5064 for (last = l1; last->next_change; last = last->next_change)
5065 continue;
5066 last->next_change = l2;
5067
5068 return l1;
5069 }
5070
5071 /* Reverse the list of changes DELTA, forming the inverse to it. */
5072
5073 static struct iv_ca_delta *
5074 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5075 {
5076 struct iv_ca_delta *act, *next, *prev = NULL;
5077 struct cost_pair *tmp;
5078
5079 for (act = delta; act; act = next)
5080 {
5081 next = act->next_change;
5082 act->next_change = prev;
5083 prev = act;
5084
5085 tmp = act->old_cp;
5086 act->old_cp = act->new_cp;
5087 act->new_cp = tmp;
5088 }
5089
5090 return prev;
5091 }
5092
5093 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5094 reverted instead. */
5095
5096 static void
5097 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5098 struct iv_ca_delta *delta, bool forward)
5099 {
5100 struct cost_pair *from, *to;
5101 struct iv_ca_delta *act;
5102
5103 if (!forward)
5104 delta = iv_ca_delta_reverse (delta);
5105
5106 for (act = delta; act; act = act->next_change)
5107 {
5108 from = act->old_cp;
5109 to = act->new_cp;
5110 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5111 iv_ca_set_cp (data, ivs, act->use, to);
5112 }
5113
5114 if (!forward)
5115 iv_ca_delta_reverse (delta);
5116 }
5117
5118 /* Returns true if CAND is used in IVS. */
5119
5120 static bool
5121 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5122 {
5123 return ivs->n_cand_uses[cand->id] > 0;
5124 }
5125
5126 /* Returns number of induction variable candidates in the set IVS. */
5127
5128 static unsigned
5129 iv_ca_n_cands (struct iv_ca *ivs)
5130 {
5131 return ivs->n_cands;
5132 }
5133
5134 /* Free the list of changes DELTA. */
5135
5136 static void
5137 iv_ca_delta_free (struct iv_ca_delta **delta)
5138 {
5139 struct iv_ca_delta *act, *next;
5140
5141 for (act = *delta; act; act = next)
5142 {
5143 next = act->next_change;
5144 free (act);
5145 }
5146
5147 *delta = NULL;
5148 }
5149
5150 /* Allocates new iv candidates assignment. */
5151
5152 static struct iv_ca *
5153 iv_ca_new (struct ivopts_data *data)
5154 {
5155 struct iv_ca *nw = XNEW (struct iv_ca);
5156
5157 nw->upto = 0;
5158 nw->bad_uses = 0;
5159 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5160 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5161 nw->cands = BITMAP_ALLOC (NULL);
5162 nw->n_cands = 0;
5163 nw->n_regs = 0;
5164 nw->cand_use_cost = zero_cost;
5165 nw->cand_cost = 0;
5166 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5167 nw->cost = zero_cost;
5168
5169 return nw;
5170 }
5171
5172 /* Free memory occupied by the set IVS. */
5173
5174 static void
5175 iv_ca_free (struct iv_ca **ivs)
5176 {
5177 free ((*ivs)->cand_for_use);
5178 free ((*ivs)->n_cand_uses);
5179 BITMAP_FREE ((*ivs)->cands);
5180 free ((*ivs)->n_invariant_uses);
5181 free (*ivs);
5182 *ivs = NULL;
5183 }
5184
5185 /* Dumps IVS to FILE. */
5186
5187 static void
5188 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5189 {
5190 const char *pref = " invariants ";
5191 unsigned i;
5192 comp_cost cost = iv_ca_cost (ivs);
5193
5194 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5195 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5196 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5197 bitmap_print (file, ivs->cands, " candidates: ","\n");
5198
5199 for (i = 0; i < ivs->upto; i++)
5200 {
5201 struct iv_use *use = iv_use (data, i);
5202 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5203 if (cp)
5204 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5205 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5206 else
5207 fprintf (file, " use:%d --> ??\n", use->id);
5208 }
5209
5210 for (i = 1; i <= data->max_inv_id; i++)
5211 if (ivs->n_invariant_uses[i])
5212 {
5213 fprintf (file, "%s%d", pref, i);
5214 pref = ", ";
5215 }
5216 fprintf (file, "\n\n");
5217 }
5218
5219 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5220 new set, and store differences in DELTA. Number of induction variables
5221 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5222 the function will try to find a solution with mimimal iv candidates. */
5223
5224 static comp_cost
5225 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5226 struct iv_cand *cand, struct iv_ca_delta **delta,
5227 unsigned *n_ivs, bool min_ncand)
5228 {
5229 unsigned i;
5230 comp_cost cost;
5231 struct iv_use *use;
5232 struct cost_pair *old_cp, *new_cp;
5233
5234 *delta = NULL;
5235 for (i = 0; i < ivs->upto; i++)
5236 {
5237 use = iv_use (data, i);
5238 old_cp = iv_ca_cand_for_use (ivs, use);
5239
5240 if (old_cp
5241 && old_cp->cand == cand)
5242 continue;
5243
5244 new_cp = get_use_iv_cost (data, use, cand);
5245 if (!new_cp)
5246 continue;
5247
5248 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5249 continue;
5250
5251 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5252 continue;
5253
5254 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5255 }
5256
5257 iv_ca_delta_commit (data, ivs, *delta, true);
5258 cost = iv_ca_cost (ivs);
5259 if (n_ivs)
5260 *n_ivs = iv_ca_n_cands (ivs);
5261 iv_ca_delta_commit (data, ivs, *delta, false);
5262
5263 return cost;
5264 }
5265
5266 /* Try narrowing set IVS by removing CAND. Return the cost of
5267 the new set and store the differences in DELTA. */
5268
5269 static comp_cost
5270 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5271 struct iv_cand *cand, struct iv_ca_delta **delta)
5272 {
5273 unsigned i, ci;
5274 struct iv_use *use;
5275 struct cost_pair *old_cp, *new_cp, *cp;
5276 bitmap_iterator bi;
5277 struct iv_cand *cnd;
5278 comp_cost cost;
5279
5280 *delta = NULL;
5281 for (i = 0; i < n_iv_uses (data); i++)
5282 {
5283 use = iv_use (data, i);
5284
5285 old_cp = iv_ca_cand_for_use (ivs, use);
5286 if (old_cp->cand != cand)
5287 continue;
5288
5289 new_cp = NULL;
5290
5291 if (data->consider_all_candidates)
5292 {
5293 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5294 {
5295 if (ci == cand->id)
5296 continue;
5297
5298 cnd = iv_cand (data, ci);
5299
5300 cp = get_use_iv_cost (data, use, cnd);
5301 if (!cp)
5302 continue;
5303
5304 if (!iv_ca_has_deps (ivs, cp))
5305 continue;
5306
5307 if (!cheaper_cost_pair (cp, new_cp))
5308 continue;
5309
5310 new_cp = cp;
5311 }
5312 }
5313 else
5314 {
5315 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5316 {
5317 if (ci == cand->id)
5318 continue;
5319
5320 cnd = iv_cand (data, ci);
5321
5322 cp = get_use_iv_cost (data, use, cnd);
5323 if (!cp)
5324 continue;
5325 if (!iv_ca_has_deps (ivs, cp))
5326 continue;
5327
5328 if (!cheaper_cost_pair (cp, new_cp))
5329 continue;
5330
5331 new_cp = cp;
5332 }
5333 }
5334
5335 if (!new_cp)
5336 {
5337 iv_ca_delta_free (delta);
5338 return infinite_cost;
5339 }
5340
5341 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5342 }
5343
5344 iv_ca_delta_commit (data, ivs, *delta, true);
5345 cost = iv_ca_cost (ivs);
5346 iv_ca_delta_commit (data, ivs, *delta, false);
5347
5348 return cost;
5349 }
5350
5351 /* Try optimizing the set of candidates IVS by removing candidates different
5352 from to EXCEPT_CAND from it. Return cost of the new set, and store
5353 differences in DELTA. */
5354
5355 static comp_cost
5356 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5357 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5358 {
5359 bitmap_iterator bi;
5360 struct iv_ca_delta *act_delta, *best_delta;
5361 unsigned i;
5362 comp_cost best_cost, acost;
5363 struct iv_cand *cand;
5364
5365 best_delta = NULL;
5366 best_cost = iv_ca_cost (ivs);
5367
5368 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5369 {
5370 cand = iv_cand (data, i);
5371
5372 if (cand == except_cand)
5373 continue;
5374
5375 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5376
5377 if (compare_costs (acost, best_cost) < 0)
5378 {
5379 best_cost = acost;
5380 iv_ca_delta_free (&best_delta);
5381 best_delta = act_delta;
5382 }
5383 else
5384 iv_ca_delta_free (&act_delta);
5385 }
5386
5387 if (!best_delta)
5388 {
5389 *delta = NULL;
5390 return best_cost;
5391 }
5392
5393 /* Recurse to possibly remove other unnecessary ivs. */
5394 iv_ca_delta_commit (data, ivs, best_delta, true);
5395 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5396 iv_ca_delta_commit (data, ivs, best_delta, false);
5397 *delta = iv_ca_delta_join (best_delta, *delta);
5398 return best_cost;
5399 }
5400
5401 /* Tries to extend the sets IVS in the best possible way in order
5402 to express the USE. If ORIGINALP is true, prefer candidates from
5403 the original set of IVs, otherwise favor important candidates not
5404 based on any memory object. */
5405
5406 static bool
5407 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5408 struct iv_use *use, bool originalp)
5409 {
5410 comp_cost best_cost, act_cost;
5411 unsigned i;
5412 bitmap_iterator bi;
5413 struct iv_cand *cand;
5414 struct iv_ca_delta *best_delta = NULL, *act_delta;
5415 struct cost_pair *cp;
5416
5417 iv_ca_add_use (data, ivs, use, false);
5418 best_cost = iv_ca_cost (ivs);
5419
5420 cp = iv_ca_cand_for_use (ivs, use);
5421 if (!cp)
5422 {
5423 ivs->upto--;
5424 ivs->bad_uses--;
5425 iv_ca_add_use (data, ivs, use, true);
5426 best_cost = iv_ca_cost (ivs);
5427 cp = iv_ca_cand_for_use (ivs, use);
5428 }
5429 if (cp)
5430 {
5431 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5432 iv_ca_set_no_cp (data, ivs, use);
5433 }
5434
5435 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5436 first try important candidates not based on any memory object. Only if
5437 this fails, try the specific ones. Rationale -- in loops with many
5438 variables the best choice often is to use just one generic biv. If we
5439 added here many ivs specific to the uses, the optimization algorithm later
5440 would be likely to get stuck in a local minimum, thus causing us to create
5441 too many ivs. The approach from few ivs to more seems more likely to be
5442 successful -- starting from few ivs, replacing an expensive use by a
5443 specific iv should always be a win. */
5444 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5445 {
5446 cand = iv_cand (data, i);
5447
5448 if (originalp && cand->pos !=IP_ORIGINAL)
5449 continue;
5450
5451 if (!originalp && cand->iv->base_object != NULL_TREE)
5452 continue;
5453
5454 if (iv_ca_cand_used_p (ivs, cand))
5455 continue;
5456
5457 cp = get_use_iv_cost (data, use, cand);
5458 if (!cp)
5459 continue;
5460
5461 iv_ca_set_cp (data, ivs, use, cp);
5462 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5463 true);
5464 iv_ca_set_no_cp (data, ivs, use);
5465 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5466
5467 if (compare_costs (act_cost, best_cost) < 0)
5468 {
5469 best_cost = act_cost;
5470
5471 iv_ca_delta_free (&best_delta);
5472 best_delta = act_delta;
5473 }
5474 else
5475 iv_ca_delta_free (&act_delta);
5476 }
5477
5478 if (infinite_cost_p (best_cost))
5479 {
5480 for (i = 0; i < use->n_map_members; i++)
5481 {
5482 cp = use->cost_map + i;
5483 cand = cp->cand;
5484 if (!cand)
5485 continue;
5486
5487 /* Already tried this. */
5488 if (cand->important)
5489 {
5490 if (originalp && cand->pos == IP_ORIGINAL)
5491 continue;
5492 if (!originalp && cand->iv->base_object == NULL_TREE)
5493 continue;
5494 }
5495
5496 if (iv_ca_cand_used_p (ivs, cand))
5497 continue;
5498
5499 act_delta = NULL;
5500 iv_ca_set_cp (data, ivs, use, cp);
5501 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5502 iv_ca_set_no_cp (data, ivs, use);
5503 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5504 cp, act_delta);
5505
5506 if (compare_costs (act_cost, best_cost) < 0)
5507 {
5508 best_cost = act_cost;
5509
5510 if (best_delta)
5511 iv_ca_delta_free (&best_delta);
5512 best_delta = act_delta;
5513 }
5514 else
5515 iv_ca_delta_free (&act_delta);
5516 }
5517 }
5518
5519 iv_ca_delta_commit (data, ivs, best_delta, true);
5520 iv_ca_delta_free (&best_delta);
5521
5522 return !infinite_cost_p (best_cost);
5523 }
5524
5525 /* Finds an initial assignment of candidates to uses. */
5526
5527 static struct iv_ca *
5528 get_initial_solution (struct ivopts_data *data, bool originalp)
5529 {
5530 struct iv_ca *ivs = iv_ca_new (data);
5531 unsigned i;
5532
5533 for (i = 0; i < n_iv_uses (data); i++)
5534 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5535 {
5536 iv_ca_free (&ivs);
5537 return NULL;
5538 }
5539
5540 return ivs;
5541 }
5542
5543 /* Tries to improve set of induction variables IVS. */
5544
5545 static bool
5546 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5547 {
5548 unsigned i, n_ivs;
5549 comp_cost acost, best_cost = iv_ca_cost (ivs);
5550 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5551 struct iv_cand *cand;
5552
5553 /* Try extending the set of induction variables by one. */
5554 for (i = 0; i < n_iv_cands (data); i++)
5555 {
5556 cand = iv_cand (data, i);
5557
5558 if (iv_ca_cand_used_p (ivs, cand))
5559 continue;
5560
5561 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5562 if (!act_delta)
5563 continue;
5564
5565 /* If we successfully added the candidate and the set is small enough,
5566 try optimizing it by removing other candidates. */
5567 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5568 {
5569 iv_ca_delta_commit (data, ivs, act_delta, true);
5570 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5571 iv_ca_delta_commit (data, ivs, act_delta, false);
5572 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5573 }
5574
5575 if (compare_costs (acost, best_cost) < 0)
5576 {
5577 best_cost = acost;
5578 iv_ca_delta_free (&best_delta);
5579 best_delta = act_delta;
5580 }
5581 else
5582 iv_ca_delta_free (&act_delta);
5583 }
5584
5585 if (!best_delta)
5586 {
5587 /* Try removing the candidates from the set instead. */
5588 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5589
5590 /* Nothing more we can do. */
5591 if (!best_delta)
5592 return false;
5593 }
5594
5595 iv_ca_delta_commit (data, ivs, best_delta, true);
5596 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5597 iv_ca_delta_free (&best_delta);
5598 return true;
5599 }
5600
5601 /* Attempts to find the optimal set of induction variables. We do simple
5602 greedy heuristic -- we try to replace at most one candidate in the selected
5603 solution and remove the unused ivs while this improves the cost. */
5604
5605 static struct iv_ca *
5606 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5607 {
5608 struct iv_ca *set;
5609
5610 /* Get the initial solution. */
5611 set = get_initial_solution (data, originalp);
5612 if (!set)
5613 {
5614 if (dump_file && (dump_flags & TDF_DETAILS))
5615 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5616 return NULL;
5617 }
5618
5619 if (dump_file && (dump_flags & TDF_DETAILS))
5620 {
5621 fprintf (dump_file, "Initial set of candidates:\n");
5622 iv_ca_dump (data, dump_file, set);
5623 }
5624
5625 while (try_improve_iv_set (data, set))
5626 {
5627 if (dump_file && (dump_flags & TDF_DETAILS))
5628 {
5629 fprintf (dump_file, "Improved to:\n");
5630 iv_ca_dump (data, dump_file, set);
5631 }
5632 }
5633
5634 return set;
5635 }
5636
5637 static struct iv_ca *
5638 find_optimal_iv_set (struct ivopts_data *data)
5639 {
5640 unsigned i;
5641 struct iv_ca *set, *origset;
5642 struct iv_use *use;
5643 comp_cost cost, origcost;
5644
5645 /* Determine the cost based on a strategy that starts with original IVs,
5646 and try again using a strategy that prefers candidates not based
5647 on any IVs. */
5648 origset = find_optimal_iv_set_1 (data, true);
5649 set = find_optimal_iv_set_1 (data, false);
5650
5651 if (!origset && !set)
5652 return NULL;
5653
5654 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5655 cost = set ? iv_ca_cost (set) : infinite_cost;
5656
5657 if (dump_file && (dump_flags & TDF_DETAILS))
5658 {
5659 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5660 origcost.cost, origcost.complexity);
5661 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5662 cost.cost, cost.complexity);
5663 }
5664
5665 /* Choose the one with the best cost. */
5666 if (compare_costs (origcost, cost) <= 0)
5667 {
5668 if (set)
5669 iv_ca_free (&set);
5670 set = origset;
5671 }
5672 else if (origset)
5673 iv_ca_free (&origset);
5674
5675 for (i = 0; i < n_iv_uses (data); i++)
5676 {
5677 use = iv_use (data, i);
5678 use->selected = iv_ca_cand_for_use (set, use)->cand;
5679 }
5680
5681 return set;
5682 }
5683
5684 /* Creates a new induction variable corresponding to CAND. */
5685
5686 static void
5687 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5688 {
5689 gimple_stmt_iterator incr_pos;
5690 tree base;
5691 bool after = false;
5692
5693 if (!cand->iv)
5694 return;
5695
5696 switch (cand->pos)
5697 {
5698 case IP_NORMAL:
5699 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5700 break;
5701
5702 case IP_END:
5703 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5704 after = true;
5705 break;
5706
5707 case IP_AFTER_USE:
5708 after = true;
5709 /* fall through */
5710 case IP_BEFORE_USE:
5711 incr_pos = gsi_for_stmt (cand->incremented_at);
5712 break;
5713
5714 case IP_ORIGINAL:
5715 /* Mark that the iv is preserved. */
5716 name_info (data, cand->var_before)->preserve_biv = true;
5717 name_info (data, cand->var_after)->preserve_biv = true;
5718
5719 /* Rewrite the increment so that it uses var_before directly. */
5720 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5721 return;
5722 }
5723
5724 gimple_add_tmp_var (cand->var_before);
5725 add_referenced_var (cand->var_before);
5726
5727 base = unshare_expr (cand->iv->base);
5728
5729 create_iv (base, unshare_expr (cand->iv->step),
5730 cand->var_before, data->current_loop,
5731 &incr_pos, after, &cand->var_before, &cand->var_after);
5732 }
5733
5734 /* Creates new induction variables described in SET. */
5735
5736 static void
5737 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5738 {
5739 unsigned i;
5740 struct iv_cand *cand;
5741 bitmap_iterator bi;
5742
5743 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5744 {
5745 cand = iv_cand (data, i);
5746 create_new_iv (data, cand);
5747 }
5748
5749 if (dump_file && (dump_flags & TDF_DETAILS))
5750 {
5751 fprintf (dump_file, "\nSelected IV set: \n");
5752 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5753 {
5754 cand = iv_cand (data, i);
5755 dump_cand (dump_file, cand);
5756 }
5757 fprintf (dump_file, "\n");
5758 }
5759 }
5760
5761 /* Rewrites USE (definition of iv used in a nonlinear expression)
5762 using candidate CAND. */
5763
5764 static void
5765 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5766 struct iv_use *use, struct iv_cand *cand)
5767 {
5768 tree comp;
5769 tree op, tgt;
5770 gimple ass;
5771 gimple_stmt_iterator bsi;
5772
5773 /* An important special case -- if we are asked to express value of
5774 the original iv by itself, just exit; there is no need to
5775 introduce a new computation (that might also need casting the
5776 variable to unsigned and back). */
5777 if (cand->pos == IP_ORIGINAL
5778 && cand->incremented_at == use->stmt)
5779 {
5780 tree step, ctype, utype;
5781 enum tree_code incr_code = PLUS_EXPR, old_code;
5782
5783 gcc_assert (is_gimple_assign (use->stmt));
5784 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5785
5786 step = cand->iv->step;
5787 ctype = TREE_TYPE (step);
5788 utype = TREE_TYPE (cand->var_after);
5789 if (TREE_CODE (step) == NEGATE_EXPR)
5790 {
5791 incr_code = MINUS_EXPR;
5792 step = TREE_OPERAND (step, 0);
5793 }
5794
5795 /* Check whether we may leave the computation unchanged.
5796 This is the case only if it does not rely on other
5797 computations in the loop -- otherwise, the computation
5798 we rely upon may be removed in remove_unused_ivs,
5799 thus leading to ICE. */
5800 old_code = gimple_assign_rhs_code (use->stmt);
5801 if (old_code == PLUS_EXPR
5802 || old_code == MINUS_EXPR
5803 || old_code == POINTER_PLUS_EXPR)
5804 {
5805 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5806 op = gimple_assign_rhs2 (use->stmt);
5807 else if (old_code != MINUS_EXPR
5808 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5809 op = gimple_assign_rhs1 (use->stmt);
5810 else
5811 op = NULL_TREE;
5812 }
5813 else
5814 op = NULL_TREE;
5815
5816 if (op
5817 && (TREE_CODE (op) == INTEGER_CST
5818 || operand_equal_p (op, step, 0)))
5819 return;
5820
5821 /* Otherwise, add the necessary computations to express
5822 the iv. */
5823 op = fold_convert (ctype, cand->var_before);
5824 comp = fold_convert (utype,
5825 build2 (incr_code, ctype, op,
5826 unshare_expr (step)));
5827 }
5828 else
5829 {
5830 comp = get_computation (data->current_loop, use, cand);
5831 gcc_assert (comp != NULL_TREE);
5832 }
5833
5834 switch (gimple_code (use->stmt))
5835 {
5836 case GIMPLE_PHI:
5837 tgt = PHI_RESULT (use->stmt);
5838
5839 /* If we should keep the biv, do not replace it. */
5840 if (name_info (data, tgt)->preserve_biv)
5841 return;
5842
5843 bsi = gsi_after_labels (gimple_bb (use->stmt));
5844 break;
5845
5846 case GIMPLE_ASSIGN:
5847 tgt = gimple_assign_lhs (use->stmt);
5848 bsi = gsi_for_stmt (use->stmt);
5849 break;
5850
5851 default:
5852 gcc_unreachable ();
5853 }
5854
5855 if (!valid_gimple_rhs_p (comp)
5856 || (gimple_code (use->stmt) != GIMPLE_PHI
5857 /* We can't allow re-allocating the stmt as it might be pointed
5858 to still. */
5859 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5860 >= gimple_num_ops (gsi_stmt (bsi)))))
5861 {
5862 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5863 true, GSI_SAME_STMT);
5864 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5865 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5866 }
5867
5868 if (gimple_code (use->stmt) == GIMPLE_PHI)
5869 {
5870 ass = gimple_build_assign (tgt, comp);
5871 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5872
5873 bsi = gsi_for_stmt (use->stmt);
5874 remove_phi_node (&bsi, false);
5875 }
5876 else
5877 {
5878 gimple_assign_set_rhs_from_tree (&bsi, comp);
5879 use->stmt = gsi_stmt (bsi);
5880 }
5881 }
5882
5883 /* Copies the reference information from OLD_REF to NEW_REF. */
5884
5885 static void
5886 copy_ref_info (tree new_ref, tree old_ref)
5887 {
5888 tree new_ptr_base = NULL_TREE;
5889
5890 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5891 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5892
5893 if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5894 new_ptr_base = TMR_BASE (new_ref);
5895 else if (TREE_CODE (new_ref) == MEM_REF)
5896 new_ptr_base = TREE_OPERAND (new_ref, 0);
5897
5898 /* We can transfer points-to information from an old pointer
5899 or decl base to the new one. */
5900 if (new_ptr_base
5901 && TREE_CODE (new_ptr_base) == SSA_NAME
5902 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5903 && !SSA_NAME_PTR_INFO (new_ptr_base))
5904 {
5905 tree base = get_base_address (old_ref);
5906 if (!base)
5907 ;
5908 else if ((INDIRECT_REF_P (base)
5909 || TREE_CODE (base) == MEM_REF)
5910 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5911 duplicate_ssa_name_ptr_info
5912 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5913 else if (TREE_CODE (base) == VAR_DECL
5914 || TREE_CODE (base) == PARM_DECL
5915 || TREE_CODE (base) == RESULT_DECL)
5916 {
5917 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5918 pt_solution_set_var (&pi->pt, base);
5919 }
5920 }
5921 }
5922
5923 /* Performs a peephole optimization to reorder the iv update statement with
5924 a mem ref to enable instruction combining in later phases. The mem ref uses
5925 the iv value before the update, so the reordering transformation requires
5926 adjustment of the offset. CAND is the selected IV_CAND.
5927
5928 Example:
5929
5930 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5931 iv2 = iv1 + 1;
5932
5933 if (t < val) (1)
5934 goto L;
5935 goto Head;
5936
5937
5938 directly propagating t over to (1) will introduce overlapping live range
5939 thus increase register pressure. This peephole transform it into:
5940
5941
5942 iv2 = iv1 + 1;
5943 t = MEM_REF (base, iv2, 8, 8);
5944 if (t < val)
5945 goto L;
5946 goto Head;
5947 */
5948
5949 static void
5950 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5951 {
5952 tree var_after;
5953 gimple iv_update, stmt;
5954 basic_block bb;
5955 gimple_stmt_iterator gsi, gsi_iv;
5956
5957 if (cand->pos != IP_NORMAL)
5958 return;
5959
5960 var_after = cand->var_after;
5961 iv_update = SSA_NAME_DEF_STMT (var_after);
5962
5963 bb = gimple_bb (iv_update);
5964 gsi = gsi_last_nondebug_bb (bb);
5965 stmt = gsi_stmt (gsi);
5966
5967 /* Only handle conditional statement for now. */
5968 if (gimple_code (stmt) != GIMPLE_COND)
5969 return;
5970
5971 gsi_prev_nondebug (&gsi);
5972 stmt = gsi_stmt (gsi);
5973 if (stmt != iv_update)
5974 return;
5975
5976 gsi_prev_nondebug (&gsi);
5977 if (gsi_end_p (gsi))
5978 return;
5979
5980 stmt = gsi_stmt (gsi);
5981 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5982 return;
5983
5984 if (stmt != use->stmt)
5985 return;
5986
5987 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5988 return;
5989
5990 if (dump_file && (dump_flags & TDF_DETAILS))
5991 {
5992 fprintf (dump_file, "Reordering \n");
5993 print_gimple_stmt (dump_file, iv_update, 0, 0);
5994 print_gimple_stmt (dump_file, use->stmt, 0, 0);
5995 fprintf (dump_file, "\n");
5996 }
5997
5998 gsi = gsi_for_stmt (use->stmt);
5999 gsi_iv = gsi_for_stmt (iv_update);
6000 gsi_move_before (&gsi_iv, &gsi);
6001
6002 cand->pos = IP_BEFORE_USE;
6003 cand->incremented_at = use->stmt;
6004 }
6005
6006 /* Rewrites USE (address that is an iv) using candidate CAND. */
6007
6008 static void
6009 rewrite_use_address (struct ivopts_data *data,
6010 struct iv_use *use, struct iv_cand *cand)
6011 {
6012 aff_tree aff;
6013 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6014 tree base_hint = NULL_TREE;
6015 tree ref, iv;
6016 bool ok;
6017
6018 adjust_iv_update_pos (cand, use);
6019 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6020 gcc_assert (ok);
6021 unshare_aff_combination (&aff);
6022
6023 /* To avoid undefined overflow problems, all IV candidates use unsigned
6024 integer types. The drawback is that this makes it impossible for
6025 create_mem_ref to distinguish an IV that is based on a memory object
6026 from one that represents simply an offset.
6027
6028 To work around this problem, we pass a hint to create_mem_ref that
6029 indicates which variable (if any) in aff is an IV based on a memory
6030 object. Note that we only consider the candidate. If this is not
6031 based on an object, the base of the reference is in some subexpression
6032 of the use -- but these will use pointer types, so they are recognized
6033 by the create_mem_ref heuristics anyway. */
6034 if (cand->iv->base_object)
6035 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6036
6037 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6038 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6039 reference_alias_ptr_type (*use->op_p),
6040 iv, base_hint, data->speed);
6041 copy_ref_info (ref, *use->op_p);
6042 *use->op_p = ref;
6043 }
6044
6045 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6046 candidate CAND. */
6047
6048 static void
6049 rewrite_use_compare (struct ivopts_data *data,
6050 struct iv_use *use, struct iv_cand *cand)
6051 {
6052 tree comp, *var_p, op, bound;
6053 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6054 enum tree_code compare;
6055 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6056 bool ok;
6057
6058 bound = cp->value;
6059 if (bound)
6060 {
6061 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6062 tree var_type = TREE_TYPE (var);
6063 gimple_seq stmts;
6064
6065 if (dump_file && (dump_flags & TDF_DETAILS))
6066 {
6067 fprintf (dump_file, "Replacing exit test: ");
6068 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6069 }
6070 compare = iv_elimination_compare (data, use);
6071 bound = unshare_expr (fold_convert (var_type, bound));
6072 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6073 if (stmts)
6074 gsi_insert_seq_on_edge_immediate (
6075 loop_preheader_edge (data->current_loop),
6076 stmts);
6077
6078 gimple_cond_set_lhs (use->stmt, var);
6079 gimple_cond_set_code (use->stmt, compare);
6080 gimple_cond_set_rhs (use->stmt, op);
6081 return;
6082 }
6083
6084 /* The induction variable elimination failed; just express the original
6085 giv. */
6086 comp = get_computation (data->current_loop, use, cand);
6087 gcc_assert (comp != NULL_TREE);
6088
6089 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6090 gcc_assert (ok);
6091
6092 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6093 true, GSI_SAME_STMT);
6094 }
6095
6096 /* Rewrites USE using candidate CAND. */
6097
6098 static void
6099 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6100 {
6101 switch (use->type)
6102 {
6103 case USE_NONLINEAR_EXPR:
6104 rewrite_use_nonlinear_expr (data, use, cand);
6105 break;
6106
6107 case USE_ADDRESS:
6108 rewrite_use_address (data, use, cand);
6109 break;
6110
6111 case USE_COMPARE:
6112 rewrite_use_compare (data, use, cand);
6113 break;
6114
6115 default:
6116 gcc_unreachable ();
6117 }
6118
6119 update_stmt (use->stmt);
6120 }
6121
6122 /* Rewrite the uses using the selected induction variables. */
6123
6124 static void
6125 rewrite_uses (struct ivopts_data *data)
6126 {
6127 unsigned i;
6128 struct iv_cand *cand;
6129 struct iv_use *use;
6130
6131 for (i = 0; i < n_iv_uses (data); i++)
6132 {
6133 use = iv_use (data, i);
6134 cand = use->selected;
6135 gcc_assert (cand);
6136
6137 rewrite_use (data, use, cand);
6138 }
6139 }
6140
6141 /* Removes the ivs that are not used after rewriting. */
6142
6143 static void
6144 remove_unused_ivs (struct ivopts_data *data)
6145 {
6146 unsigned j;
6147 bitmap_iterator bi;
6148 bitmap toremove = BITMAP_ALLOC (NULL);
6149
6150 /* Figure out an order in which to release SSA DEFs so that we don't
6151 release something that we'd have to propagate into a debug stmt
6152 afterwards. */
6153 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6154 {
6155 struct version_info *info;
6156
6157 info = ver_info (data, j);
6158 if (info->iv
6159 && !integer_zerop (info->iv->step)
6160 && !info->inv_id
6161 && !info->iv->have_use_for
6162 && !info->preserve_biv)
6163 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6164 }
6165
6166 release_defs_bitset (toremove);
6167
6168 BITMAP_FREE (toremove);
6169 }
6170
6171 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6172 for pointer_map_traverse. */
6173
6174 static bool
6175 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6176 void *data ATTRIBUTE_UNUSED)
6177 {
6178 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6179
6180 free (niter);
6181 return true;
6182 }
6183
6184 /* Frees data allocated by the optimization of a single loop. */
6185
6186 static void
6187 free_loop_data (struct ivopts_data *data)
6188 {
6189 unsigned i, j;
6190 bitmap_iterator bi;
6191 tree obj;
6192
6193 if (data->niters)
6194 {
6195 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6196 pointer_map_destroy (data->niters);
6197 data->niters = NULL;
6198 }
6199
6200 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6201 {
6202 struct version_info *info;
6203
6204 info = ver_info (data, i);
6205 if (info->iv)
6206 free (info->iv);
6207 info->iv = NULL;
6208 info->has_nonlin_use = false;
6209 info->preserve_biv = false;
6210 info->inv_id = 0;
6211 }
6212 bitmap_clear (data->relevant);
6213 bitmap_clear (data->important_candidates);
6214
6215 for (i = 0; i < n_iv_uses (data); i++)
6216 {
6217 struct iv_use *use = iv_use (data, i);
6218
6219 free (use->iv);
6220 BITMAP_FREE (use->related_cands);
6221 for (j = 0; j < use->n_map_members; j++)
6222 if (use->cost_map[j].depends_on)
6223 BITMAP_FREE (use->cost_map[j].depends_on);
6224 free (use->cost_map);
6225 free (use);
6226 }
6227 VEC_truncate (iv_use_p, data->iv_uses, 0);
6228
6229 for (i = 0; i < n_iv_cands (data); i++)
6230 {
6231 struct iv_cand *cand = iv_cand (data, i);
6232
6233 if (cand->iv)
6234 free (cand->iv);
6235 if (cand->depends_on)
6236 BITMAP_FREE (cand->depends_on);
6237 free (cand);
6238 }
6239 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6240
6241 if (data->version_info_size < num_ssa_names)
6242 {
6243 data->version_info_size = 2 * num_ssa_names;
6244 free (data->version_info);
6245 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6246 }
6247
6248 data->max_inv_id = 0;
6249
6250 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6251 SET_DECL_RTL (obj, NULL_RTX);
6252
6253 VEC_truncate (tree, decl_rtl_to_reset, 0);
6254
6255 htab_empty (data->inv_expr_tab);
6256 data->inv_expr_id = 0;
6257 }
6258
6259 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6260 loop tree. */
6261
6262 static void
6263 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6264 {
6265 free_loop_data (data);
6266 free (data->version_info);
6267 BITMAP_FREE (data->relevant);
6268 BITMAP_FREE (data->important_candidates);
6269
6270 VEC_free (tree, heap, decl_rtl_to_reset);
6271 VEC_free (iv_use_p, heap, data->iv_uses);
6272 VEC_free (iv_cand_p, heap, data->iv_candidates);
6273 htab_delete (data->inv_expr_tab);
6274 }
6275
6276 /* Returns true if the loop body BODY includes any function calls. */
6277
6278 static bool
6279 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6280 {
6281 gimple_stmt_iterator gsi;
6282 unsigned i;
6283
6284 for (i = 0; i < num_nodes; i++)
6285 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6286 {
6287 gimple stmt = gsi_stmt (gsi);
6288 if (is_gimple_call (stmt)
6289 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6290 return true;
6291 }
6292 return false;
6293 }
6294
6295 /* Optimizes the LOOP. Returns true if anything changed. */
6296
6297 static bool
6298 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6299 {
6300 bool changed = false;
6301 struct iv_ca *iv_ca;
6302 edge exit;
6303 basic_block *body;
6304
6305 gcc_assert (!data->niters);
6306 data->current_loop = loop;
6307 data->speed = optimize_loop_for_speed_p (loop);
6308
6309 if (dump_file && (dump_flags & TDF_DETAILS))
6310 {
6311 fprintf (dump_file, "Processing loop %d\n", loop->num);
6312
6313 exit = single_dom_exit (loop);
6314 if (exit)
6315 {
6316 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6317 exit->src->index, exit->dest->index);
6318 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6319 fprintf (dump_file, "\n");
6320 }
6321
6322 fprintf (dump_file, "\n");
6323 }
6324
6325 body = get_loop_body (loop);
6326 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6327 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6328 free (body);
6329
6330 /* For each ssa name determines whether it behaves as an induction variable
6331 in some loop. */
6332 if (!find_induction_variables (data))
6333 goto finish;
6334
6335 /* Finds interesting uses (item 1). */
6336 find_interesting_uses (data);
6337 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6338 goto finish;
6339
6340 /* Finds candidates for the induction variables (item 2). */
6341 find_iv_candidates (data);
6342
6343 /* Calculates the costs (item 3, part 1). */
6344 determine_iv_costs (data);
6345 determine_use_iv_costs (data);
6346 determine_set_costs (data);
6347
6348 /* Find the optimal set of induction variables (item 3, part 2). */
6349 iv_ca = find_optimal_iv_set (data);
6350 if (!iv_ca)
6351 goto finish;
6352 changed = true;
6353
6354 /* Create the new induction variables (item 4, part 1). */
6355 create_new_ivs (data, iv_ca);
6356 iv_ca_free (&iv_ca);
6357
6358 /* Rewrite the uses (item 4, part 2). */
6359 rewrite_uses (data);
6360
6361 /* Remove the ivs that are unused after rewriting. */
6362 remove_unused_ivs (data);
6363
6364 /* We have changed the structure of induction variables; it might happen
6365 that definitions in the scev database refer to some of them that were
6366 eliminated. */
6367 scev_reset ();
6368
6369 finish:
6370 free_loop_data (data);
6371
6372 return changed;
6373 }
6374
6375 /* Main entry point. Optimizes induction variables in loops. */
6376
6377 void
6378 tree_ssa_iv_optimize (void)
6379 {
6380 struct loop *loop;
6381 struct ivopts_data data;
6382 loop_iterator li;
6383
6384 tree_ssa_iv_optimize_init (&data);
6385
6386 /* Optimize the loops starting with the innermost ones. */
6387 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6388 {
6389 if (dump_file && (dump_flags & TDF_DETAILS))
6390 flow_loop_dump (loop, dump_file, NULL, 1);
6391
6392 tree_ssa_iv_optimize_loop (&data, loop);
6393 }
6394
6395 tree_ssa_iv_optimize_finalize (&data);
6396 }