tree-data-ref.c (dr_analyze_alias): Do not set DR_SUBVARS.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "rtl.h"
70 #include "tm_p.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
73 #include "output.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "varray.h"
80 #include "expr.h"
81 #include "tree-pass.h"
82 #include "ggc.h"
83 #include "insn-config.h"
84 #include "recog.h"
85 #include "pointer-set.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
93 #include "target.h"
94
95 /* The infinite cost. */
96 #define INFTY 10000000
97
98 /* The expected number of loop iterations. TODO -- use profiling instead of
99 this. */
100 #define AVG_LOOP_NITER(LOOP) 5
101
102
103 /* Representation of the induction variable. */
104 struct iv
105 {
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
113 };
114
115 /* Per-ssa version information (induction variable descriptions, etc.). */
116 struct version_info
117 {
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 };
125
126 /* Types of uses. */
127 enum use_type
128 {
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
132 };
133
134 /* Cost of a computation. */
135 typedef struct
136 {
137 unsigned cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
142 } comp_cost;
143
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
146
147 /* The candidate - cost pair. */
148 struct cost_pair
149 {
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
153 preserved. */
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
157 };
158
159 /* Use. */
160 struct iv_use
161 {
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 tree stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
168 important ones. */
169
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
173
174 struct iv_cand *selected;
175 /* The selected candidate. */
176 };
177
178 /* The position where the iv is computed. */
179 enum iv_position
180 {
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_ORIGINAL /* The original biv. */
184 };
185
186 /* The induction variable candidate. */
187 struct iv_cand
188 {
189 unsigned id; /* The number of the candidate. */
190 bool important; /* Whether this is an "important" candidate, i.e. such
191 that it should be considered by all uses. */
192 enum iv_position pos; /* Where it is computed. */
193 tree incremented_at; /* For original biv, the statement where it is
194 incremented. */
195 tree var_before; /* The variable used for it before increment. */
196 tree var_after; /* The variable used for it after increment. */
197 struct iv *iv; /* The value of the candidate. NULL for
198 "pseudocandidate" used to indicate the possibility
199 to replace the final value of an iv by direct
200 computation of the value. */
201 unsigned cost; /* Cost of the candidate. */
202 bitmap depends_on; /* The list of invariants that are used in step of the
203 biv. */
204 };
205
206 /* The data used by the induction variable optimizations. */
207
208 typedef struct iv_use *iv_use_p;
209 DEF_VEC_P(iv_use_p);
210 DEF_VEC_ALLOC_P(iv_use_p,heap);
211
212 typedef struct iv_cand *iv_cand_p;
213 DEF_VEC_P(iv_cand_p);
214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
215
216 struct ivopts_data
217 {
218 /* The currently optimized loop. */
219 struct loop *current_loop;
220
221 /* Number of registers used in it. */
222 unsigned regs_used;
223
224 /* Numbers of iterations for all exits of the current loop. */
225 struct pointer_map_t *niters;
226
227 /* The size of version_info array allocated. */
228 unsigned version_info_size;
229
230 /* The array of information for the ssa names. */
231 struct version_info *version_info;
232
233 /* The bitmap of indices in version_info whose value was changed. */
234 bitmap relevant;
235
236 /* The maximum invariant id. */
237 unsigned max_inv_id;
238
239 /* The uses of induction variables. */
240 VEC(iv_use_p,heap) *iv_uses;
241
242 /* The candidates. */
243 VEC(iv_cand_p,heap) *iv_candidates;
244
245 /* A bitmap of important candidates. */
246 bitmap important_candidates;
247
248 /* Whether to consider just related and important candidates when replacing a
249 use. */
250 bool consider_all_candidates;
251 };
252
253 /* An assignment of iv candidates to uses. */
254
255 struct iv_ca
256 {
257 /* The number of uses covered by the assignment. */
258 unsigned upto;
259
260 /* Number of uses that cannot be expressed by the candidates in the set. */
261 unsigned bad_uses;
262
263 /* Candidate assigned to a use, together with the related costs. */
264 struct cost_pair **cand_for_use;
265
266 /* Number of times each candidate is used. */
267 unsigned *n_cand_uses;
268
269 /* The candidates used. */
270 bitmap cands;
271
272 /* The number of candidates in the set. */
273 unsigned n_cands;
274
275 /* Total number of registers needed. */
276 unsigned n_regs;
277
278 /* Total cost of expressing uses. */
279 comp_cost cand_use_cost;
280
281 /* Total cost of candidates. */
282 unsigned cand_cost;
283
284 /* Number of times each invariant is used. */
285 unsigned *n_invariant_uses;
286
287 /* Total cost of the assignment. */
288 comp_cost cost;
289 };
290
291 /* Difference of two iv candidate assignments. */
292
293 struct iv_ca_delta
294 {
295 /* Changed use. */
296 struct iv_use *use;
297
298 /* An old assignment (for rollback purposes). */
299 struct cost_pair *old_cp;
300
301 /* A new assignment. */
302 struct cost_pair *new_cp;
303
304 /* Next change in the list. */
305 struct iv_ca_delta *next_change;
306 };
307
308 /* Bound on number of candidates below that all candidates are considered. */
309
310 #define CONSIDER_ALL_CANDIDATES_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
312
313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
314 optimizing such a loop would help, and it would take ages). */
315
316 #define MAX_CONSIDERED_USES \
317 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
318
319 /* If there are at most this number of ivs in the set, try removing unnecessary
320 ivs from the set always. */
321
322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
324
325 /* The list of trees for that the decl_rtl field must be reset is stored
326 here. */
327
328 static VEC(tree,heap) *decl_rtl_to_reset;
329
330 /* Number of uses recorded in DATA. */
331
332 static inline unsigned
333 n_iv_uses (struct ivopts_data *data)
334 {
335 return VEC_length (iv_use_p, data->iv_uses);
336 }
337
338 /* Ith use recorded in DATA. */
339
340 static inline struct iv_use *
341 iv_use (struct ivopts_data *data, unsigned i)
342 {
343 return VEC_index (iv_use_p, data->iv_uses, i);
344 }
345
346 /* Number of candidates recorded in DATA. */
347
348 static inline unsigned
349 n_iv_cands (struct ivopts_data *data)
350 {
351 return VEC_length (iv_cand_p, data->iv_candidates);
352 }
353
354 /* Ith candidate recorded in DATA. */
355
356 static inline struct iv_cand *
357 iv_cand (struct ivopts_data *data, unsigned i)
358 {
359 return VEC_index (iv_cand_p, data->iv_candidates, i);
360 }
361
362 /* The single loop exit if it dominates the latch, NULL otherwise. */
363
364 edge
365 single_dom_exit (struct loop *loop)
366 {
367 edge exit = single_exit (loop);
368
369 if (!exit)
370 return NULL;
371
372 if (!just_once_each_iteration_p (loop, exit->src))
373 return NULL;
374
375 return exit;
376 }
377
378 /* Dumps information about the induction variable IV to FILE. */
379
380 extern void dump_iv (FILE *, struct iv *);
381 void
382 dump_iv (FILE *file, struct iv *iv)
383 {
384 if (iv->ssa_name)
385 {
386 fprintf (file, "ssa name ");
387 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
388 fprintf (file, "\n");
389 }
390
391 fprintf (file, " type ");
392 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
393 fprintf (file, "\n");
394
395 if (iv->step)
396 {
397 fprintf (file, " base ");
398 print_generic_expr (file, iv->base, TDF_SLIM);
399 fprintf (file, "\n");
400
401 fprintf (file, " step ");
402 print_generic_expr (file, iv->step, TDF_SLIM);
403 fprintf (file, "\n");
404 }
405 else
406 {
407 fprintf (file, " invariant ");
408 print_generic_expr (file, iv->base, TDF_SLIM);
409 fprintf (file, "\n");
410 }
411
412 if (iv->base_object)
413 {
414 fprintf (file, " base object ");
415 print_generic_expr (file, iv->base_object, TDF_SLIM);
416 fprintf (file, "\n");
417 }
418
419 if (iv->biv_p)
420 fprintf (file, " is a biv\n");
421 }
422
423 /* Dumps information about the USE to FILE. */
424
425 extern void dump_use (FILE *, struct iv_use *);
426 void
427 dump_use (FILE *file, struct iv_use *use)
428 {
429 fprintf (file, "use %d\n", use->id);
430
431 switch (use->type)
432 {
433 case USE_NONLINEAR_EXPR:
434 fprintf (file, " generic\n");
435 break;
436
437 case USE_ADDRESS:
438 fprintf (file, " address\n");
439 break;
440
441 case USE_COMPARE:
442 fprintf (file, " compare\n");
443 break;
444
445 default:
446 gcc_unreachable ();
447 }
448
449 fprintf (file, " in statement ");
450 print_generic_expr (file, use->stmt, TDF_SLIM);
451 fprintf (file, "\n");
452
453 fprintf (file, " at position ");
454 if (use->op_p)
455 print_generic_expr (file, *use->op_p, TDF_SLIM);
456 fprintf (file, "\n");
457
458 dump_iv (file, use->iv);
459
460 if (use->related_cands)
461 {
462 fprintf (file, " related candidates ");
463 dump_bitmap (file, use->related_cands);
464 }
465 }
466
467 /* Dumps information about the uses to FILE. */
468
469 extern void dump_uses (FILE *, struct ivopts_data *);
470 void
471 dump_uses (FILE *file, struct ivopts_data *data)
472 {
473 unsigned i;
474 struct iv_use *use;
475
476 for (i = 0; i < n_iv_uses (data); i++)
477 {
478 use = iv_use (data, i);
479
480 dump_use (file, use);
481 fprintf (file, "\n");
482 }
483 }
484
485 /* Dumps information about induction variable candidate CAND to FILE. */
486
487 extern void dump_cand (FILE *, struct iv_cand *);
488 void
489 dump_cand (FILE *file, struct iv_cand *cand)
490 {
491 struct iv *iv = cand->iv;
492
493 fprintf (file, "candidate %d%s\n",
494 cand->id, cand->important ? " (important)" : "");
495
496 if (cand->depends_on)
497 {
498 fprintf (file, " depends on ");
499 dump_bitmap (file, cand->depends_on);
500 }
501
502 if (!iv)
503 {
504 fprintf (file, " final value replacement\n");
505 return;
506 }
507
508 switch (cand->pos)
509 {
510 case IP_NORMAL:
511 fprintf (file, " incremented before exit test\n");
512 break;
513
514 case IP_END:
515 fprintf (file, " incremented at end\n");
516 break;
517
518 case IP_ORIGINAL:
519 fprintf (file, " original biv\n");
520 break;
521 }
522
523 dump_iv (file, iv);
524 }
525
526 /* Returns the info for ssa version VER. */
527
528 static inline struct version_info *
529 ver_info (struct ivopts_data *data, unsigned ver)
530 {
531 return data->version_info + ver;
532 }
533
534 /* Returns the info for ssa name NAME. */
535
536 static inline struct version_info *
537 name_info (struct ivopts_data *data, tree name)
538 {
539 return ver_info (data, SSA_NAME_VERSION (name));
540 }
541
542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
543 emitted in LOOP. */
544
545 static bool
546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
547 {
548 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
549
550 gcc_assert (bb);
551
552 if (sbb == loop->latch)
553 return true;
554
555 if (sbb != bb)
556 return false;
557
558 return stmt == last_stmt (bb);
559 }
560
561 /* Returns true if STMT if after the place where the original induction
562 variable CAND is incremented. */
563
564 static bool
565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
566 {
567 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
568 basic_block stmt_bb = bb_for_stmt (stmt);
569 block_stmt_iterator bsi;
570
571 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
572 return false;
573
574 if (stmt_bb != cand_bb)
575 return true;
576
577 /* Scan the block from the end, since the original ivs are usually
578 incremented at the end of the loop body. */
579 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
580 {
581 if (bsi_stmt (bsi) == cand->incremented_at)
582 return false;
583 if (bsi_stmt (bsi) == stmt)
584 return true;
585 }
586 }
587
588 /* Returns true if STMT if after the place where the induction variable
589 CAND is incremented in LOOP. */
590
591 static bool
592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
593 {
594 switch (cand->pos)
595 {
596 case IP_END:
597 return false;
598
599 case IP_NORMAL:
600 return stmt_after_ip_normal_pos (loop, stmt);
601
602 case IP_ORIGINAL:
603 return stmt_after_ip_original_pos (cand, stmt);
604
605 default:
606 gcc_unreachable ();
607 }
608 }
609
610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
611
612 static bool
613 abnormal_ssa_name_p (tree exp)
614 {
615 if (!exp)
616 return false;
617
618 if (TREE_CODE (exp) != SSA_NAME)
619 return false;
620
621 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
622 }
623
624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
625 abnormal phi node. Callback for for_each_index. */
626
627 static bool
628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
629 void *data ATTRIBUTE_UNUSED)
630 {
631 if (TREE_CODE (base) == ARRAY_REF)
632 {
633 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
634 return false;
635 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
636 return false;
637 }
638
639 return !abnormal_ssa_name_p (*index);
640 }
641
642 /* Returns true if EXPR contains a ssa name that occurs in an
643 abnormal phi node. */
644
645 bool
646 contains_abnormal_ssa_name_p (tree expr)
647 {
648 enum tree_code code;
649 enum tree_code_class codeclass;
650
651 if (!expr)
652 return false;
653
654 code = TREE_CODE (expr);
655 codeclass = TREE_CODE_CLASS (code);
656
657 if (code == SSA_NAME)
658 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
659
660 if (code == INTEGER_CST
661 || is_gimple_min_invariant (expr))
662 return false;
663
664 if (code == ADDR_EXPR)
665 return !for_each_index (&TREE_OPERAND (expr, 0),
666 idx_contains_abnormal_ssa_name_p,
667 NULL);
668
669 switch (codeclass)
670 {
671 case tcc_binary:
672 case tcc_comparison:
673 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
674 return true;
675
676 /* Fallthru. */
677 case tcc_unary:
678 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
679 return true;
680
681 break;
682
683 default:
684 gcc_unreachable ();
685 }
686
687 return false;
688 }
689
690 /* Returns tree describing number of iterations determined from
691 EXIT of DATA->current_loop, or NULL if something goes wrong. */
692
693 static tree
694 niter_for_exit (struct ivopts_data *data, edge exit)
695 {
696 struct tree_niter_desc desc;
697 tree niter;
698 void **slot;
699
700 if (!data->niters)
701 {
702 data->niters = pointer_map_create ();
703 slot = NULL;
704 }
705 else
706 slot = pointer_map_contains (data->niters, exit);
707
708 if (!slot)
709 {
710 /* Try to determine number of iterations. We must know it
711 unconditionally (i.e., without possibility of # of iterations
712 being zero). Also, we cannot safely work with ssa names that
713 appear in phi nodes on abnormal edges, so that we do not create
714 overlapping life ranges for them (PR 27283). */
715 if (number_of_iterations_exit (data->current_loop,
716 exit, &desc, true)
717 && integer_zerop (desc.may_be_zero)
718 && !contains_abnormal_ssa_name_p (desc.niter))
719 niter = desc.niter;
720 else
721 niter = NULL_TREE;
722
723 *pointer_map_insert (data->niters, exit) = niter;
724 }
725 else
726 niter = (tree) *slot;
727
728 return niter;
729 }
730
731 /* Returns tree describing number of iterations determined from
732 single dominating exit of DATA->current_loop, or NULL if something
733 goes wrong. */
734
735 static tree
736 niter_for_single_dom_exit (struct ivopts_data *data)
737 {
738 edge exit = single_dom_exit (data->current_loop);
739
740 if (!exit)
741 return NULL;
742
743 return niter_for_exit (data, exit);
744 }
745
746 /* Initializes data structures used by the iv optimization pass, stored
747 in DATA. */
748
749 static void
750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
751 {
752 data->version_info_size = 2 * num_ssa_names;
753 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = NULL;
758 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
759 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
760 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
761 }
762
763 /* Returns a memory object to that EXPR points. In case we are able to
764 determine that it does not point to any such object, NULL is returned. */
765
766 static tree
767 determine_base_object (tree expr)
768 {
769 enum tree_code code = TREE_CODE (expr);
770 tree base, obj;
771
772 /* If this is a pointer casted to any type, we need to determine
773 the base object for the pointer; so handle conversions before
774 throwing away non-pointer expressions. */
775 if (TREE_CODE (expr) == NOP_EXPR
776 || TREE_CODE (expr) == CONVERT_EXPR)
777 return determine_base_object (TREE_OPERAND (expr, 0));
778
779 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
780 return NULL_TREE;
781
782 switch (code)
783 {
784 case INTEGER_CST:
785 return NULL_TREE;
786
787 case ADDR_EXPR:
788 obj = TREE_OPERAND (expr, 0);
789 base = get_base_address (obj);
790
791 if (!base)
792 return expr;
793
794 if (TREE_CODE (base) == INDIRECT_REF)
795 return determine_base_object (TREE_OPERAND (base, 0));
796
797 return fold_convert (ptr_type_node,
798 build_fold_addr_expr (base));
799
800 case POINTER_PLUS_EXPR:
801 return determine_base_object (TREE_OPERAND (expr, 0));
802
803 case PLUS_EXPR:
804 case MINUS_EXPR:
805 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
806 gcc_unreachable ();
807
808 default:
809 return fold_convert (ptr_type_node, expr);
810 }
811 }
812
813 /* Allocates an induction variable with given initial value BASE and step STEP
814 for loop LOOP. */
815
816 static struct iv *
817 alloc_iv (tree base, tree step)
818 {
819 struct iv *iv = XCNEW (struct iv);
820 gcc_assert (step != NULL_TREE);
821
822 iv->base = base;
823 iv->base_object = determine_base_object (base);
824 iv->step = step;
825 iv->biv_p = false;
826 iv->have_use_for = false;
827 iv->use_id = 0;
828 iv->ssa_name = NULL_TREE;
829
830 return iv;
831 }
832
833 /* Sets STEP and BASE for induction variable IV. */
834
835 static void
836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
837 {
838 struct version_info *info = name_info (data, iv);
839
840 gcc_assert (!info->iv);
841
842 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
843 info->iv = alloc_iv (base, step);
844 info->iv->ssa_name = iv;
845 }
846
847 /* Finds induction variable declaration for VAR. */
848
849 static struct iv *
850 get_iv (struct ivopts_data *data, tree var)
851 {
852 basic_block bb;
853 tree type = TREE_TYPE (var);
854
855 if (!POINTER_TYPE_P (type)
856 && !INTEGRAL_TYPE_P (type))
857 return NULL;
858
859 if (!name_info (data, var)->iv)
860 {
861 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
862
863 if (!bb
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, build_int_cst (type, 0));
866 }
867
868 return name_info (data, var)->iv;
869 }
870
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
873
874 static tree
875 determine_biv_step (tree phi)
876 {
877 struct loop *loop = bb_for_stmt (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
879 affine_iv iv;
880
881 if (!is_gimple_reg (name))
882 return NULL_TREE;
883
884 if (!simple_iv (loop, phi, name, &iv, true))
885 return NULL_TREE;
886
887 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
888 }
889
890 /* Finds basic ivs. */
891
892 static bool
893 find_bivs (struct ivopts_data *data)
894 {
895 tree phi, step, type, base;
896 bool found = false;
897 struct loop *loop = data->current_loop;
898
899 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
900 {
901 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
902 continue;
903
904 step = determine_biv_step (phi);
905 if (!step)
906 continue;
907
908 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
909 base = expand_simple_operations (base);
910 if (contains_abnormal_ssa_name_p (base)
911 || contains_abnormal_ssa_name_p (step))
912 continue;
913
914 type = TREE_TYPE (PHI_RESULT (phi));
915 base = fold_convert (type, base);
916 if (step)
917 {
918 if (POINTER_TYPE_P (type))
919 step = fold_convert (sizetype, step);
920 else
921 step = fold_convert (type, step);
922 }
923
924 set_iv (data, PHI_RESULT (phi), base, step);
925 found = true;
926 }
927
928 return found;
929 }
930
931 /* Marks basic ivs. */
932
933 static void
934 mark_bivs (struct ivopts_data *data)
935 {
936 tree phi, var;
937 struct iv *iv, *incr_iv;
938 struct loop *loop = data->current_loop;
939 basic_block incr_bb;
940
941 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
942 {
943 iv = get_iv (data, PHI_RESULT (phi));
944 if (!iv)
945 continue;
946
947 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
948 incr_iv = get_iv (data, var);
949 if (!incr_iv)
950 continue;
951
952 /* If the increment is in the subloop, ignore it. */
953 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
954 if (incr_bb->loop_father != data->current_loop
955 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
956 continue;
957
958 iv->biv_p = true;
959 incr_iv->biv_p = true;
960 }
961 }
962
963 /* Checks whether STMT defines a linear induction variable and stores its
964 parameters to IV. */
965
966 static bool
967 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
968 {
969 tree lhs;
970 struct loop *loop = data->current_loop;
971
972 iv->base = NULL_TREE;
973 iv->step = NULL_TREE;
974
975 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
976 return false;
977
978 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
979 if (TREE_CODE (lhs) != SSA_NAME)
980 return false;
981
982 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
983 return false;
984 iv->base = expand_simple_operations (iv->base);
985
986 if (contains_abnormal_ssa_name_p (iv->base)
987 || contains_abnormal_ssa_name_p (iv->step))
988 return false;
989
990 return true;
991 }
992
993 /* Finds general ivs in statement STMT. */
994
995 static void
996 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
997 {
998 affine_iv iv;
999
1000 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1001 return;
1002
1003 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1004 }
1005
1006 /* Finds general ivs in basic block BB. */
1007
1008 static void
1009 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1010 {
1011 block_stmt_iterator bsi;
1012
1013 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1014 find_givs_in_stmt (data, bsi_stmt (bsi));
1015 }
1016
1017 /* Finds general ivs. */
1018
1019 static void
1020 find_givs (struct ivopts_data *data)
1021 {
1022 struct loop *loop = data->current_loop;
1023 basic_block *body = get_loop_body_in_dom_order (loop);
1024 unsigned i;
1025
1026 for (i = 0; i < loop->num_nodes; i++)
1027 find_givs_in_bb (data, body[i]);
1028 free (body);
1029 }
1030
1031 /* For each ssa name defined in LOOP determines whether it is an induction
1032 variable and if so, its initial value and step. */
1033
1034 static bool
1035 find_induction_variables (struct ivopts_data *data)
1036 {
1037 unsigned i;
1038 bitmap_iterator bi;
1039
1040 if (!find_bivs (data))
1041 return false;
1042
1043 find_givs (data);
1044 mark_bivs (data);
1045
1046 if (dump_file && (dump_flags & TDF_DETAILS))
1047 {
1048 tree niter = niter_for_single_dom_exit (data);
1049
1050 if (niter)
1051 {
1052 fprintf (dump_file, " number of iterations ");
1053 print_generic_expr (dump_file, niter, TDF_SLIM);
1054 fprintf (dump_file, "\n\n");
1055 };
1056
1057 fprintf (dump_file, "Induction variables:\n\n");
1058
1059 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1060 {
1061 if (ver_info (data, i)->iv)
1062 dump_iv (dump_file, ver_info (data, i)->iv);
1063 }
1064 }
1065
1066 return true;
1067 }
1068
1069 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1070
1071 static struct iv_use *
1072 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1073 tree stmt, enum use_type use_type)
1074 {
1075 struct iv_use *use = XCNEW (struct iv_use);
1076
1077 use->id = n_iv_uses (data);
1078 use->type = use_type;
1079 use->iv = iv;
1080 use->stmt = stmt;
1081 use->op_p = use_p;
1082 use->related_cands = BITMAP_ALLOC (NULL);
1083
1084 /* To avoid showing ssa name in the dumps, if it was not reset by the
1085 caller. */
1086 iv->ssa_name = NULL_TREE;
1087
1088 if (dump_file && (dump_flags & TDF_DETAILS))
1089 dump_use (dump_file, use);
1090
1091 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1092
1093 return use;
1094 }
1095
1096 /* Checks whether OP is a loop-level invariant and if so, records it.
1097 NONLINEAR_USE is true if the invariant is used in a way we do not
1098 handle specially. */
1099
1100 static void
1101 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1102 {
1103 basic_block bb;
1104 struct version_info *info;
1105
1106 if (TREE_CODE (op) != SSA_NAME
1107 || !is_gimple_reg (op))
1108 return;
1109
1110 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1111 if (bb
1112 && flow_bb_inside_loop_p (data->current_loop, bb))
1113 return;
1114
1115 info = name_info (data, op);
1116 info->name = op;
1117 info->has_nonlin_use |= nonlinear_use;
1118 if (!info->inv_id)
1119 info->inv_id = ++data->max_inv_id;
1120 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1121 }
1122
1123 /* Checks whether the use OP is interesting and if so, records it. */
1124
1125 static struct iv_use *
1126 find_interesting_uses_op (struct ivopts_data *data, tree op)
1127 {
1128 struct iv *iv;
1129 struct iv *civ;
1130 tree stmt;
1131 struct iv_use *use;
1132
1133 if (TREE_CODE (op) != SSA_NAME)
1134 return NULL;
1135
1136 iv = get_iv (data, op);
1137 if (!iv)
1138 return NULL;
1139
1140 if (iv->have_use_for)
1141 {
1142 use = iv_use (data, iv->use_id);
1143
1144 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1145 return use;
1146 }
1147
1148 if (integer_zerop (iv->step))
1149 {
1150 record_invariant (data, op, true);
1151 return NULL;
1152 }
1153 iv->have_use_for = true;
1154
1155 civ = XNEW (struct iv);
1156 *civ = *iv;
1157
1158 stmt = SSA_NAME_DEF_STMT (op);
1159 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1160 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1161
1162 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1163 iv->use_id = use->id;
1164
1165 return use;
1166 }
1167
1168 /* Given a condition *COND_P, checks whether it is a compare of an induction
1169 variable and an invariant. If this is the case, CONTROL_VAR is set
1170 to location of the iv, BOUND to the location of the invariant,
1171 IV_VAR and IV_BOUND are set to the corresponding induction variable
1172 descriptions, and true is returned. If this is not the case,
1173 CONTROL_VAR and BOUND are set to the arguments of the condition and
1174 false is returned. */
1175
1176 static bool
1177 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1178 tree **control_var, tree **bound,
1179 struct iv **iv_var, struct iv **iv_bound)
1180 {
1181 /* The nodes returned when COND has just one operand. Note that you should
1182 not modify anything in BOUND or IV_BOUND because of this. */
1183 static struct iv const_iv;
1184 static tree zero;
1185 tree cond = *cond_p;
1186 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1187 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1188 bool ret = false;
1189
1190 zero = integer_zero_node;
1191 const_iv.step = integer_zero_node;
1192
1193 if (TREE_CODE (cond) == SSA_NAME)
1194 {
1195 op0 = cond_p;
1196 iv0 = get_iv (data, cond);
1197 ret = (iv0 && !integer_zerop (iv0->step));
1198 goto end;
1199 }
1200
1201 if (!COMPARISON_CLASS_P (cond))
1202 {
1203 op0 = cond_p;
1204 goto end;
1205 }
1206
1207 op0 = &TREE_OPERAND (cond, 0);
1208 op1 = &TREE_OPERAND (cond, 1);
1209 if (TREE_CODE (*op0) == SSA_NAME)
1210 iv0 = get_iv (data, *op0);
1211 if (TREE_CODE (*op1) == SSA_NAME)
1212 iv1 = get_iv (data, *op1);
1213
1214 /* Exactly one of the compared values must be an iv, and the other one must
1215 be an invariant. */
1216 if (!iv0 || !iv1)
1217 goto end;
1218
1219 if (integer_zerop (iv0->step))
1220 {
1221 /* Control variable may be on the other side. */
1222 tmp_op = op0; op0 = op1; op1 = tmp_op;
1223 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1224 }
1225 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1226
1227 end:
1228 if (control_var)
1229 *control_var = op0;;
1230 if (iv_var)
1231 *iv_var = iv0;;
1232 if (bound)
1233 *bound = op1;
1234 if (iv_bound)
1235 *iv_bound = iv1;
1236
1237 return ret;
1238 }
1239
1240 /* Checks whether the condition *COND_P in STMT is interesting
1241 and if so, records it. */
1242
1243 static void
1244 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1245 {
1246 tree *var_p, *bound_p;
1247 struct iv *var_iv, *civ;
1248
1249 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1250 {
1251 find_interesting_uses_op (data, *var_p);
1252 find_interesting_uses_op (data, *bound_p);
1253 return;
1254 }
1255
1256 civ = XNEW (struct iv);
1257 *civ = *var_iv;
1258 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1259 }
1260
1261 /* Returns true if expression EXPR is obviously invariant in LOOP,
1262 i.e. if all its operands are defined outside of the LOOP. LOOP
1263 should not be the function body. */
1264
1265 bool
1266 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1267 {
1268 basic_block def_bb;
1269 unsigned i, len;
1270
1271 gcc_assert (loop_depth (loop) > 0);
1272
1273 if (is_gimple_min_invariant (expr))
1274 return true;
1275
1276 if (TREE_CODE (expr) == SSA_NAME)
1277 {
1278 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1279 if (def_bb
1280 && flow_bb_inside_loop_p (loop, def_bb))
1281 return false;
1282
1283 return true;
1284 }
1285
1286 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1287 return false;
1288
1289 len = TREE_OPERAND_LENGTH (expr);
1290 for (i = 0; i < len; i++)
1291 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1292 return false;
1293
1294 return true;
1295 }
1296
1297 /* Cumulates the steps of indices into DATA and replaces their values with the
1298 initial ones. Returns false when the value of the index cannot be determined.
1299 Callback for for_each_index. */
1300
1301 struct ifs_ivopts_data
1302 {
1303 struct ivopts_data *ivopts_data;
1304 tree stmt;
1305 tree step;
1306 };
1307
1308 static bool
1309 idx_find_step (tree base, tree *idx, void *data)
1310 {
1311 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1312 struct iv *iv;
1313 tree step, iv_base, iv_step, lbound, off;
1314 struct loop *loop = dta->ivopts_data->current_loop;
1315
1316 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1317 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1318 return false;
1319
1320 /* If base is a component ref, require that the offset of the reference
1321 be invariant. */
1322 if (TREE_CODE (base) == COMPONENT_REF)
1323 {
1324 off = component_ref_field_offset (base);
1325 return expr_invariant_in_loop_p (loop, off);
1326 }
1327
1328 /* If base is array, first check whether we will be able to move the
1329 reference out of the loop (in order to take its address in strength
1330 reduction). In order for this to work we need both lower bound
1331 and step to be loop invariants. */
1332 if (TREE_CODE (base) == ARRAY_REF)
1333 {
1334 step = array_ref_element_size (base);
1335 lbound = array_ref_low_bound (base);
1336
1337 if (!expr_invariant_in_loop_p (loop, step)
1338 || !expr_invariant_in_loop_p (loop, lbound))
1339 return false;
1340 }
1341
1342 if (TREE_CODE (*idx) != SSA_NAME)
1343 return true;
1344
1345 iv = get_iv (dta->ivopts_data, *idx);
1346 if (!iv)
1347 return false;
1348
1349 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1350 *&x[0], which is not folded and does not trigger the
1351 ARRAY_REF path below. */
1352 *idx = iv->base;
1353
1354 if (integer_zerop (iv->step))
1355 return true;
1356
1357 if (TREE_CODE (base) == ARRAY_REF)
1358 {
1359 step = array_ref_element_size (base);
1360
1361 /* We only handle addresses whose step is an integer constant. */
1362 if (TREE_CODE (step) != INTEGER_CST)
1363 return false;
1364 }
1365 else
1366 /* The step for pointer arithmetics already is 1 byte. */
1367 step = build_int_cst (sizetype, 1);
1368
1369 iv_base = iv->base;
1370 iv_step = iv->step;
1371 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1372 sizetype, &iv_base, &iv_step, dta->stmt,
1373 false))
1374 {
1375 /* The index might wrap. */
1376 return false;
1377 }
1378
1379 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1380 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1381
1382 return true;
1383 }
1384
1385 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1386 object is passed to it in DATA. */
1387
1388 static bool
1389 idx_record_use (tree base, tree *idx,
1390 void *vdata)
1391 {
1392 struct ivopts_data *data = (struct ivopts_data *) vdata;
1393 find_interesting_uses_op (data, *idx);
1394 if (TREE_CODE (base) == ARRAY_REF)
1395 {
1396 find_interesting_uses_op (data, array_ref_element_size (base));
1397 find_interesting_uses_op (data, array_ref_low_bound (base));
1398 }
1399 return true;
1400 }
1401
1402 /* If we can prove that TOP = cst * BOT for some constant cst,
1403 store cst to MUL and return true. Otherwise return false.
1404 The returned value is always sign-extended, regardless of the
1405 signedness of TOP and BOT. */
1406
1407 static bool
1408 constant_multiple_of (tree top, tree bot, double_int *mul)
1409 {
1410 tree mby;
1411 enum tree_code code;
1412 double_int res, p0, p1;
1413 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1414
1415 STRIP_NOPS (top);
1416 STRIP_NOPS (bot);
1417
1418 if (operand_equal_p (top, bot, 0))
1419 {
1420 *mul = double_int_one;
1421 return true;
1422 }
1423
1424 code = TREE_CODE (top);
1425 switch (code)
1426 {
1427 case MULT_EXPR:
1428 mby = TREE_OPERAND (top, 1);
1429 if (TREE_CODE (mby) != INTEGER_CST)
1430 return false;
1431
1432 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1433 return false;
1434
1435 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1436 precision);
1437 return true;
1438
1439 case PLUS_EXPR:
1440 case MINUS_EXPR:
1441 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1442 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1443 return false;
1444
1445 if (code == MINUS_EXPR)
1446 p1 = double_int_neg (p1);
1447 *mul = double_int_sext (double_int_add (p0, p1), precision);
1448 return true;
1449
1450 case INTEGER_CST:
1451 if (TREE_CODE (bot) != INTEGER_CST)
1452 return false;
1453
1454 p0 = double_int_sext (tree_to_double_int (top), precision);
1455 p1 = double_int_sext (tree_to_double_int (bot), precision);
1456 if (double_int_zero_p (p1))
1457 return false;
1458 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1459 precision);
1460 return double_int_zero_p (res);
1461
1462 default:
1463 return false;
1464 }
1465 }
1466
1467 /* Returns true if memory reference REF with step STEP may be unaligned. */
1468
1469 static bool
1470 may_be_unaligned_p (tree ref, tree step)
1471 {
1472 tree base;
1473 tree base_type;
1474 HOST_WIDE_INT bitsize;
1475 HOST_WIDE_INT bitpos;
1476 tree toffset;
1477 enum machine_mode mode;
1478 int unsignedp, volatilep;
1479 unsigned base_align;
1480
1481 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1482 thus they are not misaligned. */
1483 if (TREE_CODE (ref) == TARGET_MEM_REF)
1484 return false;
1485
1486 /* The test below is basically copy of what expr.c:normal_inner_ref
1487 does to check whether the object must be loaded by parts when
1488 STRICT_ALIGNMENT is true. */
1489 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1490 &unsignedp, &volatilep, true);
1491 base_type = TREE_TYPE (base);
1492 base_align = TYPE_ALIGN (base_type);
1493
1494 if (mode != BLKmode)
1495 {
1496 double_int mul;
1497 tree al = build_int_cst (TREE_TYPE (step),
1498 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1499
1500 if (base_align < GET_MODE_ALIGNMENT (mode)
1501 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1502 || bitpos % BITS_PER_UNIT != 0)
1503 return true;
1504
1505 if (!constant_multiple_of (step, al, &mul))
1506 return true;
1507 }
1508
1509 return false;
1510 }
1511
1512 /* Return true if EXPR may be non-addressable. */
1513
1514 static bool
1515 may_be_nonaddressable_p (tree expr)
1516 {
1517 switch (TREE_CODE (expr))
1518 {
1519 case TARGET_MEM_REF:
1520 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1521 target, thus they are always addressable. */
1522 return false;
1523
1524 case COMPONENT_REF:
1525 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1526 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1527
1528 case VIEW_CONVERT_EXPR:
1529 /* This kind of view-conversions may wrap non-addressable objects
1530 and make them look addressable. After some processing the
1531 non-addressability may be uncovered again, causing ADDR_EXPRs
1532 of inappropriate objects to be built. */
1533 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1534 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1535 return true;
1536
1537 /* ... fall through ... */
1538
1539 case ARRAY_REF:
1540 case ARRAY_RANGE_REF:
1541 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1542
1543 case CONVERT_EXPR:
1544 case NOP_EXPR:
1545 return true;
1546
1547 default:
1548 break;
1549 }
1550
1551 return false;
1552 }
1553
1554 /* Finds addresses in *OP_P inside STMT. */
1555
1556 static void
1557 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1558 {
1559 tree base = *op_p, step = build_int_cst (sizetype, 0);
1560 struct iv *civ;
1561 struct ifs_ivopts_data ifs_ivopts_data;
1562
1563 /* Do not play with volatile memory references. A bit too conservative,
1564 perhaps, but safe. */
1565 if (stmt_ann (stmt)->has_volatile_ops)
1566 goto fail;
1567
1568 /* Ignore bitfields for now. Not really something terribly complicated
1569 to handle. TODO. */
1570 if (TREE_CODE (base) == BIT_FIELD_REF)
1571 goto fail;
1572
1573 base = unshare_expr (base);
1574
1575 if (TREE_CODE (base) == TARGET_MEM_REF)
1576 {
1577 tree type = build_pointer_type (TREE_TYPE (base));
1578 tree astep;
1579
1580 if (TMR_BASE (base)
1581 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1582 {
1583 civ = get_iv (data, TMR_BASE (base));
1584 if (!civ)
1585 goto fail;
1586
1587 TMR_BASE (base) = civ->base;
1588 step = civ->step;
1589 }
1590 if (TMR_INDEX (base)
1591 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1592 {
1593 civ = get_iv (data, TMR_INDEX (base));
1594 if (!civ)
1595 goto fail;
1596
1597 TMR_INDEX (base) = civ->base;
1598 astep = civ->step;
1599
1600 if (astep)
1601 {
1602 if (TMR_STEP (base))
1603 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1604
1605 step = fold_build2 (PLUS_EXPR, type, step, astep);
1606 }
1607 }
1608
1609 if (integer_zerop (step))
1610 goto fail;
1611 base = tree_mem_ref_addr (type, base);
1612 }
1613 else
1614 {
1615 ifs_ivopts_data.ivopts_data = data;
1616 ifs_ivopts_data.stmt = stmt;
1617 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1618 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1619 || integer_zerop (ifs_ivopts_data.step))
1620 goto fail;
1621 step = ifs_ivopts_data.step;
1622
1623 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1624 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1625
1626 /* Check that the base expression is addressable. This needs
1627 to be done after substituting bases of IVs into it. */
1628 if (may_be_nonaddressable_p (base))
1629 goto fail;
1630
1631 /* Moreover, on strict alignment platforms, check that it is
1632 sufficiently aligned. */
1633 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1634 goto fail;
1635
1636 base = build_fold_addr_expr (base);
1637
1638 /* Substituting bases of IVs into the base expression might
1639 have caused folding opportunities. */
1640 if (TREE_CODE (base) == ADDR_EXPR)
1641 {
1642 tree *ref = &TREE_OPERAND (base, 0);
1643 while (handled_component_p (*ref))
1644 ref = &TREE_OPERAND (*ref, 0);
1645 if (TREE_CODE (*ref) == INDIRECT_REF)
1646 *ref = fold_indirect_ref (*ref);
1647 }
1648 }
1649
1650 civ = alloc_iv (base, step);
1651 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1652 return;
1653
1654 fail:
1655 for_each_index (op_p, idx_record_use, data);
1656 }
1657
1658 /* Finds and records invariants used in STMT. */
1659
1660 static void
1661 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1662 {
1663 ssa_op_iter iter;
1664 use_operand_p use_p;
1665 tree op;
1666
1667 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1668 {
1669 op = USE_FROM_PTR (use_p);
1670 record_invariant (data, op, false);
1671 }
1672 }
1673
1674 /* Finds interesting uses of induction variables in the statement STMT. */
1675
1676 static void
1677 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1678 {
1679 struct iv *iv;
1680 tree op, lhs, rhs;
1681 ssa_op_iter iter;
1682 use_operand_p use_p;
1683
1684 find_invariants_stmt (data, stmt);
1685
1686 if (TREE_CODE (stmt) == COND_EXPR)
1687 {
1688 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1689 return;
1690 }
1691
1692 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1693 {
1694 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1695 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1696
1697 if (TREE_CODE (lhs) == SSA_NAME)
1698 {
1699 /* If the statement defines an induction variable, the uses are not
1700 interesting by themselves. */
1701
1702 iv = get_iv (data, lhs);
1703
1704 if (iv && !integer_zerop (iv->step))
1705 return;
1706 }
1707
1708 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1709 {
1710 case tcc_comparison:
1711 find_interesting_uses_cond (data, stmt,
1712 &GIMPLE_STMT_OPERAND (stmt, 1));
1713 return;
1714
1715 case tcc_reference:
1716 find_interesting_uses_address (data, stmt,
1717 &GIMPLE_STMT_OPERAND (stmt, 1));
1718 if (REFERENCE_CLASS_P (lhs))
1719 find_interesting_uses_address (data, stmt,
1720 &GIMPLE_STMT_OPERAND (stmt, 0));
1721 return;
1722
1723 default: ;
1724 }
1725
1726 if (REFERENCE_CLASS_P (lhs)
1727 && is_gimple_val (rhs))
1728 {
1729 find_interesting_uses_address (data, stmt,
1730 &GIMPLE_STMT_OPERAND (stmt, 0));
1731 find_interesting_uses_op (data, rhs);
1732 return;
1733 }
1734
1735 /* TODO -- we should also handle address uses of type
1736
1737 memory = call (whatever);
1738
1739 and
1740
1741 call (memory). */
1742 }
1743
1744 if (TREE_CODE (stmt) == PHI_NODE
1745 && bb_for_stmt (stmt) == data->current_loop->header)
1746 {
1747 lhs = PHI_RESULT (stmt);
1748 iv = get_iv (data, lhs);
1749
1750 if (iv && !integer_zerop (iv->step))
1751 return;
1752 }
1753
1754 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1755 {
1756 op = USE_FROM_PTR (use_p);
1757
1758 if (TREE_CODE (op) != SSA_NAME)
1759 continue;
1760
1761 iv = get_iv (data, op);
1762 if (!iv)
1763 continue;
1764
1765 find_interesting_uses_op (data, op);
1766 }
1767 }
1768
1769 /* Finds interesting uses of induction variables outside of loops
1770 on loop exit edge EXIT. */
1771
1772 static void
1773 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1774 {
1775 tree phi, def;
1776
1777 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1778 {
1779 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1780 if (is_gimple_reg (def))
1781 find_interesting_uses_op (data, def);
1782 }
1783 }
1784
1785 /* Finds uses of the induction variables that are interesting. */
1786
1787 static void
1788 find_interesting_uses (struct ivopts_data *data)
1789 {
1790 basic_block bb;
1791 block_stmt_iterator bsi;
1792 tree phi;
1793 basic_block *body = get_loop_body (data->current_loop);
1794 unsigned i;
1795 struct version_info *info;
1796 edge e;
1797
1798 if (dump_file && (dump_flags & TDF_DETAILS))
1799 fprintf (dump_file, "Uses:\n\n");
1800
1801 for (i = 0; i < data->current_loop->num_nodes; i++)
1802 {
1803 edge_iterator ei;
1804 bb = body[i];
1805
1806 FOR_EACH_EDGE (e, ei, bb->succs)
1807 if (e->dest != EXIT_BLOCK_PTR
1808 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1809 find_interesting_uses_outside (data, e);
1810
1811 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1812 find_interesting_uses_stmt (data, phi);
1813 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1814 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1815 }
1816
1817 if (dump_file && (dump_flags & TDF_DETAILS))
1818 {
1819 bitmap_iterator bi;
1820
1821 fprintf (dump_file, "\n");
1822
1823 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1824 {
1825 info = ver_info (data, i);
1826 if (info->inv_id)
1827 {
1828 fprintf (dump_file, " ");
1829 print_generic_expr (dump_file, info->name, TDF_SLIM);
1830 fprintf (dump_file, " is invariant (%d)%s\n",
1831 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1832 }
1833 }
1834
1835 fprintf (dump_file, "\n");
1836 }
1837
1838 free (body);
1839 }
1840
1841 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1842 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1843 we are at the top-level of the processed address. */
1844
1845 static tree
1846 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1847 unsigned HOST_WIDE_INT *offset)
1848 {
1849 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1850 enum tree_code code;
1851 tree type, orig_type = TREE_TYPE (expr);
1852 unsigned HOST_WIDE_INT off0, off1, st;
1853 tree orig_expr = expr;
1854
1855 STRIP_NOPS (expr);
1856
1857 type = TREE_TYPE (expr);
1858 code = TREE_CODE (expr);
1859 *offset = 0;
1860
1861 switch (code)
1862 {
1863 case INTEGER_CST:
1864 if (!cst_and_fits_in_hwi (expr)
1865 || integer_zerop (expr))
1866 return orig_expr;
1867
1868 *offset = int_cst_value (expr);
1869 return build_int_cst (orig_type, 0);
1870
1871 case POINTER_PLUS_EXPR:
1872 case PLUS_EXPR:
1873 case MINUS_EXPR:
1874 op0 = TREE_OPERAND (expr, 0);
1875 op1 = TREE_OPERAND (expr, 1);
1876
1877 op0 = strip_offset_1 (op0, false, false, &off0);
1878 op1 = strip_offset_1 (op1, false, false, &off1);
1879
1880 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1881 if (op0 == TREE_OPERAND (expr, 0)
1882 && op1 == TREE_OPERAND (expr, 1))
1883 return orig_expr;
1884
1885 if (integer_zerop (op1))
1886 expr = op0;
1887 else if (integer_zerop (op0))
1888 {
1889 if (code == MINUS_EXPR)
1890 expr = fold_build1 (NEGATE_EXPR, type, op1);
1891 else
1892 expr = op1;
1893 }
1894 else
1895 expr = fold_build2 (code, type, op0, op1);
1896
1897 return fold_convert (orig_type, expr);
1898
1899 case ARRAY_REF:
1900 if (!inside_addr)
1901 return orig_expr;
1902
1903 step = array_ref_element_size (expr);
1904 if (!cst_and_fits_in_hwi (step))
1905 break;
1906
1907 st = int_cst_value (step);
1908 op1 = TREE_OPERAND (expr, 1);
1909 op1 = strip_offset_1 (op1, false, false, &off1);
1910 *offset = off1 * st;
1911
1912 if (top_compref
1913 && integer_zerop (op1))
1914 {
1915 /* Strip the component reference completely. */
1916 op0 = TREE_OPERAND (expr, 0);
1917 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1918 *offset += off0;
1919 return op0;
1920 }
1921 break;
1922
1923 case COMPONENT_REF:
1924 if (!inside_addr)
1925 return orig_expr;
1926
1927 tmp = component_ref_field_offset (expr);
1928 if (top_compref
1929 && cst_and_fits_in_hwi (tmp))
1930 {
1931 /* Strip the component reference completely. */
1932 op0 = TREE_OPERAND (expr, 0);
1933 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1934 *offset = off0 + int_cst_value (tmp);
1935 return op0;
1936 }
1937 break;
1938
1939 case ADDR_EXPR:
1940 op0 = TREE_OPERAND (expr, 0);
1941 op0 = strip_offset_1 (op0, true, true, &off0);
1942 *offset += off0;
1943
1944 if (op0 == TREE_OPERAND (expr, 0))
1945 return orig_expr;
1946
1947 expr = build_fold_addr_expr (op0);
1948 return fold_convert (orig_type, expr);
1949
1950 case INDIRECT_REF:
1951 inside_addr = false;
1952 break;
1953
1954 default:
1955 return orig_expr;
1956 }
1957
1958 /* Default handling of expressions for that we want to recurse into
1959 the first operand. */
1960 op0 = TREE_OPERAND (expr, 0);
1961 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1962 *offset += off0;
1963
1964 if (op0 == TREE_OPERAND (expr, 0)
1965 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1966 return orig_expr;
1967
1968 expr = copy_node (expr);
1969 TREE_OPERAND (expr, 0) = op0;
1970 if (op1)
1971 TREE_OPERAND (expr, 1) = op1;
1972
1973 /* Inside address, we might strip the top level component references,
1974 thus changing type of the expression. Handling of ADDR_EXPR
1975 will fix that. */
1976 expr = fold_convert (orig_type, expr);
1977
1978 return expr;
1979 }
1980
1981 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1982
1983 static tree
1984 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1985 {
1986 return strip_offset_1 (expr, false, false, offset);
1987 }
1988
1989 /* Returns variant of TYPE that can be used as base for different uses.
1990 We return unsigned type with the same precision, which avoids problems
1991 with overflows. */
1992
1993 static tree
1994 generic_type_for (tree type)
1995 {
1996 if (POINTER_TYPE_P (type))
1997 return unsigned_type_for (type);
1998
1999 if (TYPE_UNSIGNED (type))
2000 return type;
2001
2002 return unsigned_type_for (type);
2003 }
2004
2005 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2006 the bitmap to that we should store it. */
2007
2008 static struct ivopts_data *fd_ivopts_data;
2009 static tree
2010 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2011 {
2012 bitmap *depends_on = (bitmap *) data;
2013 struct version_info *info;
2014
2015 if (TREE_CODE (*expr_p) != SSA_NAME)
2016 return NULL_TREE;
2017 info = name_info (fd_ivopts_data, *expr_p);
2018
2019 if (!info->inv_id || info->has_nonlin_use)
2020 return NULL_TREE;
2021
2022 if (!*depends_on)
2023 *depends_on = BITMAP_ALLOC (NULL);
2024 bitmap_set_bit (*depends_on, info->inv_id);
2025
2026 return NULL_TREE;
2027 }
2028
2029 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2030 position to POS. If USE is not NULL, the candidate is set as related to
2031 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2032 replacement of the final value of the iv by a direct computation. */
2033
2034 static struct iv_cand *
2035 add_candidate_1 (struct ivopts_data *data,
2036 tree base, tree step, bool important, enum iv_position pos,
2037 struct iv_use *use, tree incremented_at)
2038 {
2039 unsigned i;
2040 struct iv_cand *cand = NULL;
2041 tree type, orig_type;
2042
2043 if (base)
2044 {
2045 orig_type = TREE_TYPE (base);
2046 type = generic_type_for (orig_type);
2047 /* Don't convert the base to the generic type for pointers as the generic
2048 type is an integer type with the same size as the pointer type. */
2049 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2050 {
2051 base = fold_convert (type, base);
2052 step = fold_convert (type, step);
2053 }
2054 }
2055
2056 for (i = 0; i < n_iv_cands (data); i++)
2057 {
2058 cand = iv_cand (data, i);
2059
2060 if (cand->pos != pos)
2061 continue;
2062
2063 if (cand->incremented_at != incremented_at)
2064 continue;
2065
2066 if (!cand->iv)
2067 {
2068 if (!base && !step)
2069 break;
2070
2071 continue;
2072 }
2073
2074 if (!base && !step)
2075 continue;
2076
2077 if (operand_equal_p (base, cand->iv->base, 0)
2078 && operand_equal_p (step, cand->iv->step, 0))
2079 break;
2080 }
2081
2082 if (i == n_iv_cands (data))
2083 {
2084 cand = XCNEW (struct iv_cand);
2085 cand->id = i;
2086
2087 if (!base && !step)
2088 cand->iv = NULL;
2089 else
2090 cand->iv = alloc_iv (base, step);
2091
2092 cand->pos = pos;
2093 if (pos != IP_ORIGINAL && cand->iv)
2094 {
2095 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2096 cand->var_after = cand->var_before;
2097 }
2098 cand->important = important;
2099 cand->incremented_at = incremented_at;
2100 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2101
2102 if (step
2103 && TREE_CODE (step) != INTEGER_CST)
2104 {
2105 fd_ivopts_data = data;
2106 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2107 }
2108
2109 if (dump_file && (dump_flags & TDF_DETAILS))
2110 dump_cand (dump_file, cand);
2111 }
2112
2113 if (important && !cand->important)
2114 {
2115 cand->important = true;
2116 if (dump_file && (dump_flags & TDF_DETAILS))
2117 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2118 }
2119
2120 if (use)
2121 {
2122 bitmap_set_bit (use->related_cands, i);
2123 if (dump_file && (dump_flags & TDF_DETAILS))
2124 fprintf (dump_file, "Candidate %d is related to use %d\n",
2125 cand->id, use->id);
2126 }
2127
2128 return cand;
2129 }
2130
2131 /* Returns true if incrementing the induction variable at the end of the LOOP
2132 is allowed.
2133
2134 The purpose is to avoid splitting latch edge with a biv increment, thus
2135 creating a jump, possibly confusing other optimization passes and leaving
2136 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2137 is not available (so we do not have a better alternative), or if the latch
2138 edge is already nonempty. */
2139
2140 static bool
2141 allow_ip_end_pos_p (struct loop *loop)
2142 {
2143 if (!ip_normal_pos (loop))
2144 return true;
2145
2146 if (!empty_block_p (ip_end_pos (loop)))
2147 return true;
2148
2149 return false;
2150 }
2151
2152 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2153 position to POS. If USE is not NULL, the candidate is set as related to
2154 it. The candidate computation is scheduled on all available positions. */
2155
2156 static void
2157 add_candidate (struct ivopts_data *data,
2158 tree base, tree step, bool important, struct iv_use *use)
2159 {
2160 if (ip_normal_pos (data->current_loop))
2161 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2162 if (ip_end_pos (data->current_loop)
2163 && allow_ip_end_pos_p (data->current_loop))
2164 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2165 }
2166
2167 /* Add a standard "0 + 1 * iteration" iv candidate for a
2168 type with SIZE bits. */
2169
2170 static void
2171 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2172 unsigned int size)
2173 {
2174 tree type = lang_hooks.types.type_for_size (size, true);
2175 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2176 true, NULL);
2177 }
2178
2179 /* Adds standard iv candidates. */
2180
2181 static void
2182 add_standard_iv_candidates (struct ivopts_data *data)
2183 {
2184 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2185
2186 /* The same for a double-integer type if it is still fast enough. */
2187 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2188 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2189 }
2190
2191
2192 /* Adds candidates bases on the old induction variable IV. */
2193
2194 static void
2195 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2196 {
2197 tree phi, def;
2198 struct iv_cand *cand;
2199
2200 add_candidate (data, iv->base, iv->step, true, NULL);
2201
2202 /* The same, but with initial value zero. */
2203 add_candidate (data,
2204 build_int_cst (TREE_TYPE (iv->base), 0),
2205 iv->step, true, NULL);
2206
2207 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2208 if (TREE_CODE (phi) == PHI_NODE)
2209 {
2210 /* Additionally record the possibility of leaving the original iv
2211 untouched. */
2212 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2213 cand = add_candidate_1 (data,
2214 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2215 SSA_NAME_DEF_STMT (def));
2216 cand->var_before = iv->ssa_name;
2217 cand->var_after = def;
2218 }
2219 }
2220
2221 /* Adds candidates based on the old induction variables. */
2222
2223 static void
2224 add_old_ivs_candidates (struct ivopts_data *data)
2225 {
2226 unsigned i;
2227 struct iv *iv;
2228 bitmap_iterator bi;
2229
2230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2231 {
2232 iv = ver_info (data, i)->iv;
2233 if (iv && iv->biv_p && !integer_zerop (iv->step))
2234 add_old_iv_candidates (data, iv);
2235 }
2236 }
2237
2238 /* Adds candidates based on the value of the induction variable IV and USE. */
2239
2240 static void
2241 add_iv_value_candidates (struct ivopts_data *data,
2242 struct iv *iv, struct iv_use *use)
2243 {
2244 unsigned HOST_WIDE_INT offset;
2245 tree base;
2246 tree basetype;
2247
2248 add_candidate (data, iv->base, iv->step, false, use);
2249
2250 /* The same, but with initial value zero. Make such variable important,
2251 since it is generic enough so that possibly many uses may be based
2252 on it. */
2253 basetype = TREE_TYPE (iv->base);
2254 if (POINTER_TYPE_P (basetype))
2255 basetype = sizetype;
2256 add_candidate (data, build_int_cst (basetype, 0),
2257 iv->step, true, use);
2258
2259 /* Third, try removing the constant offset. */
2260 base = strip_offset (iv->base, &offset);
2261 if (offset)
2262 add_candidate (data, base, iv->step, false, use);
2263 }
2264
2265 /* Adds candidates based on the uses. */
2266
2267 static void
2268 add_derived_ivs_candidates (struct ivopts_data *data)
2269 {
2270 unsigned i;
2271
2272 for (i = 0; i < n_iv_uses (data); i++)
2273 {
2274 struct iv_use *use = iv_use (data, i);
2275
2276 if (!use)
2277 continue;
2278
2279 switch (use->type)
2280 {
2281 case USE_NONLINEAR_EXPR:
2282 case USE_COMPARE:
2283 case USE_ADDRESS:
2284 /* Just add the ivs based on the value of the iv used here. */
2285 add_iv_value_candidates (data, use->iv, use);
2286 break;
2287
2288 default:
2289 gcc_unreachable ();
2290 }
2291 }
2292 }
2293
2294 /* Record important candidates and add them to related_cands bitmaps
2295 if needed. */
2296
2297 static void
2298 record_important_candidates (struct ivopts_data *data)
2299 {
2300 unsigned i;
2301 struct iv_use *use;
2302
2303 for (i = 0; i < n_iv_cands (data); i++)
2304 {
2305 struct iv_cand *cand = iv_cand (data, i);
2306
2307 if (cand->important)
2308 bitmap_set_bit (data->important_candidates, i);
2309 }
2310
2311 data->consider_all_candidates = (n_iv_cands (data)
2312 <= CONSIDER_ALL_CANDIDATES_BOUND);
2313
2314 if (data->consider_all_candidates)
2315 {
2316 /* We will not need "related_cands" bitmaps in this case,
2317 so release them to decrease peak memory consumption. */
2318 for (i = 0; i < n_iv_uses (data); i++)
2319 {
2320 use = iv_use (data, i);
2321 BITMAP_FREE (use->related_cands);
2322 }
2323 }
2324 else
2325 {
2326 /* Add important candidates to the related_cands bitmaps. */
2327 for (i = 0; i < n_iv_uses (data); i++)
2328 bitmap_ior_into (iv_use (data, i)->related_cands,
2329 data->important_candidates);
2330 }
2331 }
2332
2333 /* Finds the candidates for the induction variables. */
2334
2335 static void
2336 find_iv_candidates (struct ivopts_data *data)
2337 {
2338 /* Add commonly used ivs. */
2339 add_standard_iv_candidates (data);
2340
2341 /* Add old induction variables. */
2342 add_old_ivs_candidates (data);
2343
2344 /* Add induction variables derived from uses. */
2345 add_derived_ivs_candidates (data);
2346
2347 /* Record the important candidates. */
2348 record_important_candidates (data);
2349 }
2350
2351 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2352 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2353 we allocate a simple list to every use. */
2354
2355 static void
2356 alloc_use_cost_map (struct ivopts_data *data)
2357 {
2358 unsigned i, size, s, j;
2359
2360 for (i = 0; i < n_iv_uses (data); i++)
2361 {
2362 struct iv_use *use = iv_use (data, i);
2363 bitmap_iterator bi;
2364
2365 if (data->consider_all_candidates)
2366 size = n_iv_cands (data);
2367 else
2368 {
2369 s = 0;
2370 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2371 {
2372 s++;
2373 }
2374
2375 /* Round up to the power of two, so that moduling by it is fast. */
2376 for (size = 1; size < s; size <<= 1)
2377 continue;
2378 }
2379
2380 use->n_map_members = size;
2381 use->cost_map = XCNEWVEC (struct cost_pair, size);
2382 }
2383 }
2384
2385 /* Returns description of computation cost of expression whose runtime
2386 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2387
2388 static comp_cost
2389 new_cost (unsigned runtime, unsigned complexity)
2390 {
2391 comp_cost cost;
2392
2393 cost.cost = runtime;
2394 cost.complexity = complexity;
2395
2396 return cost;
2397 }
2398
2399 /* Adds costs COST1 and COST2. */
2400
2401 static comp_cost
2402 add_costs (comp_cost cost1, comp_cost cost2)
2403 {
2404 cost1.cost += cost2.cost;
2405 cost1.complexity += cost2.complexity;
2406
2407 return cost1;
2408 }
2409 /* Subtracts costs COST1 and COST2. */
2410
2411 static comp_cost
2412 sub_costs (comp_cost cost1, comp_cost cost2)
2413 {
2414 cost1.cost -= cost2.cost;
2415 cost1.complexity -= cost2.complexity;
2416
2417 return cost1;
2418 }
2419
2420 /* Returns a negative number if COST1 < COST2, a positive number if
2421 COST1 > COST2, and 0 if COST1 = COST2. */
2422
2423 static int
2424 compare_costs (comp_cost cost1, comp_cost cost2)
2425 {
2426 if (cost1.cost == cost2.cost)
2427 return cost1.complexity - cost2.complexity;
2428
2429 return cost1.cost - cost2.cost;
2430 }
2431
2432 /* Returns true if COST is infinite. */
2433
2434 static bool
2435 infinite_cost_p (comp_cost cost)
2436 {
2437 return cost.cost == INFTY;
2438 }
2439
2440 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2441 on invariants DEPENDS_ON and that the value used in expressing it
2442 is VALUE.*/
2443
2444 static void
2445 set_use_iv_cost (struct ivopts_data *data,
2446 struct iv_use *use, struct iv_cand *cand,
2447 comp_cost cost, bitmap depends_on, tree value)
2448 {
2449 unsigned i, s;
2450
2451 if (infinite_cost_p (cost))
2452 {
2453 BITMAP_FREE (depends_on);
2454 return;
2455 }
2456
2457 if (data->consider_all_candidates)
2458 {
2459 use->cost_map[cand->id].cand = cand;
2460 use->cost_map[cand->id].cost = cost;
2461 use->cost_map[cand->id].depends_on = depends_on;
2462 use->cost_map[cand->id].value = value;
2463 return;
2464 }
2465
2466 /* n_map_members is a power of two, so this computes modulo. */
2467 s = cand->id & (use->n_map_members - 1);
2468 for (i = s; i < use->n_map_members; i++)
2469 if (!use->cost_map[i].cand)
2470 goto found;
2471 for (i = 0; i < s; i++)
2472 if (!use->cost_map[i].cand)
2473 goto found;
2474
2475 gcc_unreachable ();
2476
2477 found:
2478 use->cost_map[i].cand = cand;
2479 use->cost_map[i].cost = cost;
2480 use->cost_map[i].depends_on = depends_on;
2481 use->cost_map[i].value = value;
2482 }
2483
2484 /* Gets cost of (USE, CANDIDATE) pair. */
2485
2486 static struct cost_pair *
2487 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2488 struct iv_cand *cand)
2489 {
2490 unsigned i, s;
2491 struct cost_pair *ret;
2492
2493 if (!cand)
2494 return NULL;
2495
2496 if (data->consider_all_candidates)
2497 {
2498 ret = use->cost_map + cand->id;
2499 if (!ret->cand)
2500 return NULL;
2501
2502 return ret;
2503 }
2504
2505 /* n_map_members is a power of two, so this computes modulo. */
2506 s = cand->id & (use->n_map_members - 1);
2507 for (i = s; i < use->n_map_members; i++)
2508 if (use->cost_map[i].cand == cand)
2509 return use->cost_map + i;
2510
2511 for (i = 0; i < s; i++)
2512 if (use->cost_map[i].cand == cand)
2513 return use->cost_map + i;
2514
2515 return NULL;
2516 }
2517
2518 /* Returns estimate on cost of computing SEQ. */
2519
2520 static unsigned
2521 seq_cost (rtx seq)
2522 {
2523 unsigned cost = 0;
2524 rtx set;
2525
2526 for (; seq; seq = NEXT_INSN (seq))
2527 {
2528 set = single_set (seq);
2529 if (set)
2530 cost += rtx_cost (set, SET);
2531 else
2532 cost++;
2533 }
2534
2535 return cost;
2536 }
2537
2538 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2539 static rtx
2540 produce_memory_decl_rtl (tree obj, int *regno)
2541 {
2542 rtx x;
2543
2544 gcc_assert (obj);
2545 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2546 {
2547 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2548 x = gen_rtx_SYMBOL_REF (Pmode, name);
2549 SET_SYMBOL_REF_DECL (x, obj);
2550 x = gen_rtx_MEM (DECL_MODE (obj), x);
2551 targetm.encode_section_info (obj, x, true);
2552 }
2553 else
2554 {
2555 x = gen_raw_REG (Pmode, (*regno)++);
2556 x = gen_rtx_MEM (DECL_MODE (obj), x);
2557 }
2558
2559 return x;
2560 }
2561
2562 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2563 walk_tree. DATA contains the actual fake register number. */
2564
2565 static tree
2566 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2567 {
2568 tree obj = NULL_TREE;
2569 rtx x = NULL_RTX;
2570 int *regno = (int *) data;
2571
2572 switch (TREE_CODE (*expr_p))
2573 {
2574 case ADDR_EXPR:
2575 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2576 handled_component_p (*expr_p);
2577 expr_p = &TREE_OPERAND (*expr_p, 0))
2578 continue;
2579 obj = *expr_p;
2580 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2581 x = produce_memory_decl_rtl (obj, regno);
2582 break;
2583
2584 case SSA_NAME:
2585 *ws = 0;
2586 obj = SSA_NAME_VAR (*expr_p);
2587 if (!DECL_RTL_SET_P (obj))
2588 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2589 break;
2590
2591 case VAR_DECL:
2592 case PARM_DECL:
2593 case RESULT_DECL:
2594 *ws = 0;
2595 obj = *expr_p;
2596
2597 if (DECL_RTL_SET_P (obj))
2598 break;
2599
2600 if (DECL_MODE (obj) == BLKmode)
2601 x = produce_memory_decl_rtl (obj, regno);
2602 else
2603 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2604
2605 break;
2606
2607 default:
2608 break;
2609 }
2610
2611 if (x)
2612 {
2613 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2614 SET_DECL_RTL (obj, x);
2615 }
2616
2617 return NULL_TREE;
2618 }
2619
2620 /* Determines cost of the computation of EXPR. */
2621
2622 static unsigned
2623 computation_cost (tree expr)
2624 {
2625 rtx seq, rslt;
2626 tree type = TREE_TYPE (expr);
2627 unsigned cost;
2628 /* Avoid using hard regs in ways which may be unsupported. */
2629 int regno = LAST_VIRTUAL_REGISTER + 1;
2630
2631 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2632 start_sequence ();
2633 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2634 seq = get_insns ();
2635 end_sequence ();
2636
2637 cost = seq_cost (seq);
2638 if (MEM_P (rslt))
2639 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2640
2641 return cost;
2642 }
2643
2644 /* Returns variable containing the value of candidate CAND at statement AT. */
2645
2646 static tree
2647 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2648 {
2649 if (stmt_after_increment (loop, cand, stmt))
2650 return cand->var_after;
2651 else
2652 return cand->var_before;
2653 }
2654
2655 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2656 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2657
2658 int
2659 tree_int_cst_sign_bit (const_tree t)
2660 {
2661 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2662 unsigned HOST_WIDE_INT w;
2663
2664 if (bitno < HOST_BITS_PER_WIDE_INT)
2665 w = TREE_INT_CST_LOW (t);
2666 else
2667 {
2668 w = TREE_INT_CST_HIGH (t);
2669 bitno -= HOST_BITS_PER_WIDE_INT;
2670 }
2671
2672 return (w >> bitno) & 1;
2673 }
2674
2675 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2676 same precision that is at least as wide as the precision of TYPE, stores
2677 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2678 type of A and B. */
2679
2680 static tree
2681 determine_common_wider_type (tree *a, tree *b)
2682 {
2683 tree wider_type = NULL;
2684 tree suba, subb;
2685 tree atype = TREE_TYPE (*a);
2686
2687 if ((TREE_CODE (*a) == NOP_EXPR
2688 || TREE_CODE (*a) == CONVERT_EXPR))
2689 {
2690 suba = TREE_OPERAND (*a, 0);
2691 wider_type = TREE_TYPE (suba);
2692 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2693 return atype;
2694 }
2695 else
2696 return atype;
2697
2698 if ((TREE_CODE (*b) == NOP_EXPR
2699 || TREE_CODE (*b) == CONVERT_EXPR))
2700 {
2701 subb = TREE_OPERAND (*b, 0);
2702 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2703 return atype;
2704 }
2705 else
2706 return atype;
2707
2708 *a = suba;
2709 *b = subb;
2710 return wider_type;
2711 }
2712
2713 /* Determines the expression by that USE is expressed from induction variable
2714 CAND at statement AT in LOOP. The expression is stored in a decomposed
2715 form into AFF. Returns false if USE cannot be expressed using CAND. */
2716
2717 static bool
2718 get_computation_aff (struct loop *loop,
2719 struct iv_use *use, struct iv_cand *cand, tree at,
2720 struct affine_tree_combination *aff)
2721 {
2722 tree ubase = use->iv->base;
2723 tree ustep = use->iv->step;
2724 tree cbase = cand->iv->base;
2725 tree cstep = cand->iv->step, cstep_common;
2726 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2727 tree common_type, var;
2728 tree uutype;
2729 aff_tree cbase_aff, var_aff;
2730 double_int rat;
2731
2732 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2733 {
2734 /* We do not have a precision to express the values of use. */
2735 return false;
2736 }
2737
2738 var = var_at_stmt (loop, cand, at);
2739 uutype = unsigned_type_for (utype);
2740
2741 /* If the conversion is not noop, perform it. */
2742 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2743 {
2744 cstep = fold_convert (uutype, cstep);
2745 cbase = fold_convert (uutype, cbase);
2746 var = fold_convert (uutype, var);
2747 }
2748
2749 if (!constant_multiple_of (ustep, cstep, &rat))
2750 return false;
2751
2752 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2753 type, we achieve better folding by computing their difference in this
2754 wider type, and cast the result to UUTYPE. We do not need to worry about
2755 overflows, as all the arithmetics will in the end be performed in UUTYPE
2756 anyway. */
2757 common_type = determine_common_wider_type (&ubase, &cbase);
2758
2759 /* use = ubase - ratio * cbase + ratio * var. */
2760 tree_to_aff_combination (ubase, common_type, aff);
2761 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2762 tree_to_aff_combination (var, uutype, &var_aff);
2763
2764 /* We need to shift the value if we are after the increment. */
2765 if (stmt_after_increment (loop, cand, at))
2766 {
2767 aff_tree cstep_aff;
2768
2769 if (common_type != uutype)
2770 cstep_common = fold_convert (common_type, cstep);
2771 else
2772 cstep_common = cstep;
2773
2774 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2775 aff_combination_add (&cbase_aff, &cstep_aff);
2776 }
2777
2778 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2779 aff_combination_add (aff, &cbase_aff);
2780 if (common_type != uutype)
2781 aff_combination_convert (aff, uutype);
2782
2783 aff_combination_scale (&var_aff, rat);
2784 aff_combination_add (aff, &var_aff);
2785
2786 return true;
2787 }
2788
2789 /* Determines the expression by that USE is expressed from induction variable
2790 CAND at statement AT in LOOP. The computation is unshared. */
2791
2792 static tree
2793 get_computation_at (struct loop *loop,
2794 struct iv_use *use, struct iv_cand *cand, tree at)
2795 {
2796 aff_tree aff;
2797 tree type = TREE_TYPE (use->iv->base);
2798
2799 if (!get_computation_aff (loop, use, cand, at, &aff))
2800 return NULL_TREE;
2801 unshare_aff_combination (&aff);
2802 return fold_convert (type, aff_combination_to_tree (&aff));
2803 }
2804
2805 /* Determines the expression by that USE is expressed from induction variable
2806 CAND in LOOP. The computation is unshared. */
2807
2808 static tree
2809 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2810 {
2811 return get_computation_at (loop, use, cand, use->stmt);
2812 }
2813
2814 /* Returns cost of addition in MODE. */
2815
2816 static unsigned
2817 add_cost (enum machine_mode mode)
2818 {
2819 static unsigned costs[NUM_MACHINE_MODES];
2820 rtx seq;
2821 unsigned cost;
2822
2823 if (costs[mode])
2824 return costs[mode];
2825
2826 start_sequence ();
2827 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2828 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2829 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2830 NULL_RTX);
2831 seq = get_insns ();
2832 end_sequence ();
2833
2834 cost = seq_cost (seq);
2835 if (!cost)
2836 cost = 1;
2837
2838 costs[mode] = cost;
2839
2840 if (dump_file && (dump_flags & TDF_DETAILS))
2841 fprintf (dump_file, "Addition in %s costs %d\n",
2842 GET_MODE_NAME (mode), cost);
2843 return cost;
2844 }
2845
2846 /* Entry in a hashtable of already known costs for multiplication. */
2847 struct mbc_entry
2848 {
2849 HOST_WIDE_INT cst; /* The constant to multiply by. */
2850 enum machine_mode mode; /* In mode. */
2851 unsigned cost; /* The cost. */
2852 };
2853
2854 /* Counts hash value for the ENTRY. */
2855
2856 static hashval_t
2857 mbc_entry_hash (const void *entry)
2858 {
2859 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2860
2861 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2862 }
2863
2864 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2865
2866 static int
2867 mbc_entry_eq (const void *entry1, const void *entry2)
2868 {
2869 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2870 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2871
2872 return (e1->mode == e2->mode
2873 && e1->cst == e2->cst);
2874 }
2875
2876 /* Returns cost of multiplication by constant CST in MODE. */
2877
2878 unsigned
2879 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2880 {
2881 static htab_t costs;
2882 struct mbc_entry **cached, act;
2883 rtx seq;
2884 unsigned cost;
2885
2886 if (!costs)
2887 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2888
2889 act.mode = mode;
2890 act.cst = cst;
2891 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2892 if (*cached)
2893 return (*cached)->cost;
2894
2895 *cached = XNEW (struct mbc_entry);
2896 (*cached)->mode = mode;
2897 (*cached)->cst = cst;
2898
2899 start_sequence ();
2900 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2901 gen_int_mode (cst, mode), NULL_RTX, 0);
2902 seq = get_insns ();
2903 end_sequence ();
2904
2905 cost = seq_cost (seq);
2906
2907 if (dump_file && (dump_flags & TDF_DETAILS))
2908 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2909 (int) cst, GET_MODE_NAME (mode), cost);
2910
2911 (*cached)->cost = cost;
2912
2913 return cost;
2914 }
2915
2916 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2917 validity for a memory reference accessing memory of mode MODE. */
2918
2919 bool
2920 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2921 {
2922 #define MAX_RATIO 128
2923 static sbitmap valid_mult[MAX_MACHINE_MODE];
2924
2925 if (!valid_mult[mode])
2926 {
2927 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2928 rtx addr;
2929 HOST_WIDE_INT i;
2930
2931 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2932 sbitmap_zero (valid_mult[mode]);
2933 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2934 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2935 {
2936 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2937 if (memory_address_p (mode, addr))
2938 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2939 }
2940
2941 if (dump_file && (dump_flags & TDF_DETAILS))
2942 {
2943 fprintf (dump_file, " allowed multipliers:");
2944 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2945 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2946 fprintf (dump_file, " %d", (int) i);
2947 fprintf (dump_file, "\n");
2948 fprintf (dump_file, "\n");
2949 }
2950 }
2951
2952 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2953 return false;
2954
2955 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2956 }
2957
2958 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2959 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2960 variable is omitted. Compute the cost for a memory reference that accesses
2961 a memory location of mode MEM_MODE.
2962
2963 TODO -- there must be some better way. This all is quite crude. */
2964
2965 static comp_cost
2966 get_address_cost (bool symbol_present, bool var_present,
2967 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2968 enum machine_mode mem_mode)
2969 {
2970 static bool initialized[MAX_MACHINE_MODE];
2971 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2972 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2973 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2974 unsigned cost, acost, complexity;
2975 bool offset_p, ratio_p;
2976 HOST_WIDE_INT s_offset;
2977 unsigned HOST_WIDE_INT mask;
2978 unsigned bits;
2979
2980 if (!initialized[mem_mode])
2981 {
2982 HOST_WIDE_INT i;
2983 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2984 int old_cse_not_expected;
2985 unsigned sym_p, var_p, off_p, rat_p, add_c;
2986 rtx seq, addr, base;
2987 rtx reg0, reg1;
2988
2989 initialized[mem_mode] = true;
2990
2991 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2992
2993 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2994 for (i = start; i <= 1 << 20; i <<= 1)
2995 {
2996 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2997 if (!memory_address_p (mem_mode, addr))
2998 break;
2999 }
3000 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3001 off[mem_mode] = max_offset[mem_mode];
3002
3003 for (i = start; i <= 1 << 20; i <<= 1)
3004 {
3005 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3006 if (!memory_address_p (mem_mode, addr))
3007 break;
3008 }
3009 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3010
3011 if (dump_file && (dump_flags & TDF_DETAILS))
3012 {
3013 fprintf (dump_file, "get_address_cost:\n");
3014 fprintf (dump_file, " min offset %s %d\n",
3015 GET_MODE_NAME (mem_mode),
3016 (int) min_offset[mem_mode]);
3017 fprintf (dump_file, " max offset %s %d\n",
3018 GET_MODE_NAME (mem_mode),
3019 (int) max_offset[mem_mode]);
3020 }
3021
3022 rat[mem_mode] = 1;
3023 for (i = 2; i <= MAX_RATIO; i++)
3024 if (multiplier_allowed_in_address_p (i, mem_mode))
3025 {
3026 rat[mem_mode] = i;
3027 break;
3028 }
3029
3030 /* Compute the cost of various addressing modes. */
3031 acost = 0;
3032 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3033 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3034
3035 for (i = 0; i < 16; i++)
3036 {
3037 sym_p = i & 1;
3038 var_p = (i >> 1) & 1;
3039 off_p = (i >> 2) & 1;
3040 rat_p = (i >> 3) & 1;
3041
3042 addr = reg0;
3043 if (rat_p)
3044 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3045 gen_int_mode (rat[mem_mode], Pmode));
3046
3047 if (var_p)
3048 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3049
3050 if (sym_p)
3051 {
3052 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3053 /* ??? We can run into trouble with some backends by presenting
3054 it with symbols which havn't been properly passed through
3055 targetm.encode_section_info. By setting the local bit, we
3056 enhance the probability of things working. */
3057 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3058
3059 if (off_p)
3060 base = gen_rtx_fmt_e (CONST, Pmode,
3061 gen_rtx_fmt_ee (PLUS, Pmode,
3062 base,
3063 gen_int_mode (off[mem_mode],
3064 Pmode)));
3065 }
3066 else if (off_p)
3067 base = gen_int_mode (off[mem_mode], Pmode);
3068 else
3069 base = NULL_RTX;
3070
3071 if (base)
3072 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3073
3074 start_sequence ();
3075 /* To avoid splitting addressing modes, pretend that no cse will
3076 follow. */
3077 old_cse_not_expected = cse_not_expected;
3078 cse_not_expected = true;
3079 addr = memory_address (mem_mode, addr);
3080 cse_not_expected = old_cse_not_expected;
3081 seq = get_insns ();
3082 end_sequence ();
3083
3084 acost = seq_cost (seq);
3085 acost += address_cost (addr, mem_mode);
3086
3087 if (!acost)
3088 acost = 1;
3089 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3090 }
3091
3092 /* On some targets, it is quite expensive to load symbol to a register,
3093 which makes addresses that contain symbols look much more expensive.
3094 However, the symbol will have to be loaded in any case before the
3095 loop (and quite likely we have it in register already), so it does not
3096 make much sense to penalize them too heavily. So make some final
3097 tweaks for the SYMBOL_PRESENT modes:
3098
3099 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3100 var is cheaper, use this mode with small penalty.
3101 If VAR_PRESENT is true, try whether the mode with
3102 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3103 if this is the case, use it. */
3104 add_c = add_cost (Pmode);
3105 for (i = 0; i < 8; i++)
3106 {
3107 var_p = i & 1;
3108 off_p = (i >> 1) & 1;
3109 rat_p = (i >> 2) & 1;
3110
3111 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3112 if (var_p)
3113 acost += add_c;
3114
3115 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3116 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3117 }
3118
3119 if (dump_file && (dump_flags & TDF_DETAILS))
3120 {
3121 fprintf (dump_file, "Address costs:\n");
3122
3123 for (i = 0; i < 16; i++)
3124 {
3125 sym_p = i & 1;
3126 var_p = (i >> 1) & 1;
3127 off_p = (i >> 2) & 1;
3128 rat_p = (i >> 3) & 1;
3129
3130 fprintf (dump_file, " ");
3131 if (sym_p)
3132 fprintf (dump_file, "sym + ");
3133 if (var_p)
3134 fprintf (dump_file, "var + ");
3135 if (off_p)
3136 fprintf (dump_file, "cst + ");
3137 if (rat_p)
3138 fprintf (dump_file, "rat * ");
3139
3140 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3141 fprintf (dump_file, "index costs %d\n", acost);
3142 }
3143 fprintf (dump_file, "\n");
3144 }
3145 }
3146
3147 bits = GET_MODE_BITSIZE (Pmode);
3148 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3149 offset &= mask;
3150 if ((offset >> (bits - 1) & 1))
3151 offset |= ~mask;
3152 s_offset = offset;
3153
3154 cost = 0;
3155 offset_p = (s_offset != 0
3156 && min_offset[mem_mode] <= s_offset
3157 && s_offset <= max_offset[mem_mode]);
3158 ratio_p = (ratio != 1
3159 && multiplier_allowed_in_address_p (ratio, mem_mode));
3160
3161 if (ratio != 1 && !ratio_p)
3162 cost += multiply_by_cost (ratio, Pmode);
3163
3164 if (s_offset && !offset_p && !symbol_present)
3165 cost += add_cost (Pmode);
3166
3167 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3168 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3169 return new_cost (cost + acost, complexity);
3170 }
3171
3172 /* Estimates cost of forcing expression EXPR into a variable. */
3173
3174 static comp_cost
3175 force_expr_to_var_cost (tree expr)
3176 {
3177 static bool costs_initialized = false;
3178 static unsigned integer_cost;
3179 static unsigned symbol_cost;
3180 static unsigned address_cost;
3181 tree op0, op1;
3182 comp_cost cost0, cost1, cost;
3183 enum machine_mode mode;
3184
3185 if (!costs_initialized)
3186 {
3187 tree type = build_pointer_type (integer_type_node);
3188 tree var, addr;
3189 rtx x;
3190
3191 var = create_tmp_var_raw (integer_type_node, "test_var");
3192 TREE_STATIC (var) = 1;
3193 x = produce_memory_decl_rtl (var, NULL);
3194 SET_DECL_RTL (var, x);
3195
3196 integer_cost = computation_cost (build_int_cst (integer_type_node,
3197 2000));
3198
3199 addr = build1 (ADDR_EXPR, type, var);
3200 symbol_cost = computation_cost (addr) + 1;
3201
3202 address_cost
3203 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3204 addr,
3205 build_int_cst (sizetype, 2000))) + 1;
3206 if (dump_file && (dump_flags & TDF_DETAILS))
3207 {
3208 fprintf (dump_file, "force_expr_to_var_cost:\n");
3209 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3210 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3211 fprintf (dump_file, " address %d\n", (int) address_cost);
3212 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3213 fprintf (dump_file, "\n");
3214 }
3215
3216 costs_initialized = true;
3217 }
3218
3219 STRIP_NOPS (expr);
3220
3221 if (SSA_VAR_P (expr))
3222 return zero_cost;
3223
3224 if (is_gimple_min_invariant (expr))
3225 {
3226 if (TREE_CODE (expr) == INTEGER_CST)
3227 return new_cost (integer_cost, 0);
3228
3229 if (TREE_CODE (expr) == ADDR_EXPR)
3230 {
3231 tree obj = TREE_OPERAND (expr, 0);
3232
3233 if (TREE_CODE (obj) == VAR_DECL
3234 || TREE_CODE (obj) == PARM_DECL
3235 || TREE_CODE (obj) == RESULT_DECL)
3236 return new_cost (symbol_cost, 0);
3237 }
3238
3239 return new_cost (address_cost, 0);
3240 }
3241
3242 switch (TREE_CODE (expr))
3243 {
3244 case POINTER_PLUS_EXPR:
3245 case PLUS_EXPR:
3246 case MINUS_EXPR:
3247 case MULT_EXPR:
3248 op0 = TREE_OPERAND (expr, 0);
3249 op1 = TREE_OPERAND (expr, 1);
3250 STRIP_NOPS (op0);
3251 STRIP_NOPS (op1);
3252
3253 if (is_gimple_val (op0))
3254 cost0 = zero_cost;
3255 else
3256 cost0 = force_expr_to_var_cost (op0);
3257
3258 if (is_gimple_val (op1))
3259 cost1 = zero_cost;
3260 else
3261 cost1 = force_expr_to_var_cost (op1);
3262
3263 break;
3264
3265 default:
3266 /* Just an arbitrary value, FIXME. */
3267 return new_cost (target_spill_cost, 0);
3268 }
3269
3270 mode = TYPE_MODE (TREE_TYPE (expr));
3271 switch (TREE_CODE (expr))
3272 {
3273 case POINTER_PLUS_EXPR:
3274 case PLUS_EXPR:
3275 case MINUS_EXPR:
3276 cost = new_cost (add_cost (mode), 0);
3277 break;
3278
3279 case MULT_EXPR:
3280 if (cst_and_fits_in_hwi (op0))
3281 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3282 else if (cst_and_fits_in_hwi (op1))
3283 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3284 else
3285 return new_cost (target_spill_cost, 0);
3286 break;
3287
3288 default:
3289 gcc_unreachable ();
3290 }
3291
3292 cost = add_costs (cost, cost0);
3293 cost = add_costs (cost, cost1);
3294
3295 /* Bound the cost by target_spill_cost. The parts of complicated
3296 computations often are either loop invariant or at least can
3297 be shared between several iv uses, so letting this grow without
3298 limits would not give reasonable results. */
3299 if (cost.cost > target_spill_cost)
3300 cost.cost = target_spill_cost;
3301
3302 return cost;
3303 }
3304
3305 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3306 invariants the computation depends on. */
3307
3308 static comp_cost
3309 force_var_cost (struct ivopts_data *data,
3310 tree expr, bitmap *depends_on)
3311 {
3312 if (depends_on)
3313 {
3314 fd_ivopts_data = data;
3315 walk_tree (&expr, find_depends, depends_on, NULL);
3316 }
3317
3318 return force_expr_to_var_cost (expr);
3319 }
3320
3321 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3322 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3323 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3324 invariants the computation depends on. */
3325
3326 static comp_cost
3327 split_address_cost (struct ivopts_data *data,
3328 tree addr, bool *symbol_present, bool *var_present,
3329 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3330 {
3331 tree core;
3332 HOST_WIDE_INT bitsize;
3333 HOST_WIDE_INT bitpos;
3334 tree toffset;
3335 enum machine_mode mode;
3336 int unsignedp, volatilep;
3337
3338 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3339 &unsignedp, &volatilep, false);
3340
3341 if (toffset != 0
3342 || bitpos % BITS_PER_UNIT != 0
3343 || TREE_CODE (core) != VAR_DECL)
3344 {
3345 *symbol_present = false;
3346 *var_present = true;
3347 fd_ivopts_data = data;
3348 walk_tree (&addr, find_depends, depends_on, NULL);
3349 return new_cost (target_spill_cost, 0);
3350 }
3351
3352 *offset += bitpos / BITS_PER_UNIT;
3353 if (TREE_STATIC (core)
3354 || DECL_EXTERNAL (core))
3355 {
3356 *symbol_present = true;
3357 *var_present = false;
3358 return zero_cost;
3359 }
3360
3361 *symbol_present = false;
3362 *var_present = true;
3363 return zero_cost;
3364 }
3365
3366 /* Estimates cost of expressing difference of addresses E1 - E2 as
3367 var + symbol + offset. The value of offset is added to OFFSET,
3368 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3369 part is missing. DEPENDS_ON is a set of the invariants the computation
3370 depends on. */
3371
3372 static comp_cost
3373 ptr_difference_cost (struct ivopts_data *data,
3374 tree e1, tree e2, bool *symbol_present, bool *var_present,
3375 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3376 {
3377 HOST_WIDE_INT diff = 0;
3378 comp_cost cost;
3379
3380 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3381
3382 if (ptr_difference_const (e1, e2, &diff))
3383 {
3384 *offset += diff;
3385 *symbol_present = false;
3386 *var_present = false;
3387 return zero_cost;
3388 }
3389
3390 if (integer_zerop (e2))
3391 return split_address_cost (data, TREE_OPERAND (e1, 0),
3392 symbol_present, var_present, offset, depends_on);
3393
3394 *symbol_present = false;
3395 *var_present = true;
3396
3397 cost = force_var_cost (data, e1, depends_on);
3398 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3399 cost.cost += add_cost (Pmode);
3400
3401 return cost;
3402 }
3403
3404 /* Estimates cost of expressing difference E1 - E2 as
3405 var + symbol + offset. The value of offset is added to OFFSET,
3406 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3407 part is missing. DEPENDS_ON is a set of the invariants the computation
3408 depends on. */
3409
3410 static comp_cost
3411 difference_cost (struct ivopts_data *data,
3412 tree e1, tree e2, bool *symbol_present, bool *var_present,
3413 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3414 {
3415 comp_cost cost;
3416 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3417 unsigned HOST_WIDE_INT off1, off2;
3418
3419 e1 = strip_offset (e1, &off1);
3420 e2 = strip_offset (e2, &off2);
3421 *offset += off1 - off2;
3422
3423 STRIP_NOPS (e1);
3424 STRIP_NOPS (e2);
3425
3426 if (TREE_CODE (e1) == ADDR_EXPR)
3427 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3428 depends_on);
3429 *symbol_present = false;
3430
3431 if (operand_equal_p (e1, e2, 0))
3432 {
3433 *var_present = false;
3434 return zero_cost;
3435 }
3436 *var_present = true;
3437 if (integer_zerop (e2))
3438 return force_var_cost (data, e1, depends_on);
3439
3440 if (integer_zerop (e1))
3441 {
3442 cost = force_var_cost (data, e2, depends_on);
3443 cost.cost += multiply_by_cost (-1, mode);
3444
3445 return cost;
3446 }
3447
3448 cost = force_var_cost (data, e1, depends_on);
3449 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3450 cost.cost += add_cost (mode);
3451
3452 return cost;
3453 }
3454
3455 /* Determines the cost of the computation by that USE is expressed
3456 from induction variable CAND. If ADDRESS_P is true, we just need
3457 to create an address from it, otherwise we want to get it into
3458 register. A set of invariants we depend on is stored in
3459 DEPENDS_ON. AT is the statement at that the value is computed. */
3460
3461 static comp_cost
3462 get_computation_cost_at (struct ivopts_data *data,
3463 struct iv_use *use, struct iv_cand *cand,
3464 bool address_p, bitmap *depends_on, tree at)
3465 {
3466 tree ubase = use->iv->base, ustep = use->iv->step;
3467 tree cbase, cstep;
3468 tree utype = TREE_TYPE (ubase), ctype;
3469 unsigned HOST_WIDE_INT cstepi, offset = 0;
3470 HOST_WIDE_INT ratio, aratio;
3471 bool var_present, symbol_present;
3472 comp_cost cost;
3473 unsigned n_sums;
3474 double_int rat;
3475
3476 *depends_on = NULL;
3477
3478 /* Only consider real candidates. */
3479 if (!cand->iv)
3480 return infinite_cost;
3481
3482 cbase = cand->iv->base;
3483 cstep = cand->iv->step;
3484 ctype = TREE_TYPE (cbase);
3485
3486 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3487 {
3488 /* We do not have a precision to express the values of use. */
3489 return infinite_cost;
3490 }
3491
3492 if (address_p)
3493 {
3494 /* Do not try to express address of an object with computation based
3495 on address of a different object. This may cause problems in rtl
3496 level alias analysis (that does not expect this to be happening,
3497 as this is illegal in C), and would be unlikely to be useful
3498 anyway. */
3499 if (use->iv->base_object
3500 && cand->iv->base_object
3501 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3502 return infinite_cost;
3503 }
3504
3505 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3506 {
3507 /* TODO -- add direct handling of this case. */
3508 goto fallback;
3509 }
3510
3511 /* CSTEPI is removed from the offset in case statement is after the
3512 increment. If the step is not constant, we use zero instead.
3513 This is a bit imprecise (there is the extra addition), but
3514 redundancy elimination is likely to transform the code so that
3515 it uses value of the variable before increment anyway,
3516 so it is not that much unrealistic. */
3517 if (cst_and_fits_in_hwi (cstep))
3518 cstepi = int_cst_value (cstep);
3519 else
3520 cstepi = 0;
3521
3522 if (!constant_multiple_of (ustep, cstep, &rat))
3523 return infinite_cost;
3524
3525 if (double_int_fits_in_shwi_p (rat))
3526 ratio = double_int_to_shwi (rat);
3527 else
3528 return infinite_cost;
3529
3530 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3531 or ratio == 1, it is better to handle this like
3532
3533 ubase - ratio * cbase + ratio * var
3534
3535 (also holds in the case ratio == -1, TODO. */
3536
3537 if (cst_and_fits_in_hwi (cbase))
3538 {
3539 offset = - ratio * int_cst_value (cbase);
3540 cost = difference_cost (data,
3541 ubase, build_int_cst (utype, 0),
3542 &symbol_present, &var_present, &offset,
3543 depends_on);
3544 }
3545 else if (ratio == 1)
3546 {
3547 cost = difference_cost (data,
3548 ubase, cbase,
3549 &symbol_present, &var_present, &offset,
3550 depends_on);
3551 }
3552 else
3553 {
3554 cost = force_var_cost (data, cbase, depends_on);
3555 cost.cost += add_cost (TYPE_MODE (ctype));
3556 cost = add_costs (cost,
3557 difference_cost (data,
3558 ubase, build_int_cst (utype, 0),
3559 &symbol_present, &var_present,
3560 &offset, depends_on));
3561 }
3562
3563 /* If we are after the increment, the value of the candidate is higher by
3564 one iteration. */
3565 if (stmt_after_increment (data->current_loop, cand, at))
3566 offset -= ratio * cstepi;
3567
3568 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3569 (symbol/var/const parts may be omitted). If we are looking for an address,
3570 find the cost of addressing this. */
3571 if (address_p)
3572 return add_costs (cost, get_address_cost (symbol_present, var_present,
3573 offset, ratio,
3574 TYPE_MODE (TREE_TYPE (*use->op_p))));
3575
3576 /* Otherwise estimate the costs for computing the expression. */
3577 aratio = ratio > 0 ? ratio : -ratio;
3578 if (!symbol_present && !var_present && !offset)
3579 {
3580 if (ratio != 1)
3581 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3582
3583 return cost;
3584 }
3585
3586 if (aratio != 1)
3587 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3588
3589 n_sums = 1;
3590 if (var_present
3591 /* Symbol + offset should be compile-time computable. */
3592 && (symbol_present || offset))
3593 n_sums++;
3594
3595 /* Having offset does not affect runtime cost in case it is added to
3596 symbol, but it increases complexity. */
3597 if (offset)
3598 cost.complexity++;
3599
3600 cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3601 return cost;
3602
3603 fallback:
3604 {
3605 /* Just get the expression, expand it and measure the cost. */
3606 tree comp = get_computation_at (data->current_loop, use, cand, at);
3607
3608 if (!comp)
3609 return infinite_cost;
3610
3611 if (address_p)
3612 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3613
3614 return new_cost (computation_cost (comp), 0);
3615 }
3616 }
3617
3618 /* Determines the cost of the computation by that USE is expressed
3619 from induction variable CAND. If ADDRESS_P is true, we just need
3620 to create an address from it, otherwise we want to get it into
3621 register. A set of invariants we depend on is stored in
3622 DEPENDS_ON. */
3623
3624 static comp_cost
3625 get_computation_cost (struct ivopts_data *data,
3626 struct iv_use *use, struct iv_cand *cand,
3627 bool address_p, bitmap *depends_on)
3628 {
3629 return get_computation_cost_at (data,
3630 use, cand, address_p, depends_on, use->stmt);
3631 }
3632
3633 /* Determines cost of basing replacement of USE on CAND in a generic
3634 expression. */
3635
3636 static bool
3637 determine_use_iv_cost_generic (struct ivopts_data *data,
3638 struct iv_use *use, struct iv_cand *cand)
3639 {
3640 bitmap depends_on;
3641 comp_cost cost;
3642
3643 /* The simple case first -- if we need to express value of the preserved
3644 original biv, the cost is 0. This also prevents us from counting the
3645 cost of increment twice -- once at this use and once in the cost of
3646 the candidate. */
3647 if (cand->pos == IP_ORIGINAL
3648 && cand->incremented_at == use->stmt)
3649 {
3650 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3651 return true;
3652 }
3653
3654 cost = get_computation_cost (data, use, cand, false, &depends_on);
3655 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3656
3657 return !infinite_cost_p (cost);
3658 }
3659
3660 /* Determines cost of basing replacement of USE on CAND in an address. */
3661
3662 static bool
3663 determine_use_iv_cost_address (struct ivopts_data *data,
3664 struct iv_use *use, struct iv_cand *cand)
3665 {
3666 bitmap depends_on;
3667 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3668
3669 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3670
3671 return !infinite_cost_p (cost);
3672 }
3673
3674 /* Computes value of candidate CAND at position AT in iteration NITER, and
3675 stores it to VAL. */
3676
3677 static void
3678 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3679 aff_tree *val)
3680 {
3681 aff_tree step, delta, nit;
3682 struct iv *iv = cand->iv;
3683 tree type = TREE_TYPE (iv->base);
3684 tree steptype = type;
3685 if (POINTER_TYPE_P (type))
3686 steptype = sizetype;
3687
3688 tree_to_aff_combination (iv->step, steptype, &step);
3689 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3690 aff_combination_convert (&nit, steptype);
3691 aff_combination_mult (&nit, &step, &delta);
3692 if (stmt_after_increment (loop, cand, at))
3693 aff_combination_add (&delta, &step);
3694
3695 tree_to_aff_combination (iv->base, type, val);
3696 aff_combination_add (val, &delta);
3697 }
3698
3699 /* Returns period of induction variable iv. */
3700
3701 static tree
3702 iv_period (struct iv *iv)
3703 {
3704 tree step = iv->step, period, type;
3705 tree pow2div;
3706
3707 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3708
3709 /* Period of the iv is gcd (step, type range). Since type range is power
3710 of two, it suffices to determine the maximum power of two that divides
3711 step. */
3712 pow2div = num_ending_zeros (step);
3713 type = unsigned_type_for (TREE_TYPE (step));
3714
3715 period = build_low_bits_mask (type,
3716 (TYPE_PRECISION (type)
3717 - tree_low_cst (pow2div, 1)));
3718
3719 return period;
3720 }
3721
3722 /* Returns the comparison operator used when eliminating the iv USE. */
3723
3724 static enum tree_code
3725 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3726 {
3727 struct loop *loop = data->current_loop;
3728 basic_block ex_bb;
3729 edge exit;
3730
3731 ex_bb = bb_for_stmt (use->stmt);
3732 exit = EDGE_SUCC (ex_bb, 0);
3733 if (flow_bb_inside_loop_p (loop, exit->dest))
3734 exit = EDGE_SUCC (ex_bb, 1);
3735
3736 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3737 }
3738
3739 /* Check whether it is possible to express the condition in USE by comparison
3740 of candidate CAND. If so, store the value compared with to BOUND. */
3741
3742 static bool
3743 may_eliminate_iv (struct ivopts_data *data,
3744 struct iv_use *use, struct iv_cand *cand, tree *bound)
3745 {
3746 basic_block ex_bb;
3747 edge exit;
3748 tree nit, period;
3749 struct loop *loop = data->current_loop;
3750 aff_tree bnd;
3751 double_int period_value, max_niter;
3752
3753 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3754 return false;
3755
3756 /* For now works only for exits that dominate the loop latch. TODO -- extend
3757 for other conditions inside loop body. */
3758 ex_bb = bb_for_stmt (use->stmt);
3759 if (use->stmt != last_stmt (ex_bb)
3760 || TREE_CODE (use->stmt) != COND_EXPR)
3761 return false;
3762 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3763 return false;
3764
3765 exit = EDGE_SUCC (ex_bb, 0);
3766 if (flow_bb_inside_loop_p (loop, exit->dest))
3767 exit = EDGE_SUCC (ex_bb, 1);
3768 if (flow_bb_inside_loop_p (loop, exit->dest))
3769 return false;
3770
3771 nit = niter_for_exit (data, exit);
3772 if (!nit)
3773 return false;
3774
3775 /* Determine whether we may use the variable to test whether niter iterations
3776 elapsed. This is the case iff the period of the induction variable is
3777 greater than the number of iterations. */
3778 period = iv_period (cand->iv);
3779 if (!period)
3780 return false;
3781
3782 /* Compare the period with the estimate on the number of iterations of the
3783 loop. */
3784 if (!estimated_loop_iterations (loop, true, &max_niter))
3785 return false;
3786 period_value = tree_to_double_int (period);
3787 if (double_int_ucmp (period_value, max_niter) <= 0)
3788 return false;
3789
3790 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3791 *bound = aff_combination_to_tree (&bnd);
3792 return true;
3793 }
3794
3795 /* Determines cost of basing replacement of USE on CAND in a condition. */
3796
3797 static bool
3798 determine_use_iv_cost_condition (struct ivopts_data *data,
3799 struct iv_use *use, struct iv_cand *cand)
3800 {
3801 tree bound = NULL_TREE;
3802 struct iv *cmp_iv;
3803 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3804 comp_cost elim_cost, express_cost, cost;
3805 bool ok;
3806
3807 /* Only consider real candidates. */
3808 if (!cand->iv)
3809 {
3810 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3811 return false;
3812 }
3813
3814 /* Try iv elimination. */
3815 if (may_eliminate_iv (data, use, cand, &bound))
3816 {
3817 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3818 /* The bound is a loop invariant, so it will be only computed
3819 once. */
3820 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3821 }
3822 else
3823 elim_cost = infinite_cost;
3824
3825 /* Try expressing the original giv. If it is compared with an invariant,
3826 note that we cannot get rid of it. */
3827 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3828 gcc_assert (ok);
3829
3830 express_cost = get_computation_cost (data, use, cand, false,
3831 &depends_on_express);
3832 fd_ivopts_data = data;
3833 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3834
3835 /* Choose the better approach. */
3836 if (compare_costs (elim_cost, express_cost) < 0)
3837 {
3838 cost = elim_cost;
3839 depends_on = depends_on_elim;
3840 depends_on_elim = NULL;
3841 }
3842 else
3843 {
3844 cost = express_cost;
3845 depends_on = depends_on_express;
3846 depends_on_express = NULL;
3847 bound = NULL_TREE;
3848 }
3849
3850 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3851
3852 if (depends_on_elim)
3853 BITMAP_FREE (depends_on_elim);
3854 if (depends_on_express)
3855 BITMAP_FREE (depends_on_express);
3856
3857 return !infinite_cost_p (cost);
3858 }
3859
3860 /* Determines cost of basing replacement of USE on CAND. Returns false
3861 if USE cannot be based on CAND. */
3862
3863 static bool
3864 determine_use_iv_cost (struct ivopts_data *data,
3865 struct iv_use *use, struct iv_cand *cand)
3866 {
3867 switch (use->type)
3868 {
3869 case USE_NONLINEAR_EXPR:
3870 return determine_use_iv_cost_generic (data, use, cand);
3871
3872 case USE_ADDRESS:
3873 return determine_use_iv_cost_address (data, use, cand);
3874
3875 case USE_COMPARE:
3876 return determine_use_iv_cost_condition (data, use, cand);
3877
3878 default:
3879 gcc_unreachable ();
3880 }
3881 }
3882
3883 /* Determines costs of basing the use of the iv on an iv candidate. */
3884
3885 static void
3886 determine_use_iv_costs (struct ivopts_data *data)
3887 {
3888 unsigned i, j;
3889 struct iv_use *use;
3890 struct iv_cand *cand;
3891 bitmap to_clear = BITMAP_ALLOC (NULL);
3892
3893 alloc_use_cost_map (data);
3894
3895 for (i = 0; i < n_iv_uses (data); i++)
3896 {
3897 use = iv_use (data, i);
3898
3899 if (data->consider_all_candidates)
3900 {
3901 for (j = 0; j < n_iv_cands (data); j++)
3902 {
3903 cand = iv_cand (data, j);
3904 determine_use_iv_cost (data, use, cand);
3905 }
3906 }
3907 else
3908 {
3909 bitmap_iterator bi;
3910
3911 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3912 {
3913 cand = iv_cand (data, j);
3914 if (!determine_use_iv_cost (data, use, cand))
3915 bitmap_set_bit (to_clear, j);
3916 }
3917
3918 /* Remove the candidates for that the cost is infinite from
3919 the list of related candidates. */
3920 bitmap_and_compl_into (use->related_cands, to_clear);
3921 bitmap_clear (to_clear);
3922 }
3923 }
3924
3925 BITMAP_FREE (to_clear);
3926
3927 if (dump_file && (dump_flags & TDF_DETAILS))
3928 {
3929 fprintf (dump_file, "Use-candidate costs:\n");
3930
3931 for (i = 0; i < n_iv_uses (data); i++)
3932 {
3933 use = iv_use (data, i);
3934
3935 fprintf (dump_file, "Use %d:\n", i);
3936 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3937 for (j = 0; j < use->n_map_members; j++)
3938 {
3939 if (!use->cost_map[j].cand
3940 || infinite_cost_p (use->cost_map[j].cost))
3941 continue;
3942
3943 fprintf (dump_file, " %d\t%d\t%d\t",
3944 use->cost_map[j].cand->id,
3945 use->cost_map[j].cost.cost,
3946 use->cost_map[j].cost.complexity);
3947 if (use->cost_map[j].depends_on)
3948 bitmap_print (dump_file,
3949 use->cost_map[j].depends_on, "","");
3950 fprintf (dump_file, "\n");
3951 }
3952
3953 fprintf (dump_file, "\n");
3954 }
3955 fprintf (dump_file, "\n");
3956 }
3957 }
3958
3959 /* Determines cost of the candidate CAND. */
3960
3961 static void
3962 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3963 {
3964 comp_cost cost_base;
3965 unsigned cost, cost_step;
3966 tree base;
3967
3968 if (!cand->iv)
3969 {
3970 cand->cost = 0;
3971 return;
3972 }
3973
3974 /* There are two costs associated with the candidate -- its increment
3975 and its initialization. The second is almost negligible for any loop
3976 that rolls enough, so we take it just very little into account. */
3977
3978 base = cand->iv->base;
3979 cost_base = force_var_cost (data, base, NULL);
3980 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3981
3982 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3983
3984 /* Prefer the original ivs unless we may gain something by replacing it.
3985 The reason is to makee debugging simpler; so this is not relevant for
3986 artificial ivs created by other optimization passes. */
3987 if (cand->pos != IP_ORIGINAL
3988 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3989 cost++;
3990
3991 /* Prefer not to insert statements into latch unless there are some
3992 already (so that we do not create unnecessary jumps). */
3993 if (cand->pos == IP_END
3994 && empty_block_p (ip_end_pos (data->current_loop)))
3995 cost++;
3996
3997 cand->cost = cost;
3998 }
3999
4000 /* Determines costs of computation of the candidates. */
4001
4002 static void
4003 determine_iv_costs (struct ivopts_data *data)
4004 {
4005 unsigned i;
4006
4007 if (dump_file && (dump_flags & TDF_DETAILS))
4008 {
4009 fprintf (dump_file, "Candidate costs:\n");
4010 fprintf (dump_file, " cand\tcost\n");
4011 }
4012
4013 for (i = 0; i < n_iv_cands (data); i++)
4014 {
4015 struct iv_cand *cand = iv_cand (data, i);
4016
4017 determine_iv_cost (data, cand);
4018
4019 if (dump_file && (dump_flags & TDF_DETAILS))
4020 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4021 }
4022
4023 if (dump_file && (dump_flags & TDF_DETAILS))
4024 fprintf (dump_file, "\n");
4025 }
4026
4027 /* Calculates cost for having SIZE induction variables. */
4028
4029 static unsigned
4030 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4031 {
4032 /* We add size to the cost, so that we prefer eliminating ivs
4033 if possible. */
4034 return size + estimate_reg_pressure_cost (size, data->regs_used);
4035 }
4036
4037 /* For each size of the induction variable set determine the penalty. */
4038
4039 static void
4040 determine_set_costs (struct ivopts_data *data)
4041 {
4042 unsigned j, n;
4043 tree phi, op;
4044 struct loop *loop = data->current_loop;
4045 bitmap_iterator bi;
4046
4047 /* We use the following model (definitely improvable, especially the
4048 cost function -- TODO):
4049
4050 We estimate the number of registers available (using MD data), name it A.
4051
4052 We estimate the number of registers used by the loop, name it U. This
4053 number is obtained as the number of loop phi nodes (not counting virtual
4054 registers and bivs) + the number of variables from outside of the loop.
4055
4056 We set a reserve R (free regs that are used for temporary computations,
4057 etc.). For now the reserve is a constant 3.
4058
4059 Let I be the number of induction variables.
4060
4061 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4062 make a lot of ivs without a reason).
4063 -- if A - R < U + I <= A, the cost is I * PRES_COST
4064 -- if U + I > A, the cost is I * PRES_COST and
4065 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4066
4067 if (dump_file && (dump_flags & TDF_DETAILS))
4068 {
4069 fprintf (dump_file, "Global costs:\n");
4070 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4071 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
4072 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4073 }
4074
4075 n = 0;
4076 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4077 {
4078 op = PHI_RESULT (phi);
4079
4080 if (!is_gimple_reg (op))
4081 continue;
4082
4083 if (get_iv (data, op))
4084 continue;
4085
4086 n++;
4087 }
4088
4089 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4090 {
4091 struct version_info *info = ver_info (data, j);
4092
4093 if (info->inv_id && info->has_nonlin_use)
4094 n++;
4095 }
4096
4097 data->regs_used = n;
4098 if (dump_file && (dump_flags & TDF_DETAILS))
4099 fprintf (dump_file, " regs_used %d\n", n);
4100
4101 if (dump_file && (dump_flags & TDF_DETAILS))
4102 {
4103 fprintf (dump_file, " cost for size:\n");
4104 fprintf (dump_file, " ivs\tcost\n");
4105 for (j = 0; j <= 2 * target_avail_regs; j++)
4106 fprintf (dump_file, " %d\t%d\n", j,
4107 ivopts_global_cost_for_size (data, j));
4108 fprintf (dump_file, "\n");
4109 }
4110 }
4111
4112 /* Returns true if A is a cheaper cost pair than B. */
4113
4114 static bool
4115 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4116 {
4117 int cmp;
4118
4119 if (!a)
4120 return false;
4121
4122 if (!b)
4123 return true;
4124
4125 cmp = compare_costs (a->cost, b->cost);
4126 if (cmp < 0)
4127 return true;
4128
4129 if (cmp > 0)
4130 return false;
4131
4132 /* In case the costs are the same, prefer the cheaper candidate. */
4133 if (a->cand->cost < b->cand->cost)
4134 return true;
4135
4136 return false;
4137 }
4138
4139 /* Computes the cost field of IVS structure. */
4140
4141 static void
4142 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4143 {
4144 comp_cost cost = ivs->cand_use_cost;
4145 cost.cost += ivs->cand_cost;
4146 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4147
4148 ivs->cost = cost;
4149 }
4150
4151 /* Remove invariants in set INVS to set IVS. */
4152
4153 static void
4154 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4155 {
4156 bitmap_iterator bi;
4157 unsigned iid;
4158
4159 if (!invs)
4160 return;
4161
4162 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4163 {
4164 ivs->n_invariant_uses[iid]--;
4165 if (ivs->n_invariant_uses[iid] == 0)
4166 ivs->n_regs--;
4167 }
4168 }
4169
4170 /* Set USE not to be expressed by any candidate in IVS. */
4171
4172 static void
4173 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4174 struct iv_use *use)
4175 {
4176 unsigned uid = use->id, cid;
4177 struct cost_pair *cp;
4178
4179 cp = ivs->cand_for_use[uid];
4180 if (!cp)
4181 return;
4182 cid = cp->cand->id;
4183
4184 ivs->bad_uses++;
4185 ivs->cand_for_use[uid] = NULL;
4186 ivs->n_cand_uses[cid]--;
4187
4188 if (ivs->n_cand_uses[cid] == 0)
4189 {
4190 bitmap_clear_bit (ivs->cands, cid);
4191 /* Do not count the pseudocandidates. */
4192 if (cp->cand->iv)
4193 ivs->n_regs--;
4194 ivs->n_cands--;
4195 ivs->cand_cost -= cp->cand->cost;
4196
4197 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4198 }
4199
4200 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4201
4202 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4203 iv_ca_recount_cost (data, ivs);
4204 }
4205
4206 /* Add invariants in set INVS to set IVS. */
4207
4208 static void
4209 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4210 {
4211 bitmap_iterator bi;
4212 unsigned iid;
4213
4214 if (!invs)
4215 return;
4216
4217 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4218 {
4219 ivs->n_invariant_uses[iid]++;
4220 if (ivs->n_invariant_uses[iid] == 1)
4221 ivs->n_regs++;
4222 }
4223 }
4224
4225 /* Set cost pair for USE in set IVS to CP. */
4226
4227 static void
4228 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4229 struct iv_use *use, struct cost_pair *cp)
4230 {
4231 unsigned uid = use->id, cid;
4232
4233 if (ivs->cand_for_use[uid] == cp)
4234 return;
4235
4236 if (ivs->cand_for_use[uid])
4237 iv_ca_set_no_cp (data, ivs, use);
4238
4239 if (cp)
4240 {
4241 cid = cp->cand->id;
4242
4243 ivs->bad_uses--;
4244 ivs->cand_for_use[uid] = cp;
4245 ivs->n_cand_uses[cid]++;
4246 if (ivs->n_cand_uses[cid] == 1)
4247 {
4248 bitmap_set_bit (ivs->cands, cid);
4249 /* Do not count the pseudocandidates. */
4250 if (cp->cand->iv)
4251 ivs->n_regs++;
4252 ivs->n_cands++;
4253 ivs->cand_cost += cp->cand->cost;
4254
4255 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4256 }
4257
4258 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4259 iv_ca_set_add_invariants (ivs, cp->depends_on);
4260 iv_ca_recount_cost (data, ivs);
4261 }
4262 }
4263
4264 /* Extend set IVS by expressing USE by some of the candidates in it
4265 if possible. */
4266
4267 static void
4268 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4269 struct iv_use *use)
4270 {
4271 struct cost_pair *best_cp = NULL, *cp;
4272 bitmap_iterator bi;
4273 unsigned i;
4274
4275 gcc_assert (ivs->upto >= use->id);
4276
4277 if (ivs->upto == use->id)
4278 {
4279 ivs->upto++;
4280 ivs->bad_uses++;
4281 }
4282
4283 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4284 {
4285 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4286
4287 if (cheaper_cost_pair (cp, best_cp))
4288 best_cp = cp;
4289 }
4290
4291 iv_ca_set_cp (data, ivs, use, best_cp);
4292 }
4293
4294 /* Get cost for assignment IVS. */
4295
4296 static comp_cost
4297 iv_ca_cost (struct iv_ca *ivs)
4298 {
4299 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4300 }
4301
4302 /* Returns true if all dependences of CP are among invariants in IVS. */
4303
4304 static bool
4305 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4306 {
4307 unsigned i;
4308 bitmap_iterator bi;
4309
4310 if (!cp->depends_on)
4311 return true;
4312
4313 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4314 {
4315 if (ivs->n_invariant_uses[i] == 0)
4316 return false;
4317 }
4318
4319 return true;
4320 }
4321
4322 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4323 it before NEXT_CHANGE. */
4324
4325 static struct iv_ca_delta *
4326 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4327 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4328 {
4329 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4330
4331 change->use = use;
4332 change->old_cp = old_cp;
4333 change->new_cp = new_cp;
4334 change->next_change = next_change;
4335
4336 return change;
4337 }
4338
4339 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4340 are rewritten. */
4341
4342 static struct iv_ca_delta *
4343 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4344 {
4345 struct iv_ca_delta *last;
4346
4347 if (!l2)
4348 return l1;
4349
4350 if (!l1)
4351 return l2;
4352
4353 for (last = l1; last->next_change; last = last->next_change)
4354 continue;
4355 last->next_change = l2;
4356
4357 return l1;
4358 }
4359
4360 /* Returns candidate by that USE is expressed in IVS. */
4361
4362 static struct cost_pair *
4363 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4364 {
4365 return ivs->cand_for_use[use->id];
4366 }
4367
4368 /* Reverse the list of changes DELTA, forming the inverse to it. */
4369
4370 static struct iv_ca_delta *
4371 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4372 {
4373 struct iv_ca_delta *act, *next, *prev = NULL;
4374 struct cost_pair *tmp;
4375
4376 for (act = delta; act; act = next)
4377 {
4378 next = act->next_change;
4379 act->next_change = prev;
4380 prev = act;
4381
4382 tmp = act->old_cp;
4383 act->old_cp = act->new_cp;
4384 act->new_cp = tmp;
4385 }
4386
4387 return prev;
4388 }
4389
4390 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4391 reverted instead. */
4392
4393 static void
4394 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4395 struct iv_ca_delta *delta, bool forward)
4396 {
4397 struct cost_pair *from, *to;
4398 struct iv_ca_delta *act;
4399
4400 if (!forward)
4401 delta = iv_ca_delta_reverse (delta);
4402
4403 for (act = delta; act; act = act->next_change)
4404 {
4405 from = act->old_cp;
4406 to = act->new_cp;
4407 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4408 iv_ca_set_cp (data, ivs, act->use, to);
4409 }
4410
4411 if (!forward)
4412 iv_ca_delta_reverse (delta);
4413 }
4414
4415 /* Returns true if CAND is used in IVS. */
4416
4417 static bool
4418 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4419 {
4420 return ivs->n_cand_uses[cand->id] > 0;
4421 }
4422
4423 /* Returns number of induction variable candidates in the set IVS. */
4424
4425 static unsigned
4426 iv_ca_n_cands (struct iv_ca *ivs)
4427 {
4428 return ivs->n_cands;
4429 }
4430
4431 /* Free the list of changes DELTA. */
4432
4433 static void
4434 iv_ca_delta_free (struct iv_ca_delta **delta)
4435 {
4436 struct iv_ca_delta *act, *next;
4437
4438 for (act = *delta; act; act = next)
4439 {
4440 next = act->next_change;
4441 free (act);
4442 }
4443
4444 *delta = NULL;
4445 }
4446
4447 /* Allocates new iv candidates assignment. */
4448
4449 static struct iv_ca *
4450 iv_ca_new (struct ivopts_data *data)
4451 {
4452 struct iv_ca *nw = XNEW (struct iv_ca);
4453
4454 nw->upto = 0;
4455 nw->bad_uses = 0;
4456 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4457 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4458 nw->cands = BITMAP_ALLOC (NULL);
4459 nw->n_cands = 0;
4460 nw->n_regs = 0;
4461 nw->cand_use_cost = zero_cost;
4462 nw->cand_cost = 0;
4463 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4464 nw->cost = zero_cost;
4465
4466 return nw;
4467 }
4468
4469 /* Free memory occupied by the set IVS. */
4470
4471 static void
4472 iv_ca_free (struct iv_ca **ivs)
4473 {
4474 free ((*ivs)->cand_for_use);
4475 free ((*ivs)->n_cand_uses);
4476 BITMAP_FREE ((*ivs)->cands);
4477 free ((*ivs)->n_invariant_uses);
4478 free (*ivs);
4479 *ivs = NULL;
4480 }
4481
4482 /* Dumps IVS to FILE. */
4483
4484 static void
4485 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4486 {
4487 const char *pref = " invariants ";
4488 unsigned i;
4489 comp_cost cost = iv_ca_cost (ivs);
4490
4491 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4492 bitmap_print (file, ivs->cands, " candidates ","\n");
4493
4494 for (i = 1; i <= data->max_inv_id; i++)
4495 if (ivs->n_invariant_uses[i])
4496 {
4497 fprintf (file, "%s%d", pref, i);
4498 pref = ", ";
4499 }
4500 fprintf (file, "\n");
4501 }
4502
4503 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4504 new set, and store differences in DELTA. Number of induction variables
4505 in the new set is stored to N_IVS. */
4506
4507 static comp_cost
4508 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4509 struct iv_cand *cand, struct iv_ca_delta **delta,
4510 unsigned *n_ivs)
4511 {
4512 unsigned i;
4513 comp_cost cost;
4514 struct iv_use *use;
4515 struct cost_pair *old_cp, *new_cp;
4516
4517 *delta = NULL;
4518 for (i = 0; i < ivs->upto; i++)
4519 {
4520 use = iv_use (data, i);
4521 old_cp = iv_ca_cand_for_use (ivs, use);
4522
4523 if (old_cp
4524 && old_cp->cand == cand)
4525 continue;
4526
4527 new_cp = get_use_iv_cost (data, use, cand);
4528 if (!new_cp)
4529 continue;
4530
4531 if (!iv_ca_has_deps (ivs, new_cp))
4532 continue;
4533
4534 if (!cheaper_cost_pair (new_cp, old_cp))
4535 continue;
4536
4537 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4538 }
4539
4540 iv_ca_delta_commit (data, ivs, *delta, true);
4541 cost = iv_ca_cost (ivs);
4542 if (n_ivs)
4543 *n_ivs = iv_ca_n_cands (ivs);
4544 iv_ca_delta_commit (data, ivs, *delta, false);
4545
4546 return cost;
4547 }
4548
4549 /* Try narrowing set IVS by removing CAND. Return the cost of
4550 the new set and store the differences in DELTA. */
4551
4552 static comp_cost
4553 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4554 struct iv_cand *cand, struct iv_ca_delta **delta)
4555 {
4556 unsigned i, ci;
4557 struct iv_use *use;
4558 struct cost_pair *old_cp, *new_cp, *cp;
4559 bitmap_iterator bi;
4560 struct iv_cand *cnd;
4561 comp_cost cost;
4562
4563 *delta = NULL;
4564 for (i = 0; i < n_iv_uses (data); i++)
4565 {
4566 use = iv_use (data, i);
4567
4568 old_cp = iv_ca_cand_for_use (ivs, use);
4569 if (old_cp->cand != cand)
4570 continue;
4571
4572 new_cp = NULL;
4573
4574 if (data->consider_all_candidates)
4575 {
4576 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4577 {
4578 if (ci == cand->id)
4579 continue;
4580
4581 cnd = iv_cand (data, ci);
4582
4583 cp = get_use_iv_cost (data, use, cnd);
4584 if (!cp)
4585 continue;
4586 if (!iv_ca_has_deps (ivs, cp))
4587 continue;
4588
4589 if (!cheaper_cost_pair (cp, new_cp))
4590 continue;
4591
4592 new_cp = cp;
4593 }
4594 }
4595 else
4596 {
4597 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4598 {
4599 if (ci == cand->id)
4600 continue;
4601
4602 cnd = iv_cand (data, ci);
4603
4604 cp = get_use_iv_cost (data, use, cnd);
4605 if (!cp)
4606 continue;
4607 if (!iv_ca_has_deps (ivs, cp))
4608 continue;
4609
4610 if (!cheaper_cost_pair (cp, new_cp))
4611 continue;
4612
4613 new_cp = cp;
4614 }
4615 }
4616
4617 if (!new_cp)
4618 {
4619 iv_ca_delta_free (delta);
4620 return infinite_cost;
4621 }
4622
4623 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4624 }
4625
4626 iv_ca_delta_commit (data, ivs, *delta, true);
4627 cost = iv_ca_cost (ivs);
4628 iv_ca_delta_commit (data, ivs, *delta, false);
4629
4630 return cost;
4631 }
4632
4633 /* Try optimizing the set of candidates IVS by removing candidates different
4634 from to EXCEPT_CAND from it. Return cost of the new set, and store
4635 differences in DELTA. */
4636
4637 static comp_cost
4638 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4639 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4640 {
4641 bitmap_iterator bi;
4642 struct iv_ca_delta *act_delta, *best_delta;
4643 unsigned i;
4644 comp_cost best_cost, acost;
4645 struct iv_cand *cand;
4646
4647 best_delta = NULL;
4648 best_cost = iv_ca_cost (ivs);
4649
4650 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4651 {
4652 cand = iv_cand (data, i);
4653
4654 if (cand == except_cand)
4655 continue;
4656
4657 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4658
4659 if (compare_costs (acost, best_cost) < 0)
4660 {
4661 best_cost = acost;
4662 iv_ca_delta_free (&best_delta);
4663 best_delta = act_delta;
4664 }
4665 else
4666 iv_ca_delta_free (&act_delta);
4667 }
4668
4669 if (!best_delta)
4670 {
4671 *delta = NULL;
4672 return best_cost;
4673 }
4674
4675 /* Recurse to possibly remove other unnecessary ivs. */
4676 iv_ca_delta_commit (data, ivs, best_delta, true);
4677 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4678 iv_ca_delta_commit (data, ivs, best_delta, false);
4679 *delta = iv_ca_delta_join (best_delta, *delta);
4680 return best_cost;
4681 }
4682
4683 /* Tries to extend the sets IVS in the best possible way in order
4684 to express the USE. */
4685
4686 static bool
4687 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4688 struct iv_use *use)
4689 {
4690 comp_cost best_cost, act_cost;
4691 unsigned i;
4692 bitmap_iterator bi;
4693 struct iv_cand *cand;
4694 struct iv_ca_delta *best_delta = NULL, *act_delta;
4695 struct cost_pair *cp;
4696
4697 iv_ca_add_use (data, ivs, use);
4698 best_cost = iv_ca_cost (ivs);
4699
4700 cp = iv_ca_cand_for_use (ivs, use);
4701 if (cp)
4702 {
4703 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4704 iv_ca_set_no_cp (data, ivs, use);
4705 }
4706
4707 /* First try important candidates not based on any memory object. Only if
4708 this fails, try the specific ones. Rationale -- in loops with many
4709 variables the best choice often is to use just one generic biv. If we
4710 added here many ivs specific to the uses, the optimization algorithm later
4711 would be likely to get stuck in a local minimum, thus causing us to create
4712 too many ivs. The approach from few ivs to more seems more likely to be
4713 successful -- starting from few ivs, replacing an expensive use by a
4714 specific iv should always be a win. */
4715 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4716 {
4717 cand = iv_cand (data, i);
4718
4719 if (cand->iv->base_object != NULL_TREE)
4720 continue;
4721
4722 if (iv_ca_cand_used_p (ivs, cand))
4723 continue;
4724
4725 cp = get_use_iv_cost (data, use, cand);
4726 if (!cp)
4727 continue;
4728
4729 iv_ca_set_cp (data, ivs, use, cp);
4730 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4731 iv_ca_set_no_cp (data, ivs, use);
4732 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4733
4734 if (compare_costs (act_cost, best_cost) < 0)
4735 {
4736 best_cost = act_cost;
4737
4738 iv_ca_delta_free (&best_delta);
4739 best_delta = act_delta;
4740 }
4741 else
4742 iv_ca_delta_free (&act_delta);
4743 }
4744
4745 if (infinite_cost_p (best_cost))
4746 {
4747 for (i = 0; i < use->n_map_members; i++)
4748 {
4749 cp = use->cost_map + i;
4750 cand = cp->cand;
4751 if (!cand)
4752 continue;
4753
4754 /* Already tried this. */
4755 if (cand->important && cand->iv->base_object == NULL_TREE)
4756 continue;
4757
4758 if (iv_ca_cand_used_p (ivs, cand))
4759 continue;
4760
4761 act_delta = NULL;
4762 iv_ca_set_cp (data, ivs, use, cp);
4763 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4764 iv_ca_set_no_cp (data, ivs, use);
4765 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4766 cp, act_delta);
4767
4768 if (compare_costs (act_cost, best_cost) < 0)
4769 {
4770 best_cost = act_cost;
4771
4772 if (best_delta)
4773 iv_ca_delta_free (&best_delta);
4774 best_delta = act_delta;
4775 }
4776 else
4777 iv_ca_delta_free (&act_delta);
4778 }
4779 }
4780
4781 iv_ca_delta_commit (data, ivs, best_delta, true);
4782 iv_ca_delta_free (&best_delta);
4783
4784 return !infinite_cost_p (best_cost);
4785 }
4786
4787 /* Finds an initial assignment of candidates to uses. */
4788
4789 static struct iv_ca *
4790 get_initial_solution (struct ivopts_data *data)
4791 {
4792 struct iv_ca *ivs = iv_ca_new (data);
4793 unsigned i;
4794
4795 for (i = 0; i < n_iv_uses (data); i++)
4796 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4797 {
4798 iv_ca_free (&ivs);
4799 return NULL;
4800 }
4801
4802 return ivs;
4803 }
4804
4805 /* Tries to improve set of induction variables IVS. */
4806
4807 static bool
4808 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4809 {
4810 unsigned i, n_ivs;
4811 comp_cost acost, best_cost = iv_ca_cost (ivs);
4812 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4813 struct iv_cand *cand;
4814
4815 /* Try extending the set of induction variables by one. */
4816 for (i = 0; i < n_iv_cands (data); i++)
4817 {
4818 cand = iv_cand (data, i);
4819
4820 if (iv_ca_cand_used_p (ivs, cand))
4821 continue;
4822
4823 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4824 if (!act_delta)
4825 continue;
4826
4827 /* If we successfully added the candidate and the set is small enough,
4828 try optimizing it by removing other candidates. */
4829 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4830 {
4831 iv_ca_delta_commit (data, ivs, act_delta, true);
4832 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4833 iv_ca_delta_commit (data, ivs, act_delta, false);
4834 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4835 }
4836
4837 if (compare_costs (acost, best_cost) < 0)
4838 {
4839 best_cost = acost;
4840 iv_ca_delta_free (&best_delta);
4841 best_delta = act_delta;
4842 }
4843 else
4844 iv_ca_delta_free (&act_delta);
4845 }
4846
4847 if (!best_delta)
4848 {
4849 /* Try removing the candidates from the set instead. */
4850 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4851
4852 /* Nothing more we can do. */
4853 if (!best_delta)
4854 return false;
4855 }
4856
4857 iv_ca_delta_commit (data, ivs, best_delta, true);
4858 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4859 iv_ca_delta_free (&best_delta);
4860 return true;
4861 }
4862
4863 /* Attempts to find the optimal set of induction variables. We do simple
4864 greedy heuristic -- we try to replace at most one candidate in the selected
4865 solution and remove the unused ivs while this improves the cost. */
4866
4867 static struct iv_ca *
4868 find_optimal_iv_set (struct ivopts_data *data)
4869 {
4870 unsigned i;
4871 struct iv_ca *set;
4872 struct iv_use *use;
4873
4874 /* Get the initial solution. */
4875 set = get_initial_solution (data);
4876 if (!set)
4877 {
4878 if (dump_file && (dump_flags & TDF_DETAILS))
4879 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4880 return NULL;
4881 }
4882
4883 if (dump_file && (dump_flags & TDF_DETAILS))
4884 {
4885 fprintf (dump_file, "Initial set of candidates:\n");
4886 iv_ca_dump (data, dump_file, set);
4887 }
4888
4889 while (try_improve_iv_set (data, set))
4890 {
4891 if (dump_file && (dump_flags & TDF_DETAILS))
4892 {
4893 fprintf (dump_file, "Improved to:\n");
4894 iv_ca_dump (data, dump_file, set);
4895 }
4896 }
4897
4898 if (dump_file && (dump_flags & TDF_DETAILS))
4899 {
4900 comp_cost cost = iv_ca_cost (set);
4901 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4902 }
4903
4904 for (i = 0; i < n_iv_uses (data); i++)
4905 {
4906 use = iv_use (data, i);
4907 use->selected = iv_ca_cand_for_use (set, use)->cand;
4908 }
4909
4910 return set;
4911 }
4912
4913 /* Creates a new induction variable corresponding to CAND. */
4914
4915 static void
4916 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4917 {
4918 block_stmt_iterator incr_pos;
4919 tree base;
4920 bool after = false;
4921
4922 if (!cand->iv)
4923 return;
4924
4925 switch (cand->pos)
4926 {
4927 case IP_NORMAL:
4928 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4929 break;
4930
4931 case IP_END:
4932 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4933 after = true;
4934 break;
4935
4936 case IP_ORIGINAL:
4937 /* Mark that the iv is preserved. */
4938 name_info (data, cand->var_before)->preserve_biv = true;
4939 name_info (data, cand->var_after)->preserve_biv = true;
4940
4941 /* Rewrite the increment so that it uses var_before directly. */
4942 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4943
4944 return;
4945 }
4946
4947 gimple_add_tmp_var (cand->var_before);
4948 add_referenced_var (cand->var_before);
4949
4950 base = unshare_expr (cand->iv->base);
4951
4952 create_iv (base, unshare_expr (cand->iv->step),
4953 cand->var_before, data->current_loop,
4954 &incr_pos, after, &cand->var_before, &cand->var_after);
4955 }
4956
4957 /* Creates new induction variables described in SET. */
4958
4959 static void
4960 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4961 {
4962 unsigned i;
4963 struct iv_cand *cand;
4964 bitmap_iterator bi;
4965
4966 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4967 {
4968 cand = iv_cand (data, i);
4969 create_new_iv (data, cand);
4970 }
4971 }
4972
4973 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4974 is true, remove also the ssa name defined by the statement. */
4975
4976 static void
4977 remove_statement (tree stmt, bool including_defined_name)
4978 {
4979 if (TREE_CODE (stmt) == PHI_NODE)
4980 {
4981 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4982 }
4983 else
4984 {
4985 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4986
4987 bsi_remove (&bsi, true);
4988 release_defs (stmt);
4989 }
4990 }
4991
4992 /* Rewrites USE (definition of iv used in a nonlinear expression)
4993 using candidate CAND. */
4994
4995 static void
4996 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4997 struct iv_use *use, struct iv_cand *cand)
4998 {
4999 tree comp;
5000 tree op, tgt, ass;
5001 block_stmt_iterator bsi;
5002
5003 /* An important special case -- if we are asked to express value of
5004 the original iv by itself, just exit; there is no need to
5005 introduce a new computation (that might also need casting the
5006 variable to unsigned and back). */
5007 if (cand->pos == IP_ORIGINAL
5008 && cand->incremented_at == use->stmt)
5009 {
5010 tree step, ctype, utype;
5011 enum tree_code incr_code = PLUS_EXPR;
5012
5013 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5014 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5015
5016 step = cand->iv->step;
5017 ctype = TREE_TYPE (step);
5018 utype = TREE_TYPE (cand->var_after);
5019 if (TREE_CODE (step) == NEGATE_EXPR)
5020 {
5021 incr_code = MINUS_EXPR;
5022 step = TREE_OPERAND (step, 0);
5023 }
5024
5025 /* Check whether we may leave the computation unchanged.
5026 This is the case only if it does not rely on other
5027 computations in the loop -- otherwise, the computation
5028 we rely upon may be removed in remove_unused_ivs,
5029 thus leading to ICE. */
5030 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5031 if (TREE_CODE (op) == PLUS_EXPR
5032 || TREE_CODE (op) == MINUS_EXPR
5033 || TREE_CODE (op) == POINTER_PLUS_EXPR)
5034 {
5035 if (TREE_OPERAND (op, 0) == cand->var_before)
5036 op = TREE_OPERAND (op, 1);
5037 else if (TREE_CODE (op) != MINUS_EXPR
5038 && TREE_OPERAND (op, 1) == cand->var_before)
5039 op = TREE_OPERAND (op, 0);
5040 else
5041 op = NULL_TREE;
5042 }
5043 else
5044 op = NULL_TREE;
5045
5046 if (op
5047 && (TREE_CODE (op) == INTEGER_CST
5048 || operand_equal_p (op, step, 0)))
5049 return;
5050
5051 /* Otherwise, add the necessary computations to express
5052 the iv. */
5053 op = fold_convert (ctype, cand->var_before);
5054 comp = fold_convert (utype,
5055 build2 (incr_code, ctype, op,
5056 unshare_expr (step)));
5057 }
5058 else
5059 {
5060 comp = get_computation (data->current_loop, use, cand);
5061 gcc_assert (comp != NULL_TREE);
5062 }
5063
5064 switch (TREE_CODE (use->stmt))
5065 {
5066 case PHI_NODE:
5067 tgt = PHI_RESULT (use->stmt);
5068
5069 /* If we should keep the biv, do not replace it. */
5070 if (name_info (data, tgt)->preserve_biv)
5071 return;
5072
5073 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5074 break;
5075
5076 case GIMPLE_MODIFY_STMT:
5077 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5078 bsi = bsi_for_stmt (use->stmt);
5079 break;
5080
5081 default:
5082 gcc_unreachable ();
5083 }
5084
5085 op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5086 true, BSI_SAME_STMT);
5087
5088 if (TREE_CODE (use->stmt) == PHI_NODE)
5089 {
5090 ass = build_gimple_modify_stmt (tgt, op);
5091 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5092 remove_statement (use->stmt, false);
5093 SSA_NAME_DEF_STMT (tgt) = ass;
5094 }
5095 else
5096 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5097 }
5098
5099 /* Replaces ssa name in index IDX by its basic variable. Callback for
5100 for_each_index. */
5101
5102 static bool
5103 idx_remove_ssa_names (tree base, tree *idx,
5104 void *data ATTRIBUTE_UNUSED)
5105 {
5106 tree *op;
5107
5108 if (TREE_CODE (*idx) == SSA_NAME)
5109 *idx = SSA_NAME_VAR (*idx);
5110
5111 if (TREE_CODE (base) == ARRAY_REF)
5112 {
5113 op = &TREE_OPERAND (base, 2);
5114 if (*op
5115 && TREE_CODE (*op) == SSA_NAME)
5116 *op = SSA_NAME_VAR (*op);
5117 op = &TREE_OPERAND (base, 3);
5118 if (*op
5119 && TREE_CODE (*op) == SSA_NAME)
5120 *op = SSA_NAME_VAR (*op);
5121 }
5122
5123 return true;
5124 }
5125
5126 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5127
5128 static tree
5129 unshare_and_remove_ssa_names (tree ref)
5130 {
5131 ref = unshare_expr (ref);
5132 for_each_index (&ref, idx_remove_ssa_names, NULL);
5133
5134 return ref;
5135 }
5136
5137 /* Extract the alias analysis info for the memory reference REF. There are
5138 several ways how this information may be stored and what precisely is
5139 its semantics depending on the type of the reference, but there always is
5140 somewhere hidden one _DECL node that is used to determine the set of
5141 virtual operands for the reference. The code below deciphers this jungle
5142 and extracts this single useful piece of information. */
5143
5144 static tree
5145 get_ref_tag (tree ref, tree orig)
5146 {
5147 tree var = get_base_address (ref);
5148 tree aref = NULL_TREE, tag, sv;
5149 HOST_WIDE_INT offset, size, maxsize;
5150
5151 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5152 {
5153 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5154 if (ref)
5155 break;
5156 }
5157
5158 if (!var)
5159 return NULL_TREE;
5160
5161 if (TREE_CODE (var) == INDIRECT_REF)
5162 {
5163 /* If the base is a dereference of a pointer, first check its name memory
5164 tag. If it does not have one, use its symbol memory tag. */
5165 var = TREE_OPERAND (var, 0);
5166 if (TREE_CODE (var) != SSA_NAME)
5167 return NULL_TREE;
5168
5169 if (SSA_NAME_PTR_INFO (var))
5170 {
5171 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5172 if (tag)
5173 return tag;
5174 }
5175
5176 var = SSA_NAME_VAR (var);
5177 tag = symbol_mem_tag (var);
5178 gcc_assert (tag != NULL_TREE);
5179 return tag;
5180 }
5181 else
5182 {
5183 if (!DECL_P (var))
5184 return NULL_TREE;
5185
5186 tag = symbol_mem_tag (var);
5187 if (tag)
5188 return tag;
5189
5190 return var;
5191 }
5192 }
5193
5194 /* Copies the reference information from OLD_REF to NEW_REF. */
5195
5196 static void
5197 copy_ref_info (tree new_ref, tree old_ref)
5198 {
5199 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5200 copy_mem_ref_info (new_ref, old_ref);
5201 else
5202 {
5203 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5204 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5205 }
5206 }
5207
5208 /* Rewrites USE (address that is an iv) using candidate CAND. */
5209
5210 static void
5211 rewrite_use_address (struct ivopts_data *data,
5212 struct iv_use *use, struct iv_cand *cand)
5213 {
5214 aff_tree aff;
5215 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5216 tree ref;
5217 bool ok;
5218
5219 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5220 gcc_assert (ok);
5221 unshare_aff_combination (&aff);
5222
5223 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5224 copy_ref_info (ref, *use->op_p);
5225 *use->op_p = ref;
5226 }
5227
5228 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5229 candidate CAND. */
5230
5231 static void
5232 rewrite_use_compare (struct ivopts_data *data,
5233 struct iv_use *use, struct iv_cand *cand)
5234 {
5235 tree comp, *var_p, op, bound;
5236 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5237 enum tree_code compare;
5238 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5239 bool ok;
5240
5241 bound = cp->value;
5242 if (bound)
5243 {
5244 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5245 tree var_type = TREE_TYPE (var);
5246
5247 compare = iv_elimination_compare (data, use);
5248 bound = unshare_expr (fold_convert (var_type, bound));
5249 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5250 true, BSI_SAME_STMT);
5251
5252 *use->op_p = build2 (compare, boolean_type_node, var, op);
5253 return;
5254 }
5255
5256 /* The induction variable elimination failed; just express the original
5257 giv. */
5258 comp = get_computation (data->current_loop, use, cand);
5259 gcc_assert (comp != NULL_TREE);
5260
5261 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5262 gcc_assert (ok);
5263
5264 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5265 true, BSI_SAME_STMT);
5266 }
5267
5268 /* Rewrites USE using candidate CAND. */
5269
5270 static void
5271 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5272 {
5273 push_stmt_changes (&use->stmt);
5274
5275 switch (use->type)
5276 {
5277 case USE_NONLINEAR_EXPR:
5278 rewrite_use_nonlinear_expr (data, use, cand);
5279 break;
5280
5281 case USE_ADDRESS:
5282 rewrite_use_address (data, use, cand);
5283 break;
5284
5285 case USE_COMPARE:
5286 rewrite_use_compare (data, use, cand);
5287 break;
5288
5289 default:
5290 gcc_unreachable ();
5291 }
5292
5293 pop_stmt_changes (&use->stmt);
5294 }
5295
5296 /* Rewrite the uses using the selected induction variables. */
5297
5298 static void
5299 rewrite_uses (struct ivopts_data *data)
5300 {
5301 unsigned i;
5302 struct iv_cand *cand;
5303 struct iv_use *use;
5304
5305 for (i = 0; i < n_iv_uses (data); i++)
5306 {
5307 use = iv_use (data, i);
5308 cand = use->selected;
5309 gcc_assert (cand);
5310
5311 rewrite_use (data, use, cand);
5312 }
5313 }
5314
5315 /* Removes the ivs that are not used after rewriting. */
5316
5317 static void
5318 remove_unused_ivs (struct ivopts_data *data)
5319 {
5320 unsigned j;
5321 bitmap_iterator bi;
5322
5323 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5324 {
5325 struct version_info *info;
5326
5327 info = ver_info (data, j);
5328 if (info->iv
5329 && !integer_zerop (info->iv->step)
5330 && !info->inv_id
5331 && !info->iv->have_use_for
5332 && !info->preserve_biv)
5333 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5334 }
5335 }
5336
5337 /* Frees data allocated by the optimization of a single loop. */
5338
5339 static void
5340 free_loop_data (struct ivopts_data *data)
5341 {
5342 unsigned i, j;
5343 bitmap_iterator bi;
5344 tree obj;
5345
5346 if (data->niters)
5347 {
5348 pointer_map_destroy (data->niters);
5349 data->niters = NULL;
5350 }
5351
5352 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5353 {
5354 struct version_info *info;
5355
5356 info = ver_info (data, i);
5357 if (info->iv)
5358 free (info->iv);
5359 info->iv = NULL;
5360 info->has_nonlin_use = false;
5361 info->preserve_biv = false;
5362 info->inv_id = 0;
5363 }
5364 bitmap_clear (data->relevant);
5365 bitmap_clear (data->important_candidates);
5366
5367 for (i = 0; i < n_iv_uses (data); i++)
5368 {
5369 struct iv_use *use = iv_use (data, i);
5370
5371 free (use->iv);
5372 BITMAP_FREE (use->related_cands);
5373 for (j = 0; j < use->n_map_members; j++)
5374 if (use->cost_map[j].depends_on)
5375 BITMAP_FREE (use->cost_map[j].depends_on);
5376 free (use->cost_map);
5377 free (use);
5378 }
5379 VEC_truncate (iv_use_p, data->iv_uses, 0);
5380
5381 for (i = 0; i < n_iv_cands (data); i++)
5382 {
5383 struct iv_cand *cand = iv_cand (data, i);
5384
5385 if (cand->iv)
5386 free (cand->iv);
5387 if (cand->depends_on)
5388 BITMAP_FREE (cand->depends_on);
5389 free (cand);
5390 }
5391 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5392
5393 if (data->version_info_size < num_ssa_names)
5394 {
5395 data->version_info_size = 2 * num_ssa_names;
5396 free (data->version_info);
5397 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5398 }
5399
5400 data->max_inv_id = 0;
5401
5402 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5403 SET_DECL_RTL (obj, NULL_RTX);
5404
5405 VEC_truncate (tree, decl_rtl_to_reset, 0);
5406 }
5407
5408 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5409 loop tree. */
5410
5411 static void
5412 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5413 {
5414 free_loop_data (data);
5415 free (data->version_info);
5416 BITMAP_FREE (data->relevant);
5417 BITMAP_FREE (data->important_candidates);
5418
5419 VEC_free (tree, heap, decl_rtl_to_reset);
5420 VEC_free (iv_use_p, heap, data->iv_uses);
5421 VEC_free (iv_cand_p, heap, data->iv_candidates);
5422 }
5423
5424 /* Optimizes the LOOP. Returns true if anything changed. */
5425
5426 static bool
5427 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5428 {
5429 bool changed = false;
5430 struct iv_ca *iv_ca;
5431 edge exit;
5432
5433 gcc_assert (!data->niters);
5434 data->current_loop = loop;
5435
5436 if (dump_file && (dump_flags & TDF_DETAILS))
5437 {
5438 fprintf (dump_file, "Processing loop %d\n", loop->num);
5439
5440 exit = single_dom_exit (loop);
5441 if (exit)
5442 {
5443 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5444 exit->src->index, exit->dest->index);
5445 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5446 fprintf (dump_file, "\n");
5447 }
5448
5449 fprintf (dump_file, "\n");
5450 }
5451
5452 /* For each ssa name determines whether it behaves as an induction variable
5453 in some loop. */
5454 if (!find_induction_variables (data))
5455 goto finish;
5456
5457 /* Finds interesting uses (item 1). */
5458 find_interesting_uses (data);
5459 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5460 goto finish;
5461
5462 /* Finds candidates for the induction variables (item 2). */
5463 find_iv_candidates (data);
5464
5465 /* Calculates the costs (item 3, part 1). */
5466 determine_use_iv_costs (data);
5467 determine_iv_costs (data);
5468 determine_set_costs (data);
5469
5470 /* Find the optimal set of induction variables (item 3, part 2). */
5471 iv_ca = find_optimal_iv_set (data);
5472 if (!iv_ca)
5473 goto finish;
5474 changed = true;
5475
5476 /* Create the new induction variables (item 4, part 1). */
5477 create_new_ivs (data, iv_ca);
5478 iv_ca_free (&iv_ca);
5479
5480 /* Rewrite the uses (item 4, part 2). */
5481 rewrite_uses (data);
5482
5483 /* Remove the ivs that are unused after rewriting. */
5484 remove_unused_ivs (data);
5485
5486 /* We have changed the structure of induction variables; it might happen
5487 that definitions in the scev database refer to some of them that were
5488 eliminated. */
5489 scev_reset ();
5490
5491 finish:
5492 free_loop_data (data);
5493
5494 return changed;
5495 }
5496
5497 /* Main entry point. Optimizes induction variables in loops. */
5498
5499 void
5500 tree_ssa_iv_optimize (void)
5501 {
5502 struct loop *loop;
5503 struct ivopts_data data;
5504 loop_iterator li;
5505
5506 tree_ssa_iv_optimize_init (&data);
5507
5508 /* Optimize the loops starting with the innermost ones. */
5509 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5510 {
5511 if (dump_file && (dump_flags & TDF_DETAILS))
5512 flow_loop_dump (loop, dump_file, NULL, 1);
5513
5514 tree_ssa_iv_optimize_loop (&data, loop);
5515 }
5516
5517 tree_ssa_iv_optimize_finalize (&data);
5518 }