rs6000.md (fseldfsf4): Add TARGET_SINGLE_FLOAT condition.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
95
96 /* The infinite cost. */
97 #define INFTY 10000000
98
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
102
103
104 /* Representation of the induction variable. */
105 struct iv
106 {
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114 };
115
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
118 {
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 };
126
127 /* Types of uses. */
128 enum use_type
129 {
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133 };
134
135 /* Cost of a computation. */
136 typedef struct
137 {
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
144
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
147
148 /* The candidate - cost pair. */
149 struct cost_pair
150 {
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
158 };
159
160 /* Use. */
161 struct iv_use
162 {
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177 };
178
179 /* The position where the iv is computed. */
180 enum iv_position
181 {
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
185 };
186
187 /* The induction variable candidate. */
188 struct iv_cand
189 {
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
205 };
206
207 /* The data used by the induction variable optimizations. */
208
209 typedef struct iv_use *iv_use_p;
210 DEF_VEC_P(iv_use_p);
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
212
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
216
217 struct ivopts_data
218 {
219 /* The currently optimized loop. */
220 struct loop *current_loop;
221
222 /* Are we optimizing for speed? */
223 bool speed;
224
225 /* Number of registers used in it. */
226 unsigned regs_used;
227
228 /* Numbers of iterations for all exits of the current loop. */
229 struct pointer_map_t *niters;
230
231 /* The size of version_info array allocated. */
232 unsigned version_info_size;
233
234 /* The array of information for the ssa names. */
235 struct version_info *version_info;
236
237 /* The bitmap of indices in version_info whose value was changed. */
238 bitmap relevant;
239
240 /* The maximum invariant id. */
241 unsigned max_inv_id;
242
243 /* The uses of induction variables. */
244 VEC(iv_use_p,heap) *iv_uses;
245
246 /* The candidates. */
247 VEC(iv_cand_p,heap) *iv_candidates;
248
249 /* A bitmap of important candidates. */
250 bitmap important_candidates;
251
252 /* Whether to consider just related and important candidates when replacing a
253 use. */
254 bool consider_all_candidates;
255 };
256
257 /* An assignment of iv candidates to uses. */
258
259 struct iv_ca
260 {
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
263
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
266
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
269
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
272
273 /* The candidates used. */
274 bitmap cands;
275
276 /* The number of candidates in the set. */
277 unsigned n_cands;
278
279 /* Total number of registers needed. */
280 unsigned n_regs;
281
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
284
285 /* Total cost of candidates. */
286 unsigned cand_cost;
287
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
290
291 /* Total cost of the assignment. */
292 comp_cost cost;
293 };
294
295 /* Difference of two iv candidate assignments. */
296
297 struct iv_ca_delta
298 {
299 /* Changed use. */
300 struct iv_use *use;
301
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
304
305 /* A new assignment. */
306 struct cost_pair *new_cp;
307
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
310 };
311
312 /* Bound on number of candidates below that all candidates are considered. */
313
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
316
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
319
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
322
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
325
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
328
329 /* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
331
332 static VEC(tree,heap) *decl_rtl_to_reset;
333
334 /* Number of uses recorded in DATA. */
335
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
338 {
339 return VEC_length (iv_use_p, data->iv_uses);
340 }
341
342 /* Ith use recorded in DATA. */
343
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
346 {
347 return VEC_index (iv_use_p, data->iv_uses, i);
348 }
349
350 /* Number of candidates recorded in DATA. */
351
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
354 {
355 return VEC_length (iv_cand_p, data->iv_candidates);
356 }
357
358 /* Ith candidate recorded in DATA. */
359
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
362 {
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
364 }
365
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
367
368 edge
369 single_dom_exit (struct loop *loop)
370 {
371 edge exit = single_exit (loop);
372
373 if (!exit)
374 return NULL;
375
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
378
379 return exit;
380 }
381
382 /* Dumps information about the induction variable IV to FILE. */
383
384 extern void dump_iv (FILE *, struct iv *);
385 void
386 dump_iv (FILE *file, struct iv *iv)
387 {
388 if (iv->ssa_name)
389 {
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
393 }
394
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
398
399 if (iv->step)
400 {
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
404
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
408 }
409 else
410 {
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
416 if (iv->base_object)
417 {
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
421 }
422
423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
425 }
426
427 /* Dumps information about the USE to FILE. */
428
429 extern void dump_use (FILE *, struct iv_use *);
430 void
431 dump_use (FILE *file, struct iv_use *use)
432 {
433 fprintf (file, "use %d\n", use->id);
434
435 switch (use->type)
436 {
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
440
441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
444
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
448
449 default:
450 gcc_unreachable ();
451 }
452
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
456
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
461
462 dump_iv (file, use->iv);
463
464 if (use->related_cands)
465 {
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
468 }
469 }
470
471 /* Dumps information about the uses to FILE. */
472
473 extern void dump_uses (FILE *, struct ivopts_data *);
474 void
475 dump_uses (FILE *file, struct ivopts_data *data)
476 {
477 unsigned i;
478 struct iv_use *use;
479
480 for (i = 0; i < n_iv_uses (data); i++)
481 {
482 use = iv_use (data, i);
483
484 dump_use (file, use);
485 fprintf (file, "\n");
486 }
487 }
488
489 /* Dumps information about induction variable candidate CAND to FILE. */
490
491 extern void dump_cand (FILE *, struct iv_cand *);
492 void
493 dump_cand (FILE *file, struct iv_cand *cand)
494 {
495 struct iv *iv = cand->iv;
496
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
499
500 if (cand->depends_on)
501 {
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
504 }
505
506 if (!iv)
507 {
508 fprintf (file, " final value replacement\n");
509 return;
510 }
511
512 switch (cand->pos)
513 {
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
517
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
521
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
525 }
526
527 dump_iv (file, iv);
528 }
529
530 /* Returns the info for ssa version VER. */
531
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
534 {
535 return data->version_info + ver;
536 }
537
538 /* Returns the info for ssa name NAME. */
539
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
542 {
543 return ver_info (data, SSA_NAME_VERSION (name));
544 }
545
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
548
549 static bool
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
551 {
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
553
554 gcc_assert (bb);
555
556 if (sbb == loop->latch)
557 return true;
558
559 if (sbb != bb)
560 return false;
561
562 return stmt == last_stmt (bb);
563 }
564
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
567
568 static bool
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
570 {
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
574
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
577
578 if (stmt_bb != cand_bb)
579 return true;
580
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
584 {
585 if (gsi_stmt (bsi) == cand->incremented_at)
586 return false;
587 if (gsi_stmt (bsi) == stmt)
588 return true;
589 }
590 }
591
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
594
595 static bool
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
597 {
598 switch (cand->pos)
599 {
600 case IP_END:
601 return false;
602
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
605
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
608
609 default:
610 gcc_unreachable ();
611 }
612 }
613
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
615
616 static bool
617 abnormal_ssa_name_p (tree exp)
618 {
619 if (!exp)
620 return false;
621
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
624
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
626 }
627
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
630
631 static bool
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
634 {
635 if (TREE_CODE (base) == ARRAY_REF)
636 {
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
641 }
642
643 return !abnormal_ssa_name_p (*index);
644 }
645
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
648
649 bool
650 contains_abnormal_ssa_name_p (tree expr)
651 {
652 enum tree_code code;
653 enum tree_code_class codeclass;
654
655 if (!expr)
656 return false;
657
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
660
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
663
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
667
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
672
673 switch (codeclass)
674 {
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
679
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
684
685 break;
686
687 default:
688 gcc_unreachable ();
689 }
690
691 return false;
692 }
693
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
696
697 static tree
698 niter_for_exit (struct ivopts_data *data, edge exit)
699 {
700 struct tree_niter_desc desc;
701 tree niter;
702 void **slot;
703
704 if (!data->niters)
705 {
706 data->niters = pointer_map_create ();
707 slot = NULL;
708 }
709 else
710 slot = pointer_map_contains (data->niters, exit);
711
712 if (!slot)
713 {
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
723 niter = desc.niter;
724 else
725 niter = NULL_TREE;
726
727 *pointer_map_insert (data->niters, exit) = niter;
728 }
729 else
730 niter = (tree) *slot;
731
732 return niter;
733 }
734
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
738
739 static tree
740 niter_for_single_dom_exit (struct ivopts_data *data)
741 {
742 edge exit = single_dom_exit (data->current_loop);
743
744 if (!exit)
745 return NULL;
746
747 return niter_for_exit (data, exit);
748 }
749
750 /* Initializes data structures used by the iv optimization pass, stored
751 in DATA. */
752
753 static void
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
755 {
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
761 data->niters = NULL;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
765 }
766
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770 static tree
771 determine_base_object (tree expr)
772 {
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj;
775
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
781
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
784
785 switch (code)
786 {
787 case INTEGER_CST:
788 return NULL_TREE;
789
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
793
794 if (!base)
795 return expr;
796
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
799
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
802
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
805
806 case PLUS_EXPR:
807 case MINUS_EXPR:
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
810
811 default:
812 return fold_convert (ptr_type_node, expr);
813 }
814 }
815
816 /* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
818
819 static struct iv *
820 alloc_iv (tree base, tree step)
821 {
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
824
825 iv->base = base;
826 iv->base_object = determine_base_object (base);
827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
832
833 return iv;
834 }
835
836 /* Sets STEP and BASE for induction variable IV. */
837
838 static void
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
840 {
841 struct version_info *info = name_info (data, iv);
842
843 gcc_assert (!info->iv);
844
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
848 }
849
850 /* Finds induction variable declaration for VAR. */
851
852 static struct iv *
853 get_iv (struct ivopts_data *data, tree var)
854 {
855 basic_block bb;
856 tree type = TREE_TYPE (var);
857
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
861
862 if (!name_info (data, var)->iv)
863 {
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
865
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
869 }
870
871 return name_info (data, var)->iv;
872 }
873
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
876
877 static tree
878 determine_biv_step (gimple phi)
879 {
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
883
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
886
887 if (!simple_iv (loop, phi, name, &iv, true))
888 return NULL_TREE;
889
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
891 }
892
893 /* Finds basic ivs. */
894
895 static bool
896 find_bivs (struct ivopts_data *data)
897 {
898 gimple phi;
899 tree step, type, base;
900 bool found = false;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
903
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
905 {
906 phi = gsi_stmt (psi);
907
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
910
911 step = determine_biv_step (phi);
912 if (!step)
913 continue;
914
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
919 continue;
920
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
923 if (step)
924 {
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
929 }
930
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
933 }
934
935 return found;
936 }
937
938 /* Marks basic ivs. */
939
940 static void
941 mark_bivs (struct ivopts_data *data)
942 {
943 gimple phi;
944 tree var;
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
948 gimple_stmt_iterator psi;
949
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
951 {
952 phi = gsi_stmt (psi);
953
954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
957
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
962
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
968
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
971 }
972 }
973
974 /* Checks whether STMT defines a linear induction variable and stores its
975 parameters to IV. */
976
977 static bool
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
979 {
980 tree lhs;
981 struct loop *loop = data->current_loop;
982
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
985
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
987 return false;
988
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
992
993 if (!simple_iv (loop, stmt, lhs, iv, true))
994 return false;
995 iv->base = expand_simple_operations (iv->base);
996
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
999 return false;
1000
1001 return true;
1002 }
1003
1004 /* Finds general ivs in statement STMT. */
1005
1006 static void
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1008 {
1009 affine_iv iv;
1010
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1012 return;
1013
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1015 }
1016
1017 /* Finds general ivs in basic block BB. */
1018
1019 static void
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1021 {
1022 gimple_stmt_iterator bsi;
1023
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1026 }
1027
1028 /* Finds general ivs. */
1029
1030 static void
1031 find_givs (struct ivopts_data *data)
1032 {
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1036
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1040 }
1041
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1044
1045 static bool
1046 find_induction_variables (struct ivopts_data *data)
1047 {
1048 unsigned i;
1049 bitmap_iterator bi;
1050
1051 if (!find_bivs (data))
1052 return false;
1053
1054 find_givs (data);
1055 mark_bivs (data);
1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 {
1059 tree niter = niter_for_single_dom_exit (data);
1060
1061 if (niter)
1062 {
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1066 };
1067
1068 fprintf (dump_file, "Induction variables:\n\n");
1069
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1071 {
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1074 }
1075 }
1076
1077 return true;
1078 }
1079
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1081
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1085 {
1086 struct iv_use *use = XCNEW (struct iv_use);
1087
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1094
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1098
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1101
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1103
1104 return use;
1105 }
1106
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1110
1111 static void
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1113 {
1114 basic_block bb;
1115 struct version_info *info;
1116
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1120
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1125
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1132 }
1133
1134 /* Checks whether the use OP is interesting and if so, records it. */
1135
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1138 {
1139 struct iv *iv;
1140 struct iv *civ;
1141 gimple stmt;
1142 struct iv_use *use;
1143
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1146
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1150
1151 if (iv->have_use_for)
1152 {
1153 use = iv_use (data, iv->use_id);
1154
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 return use;
1157 }
1158
1159 if (integer_zerop (iv->step))
1160 {
1161 record_invariant (data, op, true);
1162 return NULL;
1163 }
1164 iv->have_use_for = true;
1165
1166 civ = XNEW (struct iv);
1167 *civ = *iv;
1168
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1172
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1175
1176 return use;
1177 }
1178
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1186
1187 static bool
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1191 {
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1194 static tree zero;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1198
1199 if (gimple_code (stmt) == GIMPLE_COND)
1200 {
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1203 }
1204 else
1205 {
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1208 }
1209
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1212
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1217
1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1222
1223 if (integer_zerop (iv0->step))
1224 {
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1228 }
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1230
1231 end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1240
1241 return ret;
1242 }
1243
1244 /* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
1246
1247 static void
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1249 {
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1252
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1254 {
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1257 return;
1258 }
1259
1260 civ = XNEW (struct iv);
1261 *civ = *var_iv;
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1263 }
1264
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1268
1269 bool
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1271 {
1272 basic_block def_bb;
1273 unsigned i, len;
1274
1275 gcc_assert (loop_depth (loop) > 0);
1276
1277 if (is_gimple_min_invariant (expr))
1278 return true;
1279
1280 if (TREE_CODE (expr) == SSA_NAME)
1281 {
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1286
1287 return true;
1288 }
1289
1290 if (!EXPR_P (expr))
1291 return false;
1292
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1297
1298 return true;
1299 }
1300
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1304
1305 bool
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1307 {
1308 unsigned i;
1309 tree lhs;
1310
1311 gcc_assert (loop_depth (loop) > 0);
1312
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1315 {
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1319 }
1320
1321 return true;
1322 }
1323
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1327
1328 struct ifs_ivopts_data
1329 {
1330 struct ivopts_data *ivopts_data;
1331 gimple stmt;
1332 tree step;
1333 };
1334
1335 static bool
1336 idx_find_step (tree base, tree *idx, void *data)
1337 {
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1339 struct iv *iv;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1342
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1346
1347 /* If base is a component ref, require that the offset of the reference
1348 be invariant. */
1349 if (TREE_CODE (base) == COMPONENT_REF)
1350 {
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1353 }
1354
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF)
1360 {
1361 step = array_ref_element_size (base);
1362 lbound = array_ref_low_bound (base);
1363
1364 if (!expr_invariant_in_loop_p (loop, step)
1365 || !expr_invariant_in_loop_p (loop, lbound))
1366 return false;
1367 }
1368
1369 if (TREE_CODE (*idx) != SSA_NAME)
1370 return true;
1371
1372 iv = get_iv (dta->ivopts_data, *idx);
1373 if (!iv)
1374 return false;
1375
1376 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1377 *&x[0], which is not folded and does not trigger the
1378 ARRAY_REF path below. */
1379 *idx = iv->base;
1380
1381 if (integer_zerop (iv->step))
1382 return true;
1383
1384 if (TREE_CODE (base) == ARRAY_REF)
1385 {
1386 step = array_ref_element_size (base);
1387
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step) != INTEGER_CST)
1390 return false;
1391 }
1392 else
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step = build_int_cst (sizetype, 1);
1395
1396 iv_base = iv->base;
1397 iv_step = iv->step;
1398 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1399 sizetype, &iv_base, &iv_step, dta->stmt,
1400 false))
1401 {
1402 /* The index might wrap. */
1403 return false;
1404 }
1405
1406 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1407 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1408
1409 return true;
1410 }
1411
1412 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1413 object is passed to it in DATA. */
1414
1415 static bool
1416 idx_record_use (tree base, tree *idx,
1417 void *vdata)
1418 {
1419 struct ivopts_data *data = (struct ivopts_data *) vdata;
1420 find_interesting_uses_op (data, *idx);
1421 if (TREE_CODE (base) == ARRAY_REF)
1422 {
1423 find_interesting_uses_op (data, array_ref_element_size (base));
1424 find_interesting_uses_op (data, array_ref_low_bound (base));
1425 }
1426 return true;
1427 }
1428
1429 /* If we can prove that TOP = cst * BOT for some constant cst,
1430 store cst to MUL and return true. Otherwise return false.
1431 The returned value is always sign-extended, regardless of the
1432 signedness of TOP and BOT. */
1433
1434 static bool
1435 constant_multiple_of (tree top, tree bot, double_int *mul)
1436 {
1437 tree mby;
1438 enum tree_code code;
1439 double_int res, p0, p1;
1440 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1441
1442 STRIP_NOPS (top);
1443 STRIP_NOPS (bot);
1444
1445 if (operand_equal_p (top, bot, 0))
1446 {
1447 *mul = double_int_one;
1448 return true;
1449 }
1450
1451 code = TREE_CODE (top);
1452 switch (code)
1453 {
1454 case MULT_EXPR:
1455 mby = TREE_OPERAND (top, 1);
1456 if (TREE_CODE (mby) != INTEGER_CST)
1457 return false;
1458
1459 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1460 return false;
1461
1462 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1463 precision);
1464 return true;
1465
1466 case PLUS_EXPR:
1467 case MINUS_EXPR:
1468 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1469 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1470 return false;
1471
1472 if (code == MINUS_EXPR)
1473 p1 = double_int_neg (p1);
1474 *mul = double_int_sext (double_int_add (p0, p1), precision);
1475 return true;
1476
1477 case INTEGER_CST:
1478 if (TREE_CODE (bot) != INTEGER_CST)
1479 return false;
1480
1481 p0 = double_int_sext (tree_to_double_int (top), precision);
1482 p1 = double_int_sext (tree_to_double_int (bot), precision);
1483 if (double_int_zero_p (p1))
1484 return false;
1485 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1486 precision);
1487 return double_int_zero_p (res);
1488
1489 default:
1490 return false;
1491 }
1492 }
1493
1494 /* Returns true if memory reference REF with step STEP may be unaligned. */
1495
1496 static bool
1497 may_be_unaligned_p (tree ref, tree step)
1498 {
1499 tree base;
1500 tree base_type;
1501 HOST_WIDE_INT bitsize;
1502 HOST_WIDE_INT bitpos;
1503 tree toffset;
1504 enum machine_mode mode;
1505 int unsignedp, volatilep;
1506 unsigned base_align;
1507
1508 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1509 thus they are not misaligned. */
1510 if (TREE_CODE (ref) == TARGET_MEM_REF)
1511 return false;
1512
1513 /* The test below is basically copy of what expr.c:normal_inner_ref
1514 does to check whether the object must be loaded by parts when
1515 STRICT_ALIGNMENT is true. */
1516 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1517 &unsignedp, &volatilep, true);
1518 base_type = TREE_TYPE (base);
1519 base_align = TYPE_ALIGN (base_type);
1520
1521 if (mode != BLKmode)
1522 {
1523 double_int mul;
1524 tree al = build_int_cst (TREE_TYPE (step),
1525 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1526
1527 if (base_align < GET_MODE_ALIGNMENT (mode)
1528 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1529 || bitpos % BITS_PER_UNIT != 0)
1530 return true;
1531
1532 if (!constant_multiple_of (step, al, &mul))
1533 return true;
1534 }
1535
1536 return false;
1537 }
1538
1539 /* Return true if EXPR may be non-addressable. */
1540
1541 static bool
1542 may_be_nonaddressable_p (tree expr)
1543 {
1544 switch (TREE_CODE (expr))
1545 {
1546 case TARGET_MEM_REF:
1547 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1548 target, thus they are always addressable. */
1549 return false;
1550
1551 case COMPONENT_REF:
1552 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1553 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1554
1555 case VIEW_CONVERT_EXPR:
1556 /* This kind of view-conversions may wrap non-addressable objects
1557 and make them look addressable. After some processing the
1558 non-addressability may be uncovered again, causing ADDR_EXPRs
1559 of inappropriate objects to be built. */
1560 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1561 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1562 return true;
1563
1564 /* ... fall through ... */
1565
1566 case ARRAY_REF:
1567 case ARRAY_RANGE_REF:
1568 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1569
1570 CASE_CONVERT:
1571 return true;
1572
1573 default:
1574 break;
1575 }
1576
1577 return false;
1578 }
1579
1580 /* Finds addresses in *OP_P inside STMT. */
1581
1582 static void
1583 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1584 {
1585 tree base = *op_p, step = build_int_cst (sizetype, 0);
1586 struct iv *civ;
1587 struct ifs_ivopts_data ifs_ivopts_data;
1588
1589 /* Do not play with volatile memory references. A bit too conservative,
1590 perhaps, but safe. */
1591 if (gimple_has_volatile_ops (stmt))
1592 goto fail;
1593
1594 /* Ignore bitfields for now. Not really something terribly complicated
1595 to handle. TODO. */
1596 if (TREE_CODE (base) == BIT_FIELD_REF)
1597 goto fail;
1598
1599 base = unshare_expr (base);
1600
1601 if (TREE_CODE (base) == TARGET_MEM_REF)
1602 {
1603 tree type = build_pointer_type (TREE_TYPE (base));
1604 tree astep;
1605
1606 if (TMR_BASE (base)
1607 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1608 {
1609 civ = get_iv (data, TMR_BASE (base));
1610 if (!civ)
1611 goto fail;
1612
1613 TMR_BASE (base) = civ->base;
1614 step = civ->step;
1615 }
1616 if (TMR_INDEX (base)
1617 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1618 {
1619 civ = get_iv (data, TMR_INDEX (base));
1620 if (!civ)
1621 goto fail;
1622
1623 TMR_INDEX (base) = civ->base;
1624 astep = civ->step;
1625
1626 if (astep)
1627 {
1628 if (TMR_STEP (base))
1629 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1630
1631 step = fold_build2 (PLUS_EXPR, type, step, astep);
1632 }
1633 }
1634
1635 if (integer_zerop (step))
1636 goto fail;
1637 base = tree_mem_ref_addr (type, base);
1638 }
1639 else
1640 {
1641 ifs_ivopts_data.ivopts_data = data;
1642 ifs_ivopts_data.stmt = stmt;
1643 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1644 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1645 || integer_zerop (ifs_ivopts_data.step))
1646 goto fail;
1647 step = ifs_ivopts_data.step;
1648
1649 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1650 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1651
1652 /* Check that the base expression is addressable. This needs
1653 to be done after substituting bases of IVs into it. */
1654 if (may_be_nonaddressable_p (base))
1655 goto fail;
1656
1657 /* Moreover, on strict alignment platforms, check that it is
1658 sufficiently aligned. */
1659 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1660 goto fail;
1661
1662 base = build_fold_addr_expr (base);
1663
1664 /* Substituting bases of IVs into the base expression might
1665 have caused folding opportunities. */
1666 if (TREE_CODE (base) == ADDR_EXPR)
1667 {
1668 tree *ref = &TREE_OPERAND (base, 0);
1669 while (handled_component_p (*ref))
1670 ref = &TREE_OPERAND (*ref, 0);
1671 if (TREE_CODE (*ref) == INDIRECT_REF)
1672 *ref = fold_indirect_ref (*ref);
1673 }
1674 }
1675
1676 civ = alloc_iv (base, step);
1677 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1678 return;
1679
1680 fail:
1681 for_each_index (op_p, idx_record_use, data);
1682 }
1683
1684 /* Finds and records invariants used in STMT. */
1685
1686 static void
1687 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1688 {
1689 ssa_op_iter iter;
1690 use_operand_p use_p;
1691 tree op;
1692
1693 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1694 {
1695 op = USE_FROM_PTR (use_p);
1696 record_invariant (data, op, false);
1697 }
1698 }
1699
1700 /* Finds interesting uses of induction variables in the statement STMT. */
1701
1702 static void
1703 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1704 {
1705 struct iv *iv;
1706 tree op, *lhs, *rhs;
1707 ssa_op_iter iter;
1708 use_operand_p use_p;
1709 enum tree_code code;
1710
1711 find_invariants_stmt (data, stmt);
1712
1713 if (gimple_code (stmt) == GIMPLE_COND)
1714 {
1715 find_interesting_uses_cond (data, stmt);
1716 return;
1717 }
1718
1719 if (is_gimple_assign (stmt))
1720 {
1721 lhs = gimple_assign_lhs_ptr (stmt);
1722 rhs = gimple_assign_rhs1_ptr (stmt);
1723
1724 if (TREE_CODE (*lhs) == SSA_NAME)
1725 {
1726 /* If the statement defines an induction variable, the uses are not
1727 interesting by themselves. */
1728
1729 iv = get_iv (data, *lhs);
1730
1731 if (iv && !integer_zerop (iv->step))
1732 return;
1733 }
1734
1735 code = gimple_assign_rhs_code (stmt);
1736 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1737 && (REFERENCE_CLASS_P (*rhs)
1738 || is_gimple_val (*rhs)))
1739 {
1740 if (REFERENCE_CLASS_P (*rhs))
1741 find_interesting_uses_address (data, stmt, rhs);
1742 else
1743 find_interesting_uses_op (data, *rhs);
1744
1745 if (REFERENCE_CLASS_P (*lhs))
1746 find_interesting_uses_address (data, stmt, lhs);
1747 return;
1748 }
1749 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1750 {
1751 find_interesting_uses_cond (data, stmt);
1752 return;
1753 }
1754
1755 /* TODO -- we should also handle address uses of type
1756
1757 memory = call (whatever);
1758
1759 and
1760
1761 call (memory). */
1762 }
1763
1764 if (gimple_code (stmt) == GIMPLE_PHI
1765 && gimple_bb (stmt) == data->current_loop->header)
1766 {
1767 iv = get_iv (data, PHI_RESULT (stmt));
1768
1769 if (iv && !integer_zerop (iv->step))
1770 return;
1771 }
1772
1773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1774 {
1775 op = USE_FROM_PTR (use_p);
1776
1777 if (TREE_CODE (op) != SSA_NAME)
1778 continue;
1779
1780 iv = get_iv (data, op);
1781 if (!iv)
1782 continue;
1783
1784 find_interesting_uses_op (data, op);
1785 }
1786 }
1787
1788 /* Finds interesting uses of induction variables outside of loops
1789 on loop exit edge EXIT. */
1790
1791 static void
1792 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1793 {
1794 gimple phi;
1795 gimple_stmt_iterator psi;
1796 tree def;
1797
1798 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1799 {
1800 phi = gsi_stmt (psi);
1801 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1802 if (is_gimple_reg (def))
1803 find_interesting_uses_op (data, def);
1804 }
1805 }
1806
1807 /* Finds uses of the induction variables that are interesting. */
1808
1809 static void
1810 find_interesting_uses (struct ivopts_data *data)
1811 {
1812 basic_block bb;
1813 gimple_stmt_iterator bsi;
1814 basic_block *body = get_loop_body (data->current_loop);
1815 unsigned i;
1816 struct version_info *info;
1817 edge e;
1818
1819 if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, "Uses:\n\n");
1821
1822 for (i = 0; i < data->current_loop->num_nodes; i++)
1823 {
1824 edge_iterator ei;
1825 bb = body[i];
1826
1827 FOR_EACH_EDGE (e, ei, bb->succs)
1828 if (e->dest != EXIT_BLOCK_PTR
1829 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1830 find_interesting_uses_outside (data, e);
1831
1832 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1833 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1834 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1835 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1836 }
1837
1838 if (dump_file && (dump_flags & TDF_DETAILS))
1839 {
1840 bitmap_iterator bi;
1841
1842 fprintf (dump_file, "\n");
1843
1844 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1845 {
1846 info = ver_info (data, i);
1847 if (info->inv_id)
1848 {
1849 fprintf (dump_file, " ");
1850 print_generic_expr (dump_file, info->name, TDF_SLIM);
1851 fprintf (dump_file, " is invariant (%d)%s\n",
1852 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1853 }
1854 }
1855
1856 fprintf (dump_file, "\n");
1857 }
1858
1859 free (body);
1860 }
1861
1862 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1863 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1864 we are at the top-level of the processed address. */
1865
1866 static tree
1867 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1868 unsigned HOST_WIDE_INT *offset)
1869 {
1870 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1871 enum tree_code code;
1872 tree type, orig_type = TREE_TYPE (expr);
1873 unsigned HOST_WIDE_INT off0, off1, st;
1874 tree orig_expr = expr;
1875
1876 STRIP_NOPS (expr);
1877
1878 type = TREE_TYPE (expr);
1879 code = TREE_CODE (expr);
1880 *offset = 0;
1881
1882 switch (code)
1883 {
1884 case INTEGER_CST:
1885 if (!cst_and_fits_in_hwi (expr)
1886 || integer_zerop (expr))
1887 return orig_expr;
1888
1889 *offset = int_cst_value (expr);
1890 return build_int_cst (orig_type, 0);
1891
1892 case POINTER_PLUS_EXPR:
1893 case PLUS_EXPR:
1894 case MINUS_EXPR:
1895 op0 = TREE_OPERAND (expr, 0);
1896 op1 = TREE_OPERAND (expr, 1);
1897
1898 op0 = strip_offset_1 (op0, false, false, &off0);
1899 op1 = strip_offset_1 (op1, false, false, &off1);
1900
1901 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1902 if (op0 == TREE_OPERAND (expr, 0)
1903 && op1 == TREE_OPERAND (expr, 1))
1904 return orig_expr;
1905
1906 if (integer_zerop (op1))
1907 expr = op0;
1908 else if (integer_zerop (op0))
1909 {
1910 if (code == MINUS_EXPR)
1911 expr = fold_build1 (NEGATE_EXPR, type, op1);
1912 else
1913 expr = op1;
1914 }
1915 else
1916 expr = fold_build2 (code, type, op0, op1);
1917
1918 return fold_convert (orig_type, expr);
1919
1920 case ARRAY_REF:
1921 if (!inside_addr)
1922 return orig_expr;
1923
1924 step = array_ref_element_size (expr);
1925 if (!cst_and_fits_in_hwi (step))
1926 break;
1927
1928 st = int_cst_value (step);
1929 op1 = TREE_OPERAND (expr, 1);
1930 op1 = strip_offset_1 (op1, false, false, &off1);
1931 *offset = off1 * st;
1932
1933 if (top_compref
1934 && integer_zerop (op1))
1935 {
1936 /* Strip the component reference completely. */
1937 op0 = TREE_OPERAND (expr, 0);
1938 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1939 *offset += off0;
1940 return op0;
1941 }
1942 break;
1943
1944 case COMPONENT_REF:
1945 if (!inside_addr)
1946 return orig_expr;
1947
1948 tmp = component_ref_field_offset (expr);
1949 if (top_compref
1950 && cst_and_fits_in_hwi (tmp))
1951 {
1952 /* Strip the component reference completely. */
1953 op0 = TREE_OPERAND (expr, 0);
1954 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1955 *offset = off0 + int_cst_value (tmp);
1956 return op0;
1957 }
1958 break;
1959
1960 case ADDR_EXPR:
1961 op0 = TREE_OPERAND (expr, 0);
1962 op0 = strip_offset_1 (op0, true, true, &off0);
1963 *offset += off0;
1964
1965 if (op0 == TREE_OPERAND (expr, 0))
1966 return orig_expr;
1967
1968 expr = build_fold_addr_expr (op0);
1969 return fold_convert (orig_type, expr);
1970
1971 case INDIRECT_REF:
1972 inside_addr = false;
1973 break;
1974
1975 default:
1976 return orig_expr;
1977 }
1978
1979 /* Default handling of expressions for that we want to recurse into
1980 the first operand. */
1981 op0 = TREE_OPERAND (expr, 0);
1982 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1983 *offset += off0;
1984
1985 if (op0 == TREE_OPERAND (expr, 0)
1986 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1987 return orig_expr;
1988
1989 expr = copy_node (expr);
1990 TREE_OPERAND (expr, 0) = op0;
1991 if (op1)
1992 TREE_OPERAND (expr, 1) = op1;
1993
1994 /* Inside address, we might strip the top level component references,
1995 thus changing type of the expression. Handling of ADDR_EXPR
1996 will fix that. */
1997 expr = fold_convert (orig_type, expr);
1998
1999 return expr;
2000 }
2001
2002 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2003
2004 static tree
2005 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2006 {
2007 return strip_offset_1 (expr, false, false, offset);
2008 }
2009
2010 /* Returns variant of TYPE that can be used as base for different uses.
2011 We return unsigned type with the same precision, which avoids problems
2012 with overflows. */
2013
2014 static tree
2015 generic_type_for (tree type)
2016 {
2017 if (POINTER_TYPE_P (type))
2018 return unsigned_type_for (type);
2019
2020 if (TYPE_UNSIGNED (type))
2021 return type;
2022
2023 return unsigned_type_for (type);
2024 }
2025
2026 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2027 the bitmap to that we should store it. */
2028
2029 static struct ivopts_data *fd_ivopts_data;
2030 static tree
2031 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2032 {
2033 bitmap *depends_on = (bitmap *) data;
2034 struct version_info *info;
2035
2036 if (TREE_CODE (*expr_p) != SSA_NAME)
2037 return NULL_TREE;
2038 info = name_info (fd_ivopts_data, *expr_p);
2039
2040 if (!info->inv_id || info->has_nonlin_use)
2041 return NULL_TREE;
2042
2043 if (!*depends_on)
2044 *depends_on = BITMAP_ALLOC (NULL);
2045 bitmap_set_bit (*depends_on, info->inv_id);
2046
2047 return NULL_TREE;
2048 }
2049
2050 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2051 position to POS. If USE is not NULL, the candidate is set as related to
2052 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2053 replacement of the final value of the iv by a direct computation. */
2054
2055 static struct iv_cand *
2056 add_candidate_1 (struct ivopts_data *data,
2057 tree base, tree step, bool important, enum iv_position pos,
2058 struct iv_use *use, gimple incremented_at)
2059 {
2060 unsigned i;
2061 struct iv_cand *cand = NULL;
2062 tree type, orig_type;
2063
2064 if (base)
2065 {
2066 orig_type = TREE_TYPE (base);
2067 type = generic_type_for (orig_type);
2068 /* Don't convert the base to the generic type for pointers as the generic
2069 type is an integer type with the same size as the pointer type. */
2070 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2071 {
2072 base = fold_convert (type, base);
2073 step = fold_convert (type, step);
2074 }
2075 }
2076
2077 for (i = 0; i < n_iv_cands (data); i++)
2078 {
2079 cand = iv_cand (data, i);
2080
2081 if (cand->pos != pos)
2082 continue;
2083
2084 if (cand->incremented_at != incremented_at)
2085 continue;
2086
2087 if (!cand->iv)
2088 {
2089 if (!base && !step)
2090 break;
2091
2092 continue;
2093 }
2094
2095 if (!base && !step)
2096 continue;
2097
2098 if (operand_equal_p (base, cand->iv->base, 0)
2099 && operand_equal_p (step, cand->iv->step, 0))
2100 break;
2101 }
2102
2103 if (i == n_iv_cands (data))
2104 {
2105 cand = XCNEW (struct iv_cand);
2106 cand->id = i;
2107
2108 if (!base && !step)
2109 cand->iv = NULL;
2110 else
2111 cand->iv = alloc_iv (base, step);
2112
2113 cand->pos = pos;
2114 if (pos != IP_ORIGINAL && cand->iv)
2115 {
2116 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2117 cand->var_after = cand->var_before;
2118 }
2119 cand->important = important;
2120 cand->incremented_at = incremented_at;
2121 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2122
2123 if (step
2124 && TREE_CODE (step) != INTEGER_CST)
2125 {
2126 fd_ivopts_data = data;
2127 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2128 }
2129
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 dump_cand (dump_file, cand);
2132 }
2133
2134 if (important && !cand->important)
2135 {
2136 cand->important = true;
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2138 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2139 }
2140
2141 if (use)
2142 {
2143 bitmap_set_bit (use->related_cands, i);
2144 if (dump_file && (dump_flags & TDF_DETAILS))
2145 fprintf (dump_file, "Candidate %d is related to use %d\n",
2146 cand->id, use->id);
2147 }
2148
2149 return cand;
2150 }
2151
2152 /* Returns true if incrementing the induction variable at the end of the LOOP
2153 is allowed.
2154
2155 The purpose is to avoid splitting latch edge with a biv increment, thus
2156 creating a jump, possibly confusing other optimization passes and leaving
2157 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2158 is not available (so we do not have a better alternative), or if the latch
2159 edge is already nonempty. */
2160
2161 static bool
2162 allow_ip_end_pos_p (struct loop *loop)
2163 {
2164 if (!ip_normal_pos (loop))
2165 return true;
2166
2167 if (!empty_block_p (ip_end_pos (loop)))
2168 return true;
2169
2170 return false;
2171 }
2172
2173 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2174 position to POS. If USE is not NULL, the candidate is set as related to
2175 it. The candidate computation is scheduled on all available positions. */
2176
2177 static void
2178 add_candidate (struct ivopts_data *data,
2179 tree base, tree step, bool important, struct iv_use *use)
2180 {
2181 if (ip_normal_pos (data->current_loop))
2182 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2183 if (ip_end_pos (data->current_loop)
2184 && allow_ip_end_pos_p (data->current_loop))
2185 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2186 }
2187
2188 /* Add a standard "0 + 1 * iteration" iv candidate for a
2189 type with SIZE bits. */
2190
2191 static void
2192 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2193 unsigned int size)
2194 {
2195 tree type = lang_hooks.types.type_for_size (size, true);
2196 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2197 true, NULL);
2198 }
2199
2200 /* Adds standard iv candidates. */
2201
2202 static void
2203 add_standard_iv_candidates (struct ivopts_data *data)
2204 {
2205 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2206
2207 /* The same for a double-integer type if it is still fast enough. */
2208 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2209 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2210 }
2211
2212
2213 /* Adds candidates bases on the old induction variable IV. */
2214
2215 static void
2216 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2217 {
2218 gimple phi;
2219 tree def;
2220 struct iv_cand *cand;
2221
2222 add_candidate (data, iv->base, iv->step, true, NULL);
2223
2224 /* The same, but with initial value zero. */
2225 add_candidate (data,
2226 build_int_cst (TREE_TYPE (iv->base), 0),
2227 iv->step, true, NULL);
2228
2229 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2230 if (gimple_code (phi) == GIMPLE_PHI)
2231 {
2232 /* Additionally record the possibility of leaving the original iv
2233 untouched. */
2234 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2235 cand = add_candidate_1 (data,
2236 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2237 SSA_NAME_DEF_STMT (def));
2238 cand->var_before = iv->ssa_name;
2239 cand->var_after = def;
2240 }
2241 }
2242
2243 /* Adds candidates based on the old induction variables. */
2244
2245 static void
2246 add_old_ivs_candidates (struct ivopts_data *data)
2247 {
2248 unsigned i;
2249 struct iv *iv;
2250 bitmap_iterator bi;
2251
2252 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2253 {
2254 iv = ver_info (data, i)->iv;
2255 if (iv && iv->biv_p && !integer_zerop (iv->step))
2256 add_old_iv_candidates (data, iv);
2257 }
2258 }
2259
2260 /* Adds candidates based on the value of the induction variable IV and USE. */
2261
2262 static void
2263 add_iv_value_candidates (struct ivopts_data *data,
2264 struct iv *iv, struct iv_use *use)
2265 {
2266 unsigned HOST_WIDE_INT offset;
2267 tree base;
2268 tree basetype;
2269
2270 add_candidate (data, iv->base, iv->step, false, use);
2271
2272 /* The same, but with initial value zero. Make such variable important,
2273 since it is generic enough so that possibly many uses may be based
2274 on it. */
2275 basetype = TREE_TYPE (iv->base);
2276 if (POINTER_TYPE_P (basetype))
2277 basetype = sizetype;
2278 add_candidate (data, build_int_cst (basetype, 0),
2279 iv->step, true, use);
2280
2281 /* Third, try removing the constant offset. Make sure to even
2282 add a candidate for &a[0] vs. (T *)&a. */
2283 base = strip_offset (iv->base, &offset);
2284 if (offset
2285 || base != iv->base)
2286 add_candidate (data, base, iv->step, false, use);
2287 }
2288
2289 /* Adds candidates based on the uses. */
2290
2291 static void
2292 add_derived_ivs_candidates (struct ivopts_data *data)
2293 {
2294 unsigned i;
2295
2296 for (i = 0; i < n_iv_uses (data); i++)
2297 {
2298 struct iv_use *use = iv_use (data, i);
2299
2300 if (!use)
2301 continue;
2302
2303 switch (use->type)
2304 {
2305 case USE_NONLINEAR_EXPR:
2306 case USE_COMPARE:
2307 case USE_ADDRESS:
2308 /* Just add the ivs based on the value of the iv used here. */
2309 add_iv_value_candidates (data, use->iv, use);
2310 break;
2311
2312 default:
2313 gcc_unreachable ();
2314 }
2315 }
2316 }
2317
2318 /* Record important candidates and add them to related_cands bitmaps
2319 if needed. */
2320
2321 static void
2322 record_important_candidates (struct ivopts_data *data)
2323 {
2324 unsigned i;
2325 struct iv_use *use;
2326
2327 for (i = 0; i < n_iv_cands (data); i++)
2328 {
2329 struct iv_cand *cand = iv_cand (data, i);
2330
2331 if (cand->important)
2332 bitmap_set_bit (data->important_candidates, i);
2333 }
2334
2335 data->consider_all_candidates = (n_iv_cands (data)
2336 <= CONSIDER_ALL_CANDIDATES_BOUND);
2337
2338 if (data->consider_all_candidates)
2339 {
2340 /* We will not need "related_cands" bitmaps in this case,
2341 so release them to decrease peak memory consumption. */
2342 for (i = 0; i < n_iv_uses (data); i++)
2343 {
2344 use = iv_use (data, i);
2345 BITMAP_FREE (use->related_cands);
2346 }
2347 }
2348 else
2349 {
2350 /* Add important candidates to the related_cands bitmaps. */
2351 for (i = 0; i < n_iv_uses (data); i++)
2352 bitmap_ior_into (iv_use (data, i)->related_cands,
2353 data->important_candidates);
2354 }
2355 }
2356
2357 /* Finds the candidates for the induction variables. */
2358
2359 static void
2360 find_iv_candidates (struct ivopts_data *data)
2361 {
2362 /* Add commonly used ivs. */
2363 add_standard_iv_candidates (data);
2364
2365 /* Add old induction variables. */
2366 add_old_ivs_candidates (data);
2367
2368 /* Add induction variables derived from uses. */
2369 add_derived_ivs_candidates (data);
2370
2371 /* Record the important candidates. */
2372 record_important_candidates (data);
2373 }
2374
2375 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2376 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2377 we allocate a simple list to every use. */
2378
2379 static void
2380 alloc_use_cost_map (struct ivopts_data *data)
2381 {
2382 unsigned i, size, s, j;
2383
2384 for (i = 0; i < n_iv_uses (data); i++)
2385 {
2386 struct iv_use *use = iv_use (data, i);
2387 bitmap_iterator bi;
2388
2389 if (data->consider_all_candidates)
2390 size = n_iv_cands (data);
2391 else
2392 {
2393 s = 0;
2394 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2395 {
2396 s++;
2397 }
2398
2399 /* Round up to the power of two, so that moduling by it is fast. */
2400 for (size = 1; size < s; size <<= 1)
2401 continue;
2402 }
2403
2404 use->n_map_members = size;
2405 use->cost_map = XCNEWVEC (struct cost_pair, size);
2406 }
2407 }
2408
2409 /* Returns description of computation cost of expression whose runtime
2410 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2411
2412 static comp_cost
2413 new_cost (unsigned runtime, unsigned complexity)
2414 {
2415 comp_cost cost;
2416
2417 cost.cost = runtime;
2418 cost.complexity = complexity;
2419
2420 return cost;
2421 }
2422
2423 /* Adds costs COST1 and COST2. */
2424
2425 static comp_cost
2426 add_costs (comp_cost cost1, comp_cost cost2)
2427 {
2428 cost1.cost += cost2.cost;
2429 cost1.complexity += cost2.complexity;
2430
2431 return cost1;
2432 }
2433 /* Subtracts costs COST1 and COST2. */
2434
2435 static comp_cost
2436 sub_costs (comp_cost cost1, comp_cost cost2)
2437 {
2438 cost1.cost -= cost2.cost;
2439 cost1.complexity -= cost2.complexity;
2440
2441 return cost1;
2442 }
2443
2444 /* Returns a negative number if COST1 < COST2, a positive number if
2445 COST1 > COST2, and 0 if COST1 = COST2. */
2446
2447 static int
2448 compare_costs (comp_cost cost1, comp_cost cost2)
2449 {
2450 if (cost1.cost == cost2.cost)
2451 return cost1.complexity - cost2.complexity;
2452
2453 return cost1.cost - cost2.cost;
2454 }
2455
2456 /* Returns true if COST is infinite. */
2457
2458 static bool
2459 infinite_cost_p (comp_cost cost)
2460 {
2461 return cost.cost == INFTY;
2462 }
2463
2464 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2465 on invariants DEPENDS_ON and that the value used in expressing it
2466 is VALUE.*/
2467
2468 static void
2469 set_use_iv_cost (struct ivopts_data *data,
2470 struct iv_use *use, struct iv_cand *cand,
2471 comp_cost cost, bitmap depends_on, tree value)
2472 {
2473 unsigned i, s;
2474
2475 if (infinite_cost_p (cost))
2476 {
2477 BITMAP_FREE (depends_on);
2478 return;
2479 }
2480
2481 if (data->consider_all_candidates)
2482 {
2483 use->cost_map[cand->id].cand = cand;
2484 use->cost_map[cand->id].cost = cost;
2485 use->cost_map[cand->id].depends_on = depends_on;
2486 use->cost_map[cand->id].value = value;
2487 return;
2488 }
2489
2490 /* n_map_members is a power of two, so this computes modulo. */
2491 s = cand->id & (use->n_map_members - 1);
2492 for (i = s; i < use->n_map_members; i++)
2493 if (!use->cost_map[i].cand)
2494 goto found;
2495 for (i = 0; i < s; i++)
2496 if (!use->cost_map[i].cand)
2497 goto found;
2498
2499 gcc_unreachable ();
2500
2501 found:
2502 use->cost_map[i].cand = cand;
2503 use->cost_map[i].cost = cost;
2504 use->cost_map[i].depends_on = depends_on;
2505 use->cost_map[i].value = value;
2506 }
2507
2508 /* Gets cost of (USE, CANDIDATE) pair. */
2509
2510 static struct cost_pair *
2511 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2512 struct iv_cand *cand)
2513 {
2514 unsigned i, s;
2515 struct cost_pair *ret;
2516
2517 if (!cand)
2518 return NULL;
2519
2520 if (data->consider_all_candidates)
2521 {
2522 ret = use->cost_map + cand->id;
2523 if (!ret->cand)
2524 return NULL;
2525
2526 return ret;
2527 }
2528
2529 /* n_map_members is a power of two, so this computes modulo. */
2530 s = cand->id & (use->n_map_members - 1);
2531 for (i = s; i < use->n_map_members; i++)
2532 if (use->cost_map[i].cand == cand)
2533 return use->cost_map + i;
2534
2535 for (i = 0; i < s; i++)
2536 if (use->cost_map[i].cand == cand)
2537 return use->cost_map + i;
2538
2539 return NULL;
2540 }
2541
2542 /* Returns estimate on cost of computing SEQ. */
2543
2544 static unsigned
2545 seq_cost (rtx seq, bool speed)
2546 {
2547 unsigned cost = 0;
2548 rtx set;
2549
2550 for (; seq; seq = NEXT_INSN (seq))
2551 {
2552 set = single_set (seq);
2553 if (set)
2554 cost += rtx_cost (set, SET,speed);
2555 else
2556 cost++;
2557 }
2558
2559 return cost;
2560 }
2561
2562 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2563 static rtx
2564 produce_memory_decl_rtl (tree obj, int *regno)
2565 {
2566 rtx x;
2567
2568 gcc_assert (obj);
2569 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2570 {
2571 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2572 x = gen_rtx_SYMBOL_REF (Pmode, name);
2573 SET_SYMBOL_REF_DECL (x, obj);
2574 x = gen_rtx_MEM (DECL_MODE (obj), x);
2575 targetm.encode_section_info (obj, x, true);
2576 }
2577 else
2578 {
2579 x = gen_raw_REG (Pmode, (*regno)++);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2581 }
2582
2583 return x;
2584 }
2585
2586 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2587 walk_tree. DATA contains the actual fake register number. */
2588
2589 static tree
2590 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2591 {
2592 tree obj = NULL_TREE;
2593 rtx x = NULL_RTX;
2594 int *regno = (int *) data;
2595
2596 switch (TREE_CODE (*expr_p))
2597 {
2598 case ADDR_EXPR:
2599 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2600 handled_component_p (*expr_p);
2601 expr_p = &TREE_OPERAND (*expr_p, 0))
2602 continue;
2603 obj = *expr_p;
2604 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2605 x = produce_memory_decl_rtl (obj, regno);
2606 break;
2607
2608 case SSA_NAME:
2609 *ws = 0;
2610 obj = SSA_NAME_VAR (*expr_p);
2611 if (!DECL_RTL_SET_P (obj))
2612 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2613 break;
2614
2615 case VAR_DECL:
2616 case PARM_DECL:
2617 case RESULT_DECL:
2618 *ws = 0;
2619 obj = *expr_p;
2620
2621 if (DECL_RTL_SET_P (obj))
2622 break;
2623
2624 if (DECL_MODE (obj) == BLKmode)
2625 x = produce_memory_decl_rtl (obj, regno);
2626 else
2627 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2628
2629 break;
2630
2631 default:
2632 break;
2633 }
2634
2635 if (x)
2636 {
2637 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2638 SET_DECL_RTL (obj, x);
2639 }
2640
2641 return NULL_TREE;
2642 }
2643
2644 /* Determines cost of the computation of EXPR. */
2645
2646 static unsigned
2647 computation_cost (tree expr, bool speed)
2648 {
2649 rtx seq, rslt;
2650 tree type = TREE_TYPE (expr);
2651 unsigned cost;
2652 /* Avoid using hard regs in ways which may be unsupported. */
2653 int regno = LAST_VIRTUAL_REGISTER + 1;
2654 enum function_frequency real_frequency = cfun->function_frequency;
2655
2656 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2657 crtl->maybe_hot_insn_p = speed;
2658 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2659 start_sequence ();
2660 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2661 seq = get_insns ();
2662 end_sequence ();
2663 default_rtl_profile ();
2664 cfun->function_frequency = real_frequency;
2665
2666 cost = seq_cost (seq, speed);
2667 if (MEM_P (rslt))
2668 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2669
2670 return cost;
2671 }
2672
2673 /* Returns variable containing the value of candidate CAND at statement AT. */
2674
2675 static tree
2676 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2677 {
2678 if (stmt_after_increment (loop, cand, stmt))
2679 return cand->var_after;
2680 else
2681 return cand->var_before;
2682 }
2683
2684 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2685 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2686
2687 int
2688 tree_int_cst_sign_bit (const_tree t)
2689 {
2690 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2691 unsigned HOST_WIDE_INT w;
2692
2693 if (bitno < HOST_BITS_PER_WIDE_INT)
2694 w = TREE_INT_CST_LOW (t);
2695 else
2696 {
2697 w = TREE_INT_CST_HIGH (t);
2698 bitno -= HOST_BITS_PER_WIDE_INT;
2699 }
2700
2701 return (w >> bitno) & 1;
2702 }
2703
2704 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2705 same precision that is at least as wide as the precision of TYPE, stores
2706 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2707 type of A and B. */
2708
2709 static tree
2710 determine_common_wider_type (tree *a, tree *b)
2711 {
2712 tree wider_type = NULL;
2713 tree suba, subb;
2714 tree atype = TREE_TYPE (*a);
2715
2716 if (CONVERT_EXPR_P (*a))
2717 {
2718 suba = TREE_OPERAND (*a, 0);
2719 wider_type = TREE_TYPE (suba);
2720 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2721 return atype;
2722 }
2723 else
2724 return atype;
2725
2726 if (CONVERT_EXPR_P (*b))
2727 {
2728 subb = TREE_OPERAND (*b, 0);
2729 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2730 return atype;
2731 }
2732 else
2733 return atype;
2734
2735 *a = suba;
2736 *b = subb;
2737 return wider_type;
2738 }
2739
2740 /* Determines the expression by that USE is expressed from induction variable
2741 CAND at statement AT in LOOP. The expression is stored in a decomposed
2742 form into AFF. Returns false if USE cannot be expressed using CAND. */
2743
2744 static bool
2745 get_computation_aff (struct loop *loop,
2746 struct iv_use *use, struct iv_cand *cand, gimple at,
2747 struct affine_tree_combination *aff)
2748 {
2749 tree ubase = use->iv->base;
2750 tree ustep = use->iv->step;
2751 tree cbase = cand->iv->base;
2752 tree cstep = cand->iv->step, cstep_common;
2753 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2754 tree common_type, var;
2755 tree uutype;
2756 aff_tree cbase_aff, var_aff;
2757 double_int rat;
2758
2759 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2760 {
2761 /* We do not have a precision to express the values of use. */
2762 return false;
2763 }
2764
2765 var = var_at_stmt (loop, cand, at);
2766 uutype = unsigned_type_for (utype);
2767
2768 /* If the conversion is not noop, perform it. */
2769 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2770 {
2771 cstep = fold_convert (uutype, cstep);
2772 cbase = fold_convert (uutype, cbase);
2773 var = fold_convert (uutype, var);
2774 }
2775
2776 if (!constant_multiple_of (ustep, cstep, &rat))
2777 return false;
2778
2779 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2780 type, we achieve better folding by computing their difference in this
2781 wider type, and cast the result to UUTYPE. We do not need to worry about
2782 overflows, as all the arithmetics will in the end be performed in UUTYPE
2783 anyway. */
2784 common_type = determine_common_wider_type (&ubase, &cbase);
2785
2786 /* use = ubase - ratio * cbase + ratio * var. */
2787 tree_to_aff_combination (ubase, common_type, aff);
2788 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2789 tree_to_aff_combination (var, uutype, &var_aff);
2790
2791 /* We need to shift the value if we are after the increment. */
2792 if (stmt_after_increment (loop, cand, at))
2793 {
2794 aff_tree cstep_aff;
2795
2796 if (common_type != uutype)
2797 cstep_common = fold_convert (common_type, cstep);
2798 else
2799 cstep_common = cstep;
2800
2801 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2802 aff_combination_add (&cbase_aff, &cstep_aff);
2803 }
2804
2805 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2806 aff_combination_add (aff, &cbase_aff);
2807 if (common_type != uutype)
2808 aff_combination_convert (aff, uutype);
2809
2810 aff_combination_scale (&var_aff, rat);
2811 aff_combination_add (aff, &var_aff);
2812
2813 return true;
2814 }
2815
2816 /* Determines the expression by that USE is expressed from induction variable
2817 CAND at statement AT in LOOP. The computation is unshared. */
2818
2819 static tree
2820 get_computation_at (struct loop *loop,
2821 struct iv_use *use, struct iv_cand *cand, gimple at)
2822 {
2823 aff_tree aff;
2824 tree type = TREE_TYPE (use->iv->base);
2825
2826 if (!get_computation_aff (loop, use, cand, at, &aff))
2827 return NULL_TREE;
2828 unshare_aff_combination (&aff);
2829 return fold_convert (type, aff_combination_to_tree (&aff));
2830 }
2831
2832 /* Determines the expression by that USE is expressed from induction variable
2833 CAND in LOOP. The computation is unshared. */
2834
2835 static tree
2836 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2837 {
2838 return get_computation_at (loop, use, cand, use->stmt);
2839 }
2840
2841 /* Returns cost of addition in MODE. */
2842
2843 static unsigned
2844 add_cost (enum machine_mode mode, bool speed)
2845 {
2846 static unsigned costs[NUM_MACHINE_MODES];
2847 rtx seq;
2848 unsigned cost;
2849
2850 if (costs[mode])
2851 return costs[mode];
2852
2853 start_sequence ();
2854 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2855 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2856 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2857 NULL_RTX);
2858 seq = get_insns ();
2859 end_sequence ();
2860
2861 cost = seq_cost (seq, speed);
2862 if (!cost)
2863 cost = 1;
2864
2865 costs[mode] = cost;
2866
2867 if (dump_file && (dump_flags & TDF_DETAILS))
2868 fprintf (dump_file, "Addition in %s costs %d\n",
2869 GET_MODE_NAME (mode), cost);
2870 return cost;
2871 }
2872
2873 /* Entry in a hashtable of already known costs for multiplication. */
2874 struct mbc_entry
2875 {
2876 HOST_WIDE_INT cst; /* The constant to multiply by. */
2877 enum machine_mode mode; /* In mode. */
2878 unsigned cost; /* The cost. */
2879 };
2880
2881 /* Counts hash value for the ENTRY. */
2882
2883 static hashval_t
2884 mbc_entry_hash (const void *entry)
2885 {
2886 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2887
2888 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2889 }
2890
2891 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2892
2893 static int
2894 mbc_entry_eq (const void *entry1, const void *entry2)
2895 {
2896 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2897 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2898
2899 return (e1->mode == e2->mode
2900 && e1->cst == e2->cst);
2901 }
2902
2903 /* Returns cost of multiplication by constant CST in MODE. */
2904
2905 unsigned
2906 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2907 {
2908 static htab_t costs;
2909 struct mbc_entry **cached, act;
2910 rtx seq;
2911 unsigned cost;
2912
2913 if (!costs)
2914 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2915
2916 act.mode = mode;
2917 act.cst = cst;
2918 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2919 if (*cached)
2920 return (*cached)->cost;
2921
2922 *cached = XNEW (struct mbc_entry);
2923 (*cached)->mode = mode;
2924 (*cached)->cst = cst;
2925
2926 start_sequence ();
2927 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2928 gen_int_mode (cst, mode), NULL_RTX, 0);
2929 seq = get_insns ();
2930 end_sequence ();
2931
2932 cost = seq_cost (seq, speed);
2933
2934 if (dump_file && (dump_flags & TDF_DETAILS))
2935 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2936 (int) cst, GET_MODE_NAME (mode), cost);
2937
2938 (*cached)->cost = cost;
2939
2940 return cost;
2941 }
2942
2943 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2944 validity for a memory reference accessing memory of mode MODE. */
2945
2946 bool
2947 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2948 {
2949 #define MAX_RATIO 128
2950 static sbitmap valid_mult[MAX_MACHINE_MODE];
2951
2952 if (!valid_mult[mode])
2953 {
2954 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2955 rtx addr;
2956 HOST_WIDE_INT i;
2957
2958 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2959 sbitmap_zero (valid_mult[mode]);
2960 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2961 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2962 {
2963 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2964 if (memory_address_p (mode, addr))
2965 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2966 }
2967
2968 if (dump_file && (dump_flags & TDF_DETAILS))
2969 {
2970 fprintf (dump_file, " allowed multipliers:");
2971 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2972 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2973 fprintf (dump_file, " %d", (int) i);
2974 fprintf (dump_file, "\n");
2975 fprintf (dump_file, "\n");
2976 }
2977 }
2978
2979 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2980 return false;
2981
2982 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2983 }
2984
2985 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2986 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2987 variable is omitted. Compute the cost for a memory reference that accesses
2988 a memory location of mode MEM_MODE.
2989
2990 TODO -- there must be some better way. This all is quite crude. */
2991
2992 static comp_cost
2993 get_address_cost (bool symbol_present, bool var_present,
2994 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2995 enum machine_mode mem_mode,
2996 bool speed)
2997 {
2998 static bool initialized[MAX_MACHINE_MODE];
2999 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3000 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3001 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3002 unsigned cost, acost, complexity;
3003 bool offset_p, ratio_p;
3004 HOST_WIDE_INT s_offset;
3005 unsigned HOST_WIDE_INT mask;
3006 unsigned bits;
3007
3008 if (!initialized[mem_mode])
3009 {
3010 HOST_WIDE_INT i;
3011 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3012 int old_cse_not_expected;
3013 unsigned sym_p, var_p, off_p, rat_p, add_c;
3014 rtx seq, addr, base;
3015 rtx reg0, reg1;
3016
3017 initialized[mem_mode] = true;
3018
3019 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3020
3021 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3022 for (i = start; i <= 1 << 20; i <<= 1)
3023 {
3024 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3025 if (!memory_address_p (mem_mode, addr))
3026 break;
3027 }
3028 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3029 off[mem_mode] = max_offset[mem_mode];
3030
3031 for (i = start; i <= 1 << 20; i <<= 1)
3032 {
3033 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3034 if (!memory_address_p (mem_mode, addr))
3035 break;
3036 }
3037 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3038
3039 if (dump_file && (dump_flags & TDF_DETAILS))
3040 {
3041 fprintf (dump_file, "get_address_cost:\n");
3042 fprintf (dump_file, " min offset %s %d\n",
3043 GET_MODE_NAME (mem_mode),
3044 (int) min_offset[mem_mode]);
3045 fprintf (dump_file, " max offset %s %d\n",
3046 GET_MODE_NAME (mem_mode),
3047 (int) max_offset[mem_mode]);
3048 }
3049
3050 rat[mem_mode] = 1;
3051 for (i = 2; i <= MAX_RATIO; i++)
3052 if (multiplier_allowed_in_address_p (i, mem_mode))
3053 {
3054 rat[mem_mode] = i;
3055 break;
3056 }
3057
3058 /* Compute the cost of various addressing modes. */
3059 acost = 0;
3060 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3061 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3062
3063 for (i = 0; i < 16; i++)
3064 {
3065 sym_p = i & 1;
3066 var_p = (i >> 1) & 1;
3067 off_p = (i >> 2) & 1;
3068 rat_p = (i >> 3) & 1;
3069
3070 addr = reg0;
3071 if (rat_p)
3072 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3073 gen_int_mode (rat[mem_mode], Pmode));
3074
3075 if (var_p)
3076 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3077
3078 if (sym_p)
3079 {
3080 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3081 /* ??? We can run into trouble with some backends by presenting
3082 it with symbols which haven't been properly passed through
3083 targetm.encode_section_info. By setting the local bit, we
3084 enhance the probability of things working. */
3085 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3086
3087 if (off_p)
3088 base = gen_rtx_fmt_e (CONST, Pmode,
3089 gen_rtx_fmt_ee (PLUS, Pmode,
3090 base,
3091 gen_int_mode (off[mem_mode],
3092 Pmode)));
3093 }
3094 else if (off_p)
3095 base = gen_int_mode (off[mem_mode], Pmode);
3096 else
3097 base = NULL_RTX;
3098
3099 if (base)
3100 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3101
3102 start_sequence ();
3103 /* To avoid splitting addressing modes, pretend that no cse will
3104 follow. */
3105 old_cse_not_expected = cse_not_expected;
3106 cse_not_expected = true;
3107 addr = memory_address (mem_mode, addr);
3108 cse_not_expected = old_cse_not_expected;
3109 seq = get_insns ();
3110 end_sequence ();
3111
3112 acost = seq_cost (seq, speed);
3113 acost += address_cost (addr, mem_mode, speed);
3114
3115 if (!acost)
3116 acost = 1;
3117 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3118 }
3119
3120 /* On some targets, it is quite expensive to load symbol to a register,
3121 which makes addresses that contain symbols look much more expensive.
3122 However, the symbol will have to be loaded in any case before the
3123 loop (and quite likely we have it in register already), so it does not
3124 make much sense to penalize them too heavily. So make some final
3125 tweaks for the SYMBOL_PRESENT modes:
3126
3127 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3128 var is cheaper, use this mode with small penalty.
3129 If VAR_PRESENT is true, try whether the mode with
3130 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3131 if this is the case, use it. */
3132 add_c = add_cost (Pmode, speed);
3133 for (i = 0; i < 8; i++)
3134 {
3135 var_p = i & 1;
3136 off_p = (i >> 1) & 1;
3137 rat_p = (i >> 2) & 1;
3138
3139 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3140 if (var_p)
3141 acost += add_c;
3142
3143 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3144 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3145 }
3146
3147 if (dump_file && (dump_flags & TDF_DETAILS))
3148 {
3149 fprintf (dump_file, "Address costs:\n");
3150
3151 for (i = 0; i < 16; i++)
3152 {
3153 sym_p = i & 1;
3154 var_p = (i >> 1) & 1;
3155 off_p = (i >> 2) & 1;
3156 rat_p = (i >> 3) & 1;
3157
3158 fprintf (dump_file, " ");
3159 if (sym_p)
3160 fprintf (dump_file, "sym + ");
3161 if (var_p)
3162 fprintf (dump_file, "var + ");
3163 if (off_p)
3164 fprintf (dump_file, "cst + ");
3165 if (rat_p)
3166 fprintf (dump_file, "rat * ");
3167
3168 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3169 fprintf (dump_file, "index costs %d\n", acost);
3170 }
3171 fprintf (dump_file, "\n");
3172 }
3173 }
3174
3175 bits = GET_MODE_BITSIZE (Pmode);
3176 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3177 offset &= mask;
3178 if ((offset >> (bits - 1) & 1))
3179 offset |= ~mask;
3180 s_offset = offset;
3181
3182 cost = 0;
3183 offset_p = (s_offset != 0
3184 && min_offset[mem_mode] <= s_offset
3185 && s_offset <= max_offset[mem_mode]);
3186 ratio_p = (ratio != 1
3187 && multiplier_allowed_in_address_p (ratio, mem_mode));
3188
3189 if (ratio != 1 && !ratio_p)
3190 cost += multiply_by_cost (ratio, Pmode, speed);
3191
3192 if (s_offset && !offset_p && !symbol_present)
3193 cost += add_cost (Pmode, speed);
3194
3195 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3196 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3197 return new_cost (cost + acost, complexity);
3198 }
3199
3200 /* Estimates cost of forcing expression EXPR into a variable. */
3201
3202 static comp_cost
3203 force_expr_to_var_cost (tree expr, bool speed)
3204 {
3205 static bool costs_initialized = false;
3206 static unsigned integer_cost [2];
3207 static unsigned symbol_cost [2];
3208 static unsigned address_cost [2];
3209 tree op0, op1;
3210 comp_cost cost0, cost1, cost;
3211 enum machine_mode mode;
3212
3213 if (!costs_initialized)
3214 {
3215 tree type = build_pointer_type (integer_type_node);
3216 tree var, addr;
3217 rtx x;
3218 int i;
3219
3220 var = create_tmp_var_raw (integer_type_node, "test_var");
3221 TREE_STATIC (var) = 1;
3222 x = produce_memory_decl_rtl (var, NULL);
3223 SET_DECL_RTL (var, x);
3224
3225 addr = build1 (ADDR_EXPR, type, var);
3226
3227
3228 for (i = 0; i < 2; i++)
3229 {
3230 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3231 2000), i);
3232
3233 symbol_cost[i] = computation_cost (addr, i) + 1;
3234
3235 address_cost[i]
3236 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3237 addr,
3238 build_int_cst (sizetype, 2000)), i) + 1;
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3240 {
3241 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3242 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3243 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3244 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3245 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3246 fprintf (dump_file, "\n");
3247 }
3248 }
3249
3250 costs_initialized = true;
3251 }
3252
3253 STRIP_NOPS (expr);
3254
3255 if (SSA_VAR_P (expr))
3256 return zero_cost;
3257
3258 if (is_gimple_min_invariant (expr))
3259 {
3260 if (TREE_CODE (expr) == INTEGER_CST)
3261 return new_cost (integer_cost [speed], 0);
3262
3263 if (TREE_CODE (expr) == ADDR_EXPR)
3264 {
3265 tree obj = TREE_OPERAND (expr, 0);
3266
3267 if (TREE_CODE (obj) == VAR_DECL
3268 || TREE_CODE (obj) == PARM_DECL
3269 || TREE_CODE (obj) == RESULT_DECL)
3270 return new_cost (symbol_cost [speed], 0);
3271 }
3272
3273 return new_cost (address_cost [speed], 0);
3274 }
3275
3276 switch (TREE_CODE (expr))
3277 {
3278 case POINTER_PLUS_EXPR:
3279 case PLUS_EXPR:
3280 case MINUS_EXPR:
3281 case MULT_EXPR:
3282 op0 = TREE_OPERAND (expr, 0);
3283 op1 = TREE_OPERAND (expr, 1);
3284 STRIP_NOPS (op0);
3285 STRIP_NOPS (op1);
3286
3287 if (is_gimple_val (op0))
3288 cost0 = zero_cost;
3289 else
3290 cost0 = force_expr_to_var_cost (op0, speed);
3291
3292 if (is_gimple_val (op1))
3293 cost1 = zero_cost;
3294 else
3295 cost1 = force_expr_to_var_cost (op1, speed);
3296
3297 break;
3298
3299 default:
3300 /* Just an arbitrary value, FIXME. */
3301 return new_cost (target_spill_cost[speed], 0);
3302 }
3303
3304 mode = TYPE_MODE (TREE_TYPE (expr));
3305 switch (TREE_CODE (expr))
3306 {
3307 case POINTER_PLUS_EXPR:
3308 case PLUS_EXPR:
3309 case MINUS_EXPR:
3310 cost = new_cost (add_cost (mode, speed), 0);
3311 break;
3312
3313 case MULT_EXPR:
3314 if (cst_and_fits_in_hwi (op0))
3315 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3316 else if (cst_and_fits_in_hwi (op1))
3317 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3318 else
3319 return new_cost (target_spill_cost [speed], 0);
3320 break;
3321
3322 default:
3323 gcc_unreachable ();
3324 }
3325
3326 cost = add_costs (cost, cost0);
3327 cost = add_costs (cost, cost1);
3328
3329 /* Bound the cost by target_spill_cost. The parts of complicated
3330 computations often are either loop invariant or at least can
3331 be shared between several iv uses, so letting this grow without
3332 limits would not give reasonable results. */
3333 if (cost.cost > target_spill_cost [speed])
3334 cost.cost = target_spill_cost [speed];
3335
3336 return cost;
3337 }
3338
3339 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3340 invariants the computation depends on. */
3341
3342 static comp_cost
3343 force_var_cost (struct ivopts_data *data,
3344 tree expr, bitmap *depends_on)
3345 {
3346 if (depends_on)
3347 {
3348 fd_ivopts_data = data;
3349 walk_tree (&expr, find_depends, depends_on, NULL);
3350 }
3351
3352 return force_expr_to_var_cost (expr, data->speed);
3353 }
3354
3355 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3356 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3357 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3358 invariants the computation depends on. */
3359
3360 static comp_cost
3361 split_address_cost (struct ivopts_data *data,
3362 tree addr, bool *symbol_present, bool *var_present,
3363 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3364 {
3365 tree core;
3366 HOST_WIDE_INT bitsize;
3367 HOST_WIDE_INT bitpos;
3368 tree toffset;
3369 enum machine_mode mode;
3370 int unsignedp, volatilep;
3371
3372 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3373 &unsignedp, &volatilep, false);
3374
3375 if (toffset != 0
3376 || bitpos % BITS_PER_UNIT != 0
3377 || TREE_CODE (core) != VAR_DECL)
3378 {
3379 *symbol_present = false;
3380 *var_present = true;
3381 fd_ivopts_data = data;
3382 walk_tree (&addr, find_depends, depends_on, NULL);
3383 return new_cost (target_spill_cost[data->speed], 0);
3384 }
3385
3386 *offset += bitpos / BITS_PER_UNIT;
3387 if (TREE_STATIC (core)
3388 || DECL_EXTERNAL (core))
3389 {
3390 *symbol_present = true;
3391 *var_present = false;
3392 return zero_cost;
3393 }
3394
3395 *symbol_present = false;
3396 *var_present = true;
3397 return zero_cost;
3398 }
3399
3400 /* Estimates cost of expressing difference of addresses E1 - E2 as
3401 var + symbol + offset. The value of offset is added to OFFSET,
3402 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3403 part is missing. DEPENDS_ON is a set of the invariants the computation
3404 depends on. */
3405
3406 static comp_cost
3407 ptr_difference_cost (struct ivopts_data *data,
3408 tree e1, tree e2, bool *symbol_present, bool *var_present,
3409 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3410 {
3411 HOST_WIDE_INT diff = 0;
3412 comp_cost cost;
3413 bool speed = optimize_loop_for_speed_p (data->current_loop);
3414
3415 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3416
3417 if (ptr_difference_const (e1, e2, &diff))
3418 {
3419 *offset += diff;
3420 *symbol_present = false;
3421 *var_present = false;
3422 return zero_cost;
3423 }
3424
3425 if (integer_zerop (e2))
3426 return split_address_cost (data, TREE_OPERAND (e1, 0),
3427 symbol_present, var_present, offset, depends_on);
3428
3429 *symbol_present = false;
3430 *var_present = true;
3431
3432 cost = force_var_cost (data, e1, depends_on);
3433 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3434 cost.cost += add_cost (Pmode, speed);
3435
3436 return cost;
3437 }
3438
3439 /* Estimates cost of expressing difference E1 - E2 as
3440 var + symbol + offset. The value of offset is added to OFFSET,
3441 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3442 part is missing. DEPENDS_ON is a set of the invariants the computation
3443 depends on. */
3444
3445 static comp_cost
3446 difference_cost (struct ivopts_data *data,
3447 tree e1, tree e2, bool *symbol_present, bool *var_present,
3448 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3449 {
3450 comp_cost cost;
3451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3452 unsigned HOST_WIDE_INT off1, off2;
3453
3454 e1 = strip_offset (e1, &off1);
3455 e2 = strip_offset (e2, &off2);
3456 *offset += off1 - off2;
3457
3458 STRIP_NOPS (e1);
3459 STRIP_NOPS (e2);
3460
3461 if (TREE_CODE (e1) == ADDR_EXPR)
3462 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3463 depends_on);
3464 *symbol_present = false;
3465
3466 if (operand_equal_p (e1, e2, 0))
3467 {
3468 *var_present = false;
3469 return zero_cost;
3470 }
3471 *var_present = true;
3472 if (integer_zerop (e2))
3473 return force_var_cost (data, e1, depends_on);
3474
3475 if (integer_zerop (e1))
3476 {
3477 cost = force_var_cost (data, e2, depends_on);
3478 cost.cost += multiply_by_cost (-1, mode, data->speed);
3479
3480 return cost;
3481 }
3482
3483 cost = force_var_cost (data, e1, depends_on);
3484 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3485 cost.cost += add_cost (mode, data->speed);
3486
3487 return cost;
3488 }
3489
3490 /* Determines the cost of the computation by that USE is expressed
3491 from induction variable CAND. If ADDRESS_P is true, we just need
3492 to create an address from it, otherwise we want to get it into
3493 register. A set of invariants we depend on is stored in
3494 DEPENDS_ON. AT is the statement at that the value is computed. */
3495
3496 static comp_cost
3497 get_computation_cost_at (struct ivopts_data *data,
3498 struct iv_use *use, struct iv_cand *cand,
3499 bool address_p, bitmap *depends_on, gimple at)
3500 {
3501 tree ubase = use->iv->base, ustep = use->iv->step;
3502 tree cbase, cstep;
3503 tree utype = TREE_TYPE (ubase), ctype;
3504 unsigned HOST_WIDE_INT cstepi, offset = 0;
3505 HOST_WIDE_INT ratio, aratio;
3506 bool var_present, symbol_present;
3507 comp_cost cost;
3508 unsigned n_sums;
3509 double_int rat;
3510 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3511
3512 *depends_on = NULL;
3513
3514 /* Only consider real candidates. */
3515 if (!cand->iv)
3516 return infinite_cost;
3517
3518 cbase = cand->iv->base;
3519 cstep = cand->iv->step;
3520 ctype = TREE_TYPE (cbase);
3521
3522 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3523 {
3524 /* We do not have a precision to express the values of use. */
3525 return infinite_cost;
3526 }
3527
3528 if (address_p)
3529 {
3530 /* Do not try to express address of an object with computation based
3531 on address of a different object. This may cause problems in rtl
3532 level alias analysis (that does not expect this to be happening,
3533 as this is illegal in C), and would be unlikely to be useful
3534 anyway. */
3535 if (use->iv->base_object
3536 && cand->iv->base_object
3537 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3538 return infinite_cost;
3539 }
3540
3541 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3542 {
3543 /* TODO -- add direct handling of this case. */
3544 goto fallback;
3545 }
3546
3547 /* CSTEPI is removed from the offset in case statement is after the
3548 increment. If the step is not constant, we use zero instead.
3549 This is a bit imprecise (there is the extra addition), but
3550 redundancy elimination is likely to transform the code so that
3551 it uses value of the variable before increment anyway,
3552 so it is not that much unrealistic. */
3553 if (cst_and_fits_in_hwi (cstep))
3554 cstepi = int_cst_value (cstep);
3555 else
3556 cstepi = 0;
3557
3558 if (!constant_multiple_of (ustep, cstep, &rat))
3559 return infinite_cost;
3560
3561 if (double_int_fits_in_shwi_p (rat))
3562 ratio = double_int_to_shwi (rat);
3563 else
3564 return infinite_cost;
3565
3566 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3567 or ratio == 1, it is better to handle this like
3568
3569 ubase - ratio * cbase + ratio * var
3570
3571 (also holds in the case ratio == -1, TODO. */
3572
3573 if (cst_and_fits_in_hwi (cbase))
3574 {
3575 offset = - ratio * int_cst_value (cbase);
3576 cost = difference_cost (data,
3577 ubase, build_int_cst (utype, 0),
3578 &symbol_present, &var_present, &offset,
3579 depends_on);
3580 }
3581 else if (ratio == 1)
3582 {
3583 cost = difference_cost (data,
3584 ubase, cbase,
3585 &symbol_present, &var_present, &offset,
3586 depends_on);
3587 }
3588 else
3589 {
3590 cost = force_var_cost (data, cbase, depends_on);
3591 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3592 cost = add_costs (cost,
3593 difference_cost (data,
3594 ubase, build_int_cst (utype, 0),
3595 &symbol_present, &var_present,
3596 &offset, depends_on));
3597 }
3598
3599 /* If we are after the increment, the value of the candidate is higher by
3600 one iteration. */
3601 if (stmt_after_increment (data->current_loop, cand, at))
3602 offset -= ratio * cstepi;
3603
3604 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3605 (symbol/var/const parts may be omitted). If we are looking for an address,
3606 find the cost of addressing this. */
3607 if (address_p)
3608 return add_costs (cost, get_address_cost (symbol_present, var_present,
3609 offset, ratio,
3610 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3611
3612 /* Otherwise estimate the costs for computing the expression. */
3613 aratio = ratio > 0 ? ratio : -ratio;
3614 if (!symbol_present && !var_present && !offset)
3615 {
3616 if (ratio != 1)
3617 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3618
3619 return cost;
3620 }
3621
3622 if (aratio != 1)
3623 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3624
3625 n_sums = 1;
3626 if (var_present
3627 /* Symbol + offset should be compile-time computable. */
3628 && (symbol_present || offset))
3629 n_sums++;
3630
3631 /* Having offset does not affect runtime cost in case it is added to
3632 symbol, but it increases complexity. */
3633 if (offset)
3634 cost.complexity++;
3635
3636 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3637 return cost;
3638
3639 fallback:
3640 {
3641 /* Just get the expression, expand it and measure the cost. */
3642 tree comp = get_computation_at (data->current_loop, use, cand, at);
3643
3644 if (!comp)
3645 return infinite_cost;
3646
3647 if (address_p)
3648 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3649
3650 return new_cost (computation_cost (comp, speed), 0);
3651 }
3652 }
3653
3654 /* Determines the cost of the computation by that USE is expressed
3655 from induction variable CAND. If ADDRESS_P is true, we just need
3656 to create an address from it, otherwise we want to get it into
3657 register. A set of invariants we depend on is stored in
3658 DEPENDS_ON. */
3659
3660 static comp_cost
3661 get_computation_cost (struct ivopts_data *data,
3662 struct iv_use *use, struct iv_cand *cand,
3663 bool address_p, bitmap *depends_on)
3664 {
3665 return get_computation_cost_at (data,
3666 use, cand, address_p, depends_on, use->stmt);
3667 }
3668
3669 /* Determines cost of basing replacement of USE on CAND in a generic
3670 expression. */
3671
3672 static bool
3673 determine_use_iv_cost_generic (struct ivopts_data *data,
3674 struct iv_use *use, struct iv_cand *cand)
3675 {
3676 bitmap depends_on;
3677 comp_cost cost;
3678
3679 /* The simple case first -- if we need to express value of the preserved
3680 original biv, the cost is 0. This also prevents us from counting the
3681 cost of increment twice -- once at this use and once in the cost of
3682 the candidate. */
3683 if (cand->pos == IP_ORIGINAL
3684 && cand->incremented_at == use->stmt)
3685 {
3686 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3687 return true;
3688 }
3689
3690 cost = get_computation_cost (data, use, cand, false, &depends_on);
3691 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3692
3693 return !infinite_cost_p (cost);
3694 }
3695
3696 /* Determines cost of basing replacement of USE on CAND in an address. */
3697
3698 static bool
3699 determine_use_iv_cost_address (struct ivopts_data *data,
3700 struct iv_use *use, struct iv_cand *cand)
3701 {
3702 bitmap depends_on;
3703 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3704
3705 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3706
3707 return !infinite_cost_p (cost);
3708 }
3709
3710 /* Computes value of candidate CAND at position AT in iteration NITER, and
3711 stores it to VAL. */
3712
3713 static void
3714 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3715 aff_tree *val)
3716 {
3717 aff_tree step, delta, nit;
3718 struct iv *iv = cand->iv;
3719 tree type = TREE_TYPE (iv->base);
3720 tree steptype = type;
3721 if (POINTER_TYPE_P (type))
3722 steptype = sizetype;
3723
3724 tree_to_aff_combination (iv->step, steptype, &step);
3725 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3726 aff_combination_convert (&nit, steptype);
3727 aff_combination_mult (&nit, &step, &delta);
3728 if (stmt_after_increment (loop, cand, at))
3729 aff_combination_add (&delta, &step);
3730
3731 tree_to_aff_combination (iv->base, type, val);
3732 aff_combination_add (val, &delta);
3733 }
3734
3735 /* Returns period of induction variable iv. */
3736
3737 static tree
3738 iv_period (struct iv *iv)
3739 {
3740 tree step = iv->step, period, type;
3741 tree pow2div;
3742
3743 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3744
3745 /* Period of the iv is gcd (step, type range). Since type range is power
3746 of two, it suffices to determine the maximum power of two that divides
3747 step. */
3748 pow2div = num_ending_zeros (step);
3749 type = unsigned_type_for (TREE_TYPE (step));
3750
3751 period = build_low_bits_mask (type,
3752 (TYPE_PRECISION (type)
3753 - tree_low_cst (pow2div, 1)));
3754
3755 return period;
3756 }
3757
3758 /* Returns the comparison operator used when eliminating the iv USE. */
3759
3760 static enum tree_code
3761 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3762 {
3763 struct loop *loop = data->current_loop;
3764 basic_block ex_bb;
3765 edge exit;
3766
3767 ex_bb = gimple_bb (use->stmt);
3768 exit = EDGE_SUCC (ex_bb, 0);
3769 if (flow_bb_inside_loop_p (loop, exit->dest))
3770 exit = EDGE_SUCC (ex_bb, 1);
3771
3772 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3773 }
3774
3775 /* Check whether it is possible to express the condition in USE by comparison
3776 of candidate CAND. If so, store the value compared with to BOUND. */
3777
3778 static bool
3779 may_eliminate_iv (struct ivopts_data *data,
3780 struct iv_use *use, struct iv_cand *cand, tree *bound)
3781 {
3782 basic_block ex_bb;
3783 edge exit;
3784 tree nit, period;
3785 struct loop *loop = data->current_loop;
3786 aff_tree bnd;
3787
3788 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3789 return false;
3790
3791 /* For now works only for exits that dominate the loop latch.
3792 TODO: extend to other conditions inside loop body. */
3793 ex_bb = gimple_bb (use->stmt);
3794 if (use->stmt != last_stmt (ex_bb)
3795 || gimple_code (use->stmt) != GIMPLE_COND
3796 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3797 return false;
3798
3799 exit = EDGE_SUCC (ex_bb, 0);
3800 if (flow_bb_inside_loop_p (loop, exit->dest))
3801 exit = EDGE_SUCC (ex_bb, 1);
3802 if (flow_bb_inside_loop_p (loop, exit->dest))
3803 return false;
3804
3805 nit = niter_for_exit (data, exit);
3806 if (!nit)
3807 return false;
3808
3809 /* Determine whether we can use the variable to test the exit condition.
3810 This is the case iff the period of the induction variable is greater
3811 than the number of iterations for which the exit condition is true. */
3812 period = iv_period (cand->iv);
3813
3814 /* If the number of iterations is constant, compare against it directly. */
3815 if (TREE_CODE (nit) == INTEGER_CST)
3816 {
3817 if (!tree_int_cst_lt (nit, period))
3818 return false;
3819 }
3820
3821 /* If not, and if this is the only possible exit of the loop, see whether
3822 we can get a conservative estimate on the number of iterations of the
3823 entire loop and compare against that instead. */
3824 else if (loop_only_exit_p (loop, exit))
3825 {
3826 double_int period_value, max_niter;
3827 if (!estimated_loop_iterations (loop, true, &max_niter))
3828 return false;
3829 period_value = tree_to_double_int (period);
3830 if (double_int_ucmp (max_niter, period_value) >= 0)
3831 return false;
3832 }
3833
3834 /* Otherwise, punt. */
3835 else
3836 return false;
3837
3838 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3839 *bound = aff_combination_to_tree (&bnd);
3840 return true;
3841 }
3842
3843 /* Determines cost of basing replacement of USE on CAND in a condition. */
3844
3845 static bool
3846 determine_use_iv_cost_condition (struct ivopts_data *data,
3847 struct iv_use *use, struct iv_cand *cand)
3848 {
3849 tree bound = NULL_TREE;
3850 struct iv *cmp_iv;
3851 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3852 comp_cost elim_cost, express_cost, cost;
3853 bool ok;
3854
3855 /* Only consider real candidates. */
3856 if (!cand->iv)
3857 {
3858 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3859 return false;
3860 }
3861
3862 /* Try iv elimination. */
3863 if (may_eliminate_iv (data, use, cand, &bound))
3864 {
3865 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3866 /* The bound is a loop invariant, so it will be only computed
3867 once. */
3868 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3869 }
3870 else
3871 elim_cost = infinite_cost;
3872
3873 /* Try expressing the original giv. If it is compared with an invariant,
3874 note that we cannot get rid of it. */
3875 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3876 gcc_assert (ok);
3877
3878 express_cost = get_computation_cost (data, use, cand, false,
3879 &depends_on_express);
3880 fd_ivopts_data = data;
3881 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3882
3883 /* Choose the better approach, preferring the eliminated IV. */
3884 if (compare_costs (elim_cost, express_cost) <= 0)
3885 {
3886 cost = elim_cost;
3887 depends_on = depends_on_elim;
3888 depends_on_elim = NULL;
3889 }
3890 else
3891 {
3892 cost = express_cost;
3893 depends_on = depends_on_express;
3894 depends_on_express = NULL;
3895 bound = NULL_TREE;
3896 }
3897
3898 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3899
3900 if (depends_on_elim)
3901 BITMAP_FREE (depends_on_elim);
3902 if (depends_on_express)
3903 BITMAP_FREE (depends_on_express);
3904
3905 return !infinite_cost_p (cost);
3906 }
3907
3908 /* Determines cost of basing replacement of USE on CAND. Returns false
3909 if USE cannot be based on CAND. */
3910
3911 static bool
3912 determine_use_iv_cost (struct ivopts_data *data,
3913 struct iv_use *use, struct iv_cand *cand)
3914 {
3915 switch (use->type)
3916 {
3917 case USE_NONLINEAR_EXPR:
3918 return determine_use_iv_cost_generic (data, use, cand);
3919
3920 case USE_ADDRESS:
3921 return determine_use_iv_cost_address (data, use, cand);
3922
3923 case USE_COMPARE:
3924 return determine_use_iv_cost_condition (data, use, cand);
3925
3926 default:
3927 gcc_unreachable ();
3928 }
3929 }
3930
3931 /* Determines costs of basing the use of the iv on an iv candidate. */
3932
3933 static void
3934 determine_use_iv_costs (struct ivopts_data *data)
3935 {
3936 unsigned i, j;
3937 struct iv_use *use;
3938 struct iv_cand *cand;
3939 bitmap to_clear = BITMAP_ALLOC (NULL);
3940
3941 alloc_use_cost_map (data);
3942
3943 for (i = 0; i < n_iv_uses (data); i++)
3944 {
3945 use = iv_use (data, i);
3946
3947 if (data->consider_all_candidates)
3948 {
3949 for (j = 0; j < n_iv_cands (data); j++)
3950 {
3951 cand = iv_cand (data, j);
3952 determine_use_iv_cost (data, use, cand);
3953 }
3954 }
3955 else
3956 {
3957 bitmap_iterator bi;
3958
3959 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3960 {
3961 cand = iv_cand (data, j);
3962 if (!determine_use_iv_cost (data, use, cand))
3963 bitmap_set_bit (to_clear, j);
3964 }
3965
3966 /* Remove the candidates for that the cost is infinite from
3967 the list of related candidates. */
3968 bitmap_and_compl_into (use->related_cands, to_clear);
3969 bitmap_clear (to_clear);
3970 }
3971 }
3972
3973 BITMAP_FREE (to_clear);
3974
3975 if (dump_file && (dump_flags & TDF_DETAILS))
3976 {
3977 fprintf (dump_file, "Use-candidate costs:\n");
3978
3979 for (i = 0; i < n_iv_uses (data); i++)
3980 {
3981 use = iv_use (data, i);
3982
3983 fprintf (dump_file, "Use %d:\n", i);
3984 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3985 for (j = 0; j < use->n_map_members; j++)
3986 {
3987 if (!use->cost_map[j].cand
3988 || infinite_cost_p (use->cost_map[j].cost))
3989 continue;
3990
3991 fprintf (dump_file, " %d\t%d\t%d\t",
3992 use->cost_map[j].cand->id,
3993 use->cost_map[j].cost.cost,
3994 use->cost_map[j].cost.complexity);
3995 if (use->cost_map[j].depends_on)
3996 bitmap_print (dump_file,
3997 use->cost_map[j].depends_on, "","");
3998 fprintf (dump_file, "\n");
3999 }
4000
4001 fprintf (dump_file, "\n");
4002 }
4003 fprintf (dump_file, "\n");
4004 }
4005 }
4006
4007 /* Determines cost of the candidate CAND. */
4008
4009 static void
4010 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4011 {
4012 comp_cost cost_base;
4013 unsigned cost, cost_step;
4014 tree base;
4015
4016 if (!cand->iv)
4017 {
4018 cand->cost = 0;
4019 return;
4020 }
4021
4022 /* There are two costs associated with the candidate -- its increment
4023 and its initialization. The second is almost negligible for any loop
4024 that rolls enough, so we take it just very little into account. */
4025
4026 base = cand->iv->base;
4027 cost_base = force_var_cost (data, base, NULL);
4028 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4029
4030 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4031
4032 /* Prefer the original ivs unless we may gain something by replacing it.
4033 The reason is to make debugging simpler; so this is not relevant for
4034 artificial ivs created by other optimization passes. */
4035 if (cand->pos != IP_ORIGINAL
4036 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4037 cost++;
4038
4039 /* Prefer not to insert statements into latch unless there are some
4040 already (so that we do not create unnecessary jumps). */
4041 if (cand->pos == IP_END
4042 && empty_block_p (ip_end_pos (data->current_loop)))
4043 cost++;
4044
4045 cand->cost = cost;
4046 }
4047
4048 /* Determines costs of computation of the candidates. */
4049
4050 static void
4051 determine_iv_costs (struct ivopts_data *data)
4052 {
4053 unsigned i;
4054
4055 if (dump_file && (dump_flags & TDF_DETAILS))
4056 {
4057 fprintf (dump_file, "Candidate costs:\n");
4058 fprintf (dump_file, " cand\tcost\n");
4059 }
4060
4061 for (i = 0; i < n_iv_cands (data); i++)
4062 {
4063 struct iv_cand *cand = iv_cand (data, i);
4064
4065 determine_iv_cost (data, cand);
4066
4067 if (dump_file && (dump_flags & TDF_DETAILS))
4068 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4069 }
4070
4071 if (dump_file && (dump_flags & TDF_DETAILS))
4072 fprintf (dump_file, "\n");
4073 }
4074
4075 /* Calculates cost for having SIZE induction variables. */
4076
4077 static unsigned
4078 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4079 {
4080 /* We add size to the cost, so that we prefer eliminating ivs
4081 if possible. */
4082 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4083 }
4084
4085 /* For each size of the induction variable set determine the penalty. */
4086
4087 static void
4088 determine_set_costs (struct ivopts_data *data)
4089 {
4090 unsigned j, n;
4091 gimple phi;
4092 gimple_stmt_iterator psi;
4093 tree op;
4094 struct loop *loop = data->current_loop;
4095 bitmap_iterator bi;
4096
4097 /* We use the following model (definitely improvable, especially the
4098 cost function -- TODO):
4099
4100 We estimate the number of registers available (using MD data), name it A.
4101
4102 We estimate the number of registers used by the loop, name it U. This
4103 number is obtained as the number of loop phi nodes (not counting virtual
4104 registers and bivs) + the number of variables from outside of the loop.
4105
4106 We set a reserve R (free regs that are used for temporary computations,
4107 etc.). For now the reserve is a constant 3.
4108
4109 Let I be the number of induction variables.
4110
4111 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4112 make a lot of ivs without a reason).
4113 -- if A - R < U + I <= A, the cost is I * PRES_COST
4114 -- if U + I > A, the cost is I * PRES_COST and
4115 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4116
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4118 {
4119 fprintf (dump_file, "Global costs:\n");
4120 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4121 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4122 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4123 }
4124
4125 n = 0;
4126 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4127 {
4128 phi = gsi_stmt (psi);
4129 op = PHI_RESULT (phi);
4130
4131 if (!is_gimple_reg (op))
4132 continue;
4133
4134 if (get_iv (data, op))
4135 continue;
4136
4137 n++;
4138 }
4139
4140 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4141 {
4142 struct version_info *info = ver_info (data, j);
4143
4144 if (info->inv_id && info->has_nonlin_use)
4145 n++;
4146 }
4147
4148 data->regs_used = n;
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file, " regs_used %d\n", n);
4151
4152 if (dump_file && (dump_flags & TDF_DETAILS))
4153 {
4154 fprintf (dump_file, " cost for size:\n");
4155 fprintf (dump_file, " ivs\tcost\n");
4156 for (j = 0; j <= 2 * target_avail_regs; j++)
4157 fprintf (dump_file, " %d\t%d\n", j,
4158 ivopts_global_cost_for_size (data, j));
4159 fprintf (dump_file, "\n");
4160 }
4161 }
4162
4163 /* Returns true if A is a cheaper cost pair than B. */
4164
4165 static bool
4166 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4167 {
4168 int cmp;
4169
4170 if (!a)
4171 return false;
4172
4173 if (!b)
4174 return true;
4175
4176 cmp = compare_costs (a->cost, b->cost);
4177 if (cmp < 0)
4178 return true;
4179
4180 if (cmp > 0)
4181 return false;
4182
4183 /* In case the costs are the same, prefer the cheaper candidate. */
4184 if (a->cand->cost < b->cand->cost)
4185 return true;
4186
4187 return false;
4188 }
4189
4190 /* Computes the cost field of IVS structure. */
4191
4192 static void
4193 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4194 {
4195 comp_cost cost = ivs->cand_use_cost;
4196 cost.cost += ivs->cand_cost;
4197 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4198
4199 ivs->cost = cost;
4200 }
4201
4202 /* Remove invariants in set INVS to set IVS. */
4203
4204 static void
4205 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4206 {
4207 bitmap_iterator bi;
4208 unsigned iid;
4209
4210 if (!invs)
4211 return;
4212
4213 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4214 {
4215 ivs->n_invariant_uses[iid]--;
4216 if (ivs->n_invariant_uses[iid] == 0)
4217 ivs->n_regs--;
4218 }
4219 }
4220
4221 /* Set USE not to be expressed by any candidate in IVS. */
4222
4223 static void
4224 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4225 struct iv_use *use)
4226 {
4227 unsigned uid = use->id, cid;
4228 struct cost_pair *cp;
4229
4230 cp = ivs->cand_for_use[uid];
4231 if (!cp)
4232 return;
4233 cid = cp->cand->id;
4234
4235 ivs->bad_uses++;
4236 ivs->cand_for_use[uid] = NULL;
4237 ivs->n_cand_uses[cid]--;
4238
4239 if (ivs->n_cand_uses[cid] == 0)
4240 {
4241 bitmap_clear_bit (ivs->cands, cid);
4242 /* Do not count the pseudocandidates. */
4243 if (cp->cand->iv)
4244 ivs->n_regs--;
4245 ivs->n_cands--;
4246 ivs->cand_cost -= cp->cand->cost;
4247
4248 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4249 }
4250
4251 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4252
4253 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4254 iv_ca_recount_cost (data, ivs);
4255 }
4256
4257 /* Add invariants in set INVS to set IVS. */
4258
4259 static void
4260 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4261 {
4262 bitmap_iterator bi;
4263 unsigned iid;
4264
4265 if (!invs)
4266 return;
4267
4268 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4269 {
4270 ivs->n_invariant_uses[iid]++;
4271 if (ivs->n_invariant_uses[iid] == 1)
4272 ivs->n_regs++;
4273 }
4274 }
4275
4276 /* Set cost pair for USE in set IVS to CP. */
4277
4278 static void
4279 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4280 struct iv_use *use, struct cost_pair *cp)
4281 {
4282 unsigned uid = use->id, cid;
4283
4284 if (ivs->cand_for_use[uid] == cp)
4285 return;
4286
4287 if (ivs->cand_for_use[uid])
4288 iv_ca_set_no_cp (data, ivs, use);
4289
4290 if (cp)
4291 {
4292 cid = cp->cand->id;
4293
4294 ivs->bad_uses--;
4295 ivs->cand_for_use[uid] = cp;
4296 ivs->n_cand_uses[cid]++;
4297 if (ivs->n_cand_uses[cid] == 1)
4298 {
4299 bitmap_set_bit (ivs->cands, cid);
4300 /* Do not count the pseudocandidates. */
4301 if (cp->cand->iv)
4302 ivs->n_regs++;
4303 ivs->n_cands++;
4304 ivs->cand_cost += cp->cand->cost;
4305
4306 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4307 }
4308
4309 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4310 iv_ca_set_add_invariants (ivs, cp->depends_on);
4311 iv_ca_recount_cost (data, ivs);
4312 }
4313 }
4314
4315 /* Extend set IVS by expressing USE by some of the candidates in it
4316 if possible. */
4317
4318 static void
4319 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4320 struct iv_use *use)
4321 {
4322 struct cost_pair *best_cp = NULL, *cp;
4323 bitmap_iterator bi;
4324 unsigned i;
4325
4326 gcc_assert (ivs->upto >= use->id);
4327
4328 if (ivs->upto == use->id)
4329 {
4330 ivs->upto++;
4331 ivs->bad_uses++;
4332 }
4333
4334 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4335 {
4336 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4337
4338 if (cheaper_cost_pair (cp, best_cp))
4339 best_cp = cp;
4340 }
4341
4342 iv_ca_set_cp (data, ivs, use, best_cp);
4343 }
4344
4345 /* Get cost for assignment IVS. */
4346
4347 static comp_cost
4348 iv_ca_cost (struct iv_ca *ivs)
4349 {
4350 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4351 }
4352
4353 /* Returns true if all dependences of CP are among invariants in IVS. */
4354
4355 static bool
4356 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4357 {
4358 unsigned i;
4359 bitmap_iterator bi;
4360
4361 if (!cp->depends_on)
4362 return true;
4363
4364 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4365 {
4366 if (ivs->n_invariant_uses[i] == 0)
4367 return false;
4368 }
4369
4370 return true;
4371 }
4372
4373 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4374 it before NEXT_CHANGE. */
4375
4376 static struct iv_ca_delta *
4377 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4378 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4379 {
4380 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4381
4382 change->use = use;
4383 change->old_cp = old_cp;
4384 change->new_cp = new_cp;
4385 change->next_change = next_change;
4386
4387 return change;
4388 }
4389
4390 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4391 are rewritten. */
4392
4393 static struct iv_ca_delta *
4394 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4395 {
4396 struct iv_ca_delta *last;
4397
4398 if (!l2)
4399 return l1;
4400
4401 if (!l1)
4402 return l2;
4403
4404 for (last = l1; last->next_change; last = last->next_change)
4405 continue;
4406 last->next_change = l2;
4407
4408 return l1;
4409 }
4410
4411 /* Returns candidate by that USE is expressed in IVS. */
4412
4413 static struct cost_pair *
4414 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4415 {
4416 return ivs->cand_for_use[use->id];
4417 }
4418
4419 /* Reverse the list of changes DELTA, forming the inverse to it. */
4420
4421 static struct iv_ca_delta *
4422 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4423 {
4424 struct iv_ca_delta *act, *next, *prev = NULL;
4425 struct cost_pair *tmp;
4426
4427 for (act = delta; act; act = next)
4428 {
4429 next = act->next_change;
4430 act->next_change = prev;
4431 prev = act;
4432
4433 tmp = act->old_cp;
4434 act->old_cp = act->new_cp;
4435 act->new_cp = tmp;
4436 }
4437
4438 return prev;
4439 }
4440
4441 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4442 reverted instead. */
4443
4444 static void
4445 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4446 struct iv_ca_delta *delta, bool forward)
4447 {
4448 struct cost_pair *from, *to;
4449 struct iv_ca_delta *act;
4450
4451 if (!forward)
4452 delta = iv_ca_delta_reverse (delta);
4453
4454 for (act = delta; act; act = act->next_change)
4455 {
4456 from = act->old_cp;
4457 to = act->new_cp;
4458 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4459 iv_ca_set_cp (data, ivs, act->use, to);
4460 }
4461
4462 if (!forward)
4463 iv_ca_delta_reverse (delta);
4464 }
4465
4466 /* Returns true if CAND is used in IVS. */
4467
4468 static bool
4469 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4470 {
4471 return ivs->n_cand_uses[cand->id] > 0;
4472 }
4473
4474 /* Returns number of induction variable candidates in the set IVS. */
4475
4476 static unsigned
4477 iv_ca_n_cands (struct iv_ca *ivs)
4478 {
4479 return ivs->n_cands;
4480 }
4481
4482 /* Free the list of changes DELTA. */
4483
4484 static void
4485 iv_ca_delta_free (struct iv_ca_delta **delta)
4486 {
4487 struct iv_ca_delta *act, *next;
4488
4489 for (act = *delta; act; act = next)
4490 {
4491 next = act->next_change;
4492 free (act);
4493 }
4494
4495 *delta = NULL;
4496 }
4497
4498 /* Allocates new iv candidates assignment. */
4499
4500 static struct iv_ca *
4501 iv_ca_new (struct ivopts_data *data)
4502 {
4503 struct iv_ca *nw = XNEW (struct iv_ca);
4504
4505 nw->upto = 0;
4506 nw->bad_uses = 0;
4507 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4508 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4509 nw->cands = BITMAP_ALLOC (NULL);
4510 nw->n_cands = 0;
4511 nw->n_regs = 0;
4512 nw->cand_use_cost = zero_cost;
4513 nw->cand_cost = 0;
4514 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4515 nw->cost = zero_cost;
4516
4517 return nw;
4518 }
4519
4520 /* Free memory occupied by the set IVS. */
4521
4522 static void
4523 iv_ca_free (struct iv_ca **ivs)
4524 {
4525 free ((*ivs)->cand_for_use);
4526 free ((*ivs)->n_cand_uses);
4527 BITMAP_FREE ((*ivs)->cands);
4528 free ((*ivs)->n_invariant_uses);
4529 free (*ivs);
4530 *ivs = NULL;
4531 }
4532
4533 /* Dumps IVS to FILE. */
4534
4535 static void
4536 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4537 {
4538 const char *pref = " invariants ";
4539 unsigned i;
4540 comp_cost cost = iv_ca_cost (ivs);
4541
4542 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4543 bitmap_print (file, ivs->cands, " candidates ","\n");
4544
4545 for (i = 1; i <= data->max_inv_id; i++)
4546 if (ivs->n_invariant_uses[i])
4547 {
4548 fprintf (file, "%s%d", pref, i);
4549 pref = ", ";
4550 }
4551 fprintf (file, "\n");
4552 }
4553
4554 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4555 new set, and store differences in DELTA. Number of induction variables
4556 in the new set is stored to N_IVS. */
4557
4558 static comp_cost
4559 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4560 struct iv_cand *cand, struct iv_ca_delta **delta,
4561 unsigned *n_ivs)
4562 {
4563 unsigned i;
4564 comp_cost cost;
4565 struct iv_use *use;
4566 struct cost_pair *old_cp, *new_cp;
4567
4568 *delta = NULL;
4569 for (i = 0; i < ivs->upto; i++)
4570 {
4571 use = iv_use (data, i);
4572 old_cp = iv_ca_cand_for_use (ivs, use);
4573
4574 if (old_cp
4575 && old_cp->cand == cand)
4576 continue;
4577
4578 new_cp = get_use_iv_cost (data, use, cand);
4579 if (!new_cp)
4580 continue;
4581
4582 if (!iv_ca_has_deps (ivs, new_cp))
4583 continue;
4584
4585 if (!cheaper_cost_pair (new_cp, old_cp))
4586 continue;
4587
4588 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4589 }
4590
4591 iv_ca_delta_commit (data, ivs, *delta, true);
4592 cost = iv_ca_cost (ivs);
4593 if (n_ivs)
4594 *n_ivs = iv_ca_n_cands (ivs);
4595 iv_ca_delta_commit (data, ivs, *delta, false);
4596
4597 return cost;
4598 }
4599
4600 /* Try narrowing set IVS by removing CAND. Return the cost of
4601 the new set and store the differences in DELTA. */
4602
4603 static comp_cost
4604 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4605 struct iv_cand *cand, struct iv_ca_delta **delta)
4606 {
4607 unsigned i, ci;
4608 struct iv_use *use;
4609 struct cost_pair *old_cp, *new_cp, *cp;
4610 bitmap_iterator bi;
4611 struct iv_cand *cnd;
4612 comp_cost cost;
4613
4614 *delta = NULL;
4615 for (i = 0; i < n_iv_uses (data); i++)
4616 {
4617 use = iv_use (data, i);
4618
4619 old_cp = iv_ca_cand_for_use (ivs, use);
4620 if (old_cp->cand != cand)
4621 continue;
4622
4623 new_cp = NULL;
4624
4625 if (data->consider_all_candidates)
4626 {
4627 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4628 {
4629 if (ci == cand->id)
4630 continue;
4631
4632 cnd = iv_cand (data, ci);
4633
4634 cp = get_use_iv_cost (data, use, cnd);
4635 if (!cp)
4636 continue;
4637 if (!iv_ca_has_deps (ivs, cp))
4638 continue;
4639
4640 if (!cheaper_cost_pair (cp, new_cp))
4641 continue;
4642
4643 new_cp = cp;
4644 }
4645 }
4646 else
4647 {
4648 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4649 {
4650 if (ci == cand->id)
4651 continue;
4652
4653 cnd = iv_cand (data, ci);
4654
4655 cp = get_use_iv_cost (data, use, cnd);
4656 if (!cp)
4657 continue;
4658 if (!iv_ca_has_deps (ivs, cp))
4659 continue;
4660
4661 if (!cheaper_cost_pair (cp, new_cp))
4662 continue;
4663
4664 new_cp = cp;
4665 }
4666 }
4667
4668 if (!new_cp)
4669 {
4670 iv_ca_delta_free (delta);
4671 return infinite_cost;
4672 }
4673
4674 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4675 }
4676
4677 iv_ca_delta_commit (data, ivs, *delta, true);
4678 cost = iv_ca_cost (ivs);
4679 iv_ca_delta_commit (data, ivs, *delta, false);
4680
4681 return cost;
4682 }
4683
4684 /* Try optimizing the set of candidates IVS by removing candidates different
4685 from to EXCEPT_CAND from it. Return cost of the new set, and store
4686 differences in DELTA. */
4687
4688 static comp_cost
4689 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4690 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4691 {
4692 bitmap_iterator bi;
4693 struct iv_ca_delta *act_delta, *best_delta;
4694 unsigned i;
4695 comp_cost best_cost, acost;
4696 struct iv_cand *cand;
4697
4698 best_delta = NULL;
4699 best_cost = iv_ca_cost (ivs);
4700
4701 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4702 {
4703 cand = iv_cand (data, i);
4704
4705 if (cand == except_cand)
4706 continue;
4707
4708 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4709
4710 if (compare_costs (acost, best_cost) < 0)
4711 {
4712 best_cost = acost;
4713 iv_ca_delta_free (&best_delta);
4714 best_delta = act_delta;
4715 }
4716 else
4717 iv_ca_delta_free (&act_delta);
4718 }
4719
4720 if (!best_delta)
4721 {
4722 *delta = NULL;
4723 return best_cost;
4724 }
4725
4726 /* Recurse to possibly remove other unnecessary ivs. */
4727 iv_ca_delta_commit (data, ivs, best_delta, true);
4728 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4729 iv_ca_delta_commit (data, ivs, best_delta, false);
4730 *delta = iv_ca_delta_join (best_delta, *delta);
4731 return best_cost;
4732 }
4733
4734 /* Tries to extend the sets IVS in the best possible way in order
4735 to express the USE. */
4736
4737 static bool
4738 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4739 struct iv_use *use)
4740 {
4741 comp_cost best_cost, act_cost;
4742 unsigned i;
4743 bitmap_iterator bi;
4744 struct iv_cand *cand;
4745 struct iv_ca_delta *best_delta = NULL, *act_delta;
4746 struct cost_pair *cp;
4747
4748 iv_ca_add_use (data, ivs, use);
4749 best_cost = iv_ca_cost (ivs);
4750
4751 cp = iv_ca_cand_for_use (ivs, use);
4752 if (cp)
4753 {
4754 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4755 iv_ca_set_no_cp (data, ivs, use);
4756 }
4757
4758 /* First try important candidates not based on any memory object. Only if
4759 this fails, try the specific ones. Rationale -- in loops with many
4760 variables the best choice often is to use just one generic biv. If we
4761 added here many ivs specific to the uses, the optimization algorithm later
4762 would be likely to get stuck in a local minimum, thus causing us to create
4763 too many ivs. The approach from few ivs to more seems more likely to be
4764 successful -- starting from few ivs, replacing an expensive use by a
4765 specific iv should always be a win. */
4766 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4767 {
4768 cand = iv_cand (data, i);
4769
4770 if (cand->iv->base_object != NULL_TREE)
4771 continue;
4772
4773 if (iv_ca_cand_used_p (ivs, cand))
4774 continue;
4775
4776 cp = get_use_iv_cost (data, use, cand);
4777 if (!cp)
4778 continue;
4779
4780 iv_ca_set_cp (data, ivs, use, cp);
4781 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4782 iv_ca_set_no_cp (data, ivs, use);
4783 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4784
4785 if (compare_costs (act_cost, best_cost) < 0)
4786 {
4787 best_cost = act_cost;
4788
4789 iv_ca_delta_free (&best_delta);
4790 best_delta = act_delta;
4791 }
4792 else
4793 iv_ca_delta_free (&act_delta);
4794 }
4795
4796 if (infinite_cost_p (best_cost))
4797 {
4798 for (i = 0; i < use->n_map_members; i++)
4799 {
4800 cp = use->cost_map + i;
4801 cand = cp->cand;
4802 if (!cand)
4803 continue;
4804
4805 /* Already tried this. */
4806 if (cand->important && cand->iv->base_object == NULL_TREE)
4807 continue;
4808
4809 if (iv_ca_cand_used_p (ivs, cand))
4810 continue;
4811
4812 act_delta = NULL;
4813 iv_ca_set_cp (data, ivs, use, cp);
4814 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4815 iv_ca_set_no_cp (data, ivs, use);
4816 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4817 cp, act_delta);
4818
4819 if (compare_costs (act_cost, best_cost) < 0)
4820 {
4821 best_cost = act_cost;
4822
4823 if (best_delta)
4824 iv_ca_delta_free (&best_delta);
4825 best_delta = act_delta;
4826 }
4827 else
4828 iv_ca_delta_free (&act_delta);
4829 }
4830 }
4831
4832 iv_ca_delta_commit (data, ivs, best_delta, true);
4833 iv_ca_delta_free (&best_delta);
4834
4835 return !infinite_cost_p (best_cost);
4836 }
4837
4838 /* Finds an initial assignment of candidates to uses. */
4839
4840 static struct iv_ca *
4841 get_initial_solution (struct ivopts_data *data)
4842 {
4843 struct iv_ca *ivs = iv_ca_new (data);
4844 unsigned i;
4845
4846 for (i = 0; i < n_iv_uses (data); i++)
4847 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4848 {
4849 iv_ca_free (&ivs);
4850 return NULL;
4851 }
4852
4853 return ivs;
4854 }
4855
4856 /* Tries to improve set of induction variables IVS. */
4857
4858 static bool
4859 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4860 {
4861 unsigned i, n_ivs;
4862 comp_cost acost, best_cost = iv_ca_cost (ivs);
4863 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4864 struct iv_cand *cand;
4865
4866 /* Try extending the set of induction variables by one. */
4867 for (i = 0; i < n_iv_cands (data); i++)
4868 {
4869 cand = iv_cand (data, i);
4870
4871 if (iv_ca_cand_used_p (ivs, cand))
4872 continue;
4873
4874 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4875 if (!act_delta)
4876 continue;
4877
4878 /* If we successfully added the candidate and the set is small enough,
4879 try optimizing it by removing other candidates. */
4880 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4881 {
4882 iv_ca_delta_commit (data, ivs, act_delta, true);
4883 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4884 iv_ca_delta_commit (data, ivs, act_delta, false);
4885 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4886 }
4887
4888 if (compare_costs (acost, best_cost) < 0)
4889 {
4890 best_cost = acost;
4891 iv_ca_delta_free (&best_delta);
4892 best_delta = act_delta;
4893 }
4894 else
4895 iv_ca_delta_free (&act_delta);
4896 }
4897
4898 if (!best_delta)
4899 {
4900 /* Try removing the candidates from the set instead. */
4901 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4902
4903 /* Nothing more we can do. */
4904 if (!best_delta)
4905 return false;
4906 }
4907
4908 iv_ca_delta_commit (data, ivs, best_delta, true);
4909 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4910 iv_ca_delta_free (&best_delta);
4911 return true;
4912 }
4913
4914 /* Attempts to find the optimal set of induction variables. We do simple
4915 greedy heuristic -- we try to replace at most one candidate in the selected
4916 solution and remove the unused ivs while this improves the cost. */
4917
4918 static struct iv_ca *
4919 find_optimal_iv_set (struct ivopts_data *data)
4920 {
4921 unsigned i;
4922 struct iv_ca *set;
4923 struct iv_use *use;
4924
4925 /* Get the initial solution. */
4926 set = get_initial_solution (data);
4927 if (!set)
4928 {
4929 if (dump_file && (dump_flags & TDF_DETAILS))
4930 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4931 return NULL;
4932 }
4933
4934 if (dump_file && (dump_flags & TDF_DETAILS))
4935 {
4936 fprintf (dump_file, "Initial set of candidates:\n");
4937 iv_ca_dump (data, dump_file, set);
4938 }
4939
4940 while (try_improve_iv_set (data, set))
4941 {
4942 if (dump_file && (dump_flags & TDF_DETAILS))
4943 {
4944 fprintf (dump_file, "Improved to:\n");
4945 iv_ca_dump (data, dump_file, set);
4946 }
4947 }
4948
4949 if (dump_file && (dump_flags & TDF_DETAILS))
4950 {
4951 comp_cost cost = iv_ca_cost (set);
4952 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4953 }
4954
4955 for (i = 0; i < n_iv_uses (data); i++)
4956 {
4957 use = iv_use (data, i);
4958 use->selected = iv_ca_cand_for_use (set, use)->cand;
4959 }
4960
4961 return set;
4962 }
4963
4964 /* Creates a new induction variable corresponding to CAND. */
4965
4966 static void
4967 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4968 {
4969 gimple_stmt_iterator incr_pos;
4970 tree base;
4971 bool after = false;
4972
4973 if (!cand->iv)
4974 return;
4975
4976 switch (cand->pos)
4977 {
4978 case IP_NORMAL:
4979 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4980 break;
4981
4982 case IP_END:
4983 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4984 after = true;
4985 break;
4986
4987 case IP_ORIGINAL:
4988 /* Mark that the iv is preserved. */
4989 name_info (data, cand->var_before)->preserve_biv = true;
4990 name_info (data, cand->var_after)->preserve_biv = true;
4991
4992 /* Rewrite the increment so that it uses var_before directly. */
4993 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4994
4995 return;
4996 }
4997
4998 gimple_add_tmp_var (cand->var_before);
4999 add_referenced_var (cand->var_before);
5000
5001 base = unshare_expr (cand->iv->base);
5002
5003 create_iv (base, unshare_expr (cand->iv->step),
5004 cand->var_before, data->current_loop,
5005 &incr_pos, after, &cand->var_before, &cand->var_after);
5006 }
5007
5008 /* Creates new induction variables described in SET. */
5009
5010 static void
5011 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5012 {
5013 unsigned i;
5014 struct iv_cand *cand;
5015 bitmap_iterator bi;
5016
5017 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5018 {
5019 cand = iv_cand (data, i);
5020 create_new_iv (data, cand);
5021 }
5022 }
5023
5024 /* Returns the phi-node in BB with result RESULT. */
5025
5026 static gimple
5027 get_phi_with_result (basic_block bb, tree result)
5028 {
5029 gimple_stmt_iterator i = gsi_start_phis (bb);
5030
5031 for (; !gsi_end_p (i); gsi_next (&i))
5032 if (gimple_phi_result (gsi_stmt (i)) == result)
5033 return gsi_stmt (i);
5034
5035 gcc_unreachable ();
5036 return NULL;
5037 }
5038
5039
5040 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5041 is true, remove also the ssa name defined by the statement. */
5042
5043 static void
5044 remove_statement (gimple stmt, bool including_defined_name)
5045 {
5046 if (gimple_code (stmt) == GIMPLE_PHI)
5047 {
5048 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5049 gimple_phi_result (stmt));
5050 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5051 remove_phi_node (&bsi, including_defined_name);
5052 }
5053 else
5054 {
5055 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5056 gsi_remove (&bsi, true);
5057 release_defs (stmt);
5058 }
5059 }
5060
5061 /* Rewrites USE (definition of iv used in a nonlinear expression)
5062 using candidate CAND. */
5063
5064 static void
5065 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5066 struct iv_use *use, struct iv_cand *cand)
5067 {
5068 tree comp;
5069 tree op, tgt;
5070 gimple ass;
5071 gimple_stmt_iterator bsi;
5072
5073 /* An important special case -- if we are asked to express value of
5074 the original iv by itself, just exit; there is no need to
5075 introduce a new computation (that might also need casting the
5076 variable to unsigned and back). */
5077 if (cand->pos == IP_ORIGINAL
5078 && cand->incremented_at == use->stmt)
5079 {
5080 tree step, ctype, utype;
5081 enum tree_code incr_code = PLUS_EXPR, old_code;
5082
5083 gcc_assert (is_gimple_assign (use->stmt));
5084 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5085
5086 step = cand->iv->step;
5087 ctype = TREE_TYPE (step);
5088 utype = TREE_TYPE (cand->var_after);
5089 if (TREE_CODE (step) == NEGATE_EXPR)
5090 {
5091 incr_code = MINUS_EXPR;
5092 step = TREE_OPERAND (step, 0);
5093 }
5094
5095 /* Check whether we may leave the computation unchanged.
5096 This is the case only if it does not rely on other
5097 computations in the loop -- otherwise, the computation
5098 we rely upon may be removed in remove_unused_ivs,
5099 thus leading to ICE. */
5100 old_code = gimple_assign_rhs_code (use->stmt);
5101 if (old_code == PLUS_EXPR
5102 || old_code == MINUS_EXPR
5103 || old_code == POINTER_PLUS_EXPR)
5104 {
5105 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5106 op = gimple_assign_rhs2 (use->stmt);
5107 else if (old_code != MINUS_EXPR
5108 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5109 op = gimple_assign_rhs1 (use->stmt);
5110 else
5111 op = NULL_TREE;
5112 }
5113 else
5114 op = NULL_TREE;
5115
5116 if (op
5117 && (TREE_CODE (op) == INTEGER_CST
5118 || operand_equal_p (op, step, 0)))
5119 return;
5120
5121 /* Otherwise, add the necessary computations to express
5122 the iv. */
5123 op = fold_convert (ctype, cand->var_before);
5124 comp = fold_convert (utype,
5125 build2 (incr_code, ctype, op,
5126 unshare_expr (step)));
5127 }
5128 else
5129 {
5130 comp = get_computation (data->current_loop, use, cand);
5131 gcc_assert (comp != NULL_TREE);
5132 }
5133
5134 switch (gimple_code (use->stmt))
5135 {
5136 case GIMPLE_PHI:
5137 tgt = PHI_RESULT (use->stmt);
5138
5139 /* If we should keep the biv, do not replace it. */
5140 if (name_info (data, tgt)->preserve_biv)
5141 return;
5142
5143 bsi = gsi_after_labels (gimple_bb (use->stmt));
5144 break;
5145
5146 case GIMPLE_ASSIGN:
5147 tgt = gimple_assign_lhs (use->stmt);
5148 bsi = gsi_for_stmt (use->stmt);
5149 break;
5150
5151 default:
5152 gcc_unreachable ();
5153 }
5154
5155 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5156 true, GSI_SAME_STMT);
5157
5158 if (gimple_code (use->stmt) == GIMPLE_PHI)
5159 {
5160 ass = gimple_build_assign (tgt, op);
5161 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5162 remove_statement (use->stmt, false);
5163 }
5164 else
5165 {
5166 gimple_assign_set_rhs_from_tree (&bsi, op);
5167 use->stmt = gsi_stmt (bsi);
5168 }
5169 }
5170
5171 /* Replaces ssa name in index IDX by its basic variable. Callback for
5172 for_each_index. */
5173
5174 static bool
5175 idx_remove_ssa_names (tree base, tree *idx,
5176 void *data ATTRIBUTE_UNUSED)
5177 {
5178 tree *op;
5179
5180 if (TREE_CODE (*idx) == SSA_NAME)
5181 *idx = SSA_NAME_VAR (*idx);
5182
5183 if (TREE_CODE (base) == ARRAY_REF)
5184 {
5185 op = &TREE_OPERAND (base, 2);
5186 if (*op
5187 && TREE_CODE (*op) == SSA_NAME)
5188 *op = SSA_NAME_VAR (*op);
5189 op = &TREE_OPERAND (base, 3);
5190 if (*op
5191 && TREE_CODE (*op) == SSA_NAME)
5192 *op = SSA_NAME_VAR (*op);
5193 }
5194
5195 return true;
5196 }
5197
5198 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5199
5200 static tree
5201 unshare_and_remove_ssa_names (tree ref)
5202 {
5203 ref = unshare_expr (ref);
5204 for_each_index (&ref, idx_remove_ssa_names, NULL);
5205
5206 return ref;
5207 }
5208
5209 /* Extract the alias analysis info for the memory reference REF. There are
5210 several ways how this information may be stored and what precisely is
5211 its semantics depending on the type of the reference, but there always is
5212 somewhere hidden one _DECL node that is used to determine the set of
5213 virtual operands for the reference. The code below deciphers this jungle
5214 and extracts this single useful piece of information. */
5215
5216 static tree
5217 get_ref_tag (tree ref, tree orig)
5218 {
5219 tree var = get_base_address (ref);
5220 tree aref = NULL_TREE, tag, sv;
5221 HOST_WIDE_INT offset, size, maxsize;
5222
5223 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5224 {
5225 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5226 if (ref)
5227 break;
5228 }
5229
5230 if (!var)
5231 return NULL_TREE;
5232
5233 if (TREE_CODE (var) == INDIRECT_REF)
5234 {
5235 /* If the base is a dereference of a pointer, first check its name memory
5236 tag. If it does not have one, use its symbol memory tag. */
5237 var = TREE_OPERAND (var, 0);
5238 if (TREE_CODE (var) != SSA_NAME)
5239 return NULL_TREE;
5240
5241 if (SSA_NAME_PTR_INFO (var))
5242 {
5243 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5244 if (tag)
5245 return tag;
5246 }
5247
5248 var = SSA_NAME_VAR (var);
5249 tag = symbol_mem_tag (var);
5250 gcc_assert (tag != NULL_TREE);
5251 return tag;
5252 }
5253 else
5254 {
5255 if (!DECL_P (var))
5256 return NULL_TREE;
5257
5258 tag = symbol_mem_tag (var);
5259 if (tag)
5260 return tag;
5261
5262 return var;
5263 }
5264 }
5265
5266 /* Copies the reference information from OLD_REF to NEW_REF. */
5267
5268 static void
5269 copy_ref_info (tree new_ref, tree old_ref)
5270 {
5271 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5272 copy_mem_ref_info (new_ref, old_ref);
5273 else
5274 {
5275 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5276 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5277 }
5278 }
5279
5280 /* Rewrites USE (address that is an iv) using candidate CAND. */
5281
5282 static void
5283 rewrite_use_address (struct ivopts_data *data,
5284 struct iv_use *use, struct iv_cand *cand)
5285 {
5286 aff_tree aff;
5287 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5288 tree ref;
5289 bool ok;
5290
5291 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5292 gcc_assert (ok);
5293 unshare_aff_combination (&aff);
5294
5295 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5296 copy_ref_info (ref, *use->op_p);
5297 *use->op_p = ref;
5298 }
5299
5300 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5301 candidate CAND. */
5302
5303 static void
5304 rewrite_use_compare (struct ivopts_data *data,
5305 struct iv_use *use, struct iv_cand *cand)
5306 {
5307 tree comp, *var_p, op, bound;
5308 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5309 enum tree_code compare;
5310 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5311 bool ok;
5312
5313 bound = cp->value;
5314 if (bound)
5315 {
5316 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5317 tree var_type = TREE_TYPE (var);
5318
5319 compare = iv_elimination_compare (data, use);
5320 bound = unshare_expr (fold_convert (var_type, bound));
5321 op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5322 true, GSI_SAME_STMT);
5323
5324 gimple_cond_set_lhs (use->stmt, var);
5325 gimple_cond_set_code (use->stmt, compare);
5326 gimple_cond_set_rhs (use->stmt, op);
5327 return;
5328 }
5329
5330 /* The induction variable elimination failed; just express the original
5331 giv. */
5332 comp = get_computation (data->current_loop, use, cand);
5333 gcc_assert (comp != NULL_TREE);
5334
5335 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5336 gcc_assert (ok);
5337
5338 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5339 true, GSI_SAME_STMT);
5340 }
5341
5342 /* Rewrites USE using candidate CAND. */
5343
5344 static void
5345 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5346 {
5347 push_stmt_changes (&use->stmt);
5348
5349 switch (use->type)
5350 {
5351 case USE_NONLINEAR_EXPR:
5352 rewrite_use_nonlinear_expr (data, use, cand);
5353 break;
5354
5355 case USE_ADDRESS:
5356 rewrite_use_address (data, use, cand);
5357 break;
5358
5359 case USE_COMPARE:
5360 rewrite_use_compare (data, use, cand);
5361 break;
5362
5363 default:
5364 gcc_unreachable ();
5365 }
5366
5367 pop_stmt_changes (&use->stmt);
5368 }
5369
5370 /* Rewrite the uses using the selected induction variables. */
5371
5372 static void
5373 rewrite_uses (struct ivopts_data *data)
5374 {
5375 unsigned i;
5376 struct iv_cand *cand;
5377 struct iv_use *use;
5378
5379 for (i = 0; i < n_iv_uses (data); i++)
5380 {
5381 use = iv_use (data, i);
5382 cand = use->selected;
5383 gcc_assert (cand);
5384
5385 rewrite_use (data, use, cand);
5386 }
5387 }
5388
5389 /* Removes the ivs that are not used after rewriting. */
5390
5391 static void
5392 remove_unused_ivs (struct ivopts_data *data)
5393 {
5394 unsigned j;
5395 bitmap_iterator bi;
5396
5397 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5398 {
5399 struct version_info *info;
5400
5401 info = ver_info (data, j);
5402 if (info->iv
5403 && !integer_zerop (info->iv->step)
5404 && !info->inv_id
5405 && !info->iv->have_use_for
5406 && !info->preserve_biv)
5407 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5408 }
5409 }
5410
5411 /* Frees data allocated by the optimization of a single loop. */
5412
5413 static void
5414 free_loop_data (struct ivopts_data *data)
5415 {
5416 unsigned i, j;
5417 bitmap_iterator bi;
5418 tree obj;
5419
5420 if (data->niters)
5421 {
5422 pointer_map_destroy (data->niters);
5423 data->niters = NULL;
5424 }
5425
5426 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5427 {
5428 struct version_info *info;
5429
5430 info = ver_info (data, i);
5431 if (info->iv)
5432 free (info->iv);
5433 info->iv = NULL;
5434 info->has_nonlin_use = false;
5435 info->preserve_biv = false;
5436 info->inv_id = 0;
5437 }
5438 bitmap_clear (data->relevant);
5439 bitmap_clear (data->important_candidates);
5440
5441 for (i = 0; i < n_iv_uses (data); i++)
5442 {
5443 struct iv_use *use = iv_use (data, i);
5444
5445 free (use->iv);
5446 BITMAP_FREE (use->related_cands);
5447 for (j = 0; j < use->n_map_members; j++)
5448 if (use->cost_map[j].depends_on)
5449 BITMAP_FREE (use->cost_map[j].depends_on);
5450 free (use->cost_map);
5451 free (use);
5452 }
5453 VEC_truncate (iv_use_p, data->iv_uses, 0);
5454
5455 for (i = 0; i < n_iv_cands (data); i++)
5456 {
5457 struct iv_cand *cand = iv_cand (data, i);
5458
5459 if (cand->iv)
5460 free (cand->iv);
5461 if (cand->depends_on)
5462 BITMAP_FREE (cand->depends_on);
5463 free (cand);
5464 }
5465 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5466
5467 if (data->version_info_size < num_ssa_names)
5468 {
5469 data->version_info_size = 2 * num_ssa_names;
5470 free (data->version_info);
5471 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5472 }
5473
5474 data->max_inv_id = 0;
5475
5476 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5477 SET_DECL_RTL (obj, NULL_RTX);
5478
5479 VEC_truncate (tree, decl_rtl_to_reset, 0);
5480 }
5481
5482 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5483 loop tree. */
5484
5485 static void
5486 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5487 {
5488 free_loop_data (data);
5489 free (data->version_info);
5490 BITMAP_FREE (data->relevant);
5491 BITMAP_FREE (data->important_candidates);
5492
5493 VEC_free (tree, heap, decl_rtl_to_reset);
5494 VEC_free (iv_use_p, heap, data->iv_uses);
5495 VEC_free (iv_cand_p, heap, data->iv_candidates);
5496 }
5497
5498 /* Optimizes the LOOP. Returns true if anything changed. */
5499
5500 static bool
5501 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5502 {
5503 bool changed = false;
5504 struct iv_ca *iv_ca;
5505 edge exit;
5506
5507 gcc_assert (!data->niters);
5508 data->current_loop = loop;
5509 data->speed = optimize_loop_for_speed_p (loop);
5510
5511 if (dump_file && (dump_flags & TDF_DETAILS))
5512 {
5513 fprintf (dump_file, "Processing loop %d\n", loop->num);
5514
5515 exit = single_dom_exit (loop);
5516 if (exit)
5517 {
5518 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5519 exit->src->index, exit->dest->index);
5520 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5521 fprintf (dump_file, "\n");
5522 }
5523
5524 fprintf (dump_file, "\n");
5525 }
5526
5527 /* For each ssa name determines whether it behaves as an induction variable
5528 in some loop. */
5529 if (!find_induction_variables (data))
5530 goto finish;
5531
5532 /* Finds interesting uses (item 1). */
5533 find_interesting_uses (data);
5534 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5535 goto finish;
5536
5537 /* Finds candidates for the induction variables (item 2). */
5538 find_iv_candidates (data);
5539
5540 /* Calculates the costs (item 3, part 1). */
5541 determine_use_iv_costs (data);
5542 determine_iv_costs (data);
5543 determine_set_costs (data);
5544
5545 /* Find the optimal set of induction variables (item 3, part 2). */
5546 iv_ca = find_optimal_iv_set (data);
5547 if (!iv_ca)
5548 goto finish;
5549 changed = true;
5550
5551 /* Create the new induction variables (item 4, part 1). */
5552 create_new_ivs (data, iv_ca);
5553 iv_ca_free (&iv_ca);
5554
5555 /* Rewrite the uses (item 4, part 2). */
5556 rewrite_uses (data);
5557
5558 /* Remove the ivs that are unused after rewriting. */
5559 remove_unused_ivs (data);
5560
5561 /* We have changed the structure of induction variables; it might happen
5562 that definitions in the scev database refer to some of them that were
5563 eliminated. */
5564 scev_reset ();
5565
5566 finish:
5567 free_loop_data (data);
5568
5569 return changed;
5570 }
5571
5572 /* Main entry point. Optimizes induction variables in loops. */
5573
5574 void
5575 tree_ssa_iv_optimize (void)
5576 {
5577 struct loop *loop;
5578 struct ivopts_data data;
5579 loop_iterator li;
5580
5581 tree_ssa_iv_optimize_init (&data);
5582
5583 /* Optimize the loops starting with the innermost ones. */
5584 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5585 {
5586 if (dump_file && (dump_flags & TDF_DETAILS))
5587 flow_loop_dump (loop, dump_file, NULL, 1);
5588
5589 tree_ssa_iv_optimize_loop (&data, loop);
5590 }
5591
5592 tree_ssa_iv_optimize_finalize (&data);
5593 }