bind_c_array_params_2.f90: Add "-mno-explicit-relocs" for alpha*-*-* targets.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
74 #include "cfgloop.h"
75 #include "tree-pass.h"
76 #include "ggc.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
79 #include "hashtab.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
82 #include "cfgloop.h"
83 #include "params.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
86 #include "target.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
89 #include "expmed.h"
90
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
94 #include "expr.h"
95 #include "recog.h"
96
97 /* The infinite cost. */
98 #define INFTY 10000000
99
100 #define AVG_LOOP_NITER(LOOP) 5
101
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
104 exists. */
105
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop *loop)
108 {
109 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
110 if (niter == -1)
111 return AVG_LOOP_NITER (loop);
112
113 return niter;
114 }
115
116 /* Representation of the induction variable. */
117 struct iv
118 {
119 tree base; /* Initial value of the iv. */
120 tree base_object; /* A memory object to that the induction variable points. */
121 tree step; /* Step of the iv (constant only). */
122 tree ssa_name; /* The ssa name with the value. */
123 bool biv_p; /* Is it a biv? */
124 bool have_use_for; /* Do we already have a use for it? */
125 unsigned use_id; /* The identifier in the use if it is the case. */
126 };
127
128 /* Per-ssa version information (induction variable descriptions, etc.). */
129 struct version_info
130 {
131 tree name; /* The ssa name. */
132 struct iv *iv; /* Induction variable description. */
133 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv; /* For the original biv, whether to preserve it. */
136 unsigned inv_id; /* Id of an invariant. */
137 };
138
139 /* Types of uses. */
140 enum use_type
141 {
142 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
143 USE_ADDRESS, /* Use in an address. */
144 USE_COMPARE /* Use is a compare. */
145 };
146
147 /* Cost of a computation. */
148 typedef struct
149 {
150 int cost; /* The runtime cost. */
151 unsigned complexity; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
155 } comp_cost;
156
157 static const comp_cost no_cost = {0, 0};
158 static const comp_cost infinite_cost = {INFTY, INFTY};
159
160 /* The candidate - cost pair. */
161 struct cost_pair
162 {
163 struct iv_cand *cand; /* The candidate. */
164 comp_cost cost; /* The cost. */
165 bitmap depends_on; /* The list of invariants that have to be
166 preserved. */
167 tree value; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp; /* For iv elimination, the comparison. */
171 int inv_expr_id; /* Loop invariant expression id. */
172 };
173
174 /* Use. */
175 struct iv_use
176 {
177 unsigned id; /* The id of the use. */
178 enum use_type type; /* Type of the use. */
179 struct iv *iv; /* The induction variable it is based on. */
180 gimple stmt; /* Statement in that it occurs. */
181 tree *op_p; /* The place where it occurs. */
182 bitmap related_cands; /* The set of "related" iv candidates, plus the common
183 important ones. */
184
185 unsigned n_map_members; /* Number of candidates in the cost_map list. */
186 struct cost_pair *cost_map;
187 /* The costs wrto the iv candidates. */
188
189 struct iv_cand *selected;
190 /* The selected candidate. */
191 };
192
193 /* The position where the iv is computed. */
194 enum iv_position
195 {
196 IP_NORMAL, /* At the end, just before the exit condition. */
197 IP_END, /* At the end of the latch block. */
198 IP_BEFORE_USE, /* Immediately before a specific use. */
199 IP_AFTER_USE, /* Immediately after a specific use. */
200 IP_ORIGINAL /* The original biv. */
201 };
202
203 /* The induction variable candidate. */
204 struct iv_cand
205 {
206 unsigned id; /* The number of the candidate. */
207 bool important; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
210 gimple incremented_at;/* For original biv, the statement where it is
211 incremented. */
212 tree var_before; /* The variable used for it before increment. */
213 tree var_after; /* The variable used for it after increment. */
214 struct iv *iv; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost; /* Cost of the candidate. */
219 unsigned cost_step; /* Cost of the candidate's increment operation. */
220 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on; /* The list of invariants that are used in step of the
223 biv. */
224 };
225
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
228 {
229 tree expr;
230 int id;
231 hashval_t hash;
232 };
233
234 /* The data used by the induction variable optimizations. */
235
236 typedef struct iv_use *iv_use_p;
237 DEF_VEC_P(iv_use_p);
238 DEF_VEC_ALLOC_P(iv_use_p,heap);
239
240 typedef struct iv_cand *iv_cand_p;
241 DEF_VEC_P(iv_cand_p);
242 DEF_VEC_ALLOC_P(iv_cand_p,heap);
243
244 struct ivopts_data
245 {
246 /* The currently optimized loop. */
247 struct loop *current_loop;
248
249 /* Numbers of iterations for all exits of the current loop. */
250 struct pointer_map_t *niters;
251
252 /* Number of registers used in it. */
253 unsigned regs_used;
254
255 /* The size of version_info array allocated. */
256 unsigned version_info_size;
257
258 /* The array of information for the ssa names. */
259 struct version_info *version_info;
260
261 /* The hashtable of loop invariant expressions created
262 by ivopt. */
263 htab_t inv_expr_tab;
264
265 /* Loop invariant expression id. */
266 int inv_expr_id;
267
268 /* The bitmap of indices in version_info whose value was changed. */
269 bitmap relevant;
270
271 /* The uses of induction variables. */
272 VEC(iv_use_p,heap) *iv_uses;
273
274 /* The candidates. */
275 VEC(iv_cand_p,heap) *iv_candidates;
276
277 /* A bitmap of important candidates. */
278 bitmap important_candidates;
279
280 /* The maximum invariant id. */
281 unsigned max_inv_id;
282
283 /* Whether to consider just related and important candidates when replacing a
284 use. */
285 bool consider_all_candidates;
286
287 /* Are we optimizing for speed? */
288 bool speed;
289
290 /* Whether the loop body includes any function calls. */
291 bool body_includes_call;
292
293 /* Whether the loop body can only be exited via single exit. */
294 bool loop_single_exit_p;
295 };
296
297 /* An assignment of iv candidates to uses. */
298
299 struct iv_ca
300 {
301 /* The number of uses covered by the assignment. */
302 unsigned upto;
303
304 /* Number of uses that cannot be expressed by the candidates in the set. */
305 unsigned bad_uses;
306
307 /* Candidate assigned to a use, together with the related costs. */
308 struct cost_pair **cand_for_use;
309
310 /* Number of times each candidate is used. */
311 unsigned *n_cand_uses;
312
313 /* The candidates used. */
314 bitmap cands;
315
316 /* The number of candidates in the set. */
317 unsigned n_cands;
318
319 /* Total number of registers needed. */
320 unsigned n_regs;
321
322 /* Total cost of expressing uses. */
323 comp_cost cand_use_cost;
324
325 /* Total cost of candidates. */
326 unsigned cand_cost;
327
328 /* Number of times each invariant is used. */
329 unsigned *n_invariant_uses;
330
331 /* The array holding the number of uses of each loop
332 invariant expressions created by ivopt. */
333 unsigned *used_inv_expr;
334
335 /* The number of created loop invariants. */
336 unsigned num_used_inv_expr;
337
338 /* Total cost of the assignment. */
339 comp_cost cost;
340 };
341
342 /* Difference of two iv candidate assignments. */
343
344 struct iv_ca_delta
345 {
346 /* Changed use. */
347 struct iv_use *use;
348
349 /* An old assignment (for rollback purposes). */
350 struct cost_pair *old_cp;
351
352 /* A new assignment. */
353 struct cost_pair *new_cp;
354
355 /* Next change in the list. */
356 struct iv_ca_delta *next_change;
357 };
358
359 /* Bound on number of candidates below that all candidates are considered. */
360
361 #define CONSIDER_ALL_CANDIDATES_BOUND \
362 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363
364 /* If there are more iv occurrences, we just give up (it is quite unlikely that
365 optimizing such a loop would help, and it would take ages). */
366
367 #define MAX_CONSIDERED_USES \
368 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369
370 /* If there are at most this number of ivs in the set, try removing unnecessary
371 ivs from the set always. */
372
373 #define ALWAYS_PRUNE_CAND_SET_BOUND \
374 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375
376 /* The list of trees for that the decl_rtl field must be reset is stored
377 here. */
378
379 static VEC(tree,heap) *decl_rtl_to_reset;
380
381 static comp_cost force_expr_to_var_cost (tree, bool);
382
383 /* Number of uses recorded in DATA. */
384
385 static inline unsigned
386 n_iv_uses (struct ivopts_data *data)
387 {
388 return VEC_length (iv_use_p, data->iv_uses);
389 }
390
391 /* Ith use recorded in DATA. */
392
393 static inline struct iv_use *
394 iv_use (struct ivopts_data *data, unsigned i)
395 {
396 return VEC_index (iv_use_p, data->iv_uses, i);
397 }
398
399 /* Number of candidates recorded in DATA. */
400
401 static inline unsigned
402 n_iv_cands (struct ivopts_data *data)
403 {
404 return VEC_length (iv_cand_p, data->iv_candidates);
405 }
406
407 /* Ith candidate recorded in DATA. */
408
409 static inline struct iv_cand *
410 iv_cand (struct ivopts_data *data, unsigned i)
411 {
412 return VEC_index (iv_cand_p, data->iv_candidates, i);
413 }
414
415 /* The single loop exit if it dominates the latch, NULL otherwise. */
416
417 edge
418 single_dom_exit (struct loop *loop)
419 {
420 edge exit = single_exit (loop);
421
422 if (!exit)
423 return NULL;
424
425 if (!just_once_each_iteration_p (loop, exit->src))
426 return NULL;
427
428 return exit;
429 }
430
431 /* Dumps information about the induction variable IV to FILE. */
432
433 extern void dump_iv (FILE *, struct iv *);
434 void
435 dump_iv (FILE *file, struct iv *iv)
436 {
437 if (iv->ssa_name)
438 {
439 fprintf (file, "ssa name ");
440 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
441 fprintf (file, "\n");
442 }
443
444 fprintf (file, " type ");
445 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
446 fprintf (file, "\n");
447
448 if (iv->step)
449 {
450 fprintf (file, " base ");
451 print_generic_expr (file, iv->base, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " step ");
455 print_generic_expr (file, iv->step, TDF_SLIM);
456 fprintf (file, "\n");
457 }
458 else
459 {
460 fprintf (file, " invariant ");
461 print_generic_expr (file, iv->base, TDF_SLIM);
462 fprintf (file, "\n");
463 }
464
465 if (iv->base_object)
466 {
467 fprintf (file, " base object ");
468 print_generic_expr (file, iv->base_object, TDF_SLIM);
469 fprintf (file, "\n");
470 }
471
472 if (iv->biv_p)
473 fprintf (file, " is a biv\n");
474 }
475
476 /* Dumps information about the USE to FILE. */
477
478 extern void dump_use (FILE *, struct iv_use *);
479 void
480 dump_use (FILE *file, struct iv_use *use)
481 {
482 fprintf (file, "use %d\n", use->id);
483
484 switch (use->type)
485 {
486 case USE_NONLINEAR_EXPR:
487 fprintf (file, " generic\n");
488 break;
489
490 case USE_ADDRESS:
491 fprintf (file, " address\n");
492 break;
493
494 case USE_COMPARE:
495 fprintf (file, " compare\n");
496 break;
497
498 default:
499 gcc_unreachable ();
500 }
501
502 fprintf (file, " in statement ");
503 print_gimple_stmt (file, use->stmt, 0, 0);
504 fprintf (file, "\n");
505
506 fprintf (file, " at position ");
507 if (use->op_p)
508 print_generic_expr (file, *use->op_p, TDF_SLIM);
509 fprintf (file, "\n");
510
511 dump_iv (file, use->iv);
512
513 if (use->related_cands)
514 {
515 fprintf (file, " related candidates ");
516 dump_bitmap (file, use->related_cands);
517 }
518 }
519
520 /* Dumps information about the uses to FILE. */
521
522 extern void dump_uses (FILE *, struct ivopts_data *);
523 void
524 dump_uses (FILE *file, struct ivopts_data *data)
525 {
526 unsigned i;
527 struct iv_use *use;
528
529 for (i = 0; i < n_iv_uses (data); i++)
530 {
531 use = iv_use (data, i);
532
533 dump_use (file, use);
534 fprintf (file, "\n");
535 }
536 }
537
538 /* Dumps information about induction variable candidate CAND to FILE. */
539
540 extern void dump_cand (FILE *, struct iv_cand *);
541 void
542 dump_cand (FILE *file, struct iv_cand *cand)
543 {
544 struct iv *iv = cand->iv;
545
546 fprintf (file, "candidate %d%s\n",
547 cand->id, cand->important ? " (important)" : "");
548
549 if (cand->depends_on)
550 {
551 fprintf (file, " depends on ");
552 dump_bitmap (file, cand->depends_on);
553 }
554
555 if (!iv)
556 {
557 fprintf (file, " final value replacement\n");
558 return;
559 }
560
561 if (cand->var_before)
562 {
563 fprintf (file, " var_before ");
564 print_generic_expr (file, cand->var_before, TDF_SLIM);
565 fprintf (file, "\n");
566 }
567 if (cand->var_after)
568 {
569 fprintf (file, " var_after ");
570 print_generic_expr (file, cand->var_after, TDF_SLIM);
571 fprintf (file, "\n");
572 }
573
574 switch (cand->pos)
575 {
576 case IP_NORMAL:
577 fprintf (file, " incremented before exit test\n");
578 break;
579
580 case IP_BEFORE_USE:
581 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
582 break;
583
584 case IP_AFTER_USE:
585 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
586 break;
587
588 case IP_END:
589 fprintf (file, " incremented at end\n");
590 break;
591
592 case IP_ORIGINAL:
593 fprintf (file, " original biv\n");
594 break;
595 }
596
597 dump_iv (file, iv);
598 }
599
600 /* Returns the info for ssa version VER. */
601
602 static inline struct version_info *
603 ver_info (struct ivopts_data *data, unsigned ver)
604 {
605 return data->version_info + ver;
606 }
607
608 /* Returns the info for ssa name NAME. */
609
610 static inline struct version_info *
611 name_info (struct ivopts_data *data, tree name)
612 {
613 return ver_info (data, SSA_NAME_VERSION (name));
614 }
615
616 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 emitted in LOOP. */
618
619 static bool
620 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
621 {
622 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
623
624 gcc_assert (bb);
625
626 if (sbb == loop->latch)
627 return true;
628
629 if (sbb != bb)
630 return false;
631
632 return stmt == last_stmt (bb);
633 }
634
635 /* Returns true if STMT if after the place where the original induction
636 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
637 if the positions are identical. */
638
639 static bool
640 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
641 {
642 basic_block cand_bb = gimple_bb (cand->incremented_at);
643 basic_block stmt_bb = gimple_bb (stmt);
644
645 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
646 return false;
647
648 if (stmt_bb != cand_bb)
649 return true;
650
651 if (true_if_equal
652 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
653 return true;
654 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
655 }
656
657 /* Returns true if STMT if after the place where the induction variable
658 CAND is incremented in LOOP. */
659
660 static bool
661 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
662 {
663 switch (cand->pos)
664 {
665 case IP_END:
666 return false;
667
668 case IP_NORMAL:
669 return stmt_after_ip_normal_pos (loop, stmt);
670
671 case IP_ORIGINAL:
672 case IP_AFTER_USE:
673 return stmt_after_inc_pos (cand, stmt, false);
674
675 case IP_BEFORE_USE:
676 return stmt_after_inc_pos (cand, stmt, true);
677
678 default:
679 gcc_unreachable ();
680 }
681 }
682
683 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
684
685 static bool
686 abnormal_ssa_name_p (tree exp)
687 {
688 if (!exp)
689 return false;
690
691 if (TREE_CODE (exp) != SSA_NAME)
692 return false;
693
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
695 }
696
697 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
698 abnormal phi node. Callback for for_each_index. */
699
700 static bool
701 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
702 void *data ATTRIBUTE_UNUSED)
703 {
704 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
705 {
706 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
707 return false;
708 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
709 return false;
710 }
711
712 return !abnormal_ssa_name_p (*index);
713 }
714
715 /* Returns true if EXPR contains a ssa name that occurs in an
716 abnormal phi node. */
717
718 bool
719 contains_abnormal_ssa_name_p (tree expr)
720 {
721 enum tree_code code;
722 enum tree_code_class codeclass;
723
724 if (!expr)
725 return false;
726
727 code = TREE_CODE (expr);
728 codeclass = TREE_CODE_CLASS (code);
729
730 if (code == SSA_NAME)
731 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
732
733 if (code == INTEGER_CST
734 || is_gimple_min_invariant (expr))
735 return false;
736
737 if (code == ADDR_EXPR)
738 return !for_each_index (&TREE_OPERAND (expr, 0),
739 idx_contains_abnormal_ssa_name_p,
740 NULL);
741
742 if (code == COND_EXPR)
743 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
744 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
745 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
746
747 switch (codeclass)
748 {
749 case tcc_binary:
750 case tcc_comparison:
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
752 return true;
753
754 /* Fallthru. */
755 case tcc_unary:
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
757 return true;
758
759 break;
760
761 default:
762 gcc_unreachable ();
763 }
764
765 return false;
766 }
767
768 /* Returns the structure describing number of iterations determined from
769 EXIT of DATA->current_loop, or NULL if something goes wrong. */
770
771 static struct tree_niter_desc *
772 niter_for_exit (struct ivopts_data *data, edge exit)
773 {
774 struct tree_niter_desc *desc;
775 void **slot;
776
777 if (!data->niters)
778 {
779 data->niters = pointer_map_create ();
780 slot = NULL;
781 }
782 else
783 slot = pointer_map_contains (data->niters, exit);
784
785 if (!slot)
786 {
787 /* Try to determine number of iterations. We cannot safely work with ssa
788 names that appear in phi nodes on abnormal edges, so that we do not
789 create overlapping life ranges for them (PR 27283). */
790 desc = XNEW (struct tree_niter_desc);
791 if (!number_of_iterations_exit (data->current_loop,
792 exit, desc, true)
793 || contains_abnormal_ssa_name_p (desc->niter))
794 {
795 XDELETE (desc);
796 desc = NULL;
797 }
798 slot = pointer_map_insert (data->niters, exit);
799 *slot = desc;
800 }
801 else
802 desc = (struct tree_niter_desc *) *slot;
803
804 return desc;
805 }
806
807 /* Returns the structure describing number of iterations determined from
808 single dominating exit of DATA->current_loop, or NULL if something
809 goes wrong. */
810
811 static struct tree_niter_desc *
812 niter_for_single_dom_exit (struct ivopts_data *data)
813 {
814 edge exit = single_dom_exit (data->current_loop);
815
816 if (!exit)
817 return NULL;
818
819 return niter_for_exit (data, exit);
820 }
821
822 /* Hash table equality function for expressions. */
823
824 static int
825 htab_inv_expr_eq (const void *ent1, const void *ent2)
826 {
827 const struct iv_inv_expr_ent *expr1 =
828 (const struct iv_inv_expr_ent *)ent1;
829 const struct iv_inv_expr_ent *expr2 =
830 (const struct iv_inv_expr_ent *)ent2;
831
832 return expr1->hash == expr2->hash
833 && operand_equal_p (expr1->expr, expr2->expr, 0);
834 }
835
836 /* Hash function for loop invariant expressions. */
837
838 static hashval_t
839 htab_inv_expr_hash (const void *ent)
840 {
841 const struct iv_inv_expr_ent *expr =
842 (const struct iv_inv_expr_ent *)ent;
843 return expr->hash;
844 }
845
846 /* Initializes data structures used by the iv optimization pass, stored
847 in DATA. */
848
849 static void
850 tree_ssa_iv_optimize_init (struct ivopts_data *data)
851 {
852 data->version_info_size = 2 * num_ssa_names;
853 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
854 data->relevant = BITMAP_ALLOC (NULL);
855 data->important_candidates = BITMAP_ALLOC (NULL);
856 data->max_inv_id = 0;
857 data->niters = NULL;
858 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
859 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
860 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
861 htab_inv_expr_eq, free);
862 data->inv_expr_id = 0;
863 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
864 }
865
866 /* Returns a memory object to that EXPR points. In case we are able to
867 determine that it does not point to any such object, NULL is returned. */
868
869 static tree
870 determine_base_object (tree expr)
871 {
872 enum tree_code code = TREE_CODE (expr);
873 tree base, obj;
874
875 /* If this is a pointer casted to any type, we need to determine
876 the base object for the pointer; so handle conversions before
877 throwing away non-pointer expressions. */
878 if (CONVERT_EXPR_P (expr))
879 return determine_base_object (TREE_OPERAND (expr, 0));
880
881 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
882 return NULL_TREE;
883
884 switch (code)
885 {
886 case INTEGER_CST:
887 return NULL_TREE;
888
889 case ADDR_EXPR:
890 obj = TREE_OPERAND (expr, 0);
891 base = get_base_address (obj);
892
893 if (!base)
894 return expr;
895
896 if (TREE_CODE (base) == MEM_REF)
897 return determine_base_object (TREE_OPERAND (base, 0));
898
899 return fold_convert (ptr_type_node,
900 build_fold_addr_expr (base));
901
902 case POINTER_PLUS_EXPR:
903 return determine_base_object (TREE_OPERAND (expr, 0));
904
905 case PLUS_EXPR:
906 case MINUS_EXPR:
907 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
908 gcc_unreachable ();
909
910 default:
911 return fold_convert (ptr_type_node, expr);
912 }
913 }
914
915 /* Allocates an induction variable with given initial value BASE and step STEP
916 for loop LOOP. */
917
918 static struct iv *
919 alloc_iv (tree base, tree step)
920 {
921 struct iv *iv = XCNEW (struct iv);
922 gcc_assert (step != NULL_TREE);
923
924 iv->base = base;
925 iv->base_object = determine_base_object (base);
926 iv->step = step;
927 iv->biv_p = false;
928 iv->have_use_for = false;
929 iv->use_id = 0;
930 iv->ssa_name = NULL_TREE;
931
932 return iv;
933 }
934
935 /* Sets STEP and BASE for induction variable IV. */
936
937 static void
938 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
939 {
940 struct version_info *info = name_info (data, iv);
941
942 gcc_assert (!info->iv);
943
944 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
945 info->iv = alloc_iv (base, step);
946 info->iv->ssa_name = iv;
947 }
948
949 /* Finds induction variable declaration for VAR. */
950
951 static struct iv *
952 get_iv (struct ivopts_data *data, tree var)
953 {
954 basic_block bb;
955 tree type = TREE_TYPE (var);
956
957 if (!POINTER_TYPE_P (type)
958 && !INTEGRAL_TYPE_P (type))
959 return NULL;
960
961 if (!name_info (data, var)->iv)
962 {
963 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
964
965 if (!bb
966 || !flow_bb_inside_loop_p (data->current_loop, bb))
967 set_iv (data, var, var, build_int_cst (type, 0));
968 }
969
970 return name_info (data, var)->iv;
971 }
972
973 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
974 not define a simple affine biv with nonzero step. */
975
976 static tree
977 determine_biv_step (gimple phi)
978 {
979 struct loop *loop = gimple_bb (phi)->loop_father;
980 tree name = PHI_RESULT (phi);
981 affine_iv iv;
982
983 if (!is_gimple_reg (name))
984 return NULL_TREE;
985
986 if (!simple_iv (loop, loop, name, &iv, true))
987 return NULL_TREE;
988
989 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
990 }
991
992 /* Finds basic ivs. */
993
994 static bool
995 find_bivs (struct ivopts_data *data)
996 {
997 gimple phi;
998 tree step, type, base;
999 bool found = false;
1000 struct loop *loop = data->current_loop;
1001 gimple_stmt_iterator psi;
1002
1003 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1004 {
1005 phi = gsi_stmt (psi);
1006
1007 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1008 continue;
1009
1010 step = determine_biv_step (phi);
1011 if (!step)
1012 continue;
1013
1014 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1015 base = expand_simple_operations (base);
1016 if (contains_abnormal_ssa_name_p (base)
1017 || contains_abnormal_ssa_name_p (step))
1018 continue;
1019
1020 type = TREE_TYPE (PHI_RESULT (phi));
1021 base = fold_convert (type, base);
1022 if (step)
1023 {
1024 if (POINTER_TYPE_P (type))
1025 step = convert_to_ptrofftype (step);
1026 else
1027 step = fold_convert (type, step);
1028 }
1029
1030 set_iv (data, PHI_RESULT (phi), base, step);
1031 found = true;
1032 }
1033
1034 return found;
1035 }
1036
1037 /* Marks basic ivs. */
1038
1039 static void
1040 mark_bivs (struct ivopts_data *data)
1041 {
1042 gimple phi;
1043 tree var;
1044 struct iv *iv, *incr_iv;
1045 struct loop *loop = data->current_loop;
1046 basic_block incr_bb;
1047 gimple_stmt_iterator psi;
1048
1049 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1050 {
1051 phi = gsi_stmt (psi);
1052
1053 iv = get_iv (data, PHI_RESULT (phi));
1054 if (!iv)
1055 continue;
1056
1057 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1058 incr_iv = get_iv (data, var);
1059 if (!incr_iv)
1060 continue;
1061
1062 /* If the increment is in the subloop, ignore it. */
1063 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1064 if (incr_bb->loop_father != data->current_loop
1065 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1066 continue;
1067
1068 iv->biv_p = true;
1069 incr_iv->biv_p = true;
1070 }
1071 }
1072
1073 /* Checks whether STMT defines a linear induction variable and stores its
1074 parameters to IV. */
1075
1076 static bool
1077 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1078 {
1079 tree lhs;
1080 struct loop *loop = data->current_loop;
1081
1082 iv->base = NULL_TREE;
1083 iv->step = NULL_TREE;
1084
1085 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1086 return false;
1087
1088 lhs = gimple_assign_lhs (stmt);
1089 if (TREE_CODE (lhs) != SSA_NAME)
1090 return false;
1091
1092 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1093 return false;
1094 iv->base = expand_simple_operations (iv->base);
1095
1096 if (contains_abnormal_ssa_name_p (iv->base)
1097 || contains_abnormal_ssa_name_p (iv->step))
1098 return false;
1099
1100 /* If STMT could throw, then do not consider STMT as defining a GIV.
1101 While this will suppress optimizations, we can not safely delete this
1102 GIV and associated statements, even if it appears it is not used. */
1103 if (stmt_could_throw_p (stmt))
1104 return false;
1105
1106 return true;
1107 }
1108
1109 /* Finds general ivs in statement STMT. */
1110
1111 static void
1112 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1113 {
1114 affine_iv iv;
1115
1116 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1117 return;
1118
1119 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1120 }
1121
1122 /* Finds general ivs in basic block BB. */
1123
1124 static void
1125 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1126 {
1127 gimple_stmt_iterator bsi;
1128
1129 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1130 find_givs_in_stmt (data, gsi_stmt (bsi));
1131 }
1132
1133 /* Finds general ivs. */
1134
1135 static void
1136 find_givs (struct ivopts_data *data)
1137 {
1138 struct loop *loop = data->current_loop;
1139 basic_block *body = get_loop_body_in_dom_order (loop);
1140 unsigned i;
1141
1142 for (i = 0; i < loop->num_nodes; i++)
1143 find_givs_in_bb (data, body[i]);
1144 free (body);
1145 }
1146
1147 /* For each ssa name defined in LOOP determines whether it is an induction
1148 variable and if so, its initial value and step. */
1149
1150 static bool
1151 find_induction_variables (struct ivopts_data *data)
1152 {
1153 unsigned i;
1154 bitmap_iterator bi;
1155
1156 if (!find_bivs (data))
1157 return false;
1158
1159 find_givs (data);
1160 mark_bivs (data);
1161
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 {
1164 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1165
1166 if (niter)
1167 {
1168 fprintf (dump_file, " number of iterations ");
1169 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1170 if (!integer_zerop (niter->may_be_zero))
1171 {
1172 fprintf (dump_file, "; zero if ");
1173 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1174 }
1175 fprintf (dump_file, "\n\n");
1176 };
1177
1178 fprintf (dump_file, "Induction variables:\n\n");
1179
1180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1181 {
1182 if (ver_info (data, i)->iv)
1183 dump_iv (dump_file, ver_info (data, i)->iv);
1184 }
1185 }
1186
1187 return true;
1188 }
1189
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1191
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194 gimple stmt, enum use_type use_type)
1195 {
1196 struct iv_use *use = XCNEW (struct iv_use);
1197
1198 use->id = n_iv_uses (data);
1199 use->type = use_type;
1200 use->iv = iv;
1201 use->stmt = stmt;
1202 use->op_p = use_p;
1203 use->related_cands = BITMAP_ALLOC (NULL);
1204
1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1206 caller. */
1207 iv->ssa_name = NULL_TREE;
1208
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 dump_use (dump_file, use);
1211
1212 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1213
1214 return use;
1215 }
1216
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1220
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223 {
1224 basic_block bb;
1225 struct version_info *info;
1226
1227 if (TREE_CODE (op) != SSA_NAME
1228 || !is_gimple_reg (op))
1229 return;
1230
1231 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232 if (bb
1233 && flow_bb_inside_loop_p (data->current_loop, bb))
1234 return;
1235
1236 info = name_info (data, op);
1237 info->name = op;
1238 info->has_nonlin_use |= nonlinear_use;
1239 if (!info->inv_id)
1240 info->inv_id = ++data->max_inv_id;
1241 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242 }
1243
1244 /* Checks whether the use OP is interesting and if so, records it. */
1245
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1248 {
1249 struct iv *iv;
1250 struct iv *civ;
1251 gimple stmt;
1252 struct iv_use *use;
1253
1254 if (TREE_CODE (op) != SSA_NAME)
1255 return NULL;
1256
1257 iv = get_iv (data, op);
1258 if (!iv)
1259 return NULL;
1260
1261 if (iv->have_use_for)
1262 {
1263 use = iv_use (data, iv->use_id);
1264
1265 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266 return use;
1267 }
1268
1269 if (integer_zerop (iv->step))
1270 {
1271 record_invariant (data, op, true);
1272 return NULL;
1273 }
1274 iv->have_use_for = true;
1275
1276 civ = XNEW (struct iv);
1277 *civ = *iv;
1278
1279 stmt = SSA_NAME_DEF_STMT (op);
1280 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281 || is_gimple_assign (stmt));
1282
1283 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284 iv->use_id = use->id;
1285
1286 return use;
1287 }
1288
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
1296
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299 tree **control_var, tree **bound,
1300 struct iv **iv_var, struct iv **iv_bound)
1301 {
1302 /* The objects returned when COND has constant operands. */
1303 static struct iv const_iv;
1304 static tree zero;
1305 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307 bool ret = false;
1308
1309 if (gimple_code (stmt) == GIMPLE_COND)
1310 {
1311 op0 = gimple_cond_lhs_ptr (stmt);
1312 op1 = gimple_cond_rhs_ptr (stmt);
1313 }
1314 else
1315 {
1316 op0 = gimple_assign_rhs1_ptr (stmt);
1317 op1 = gimple_assign_rhs2_ptr (stmt);
1318 }
1319
1320 zero = integer_zero_node;
1321 const_iv.step = integer_zero_node;
1322
1323 if (TREE_CODE (*op0) == SSA_NAME)
1324 iv0 = get_iv (data, *op0);
1325 if (TREE_CODE (*op1) == SSA_NAME)
1326 iv1 = get_iv (data, *op1);
1327
1328 /* Exactly one of the compared values must be an iv, and the other one must
1329 be an invariant. */
1330 if (!iv0 || !iv1)
1331 goto end;
1332
1333 if (integer_zerop (iv0->step))
1334 {
1335 /* Control variable may be on the other side. */
1336 tmp_op = op0; op0 = op1; op1 = tmp_op;
1337 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1338 }
1339 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341 end:
1342 if (control_var)
1343 *control_var = op0;;
1344 if (iv_var)
1345 *iv_var = iv0;;
1346 if (bound)
1347 *bound = op1;
1348 if (iv_bound)
1349 *iv_bound = iv1;
1350
1351 return ret;
1352 }
1353
1354 /* Checks whether the condition in STMT is interesting and if so,
1355 records it. */
1356
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1359 {
1360 tree *var_p, *bound_p;
1361 struct iv *var_iv, *civ;
1362
1363 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1364 {
1365 find_interesting_uses_op (data, *var_p);
1366 find_interesting_uses_op (data, *bound_p);
1367 return;
1368 }
1369
1370 civ = XNEW (struct iv);
1371 *civ = *var_iv;
1372 record_use (data, NULL, civ, stmt, USE_COMPARE);
1373 }
1374
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
1378
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381 {
1382 basic_block def_bb;
1383 unsigned i, len;
1384
1385 gcc_assert (loop_depth (loop) > 0);
1386
1387 if (is_gimple_min_invariant (expr))
1388 return true;
1389
1390 if (TREE_CODE (expr) == SSA_NAME)
1391 {
1392 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393 if (def_bb
1394 && flow_bb_inside_loop_p (loop, def_bb))
1395 return false;
1396
1397 return true;
1398 }
1399
1400 if (!EXPR_P (expr))
1401 return false;
1402
1403 len = TREE_OPERAND_LENGTH (expr);
1404 for (i = 0; i < len; i++)
1405 if (TREE_OPERAND (expr, i)
1406 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1407 return false;
1408
1409 return true;
1410 }
1411
1412 /* Returns true if statement STMT is obviously invariant in LOOP,
1413 i.e. if all its operands on the RHS are defined outside of the LOOP.
1414 LOOP should not be the function body. */
1415
1416 bool
1417 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1418 {
1419 unsigned i;
1420 tree lhs;
1421
1422 gcc_assert (loop_depth (loop) > 0);
1423
1424 lhs = gimple_get_lhs (stmt);
1425 for (i = 0; i < gimple_num_ops (stmt); i++)
1426 {
1427 tree op = gimple_op (stmt, i);
1428 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1429 return false;
1430 }
1431
1432 return true;
1433 }
1434
1435 /* Cumulates the steps of indices into DATA and replaces their values with the
1436 initial ones. Returns false when the value of the index cannot be determined.
1437 Callback for for_each_index. */
1438
1439 struct ifs_ivopts_data
1440 {
1441 struct ivopts_data *ivopts_data;
1442 gimple stmt;
1443 tree step;
1444 };
1445
1446 static bool
1447 idx_find_step (tree base, tree *idx, void *data)
1448 {
1449 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1450 struct iv *iv;
1451 tree step, iv_base, iv_step, lbound, off;
1452 struct loop *loop = dta->ivopts_data->current_loop;
1453
1454 /* If base is a component ref, require that the offset of the reference
1455 be invariant. */
1456 if (TREE_CODE (base) == COMPONENT_REF)
1457 {
1458 off = component_ref_field_offset (base);
1459 return expr_invariant_in_loop_p (loop, off);
1460 }
1461
1462 /* If base is array, first check whether we will be able to move the
1463 reference out of the loop (in order to take its address in strength
1464 reduction). In order for this to work we need both lower bound
1465 and step to be loop invariants. */
1466 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1467 {
1468 /* Moreover, for a range, the size needs to be invariant as well. */
1469 if (TREE_CODE (base) == ARRAY_RANGE_REF
1470 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1471 return false;
1472
1473 step = array_ref_element_size (base);
1474 lbound = array_ref_low_bound (base);
1475
1476 if (!expr_invariant_in_loop_p (loop, step)
1477 || !expr_invariant_in_loop_p (loop, lbound))
1478 return false;
1479 }
1480
1481 if (TREE_CODE (*idx) != SSA_NAME)
1482 return true;
1483
1484 iv = get_iv (dta->ivopts_data, *idx);
1485 if (!iv)
1486 return false;
1487
1488 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1489 *&x[0], which is not folded and does not trigger the
1490 ARRAY_REF path below. */
1491 *idx = iv->base;
1492
1493 if (integer_zerop (iv->step))
1494 return true;
1495
1496 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497 {
1498 step = array_ref_element_size (base);
1499
1500 /* We only handle addresses whose step is an integer constant. */
1501 if (TREE_CODE (step) != INTEGER_CST)
1502 return false;
1503 }
1504 else
1505 /* The step for pointer arithmetics already is 1 byte. */
1506 step = size_one_node;
1507
1508 iv_base = iv->base;
1509 iv_step = iv->step;
1510 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1511 sizetype, &iv_base, &iv_step, dta->stmt,
1512 false))
1513 {
1514 /* The index might wrap. */
1515 return false;
1516 }
1517
1518 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1519 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1520
1521 return true;
1522 }
1523
1524 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1525 object is passed to it in DATA. */
1526
1527 static bool
1528 idx_record_use (tree base, tree *idx,
1529 void *vdata)
1530 {
1531 struct ivopts_data *data = (struct ivopts_data *) vdata;
1532 find_interesting_uses_op (data, *idx);
1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534 {
1535 find_interesting_uses_op (data, array_ref_element_size (base));
1536 find_interesting_uses_op (data, array_ref_low_bound (base));
1537 }
1538 return true;
1539 }
1540
1541 /* If we can prove that TOP = cst * BOT for some constant cst,
1542 store cst to MUL and return true. Otherwise return false.
1543 The returned value is always sign-extended, regardless of the
1544 signedness of TOP and BOT. */
1545
1546 static bool
1547 constant_multiple_of (tree top, tree bot, double_int *mul)
1548 {
1549 tree mby;
1550 enum tree_code code;
1551 double_int res, p0, p1;
1552 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1553
1554 STRIP_NOPS (top);
1555 STRIP_NOPS (bot);
1556
1557 if (operand_equal_p (top, bot, 0))
1558 {
1559 *mul = double_int_one;
1560 return true;
1561 }
1562
1563 code = TREE_CODE (top);
1564 switch (code)
1565 {
1566 case MULT_EXPR:
1567 mby = TREE_OPERAND (top, 1);
1568 if (TREE_CODE (mby) != INTEGER_CST)
1569 return false;
1570
1571 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1572 return false;
1573
1574 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1575 precision);
1576 return true;
1577
1578 case PLUS_EXPR:
1579 case MINUS_EXPR:
1580 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1581 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1582 return false;
1583
1584 if (code == MINUS_EXPR)
1585 p1 = double_int_neg (p1);
1586 *mul = double_int_sext (double_int_add (p0, p1), precision);
1587 return true;
1588
1589 case INTEGER_CST:
1590 if (TREE_CODE (bot) != INTEGER_CST)
1591 return false;
1592
1593 p0 = double_int_sext (tree_to_double_int (top), precision);
1594 p1 = double_int_sext (tree_to_double_int (bot), precision);
1595 if (double_int_zero_p (p1))
1596 return false;
1597 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1598 precision);
1599 return double_int_zero_p (res);
1600
1601 default:
1602 return false;
1603 }
1604 }
1605
1606 /* Returns true if memory reference REF with step STEP may be unaligned. */
1607
1608 static bool
1609 may_be_unaligned_p (tree ref, tree step)
1610 {
1611 tree base;
1612 tree base_type;
1613 HOST_WIDE_INT bitsize;
1614 HOST_WIDE_INT bitpos;
1615 tree toffset;
1616 enum machine_mode mode;
1617 int unsignedp, volatilep;
1618 unsigned base_align;
1619
1620 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1621 thus they are not misaligned. */
1622 if (TREE_CODE (ref) == TARGET_MEM_REF)
1623 return false;
1624
1625 /* The test below is basically copy of what expr.c:normal_inner_ref
1626 does to check whether the object must be loaded by parts when
1627 STRICT_ALIGNMENT is true. */
1628 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1629 &unsignedp, &volatilep, true);
1630 base_type = TREE_TYPE (base);
1631 base_align = get_object_alignment (base);
1632 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1633
1634 if (mode != BLKmode)
1635 {
1636 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1637
1638 if (base_align < mode_align
1639 || (bitpos % mode_align) != 0
1640 || (bitpos % BITS_PER_UNIT) != 0)
1641 return true;
1642
1643 if (toffset
1644 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1645 return true;
1646
1647 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1648 return true;
1649 }
1650
1651 return false;
1652 }
1653
1654 /* Return true if EXPR may be non-addressable. */
1655
1656 bool
1657 may_be_nonaddressable_p (tree expr)
1658 {
1659 switch (TREE_CODE (expr))
1660 {
1661 case TARGET_MEM_REF:
1662 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1663 target, thus they are always addressable. */
1664 return false;
1665
1666 case COMPONENT_REF:
1667 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1668 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1669
1670 case VIEW_CONVERT_EXPR:
1671 /* This kind of view-conversions may wrap non-addressable objects
1672 and make them look addressable. After some processing the
1673 non-addressability may be uncovered again, causing ADDR_EXPRs
1674 of inappropriate objects to be built. */
1675 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1676 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1677 return true;
1678
1679 /* ... fall through ... */
1680
1681 case ARRAY_REF:
1682 case ARRAY_RANGE_REF:
1683 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1684
1685 CASE_CONVERT:
1686 return true;
1687
1688 default:
1689 break;
1690 }
1691
1692 return false;
1693 }
1694
1695 /* Finds addresses in *OP_P inside STMT. */
1696
1697 static void
1698 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1699 {
1700 tree base = *op_p, step = size_zero_node;
1701 struct iv *civ;
1702 struct ifs_ivopts_data ifs_ivopts_data;
1703
1704 /* Do not play with volatile memory references. A bit too conservative,
1705 perhaps, but safe. */
1706 if (gimple_has_volatile_ops (stmt))
1707 goto fail;
1708
1709 /* Ignore bitfields for now. Not really something terribly complicated
1710 to handle. TODO. */
1711 if (TREE_CODE (base) == BIT_FIELD_REF)
1712 goto fail;
1713
1714 base = unshare_expr (base);
1715
1716 if (TREE_CODE (base) == TARGET_MEM_REF)
1717 {
1718 tree type = build_pointer_type (TREE_TYPE (base));
1719 tree astep;
1720
1721 if (TMR_BASE (base)
1722 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1723 {
1724 civ = get_iv (data, TMR_BASE (base));
1725 if (!civ)
1726 goto fail;
1727
1728 TMR_BASE (base) = civ->base;
1729 step = civ->step;
1730 }
1731 if (TMR_INDEX2 (base)
1732 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1733 {
1734 civ = get_iv (data, TMR_INDEX2 (base));
1735 if (!civ)
1736 goto fail;
1737
1738 TMR_INDEX2 (base) = civ->base;
1739 step = civ->step;
1740 }
1741 if (TMR_INDEX (base)
1742 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1743 {
1744 civ = get_iv (data, TMR_INDEX (base));
1745 if (!civ)
1746 goto fail;
1747
1748 TMR_INDEX (base) = civ->base;
1749 astep = civ->step;
1750
1751 if (astep)
1752 {
1753 if (TMR_STEP (base))
1754 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1755
1756 step = fold_build2 (PLUS_EXPR, type, step, astep);
1757 }
1758 }
1759
1760 if (integer_zerop (step))
1761 goto fail;
1762 base = tree_mem_ref_addr (type, base);
1763 }
1764 else
1765 {
1766 ifs_ivopts_data.ivopts_data = data;
1767 ifs_ivopts_data.stmt = stmt;
1768 ifs_ivopts_data.step = size_zero_node;
1769 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1770 || integer_zerop (ifs_ivopts_data.step))
1771 goto fail;
1772 step = ifs_ivopts_data.step;
1773
1774 /* Check that the base expression is addressable. This needs
1775 to be done after substituting bases of IVs into it. */
1776 if (may_be_nonaddressable_p (base))
1777 goto fail;
1778
1779 /* Moreover, on strict alignment platforms, check that it is
1780 sufficiently aligned. */
1781 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1782 goto fail;
1783
1784 base = build_fold_addr_expr (base);
1785
1786 /* Substituting bases of IVs into the base expression might
1787 have caused folding opportunities. */
1788 if (TREE_CODE (base) == ADDR_EXPR)
1789 {
1790 tree *ref = &TREE_OPERAND (base, 0);
1791 while (handled_component_p (*ref))
1792 ref = &TREE_OPERAND (*ref, 0);
1793 if (TREE_CODE (*ref) == MEM_REF)
1794 {
1795 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1796 TREE_OPERAND (*ref, 0),
1797 TREE_OPERAND (*ref, 1));
1798 if (tem)
1799 *ref = tem;
1800 }
1801 }
1802 }
1803
1804 civ = alloc_iv (base, step);
1805 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1806 return;
1807
1808 fail:
1809 for_each_index (op_p, idx_record_use, data);
1810 }
1811
1812 /* Finds and records invariants used in STMT. */
1813
1814 static void
1815 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1816 {
1817 ssa_op_iter iter;
1818 use_operand_p use_p;
1819 tree op;
1820
1821 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1822 {
1823 op = USE_FROM_PTR (use_p);
1824 record_invariant (data, op, false);
1825 }
1826 }
1827
1828 /* Finds interesting uses of induction variables in the statement STMT. */
1829
1830 static void
1831 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1832 {
1833 struct iv *iv;
1834 tree op, *lhs, *rhs;
1835 ssa_op_iter iter;
1836 use_operand_p use_p;
1837 enum tree_code code;
1838
1839 find_invariants_stmt (data, stmt);
1840
1841 if (gimple_code (stmt) == GIMPLE_COND)
1842 {
1843 find_interesting_uses_cond (data, stmt);
1844 return;
1845 }
1846
1847 if (is_gimple_assign (stmt))
1848 {
1849 lhs = gimple_assign_lhs_ptr (stmt);
1850 rhs = gimple_assign_rhs1_ptr (stmt);
1851
1852 if (TREE_CODE (*lhs) == SSA_NAME)
1853 {
1854 /* If the statement defines an induction variable, the uses are not
1855 interesting by themselves. */
1856
1857 iv = get_iv (data, *lhs);
1858
1859 if (iv && !integer_zerop (iv->step))
1860 return;
1861 }
1862
1863 code = gimple_assign_rhs_code (stmt);
1864 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1865 && (REFERENCE_CLASS_P (*rhs)
1866 || is_gimple_val (*rhs)))
1867 {
1868 if (REFERENCE_CLASS_P (*rhs))
1869 find_interesting_uses_address (data, stmt, rhs);
1870 else
1871 find_interesting_uses_op (data, *rhs);
1872
1873 if (REFERENCE_CLASS_P (*lhs))
1874 find_interesting_uses_address (data, stmt, lhs);
1875 return;
1876 }
1877 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1878 {
1879 find_interesting_uses_cond (data, stmt);
1880 return;
1881 }
1882
1883 /* TODO -- we should also handle address uses of type
1884
1885 memory = call (whatever);
1886
1887 and
1888
1889 call (memory). */
1890 }
1891
1892 if (gimple_code (stmt) == GIMPLE_PHI
1893 && gimple_bb (stmt) == data->current_loop->header)
1894 {
1895 iv = get_iv (data, PHI_RESULT (stmt));
1896
1897 if (iv && !integer_zerop (iv->step))
1898 return;
1899 }
1900
1901 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1902 {
1903 op = USE_FROM_PTR (use_p);
1904
1905 if (TREE_CODE (op) != SSA_NAME)
1906 continue;
1907
1908 iv = get_iv (data, op);
1909 if (!iv)
1910 continue;
1911
1912 find_interesting_uses_op (data, op);
1913 }
1914 }
1915
1916 /* Finds interesting uses of induction variables outside of loops
1917 on loop exit edge EXIT. */
1918
1919 static void
1920 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1921 {
1922 gimple phi;
1923 gimple_stmt_iterator psi;
1924 tree def;
1925
1926 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1927 {
1928 phi = gsi_stmt (psi);
1929 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1930 if (is_gimple_reg (def))
1931 find_interesting_uses_op (data, def);
1932 }
1933 }
1934
1935 /* Finds uses of the induction variables that are interesting. */
1936
1937 static void
1938 find_interesting_uses (struct ivopts_data *data)
1939 {
1940 basic_block bb;
1941 gimple_stmt_iterator bsi;
1942 basic_block *body = get_loop_body (data->current_loop);
1943 unsigned i;
1944 struct version_info *info;
1945 edge e;
1946
1947 if (dump_file && (dump_flags & TDF_DETAILS))
1948 fprintf (dump_file, "Uses:\n\n");
1949
1950 for (i = 0; i < data->current_loop->num_nodes; i++)
1951 {
1952 edge_iterator ei;
1953 bb = body[i];
1954
1955 FOR_EACH_EDGE (e, ei, bb->succs)
1956 if (e->dest != EXIT_BLOCK_PTR
1957 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1958 find_interesting_uses_outside (data, e);
1959
1960 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1962 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1963 if (!is_gimple_debug (gsi_stmt (bsi)))
1964 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1965 }
1966
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1968 {
1969 bitmap_iterator bi;
1970
1971 fprintf (dump_file, "\n");
1972
1973 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1974 {
1975 info = ver_info (data, i);
1976 if (info->inv_id)
1977 {
1978 fprintf (dump_file, " ");
1979 print_generic_expr (dump_file, info->name, TDF_SLIM);
1980 fprintf (dump_file, " is invariant (%d)%s\n",
1981 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1982 }
1983 }
1984
1985 fprintf (dump_file, "\n");
1986 }
1987
1988 free (body);
1989 }
1990
1991 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1992 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1993 we are at the top-level of the processed address. */
1994
1995 static tree
1996 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1997 unsigned HOST_WIDE_INT *offset)
1998 {
1999 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2000 enum tree_code code;
2001 tree type, orig_type = TREE_TYPE (expr);
2002 unsigned HOST_WIDE_INT off0, off1, st;
2003 tree orig_expr = expr;
2004
2005 STRIP_NOPS (expr);
2006
2007 type = TREE_TYPE (expr);
2008 code = TREE_CODE (expr);
2009 *offset = 0;
2010
2011 switch (code)
2012 {
2013 case INTEGER_CST:
2014 if (!cst_and_fits_in_hwi (expr)
2015 || integer_zerop (expr))
2016 return orig_expr;
2017
2018 *offset = int_cst_value (expr);
2019 return build_int_cst (orig_type, 0);
2020
2021 case POINTER_PLUS_EXPR:
2022 case PLUS_EXPR:
2023 case MINUS_EXPR:
2024 op0 = TREE_OPERAND (expr, 0);
2025 op1 = TREE_OPERAND (expr, 1);
2026
2027 op0 = strip_offset_1 (op0, false, false, &off0);
2028 op1 = strip_offset_1 (op1, false, false, &off1);
2029
2030 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2031 if (op0 == TREE_OPERAND (expr, 0)
2032 && op1 == TREE_OPERAND (expr, 1))
2033 return orig_expr;
2034
2035 if (integer_zerop (op1))
2036 expr = op0;
2037 else if (integer_zerop (op0))
2038 {
2039 if (code == MINUS_EXPR)
2040 expr = fold_build1 (NEGATE_EXPR, type, op1);
2041 else
2042 expr = op1;
2043 }
2044 else
2045 expr = fold_build2 (code, type, op0, op1);
2046
2047 return fold_convert (orig_type, expr);
2048
2049 case MULT_EXPR:
2050 op1 = TREE_OPERAND (expr, 1);
2051 if (!cst_and_fits_in_hwi (op1))
2052 return orig_expr;
2053
2054 op0 = TREE_OPERAND (expr, 0);
2055 op0 = strip_offset_1 (op0, false, false, &off0);
2056 if (op0 == TREE_OPERAND (expr, 0))
2057 return orig_expr;
2058
2059 *offset = off0 * int_cst_value (op1);
2060 if (integer_zerop (op0))
2061 expr = op0;
2062 else
2063 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2064
2065 return fold_convert (orig_type, expr);
2066
2067 case ARRAY_REF:
2068 case ARRAY_RANGE_REF:
2069 if (!inside_addr)
2070 return orig_expr;
2071
2072 step = array_ref_element_size (expr);
2073 if (!cst_and_fits_in_hwi (step))
2074 break;
2075
2076 st = int_cst_value (step);
2077 op1 = TREE_OPERAND (expr, 1);
2078 op1 = strip_offset_1 (op1, false, false, &off1);
2079 *offset = off1 * st;
2080
2081 if (top_compref
2082 && integer_zerop (op1))
2083 {
2084 /* Strip the component reference completely. */
2085 op0 = TREE_OPERAND (expr, 0);
2086 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2087 *offset += off0;
2088 return op0;
2089 }
2090 break;
2091
2092 case COMPONENT_REF:
2093 if (!inside_addr)
2094 return orig_expr;
2095
2096 tmp = component_ref_field_offset (expr);
2097 if (top_compref
2098 && cst_and_fits_in_hwi (tmp))
2099 {
2100 /* Strip the component reference completely. */
2101 op0 = TREE_OPERAND (expr, 0);
2102 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2103 *offset = off0 + int_cst_value (tmp);
2104 return op0;
2105 }
2106 break;
2107
2108 case ADDR_EXPR:
2109 op0 = TREE_OPERAND (expr, 0);
2110 op0 = strip_offset_1 (op0, true, true, &off0);
2111 *offset += off0;
2112
2113 if (op0 == TREE_OPERAND (expr, 0))
2114 return orig_expr;
2115
2116 expr = build_fold_addr_expr (op0);
2117 return fold_convert (orig_type, expr);
2118
2119 case MEM_REF:
2120 /* ??? Offset operand? */
2121 inside_addr = false;
2122 break;
2123
2124 default:
2125 return orig_expr;
2126 }
2127
2128 /* Default handling of expressions for that we want to recurse into
2129 the first operand. */
2130 op0 = TREE_OPERAND (expr, 0);
2131 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2132 *offset += off0;
2133
2134 if (op0 == TREE_OPERAND (expr, 0)
2135 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2136 return orig_expr;
2137
2138 expr = copy_node (expr);
2139 TREE_OPERAND (expr, 0) = op0;
2140 if (op1)
2141 TREE_OPERAND (expr, 1) = op1;
2142
2143 /* Inside address, we might strip the top level component references,
2144 thus changing type of the expression. Handling of ADDR_EXPR
2145 will fix that. */
2146 expr = fold_convert (orig_type, expr);
2147
2148 return expr;
2149 }
2150
2151 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2152
2153 static tree
2154 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2155 {
2156 return strip_offset_1 (expr, false, false, offset);
2157 }
2158
2159 /* Returns variant of TYPE that can be used as base for different uses.
2160 We return unsigned type with the same precision, which avoids problems
2161 with overflows. */
2162
2163 static tree
2164 generic_type_for (tree type)
2165 {
2166 if (POINTER_TYPE_P (type))
2167 return unsigned_type_for (type);
2168
2169 if (TYPE_UNSIGNED (type))
2170 return type;
2171
2172 return unsigned_type_for (type);
2173 }
2174
2175 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2176 the bitmap to that we should store it. */
2177
2178 static struct ivopts_data *fd_ivopts_data;
2179 static tree
2180 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2181 {
2182 bitmap *depends_on = (bitmap *) data;
2183 struct version_info *info;
2184
2185 if (TREE_CODE (*expr_p) != SSA_NAME)
2186 return NULL_TREE;
2187 info = name_info (fd_ivopts_data, *expr_p);
2188
2189 if (!info->inv_id || info->has_nonlin_use)
2190 return NULL_TREE;
2191
2192 if (!*depends_on)
2193 *depends_on = BITMAP_ALLOC (NULL);
2194 bitmap_set_bit (*depends_on, info->inv_id);
2195
2196 return NULL_TREE;
2197 }
2198
2199 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2200 position to POS. If USE is not NULL, the candidate is set as related to
2201 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2202 replacement of the final value of the iv by a direct computation. */
2203
2204 static struct iv_cand *
2205 add_candidate_1 (struct ivopts_data *data,
2206 tree base, tree step, bool important, enum iv_position pos,
2207 struct iv_use *use, gimple incremented_at)
2208 {
2209 unsigned i;
2210 struct iv_cand *cand = NULL;
2211 tree type, orig_type;
2212
2213 /* For non-original variables, make sure their values are computed in a type
2214 that does not invoke undefined behavior on overflows (since in general,
2215 we cannot prove that these induction variables are non-wrapping). */
2216 if (pos != IP_ORIGINAL)
2217 {
2218 orig_type = TREE_TYPE (base);
2219 type = generic_type_for (orig_type);
2220 if (type != orig_type)
2221 {
2222 base = fold_convert (type, base);
2223 step = fold_convert (type, step);
2224 }
2225 }
2226
2227 for (i = 0; i < n_iv_cands (data); i++)
2228 {
2229 cand = iv_cand (data, i);
2230
2231 if (cand->pos != pos)
2232 continue;
2233
2234 if (cand->incremented_at != incremented_at
2235 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2236 && cand->ainc_use != use))
2237 continue;
2238
2239 if (!cand->iv)
2240 {
2241 if (!base && !step)
2242 break;
2243
2244 continue;
2245 }
2246
2247 if (!base && !step)
2248 continue;
2249
2250 if (operand_equal_p (base, cand->iv->base, 0)
2251 && operand_equal_p (step, cand->iv->step, 0)
2252 && (TYPE_PRECISION (TREE_TYPE (base))
2253 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2254 break;
2255 }
2256
2257 if (i == n_iv_cands (data))
2258 {
2259 cand = XCNEW (struct iv_cand);
2260 cand->id = i;
2261
2262 if (!base && !step)
2263 cand->iv = NULL;
2264 else
2265 cand->iv = alloc_iv (base, step);
2266
2267 cand->pos = pos;
2268 if (pos != IP_ORIGINAL && cand->iv)
2269 {
2270 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2271 cand->var_after = cand->var_before;
2272 }
2273 cand->important = important;
2274 cand->incremented_at = incremented_at;
2275 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2276
2277 if (step
2278 && TREE_CODE (step) != INTEGER_CST)
2279 {
2280 fd_ivopts_data = data;
2281 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2282 }
2283
2284 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2285 cand->ainc_use = use;
2286 else
2287 cand->ainc_use = NULL;
2288
2289 if (dump_file && (dump_flags & TDF_DETAILS))
2290 dump_cand (dump_file, cand);
2291 }
2292
2293 if (important && !cand->important)
2294 {
2295 cand->important = true;
2296 if (dump_file && (dump_flags & TDF_DETAILS))
2297 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2298 }
2299
2300 if (use)
2301 {
2302 bitmap_set_bit (use->related_cands, i);
2303 if (dump_file && (dump_flags & TDF_DETAILS))
2304 fprintf (dump_file, "Candidate %d is related to use %d\n",
2305 cand->id, use->id);
2306 }
2307
2308 return cand;
2309 }
2310
2311 /* Returns true if incrementing the induction variable at the end of the LOOP
2312 is allowed.
2313
2314 The purpose is to avoid splitting latch edge with a biv increment, thus
2315 creating a jump, possibly confusing other optimization passes and leaving
2316 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2317 is not available (so we do not have a better alternative), or if the latch
2318 edge is already nonempty. */
2319
2320 static bool
2321 allow_ip_end_pos_p (struct loop *loop)
2322 {
2323 if (!ip_normal_pos (loop))
2324 return true;
2325
2326 if (!empty_block_p (ip_end_pos (loop)))
2327 return true;
2328
2329 return false;
2330 }
2331
2332 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2333 Important field is set to IMPORTANT. */
2334
2335 static void
2336 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2337 bool important, struct iv_use *use)
2338 {
2339 basic_block use_bb = gimple_bb (use->stmt);
2340 enum machine_mode mem_mode;
2341 unsigned HOST_WIDE_INT cstepi;
2342
2343 /* If we insert the increment in any position other than the standard
2344 ones, we must ensure that it is incremented once per iteration.
2345 It must not be in an inner nested loop, or one side of an if
2346 statement. */
2347 if (use_bb->loop_father != data->current_loop
2348 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2349 || stmt_could_throw_p (use->stmt)
2350 || !cst_and_fits_in_hwi (step))
2351 return;
2352
2353 cstepi = int_cst_value (step);
2354
2355 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2356 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2357 || USE_STORE_PRE_INCREMENT (mem_mode))
2358 && GET_MODE_SIZE (mem_mode) == cstepi)
2359 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2360 || USE_STORE_PRE_DECREMENT (mem_mode))
2361 && GET_MODE_SIZE (mem_mode) == -cstepi))
2362 {
2363 enum tree_code code = MINUS_EXPR;
2364 tree new_base;
2365 tree new_step = step;
2366
2367 if (POINTER_TYPE_P (TREE_TYPE (base)))
2368 {
2369 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2370 code = POINTER_PLUS_EXPR;
2371 }
2372 else
2373 new_step = fold_convert (TREE_TYPE (base), new_step);
2374 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2375 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2376 use->stmt);
2377 }
2378 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2379 || USE_STORE_POST_INCREMENT (mem_mode))
2380 && GET_MODE_SIZE (mem_mode) == cstepi)
2381 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2382 || USE_STORE_POST_DECREMENT (mem_mode))
2383 && GET_MODE_SIZE (mem_mode) == -cstepi))
2384 {
2385 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2386 use->stmt);
2387 }
2388 }
2389
2390 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2391 position to POS. If USE is not NULL, the candidate is set as related to
2392 it. The candidate computation is scheduled on all available positions. */
2393
2394 static void
2395 add_candidate (struct ivopts_data *data,
2396 tree base, tree step, bool important, struct iv_use *use)
2397 {
2398 if (ip_normal_pos (data->current_loop))
2399 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2400 if (ip_end_pos (data->current_loop)
2401 && allow_ip_end_pos_p (data->current_loop))
2402 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2403
2404 if (use != NULL && use->type == USE_ADDRESS)
2405 add_autoinc_candidates (data, base, step, important, use);
2406 }
2407
2408 /* Adds standard iv candidates. */
2409
2410 static void
2411 add_standard_iv_candidates (struct ivopts_data *data)
2412 {
2413 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2414
2415 /* The same for a double-integer type if it is still fast enough. */
2416 if (TYPE_PRECISION
2417 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2418 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2419 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2420 build_int_cst (long_integer_type_node, 1), true, NULL);
2421
2422 /* The same for a double-integer type if it is still fast enough. */
2423 if (TYPE_PRECISION
2424 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2425 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2426 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2427 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2428 }
2429
2430
2431 /* Adds candidates bases on the old induction variable IV. */
2432
2433 static void
2434 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2435 {
2436 gimple phi;
2437 tree def;
2438 struct iv_cand *cand;
2439
2440 add_candidate (data, iv->base, iv->step, true, NULL);
2441
2442 /* The same, but with initial value zero. */
2443 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2444 add_candidate (data, size_int (0), iv->step, true, NULL);
2445 else
2446 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2447 iv->step, true, NULL);
2448
2449 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2450 if (gimple_code (phi) == GIMPLE_PHI)
2451 {
2452 /* Additionally record the possibility of leaving the original iv
2453 untouched. */
2454 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2455 cand = add_candidate_1 (data,
2456 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2457 SSA_NAME_DEF_STMT (def));
2458 cand->var_before = iv->ssa_name;
2459 cand->var_after = def;
2460 }
2461 }
2462
2463 /* Adds candidates based on the old induction variables. */
2464
2465 static void
2466 add_old_ivs_candidates (struct ivopts_data *data)
2467 {
2468 unsigned i;
2469 struct iv *iv;
2470 bitmap_iterator bi;
2471
2472 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2473 {
2474 iv = ver_info (data, i)->iv;
2475 if (iv && iv->biv_p && !integer_zerop (iv->step))
2476 add_old_iv_candidates (data, iv);
2477 }
2478 }
2479
2480 /* Adds candidates based on the value of the induction variable IV and USE. */
2481
2482 static void
2483 add_iv_value_candidates (struct ivopts_data *data,
2484 struct iv *iv, struct iv_use *use)
2485 {
2486 unsigned HOST_WIDE_INT offset;
2487 tree base;
2488 tree basetype;
2489
2490 add_candidate (data, iv->base, iv->step, false, use);
2491
2492 /* The same, but with initial value zero. Make such variable important,
2493 since it is generic enough so that possibly many uses may be based
2494 on it. */
2495 basetype = TREE_TYPE (iv->base);
2496 if (POINTER_TYPE_P (basetype))
2497 basetype = sizetype;
2498 add_candidate (data, build_int_cst (basetype, 0),
2499 iv->step, true, use);
2500
2501 /* Third, try removing the constant offset. Make sure to even
2502 add a candidate for &a[0] vs. (T *)&a. */
2503 base = strip_offset (iv->base, &offset);
2504 if (offset
2505 || base != iv->base)
2506 add_candidate (data, base, iv->step, false, use);
2507 }
2508
2509 /* Adds candidates based on the uses. */
2510
2511 static void
2512 add_derived_ivs_candidates (struct ivopts_data *data)
2513 {
2514 unsigned i;
2515
2516 for (i = 0; i < n_iv_uses (data); i++)
2517 {
2518 struct iv_use *use = iv_use (data, i);
2519
2520 if (!use)
2521 continue;
2522
2523 switch (use->type)
2524 {
2525 case USE_NONLINEAR_EXPR:
2526 case USE_COMPARE:
2527 case USE_ADDRESS:
2528 /* Just add the ivs based on the value of the iv used here. */
2529 add_iv_value_candidates (data, use->iv, use);
2530 break;
2531
2532 default:
2533 gcc_unreachable ();
2534 }
2535 }
2536 }
2537
2538 /* Record important candidates and add them to related_cands bitmaps
2539 if needed. */
2540
2541 static void
2542 record_important_candidates (struct ivopts_data *data)
2543 {
2544 unsigned i;
2545 struct iv_use *use;
2546
2547 for (i = 0; i < n_iv_cands (data); i++)
2548 {
2549 struct iv_cand *cand = iv_cand (data, i);
2550
2551 if (cand->important)
2552 bitmap_set_bit (data->important_candidates, i);
2553 }
2554
2555 data->consider_all_candidates = (n_iv_cands (data)
2556 <= CONSIDER_ALL_CANDIDATES_BOUND);
2557
2558 if (data->consider_all_candidates)
2559 {
2560 /* We will not need "related_cands" bitmaps in this case,
2561 so release them to decrease peak memory consumption. */
2562 for (i = 0; i < n_iv_uses (data); i++)
2563 {
2564 use = iv_use (data, i);
2565 BITMAP_FREE (use->related_cands);
2566 }
2567 }
2568 else
2569 {
2570 /* Add important candidates to the related_cands bitmaps. */
2571 for (i = 0; i < n_iv_uses (data); i++)
2572 bitmap_ior_into (iv_use (data, i)->related_cands,
2573 data->important_candidates);
2574 }
2575 }
2576
2577 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2578 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2579 we allocate a simple list to every use. */
2580
2581 static void
2582 alloc_use_cost_map (struct ivopts_data *data)
2583 {
2584 unsigned i, size, s, j;
2585
2586 for (i = 0; i < n_iv_uses (data); i++)
2587 {
2588 struct iv_use *use = iv_use (data, i);
2589 bitmap_iterator bi;
2590
2591 if (data->consider_all_candidates)
2592 size = n_iv_cands (data);
2593 else
2594 {
2595 s = 0;
2596 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2597 {
2598 s++;
2599 }
2600
2601 /* Round up to the power of two, so that moduling by it is fast. */
2602 for (size = 1; size < s; size <<= 1)
2603 continue;
2604 }
2605
2606 use->n_map_members = size;
2607 use->cost_map = XCNEWVEC (struct cost_pair, size);
2608 }
2609 }
2610
2611 /* Returns description of computation cost of expression whose runtime
2612 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2613
2614 static comp_cost
2615 new_cost (unsigned runtime, unsigned complexity)
2616 {
2617 comp_cost cost;
2618
2619 cost.cost = runtime;
2620 cost.complexity = complexity;
2621
2622 return cost;
2623 }
2624
2625 /* Adds costs COST1 and COST2. */
2626
2627 static comp_cost
2628 add_costs (comp_cost cost1, comp_cost cost2)
2629 {
2630 cost1.cost += cost2.cost;
2631 cost1.complexity += cost2.complexity;
2632
2633 return cost1;
2634 }
2635 /* Subtracts costs COST1 and COST2. */
2636
2637 static comp_cost
2638 sub_costs (comp_cost cost1, comp_cost cost2)
2639 {
2640 cost1.cost -= cost2.cost;
2641 cost1.complexity -= cost2.complexity;
2642
2643 return cost1;
2644 }
2645
2646 /* Returns a negative number if COST1 < COST2, a positive number if
2647 COST1 > COST2, and 0 if COST1 = COST2. */
2648
2649 static int
2650 compare_costs (comp_cost cost1, comp_cost cost2)
2651 {
2652 if (cost1.cost == cost2.cost)
2653 return cost1.complexity - cost2.complexity;
2654
2655 return cost1.cost - cost2.cost;
2656 }
2657
2658 /* Returns true if COST is infinite. */
2659
2660 static bool
2661 infinite_cost_p (comp_cost cost)
2662 {
2663 return cost.cost == INFTY;
2664 }
2665
2666 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2667 on invariants DEPENDS_ON and that the value used in expressing it
2668 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2669
2670 static void
2671 set_use_iv_cost (struct ivopts_data *data,
2672 struct iv_use *use, struct iv_cand *cand,
2673 comp_cost cost, bitmap depends_on, tree value,
2674 enum tree_code comp, int inv_expr_id)
2675 {
2676 unsigned i, s;
2677
2678 if (infinite_cost_p (cost))
2679 {
2680 BITMAP_FREE (depends_on);
2681 return;
2682 }
2683
2684 if (data->consider_all_candidates)
2685 {
2686 use->cost_map[cand->id].cand = cand;
2687 use->cost_map[cand->id].cost = cost;
2688 use->cost_map[cand->id].depends_on = depends_on;
2689 use->cost_map[cand->id].value = value;
2690 use->cost_map[cand->id].comp = comp;
2691 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2692 return;
2693 }
2694
2695 /* n_map_members is a power of two, so this computes modulo. */
2696 s = cand->id & (use->n_map_members - 1);
2697 for (i = s; i < use->n_map_members; i++)
2698 if (!use->cost_map[i].cand)
2699 goto found;
2700 for (i = 0; i < s; i++)
2701 if (!use->cost_map[i].cand)
2702 goto found;
2703
2704 gcc_unreachable ();
2705
2706 found:
2707 use->cost_map[i].cand = cand;
2708 use->cost_map[i].cost = cost;
2709 use->cost_map[i].depends_on = depends_on;
2710 use->cost_map[i].value = value;
2711 use->cost_map[i].comp = comp;
2712 use->cost_map[i].inv_expr_id = inv_expr_id;
2713 }
2714
2715 /* Gets cost of (USE, CANDIDATE) pair. */
2716
2717 static struct cost_pair *
2718 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2719 struct iv_cand *cand)
2720 {
2721 unsigned i, s;
2722 struct cost_pair *ret;
2723
2724 if (!cand)
2725 return NULL;
2726
2727 if (data->consider_all_candidates)
2728 {
2729 ret = use->cost_map + cand->id;
2730 if (!ret->cand)
2731 return NULL;
2732
2733 return ret;
2734 }
2735
2736 /* n_map_members is a power of two, so this computes modulo. */
2737 s = cand->id & (use->n_map_members - 1);
2738 for (i = s; i < use->n_map_members; i++)
2739 if (use->cost_map[i].cand == cand)
2740 return use->cost_map + i;
2741
2742 for (i = 0; i < s; i++)
2743 if (use->cost_map[i].cand == cand)
2744 return use->cost_map + i;
2745
2746 return NULL;
2747 }
2748
2749 /* Returns estimate on cost of computing SEQ. */
2750
2751 static unsigned
2752 seq_cost (rtx seq, bool speed)
2753 {
2754 unsigned cost = 0;
2755 rtx set;
2756
2757 for (; seq; seq = NEXT_INSN (seq))
2758 {
2759 set = single_set (seq);
2760 if (set)
2761 cost += set_src_cost (SET_SRC (set), speed);
2762 else
2763 cost++;
2764 }
2765
2766 return cost;
2767 }
2768
2769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2770 static rtx
2771 produce_memory_decl_rtl (tree obj, int *regno)
2772 {
2773 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2774 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2775 rtx x;
2776
2777 gcc_assert (obj);
2778 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2779 {
2780 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2781 x = gen_rtx_SYMBOL_REF (address_mode, name);
2782 SET_SYMBOL_REF_DECL (x, obj);
2783 x = gen_rtx_MEM (DECL_MODE (obj), x);
2784 set_mem_addr_space (x, as);
2785 targetm.encode_section_info (obj, x, true);
2786 }
2787 else
2788 {
2789 x = gen_raw_REG (address_mode, (*regno)++);
2790 x = gen_rtx_MEM (DECL_MODE (obj), x);
2791 set_mem_addr_space (x, as);
2792 }
2793
2794 return x;
2795 }
2796
2797 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2798 walk_tree. DATA contains the actual fake register number. */
2799
2800 static tree
2801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2802 {
2803 tree obj = NULL_TREE;
2804 rtx x = NULL_RTX;
2805 int *regno = (int *) data;
2806
2807 switch (TREE_CODE (*expr_p))
2808 {
2809 case ADDR_EXPR:
2810 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2811 handled_component_p (*expr_p);
2812 expr_p = &TREE_OPERAND (*expr_p, 0))
2813 continue;
2814 obj = *expr_p;
2815 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2816 x = produce_memory_decl_rtl (obj, regno);
2817 break;
2818
2819 case SSA_NAME:
2820 *ws = 0;
2821 obj = SSA_NAME_VAR (*expr_p);
2822 if (!DECL_RTL_SET_P (obj))
2823 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2824 break;
2825
2826 case VAR_DECL:
2827 case PARM_DECL:
2828 case RESULT_DECL:
2829 *ws = 0;
2830 obj = *expr_p;
2831
2832 if (DECL_RTL_SET_P (obj))
2833 break;
2834
2835 if (DECL_MODE (obj) == BLKmode)
2836 x = produce_memory_decl_rtl (obj, regno);
2837 else
2838 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2839
2840 break;
2841
2842 default:
2843 break;
2844 }
2845
2846 if (x)
2847 {
2848 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2849 SET_DECL_RTL (obj, x);
2850 }
2851
2852 return NULL_TREE;
2853 }
2854
2855 /* Determines cost of the computation of EXPR. */
2856
2857 static unsigned
2858 computation_cost (tree expr, bool speed)
2859 {
2860 rtx seq, rslt;
2861 tree type = TREE_TYPE (expr);
2862 unsigned cost;
2863 /* Avoid using hard regs in ways which may be unsupported. */
2864 int regno = LAST_VIRTUAL_REGISTER + 1;
2865 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2866 enum node_frequency real_frequency = node->frequency;
2867
2868 node->frequency = NODE_FREQUENCY_NORMAL;
2869 crtl->maybe_hot_insn_p = speed;
2870 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2871 start_sequence ();
2872 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2873 seq = get_insns ();
2874 end_sequence ();
2875 default_rtl_profile ();
2876 node->frequency = real_frequency;
2877
2878 cost = seq_cost (seq, speed);
2879 if (MEM_P (rslt))
2880 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2881 TYPE_ADDR_SPACE (type), speed);
2882 else if (!REG_P (rslt))
2883 cost += set_src_cost (rslt, speed);
2884
2885 return cost;
2886 }
2887
2888 /* Returns variable containing the value of candidate CAND at statement AT. */
2889
2890 static tree
2891 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2892 {
2893 if (stmt_after_increment (loop, cand, stmt))
2894 return cand->var_after;
2895 else
2896 return cand->var_before;
2897 }
2898
2899 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2900 same precision that is at least as wide as the precision of TYPE, stores
2901 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2902 type of A and B. */
2903
2904 static tree
2905 determine_common_wider_type (tree *a, tree *b)
2906 {
2907 tree wider_type = NULL;
2908 tree suba, subb;
2909 tree atype = TREE_TYPE (*a);
2910
2911 if (CONVERT_EXPR_P (*a))
2912 {
2913 suba = TREE_OPERAND (*a, 0);
2914 wider_type = TREE_TYPE (suba);
2915 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2916 return atype;
2917 }
2918 else
2919 return atype;
2920
2921 if (CONVERT_EXPR_P (*b))
2922 {
2923 subb = TREE_OPERAND (*b, 0);
2924 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2925 return atype;
2926 }
2927 else
2928 return atype;
2929
2930 *a = suba;
2931 *b = subb;
2932 return wider_type;
2933 }
2934
2935 /* Determines the expression by that USE is expressed from induction variable
2936 CAND at statement AT in LOOP. The expression is stored in a decomposed
2937 form into AFF. Returns false if USE cannot be expressed using CAND. */
2938
2939 static bool
2940 get_computation_aff (struct loop *loop,
2941 struct iv_use *use, struct iv_cand *cand, gimple at,
2942 struct affine_tree_combination *aff)
2943 {
2944 tree ubase = use->iv->base;
2945 tree ustep = use->iv->step;
2946 tree cbase = cand->iv->base;
2947 tree cstep = cand->iv->step, cstep_common;
2948 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2949 tree common_type, var;
2950 tree uutype;
2951 aff_tree cbase_aff, var_aff;
2952 double_int rat;
2953
2954 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2955 {
2956 /* We do not have a precision to express the values of use. */
2957 return false;
2958 }
2959
2960 var = var_at_stmt (loop, cand, at);
2961 uutype = unsigned_type_for (utype);
2962
2963 /* If the conversion is not noop, perform it. */
2964 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2965 {
2966 cstep = fold_convert (uutype, cstep);
2967 cbase = fold_convert (uutype, cbase);
2968 var = fold_convert (uutype, var);
2969 }
2970
2971 if (!constant_multiple_of (ustep, cstep, &rat))
2972 return false;
2973
2974 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2975 type, we achieve better folding by computing their difference in this
2976 wider type, and cast the result to UUTYPE. We do not need to worry about
2977 overflows, as all the arithmetics will in the end be performed in UUTYPE
2978 anyway. */
2979 common_type = determine_common_wider_type (&ubase, &cbase);
2980
2981 /* use = ubase - ratio * cbase + ratio * var. */
2982 tree_to_aff_combination (ubase, common_type, aff);
2983 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2984 tree_to_aff_combination (var, uutype, &var_aff);
2985
2986 /* We need to shift the value if we are after the increment. */
2987 if (stmt_after_increment (loop, cand, at))
2988 {
2989 aff_tree cstep_aff;
2990
2991 if (common_type != uutype)
2992 cstep_common = fold_convert (common_type, cstep);
2993 else
2994 cstep_common = cstep;
2995
2996 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2997 aff_combination_add (&cbase_aff, &cstep_aff);
2998 }
2999
3000 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3001 aff_combination_add (aff, &cbase_aff);
3002 if (common_type != uutype)
3003 aff_combination_convert (aff, uutype);
3004
3005 aff_combination_scale (&var_aff, rat);
3006 aff_combination_add (aff, &var_aff);
3007
3008 return true;
3009 }
3010
3011 /* Determines the expression by that USE is expressed from induction variable
3012 CAND at statement AT in LOOP. The computation is unshared. */
3013
3014 static tree
3015 get_computation_at (struct loop *loop,
3016 struct iv_use *use, struct iv_cand *cand, gimple at)
3017 {
3018 aff_tree aff;
3019 tree type = TREE_TYPE (use->iv->base);
3020
3021 if (!get_computation_aff (loop, use, cand, at, &aff))
3022 return NULL_TREE;
3023 unshare_aff_combination (&aff);
3024 return fold_convert (type, aff_combination_to_tree (&aff));
3025 }
3026
3027 /* Determines the expression by that USE is expressed from induction variable
3028 CAND in LOOP. The computation is unshared. */
3029
3030 static tree
3031 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3032 {
3033 return get_computation_at (loop, use, cand, use->stmt);
3034 }
3035
3036 /* Adjust the cost COST for being in loop setup rather than loop body.
3037 If we're optimizing for space, the loop setup overhead is constant;
3038 if we're optimizing for speed, amortize it over the per-iteration cost. */
3039 static unsigned
3040 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3041 {
3042 if (cost == INFTY)
3043 return cost;
3044 else if (optimize_loop_for_speed_p (data->current_loop))
3045 return cost / avg_loop_niter (data->current_loop);
3046 else
3047 return cost;
3048 }
3049
3050 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3051 validity for a memory reference accessing memory of mode MODE in
3052 address space AS. */
3053
3054 DEF_VEC_P (sbitmap);
3055 DEF_VEC_ALLOC_P (sbitmap, heap);
3056
3057 bool
3058 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3059 addr_space_t as)
3060 {
3061 #define MAX_RATIO 128
3062 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3063 static VEC (sbitmap, heap) *valid_mult_list;
3064 sbitmap valid_mult;
3065
3066 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3067 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3068
3069 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3070 if (!valid_mult)
3071 {
3072 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3073 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3074 rtx addr;
3075 HOST_WIDE_INT i;
3076
3077 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3078 sbitmap_zero (valid_mult);
3079 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3080 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3081 {
3082 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3083 if (memory_address_addr_space_p (mode, addr, as))
3084 SET_BIT (valid_mult, i + MAX_RATIO);
3085 }
3086
3087 if (dump_file && (dump_flags & TDF_DETAILS))
3088 {
3089 fprintf (dump_file, " allowed multipliers:");
3090 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3091 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3092 fprintf (dump_file, " %d", (int) i);
3093 fprintf (dump_file, "\n");
3094 fprintf (dump_file, "\n");
3095 }
3096
3097 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3098 }
3099
3100 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3101 return false;
3102
3103 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3104 }
3105
3106 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3107 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3108 variable is omitted. Compute the cost for a memory reference that accesses
3109 a memory location of mode MEM_MODE in address space AS.
3110
3111 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3112 size of MEM_MODE / RATIO) is available. To make this determination, we
3113 look at the size of the increment to be made, which is given in CSTEP.
3114 CSTEP may be zero if the step is unknown.
3115 STMT_AFTER_INC is true iff the statement we're looking at is after the
3116 increment of the original biv.
3117
3118 TODO -- there must be some better way. This all is quite crude. */
3119
3120 typedef struct
3121 {
3122 HOST_WIDE_INT min_offset, max_offset;
3123 unsigned costs[2][2][2][2];
3124 } *address_cost_data;
3125
3126 DEF_VEC_P (address_cost_data);
3127 DEF_VEC_ALLOC_P (address_cost_data, heap);
3128
3129 static comp_cost
3130 get_address_cost (bool symbol_present, bool var_present,
3131 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3132 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3133 addr_space_t as, bool speed,
3134 bool stmt_after_inc, bool *may_autoinc)
3135 {
3136 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3137 static VEC(address_cost_data, heap) *address_cost_data_list;
3138 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3139 address_cost_data data;
3140 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3141 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3142 unsigned cost, acost, complexity;
3143 bool offset_p, ratio_p, autoinc;
3144 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3145 unsigned HOST_WIDE_INT mask;
3146 unsigned bits;
3147
3148 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3149 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3150 data_index + 1);
3151
3152 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3153 if (!data)
3154 {
3155 HOST_WIDE_INT i;
3156 HOST_WIDE_INT rat, off = 0;
3157 int old_cse_not_expected, width;
3158 unsigned sym_p, var_p, off_p, rat_p, add_c;
3159 rtx seq, addr, base;
3160 rtx reg0, reg1;
3161
3162 data = (address_cost_data) xcalloc (1, sizeof (*data));
3163
3164 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3165
3166 width = GET_MODE_BITSIZE (address_mode) - 1;
3167 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3168 width = HOST_BITS_PER_WIDE_INT - 1;
3169 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3170
3171 for (i = width; i >= 0; i--)
3172 {
3173 off = -((HOST_WIDE_INT) 1 << i);
3174 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3175 if (memory_address_addr_space_p (mem_mode, addr, as))
3176 break;
3177 }
3178 data->min_offset = (i == -1? 0 : off);
3179
3180 for (i = width; i >= 0; i--)
3181 {
3182 off = ((HOST_WIDE_INT) 1 << i) - 1;
3183 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3184 if (memory_address_addr_space_p (mem_mode, addr, as))
3185 break;
3186 }
3187 if (i == -1)
3188 off = 0;
3189 data->max_offset = off;
3190
3191 if (dump_file && (dump_flags & TDF_DETAILS))
3192 {
3193 fprintf (dump_file, "get_address_cost:\n");
3194 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3195 GET_MODE_NAME (mem_mode),
3196 data->min_offset);
3197 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3198 GET_MODE_NAME (mem_mode),
3199 data->max_offset);
3200 }
3201
3202 rat = 1;
3203 for (i = 2; i <= MAX_RATIO; i++)
3204 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3205 {
3206 rat = i;
3207 break;
3208 }
3209
3210 /* Compute the cost of various addressing modes. */
3211 acost = 0;
3212 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3213 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3214
3215 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3216 || USE_STORE_PRE_DECREMENT (mem_mode))
3217 {
3218 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3219 has_predec[mem_mode]
3220 = memory_address_addr_space_p (mem_mode, addr, as);
3221 }
3222 if (USE_LOAD_POST_DECREMENT (mem_mode)
3223 || USE_STORE_POST_DECREMENT (mem_mode))
3224 {
3225 addr = gen_rtx_POST_DEC (address_mode, reg0);
3226 has_postdec[mem_mode]
3227 = memory_address_addr_space_p (mem_mode, addr, as);
3228 }
3229 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3230 || USE_STORE_PRE_DECREMENT (mem_mode))
3231 {
3232 addr = gen_rtx_PRE_INC (address_mode, reg0);
3233 has_preinc[mem_mode]
3234 = memory_address_addr_space_p (mem_mode, addr, as);
3235 }
3236 if (USE_LOAD_POST_INCREMENT (mem_mode)
3237 || USE_STORE_POST_INCREMENT (mem_mode))
3238 {
3239 addr = gen_rtx_POST_INC (address_mode, reg0);
3240 has_postinc[mem_mode]
3241 = memory_address_addr_space_p (mem_mode, addr, as);
3242 }
3243 for (i = 0; i < 16; i++)
3244 {
3245 sym_p = i & 1;
3246 var_p = (i >> 1) & 1;
3247 off_p = (i >> 2) & 1;
3248 rat_p = (i >> 3) & 1;
3249
3250 addr = reg0;
3251 if (rat_p)
3252 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3253 gen_int_mode (rat, address_mode));
3254
3255 if (var_p)
3256 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3257
3258 if (sym_p)
3259 {
3260 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3261 /* ??? We can run into trouble with some backends by presenting
3262 it with symbols which haven't been properly passed through
3263 targetm.encode_section_info. By setting the local bit, we
3264 enhance the probability of things working. */
3265 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3266
3267 if (off_p)
3268 base = gen_rtx_fmt_e (CONST, address_mode,
3269 gen_rtx_fmt_ee
3270 (PLUS, address_mode, base,
3271 gen_int_mode (off, address_mode)));
3272 }
3273 else if (off_p)
3274 base = gen_int_mode (off, address_mode);
3275 else
3276 base = NULL_RTX;
3277
3278 if (base)
3279 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3280
3281 start_sequence ();
3282 /* To avoid splitting addressing modes, pretend that no cse will
3283 follow. */
3284 old_cse_not_expected = cse_not_expected;
3285 cse_not_expected = true;
3286 addr = memory_address_addr_space (mem_mode, addr, as);
3287 cse_not_expected = old_cse_not_expected;
3288 seq = get_insns ();
3289 end_sequence ();
3290
3291 acost = seq_cost (seq, speed);
3292 acost += address_cost (addr, mem_mode, as, speed);
3293
3294 if (!acost)
3295 acost = 1;
3296 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3297 }
3298
3299 /* On some targets, it is quite expensive to load symbol to a register,
3300 which makes addresses that contain symbols look much more expensive.
3301 However, the symbol will have to be loaded in any case before the
3302 loop (and quite likely we have it in register already), so it does not
3303 make much sense to penalize them too heavily. So make some final
3304 tweaks for the SYMBOL_PRESENT modes:
3305
3306 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3307 var is cheaper, use this mode with small penalty.
3308 If VAR_PRESENT is true, try whether the mode with
3309 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3310 if this is the case, use it. */
3311 add_c = add_cost[speed][address_mode];
3312 for (i = 0; i < 8; i++)
3313 {
3314 var_p = i & 1;
3315 off_p = (i >> 1) & 1;
3316 rat_p = (i >> 2) & 1;
3317
3318 acost = data->costs[0][1][off_p][rat_p] + 1;
3319 if (var_p)
3320 acost += add_c;
3321
3322 if (acost < data->costs[1][var_p][off_p][rat_p])
3323 data->costs[1][var_p][off_p][rat_p] = acost;
3324 }
3325
3326 if (dump_file && (dump_flags & TDF_DETAILS))
3327 {
3328 fprintf (dump_file, "Address costs:\n");
3329
3330 for (i = 0; i < 16; i++)
3331 {
3332 sym_p = i & 1;
3333 var_p = (i >> 1) & 1;
3334 off_p = (i >> 2) & 1;
3335 rat_p = (i >> 3) & 1;
3336
3337 fprintf (dump_file, " ");
3338 if (sym_p)
3339 fprintf (dump_file, "sym + ");
3340 if (var_p)
3341 fprintf (dump_file, "var + ");
3342 if (off_p)
3343 fprintf (dump_file, "cst + ");
3344 if (rat_p)
3345 fprintf (dump_file, "rat * ");
3346
3347 acost = data->costs[sym_p][var_p][off_p][rat_p];
3348 fprintf (dump_file, "index costs %d\n", acost);
3349 }
3350 if (has_predec[mem_mode] || has_postdec[mem_mode]
3351 || has_preinc[mem_mode] || has_postinc[mem_mode])
3352 fprintf (dump_file, " May include autoinc/dec\n");
3353 fprintf (dump_file, "\n");
3354 }
3355
3356 VEC_replace (address_cost_data, address_cost_data_list,
3357 data_index, data);
3358 }
3359
3360 bits = GET_MODE_BITSIZE (address_mode);
3361 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3362 offset &= mask;
3363 if ((offset >> (bits - 1) & 1))
3364 offset |= ~mask;
3365 s_offset = offset;
3366
3367 autoinc = false;
3368 msize = GET_MODE_SIZE (mem_mode);
3369 autoinc_offset = offset;
3370 if (stmt_after_inc)
3371 autoinc_offset += ratio * cstep;
3372 if (symbol_present || var_present || ratio != 1)
3373 autoinc = false;
3374 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3375 && msize == cstep)
3376 || (has_postdec[mem_mode] && autoinc_offset == 0
3377 && msize == -cstep)
3378 || (has_preinc[mem_mode] && autoinc_offset == msize
3379 && msize == cstep)
3380 || (has_predec[mem_mode] && autoinc_offset == -msize
3381 && msize == -cstep))
3382 autoinc = true;
3383
3384 cost = 0;
3385 offset_p = (s_offset != 0
3386 && data->min_offset <= s_offset
3387 && s_offset <= data->max_offset);
3388 ratio_p = (ratio != 1
3389 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3390
3391 if (ratio != 1 && !ratio_p)
3392 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3393
3394 if (s_offset && !offset_p && !symbol_present)
3395 cost += add_cost[speed][address_mode];
3396
3397 if (may_autoinc)
3398 *may_autoinc = autoinc;
3399 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3400 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3401 return new_cost (cost + acost, complexity);
3402 }
3403
3404 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3405 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3406 calculating the operands of EXPR. Returns true if successful, and returns
3407 the cost in COST. */
3408
3409 static bool
3410 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3411 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3412 {
3413 comp_cost res;
3414 tree op1 = TREE_OPERAND (expr, 1);
3415 tree cst = TREE_OPERAND (mult, 1);
3416 tree multop = TREE_OPERAND (mult, 0);
3417 int m = exact_log2 (int_cst_value (cst));
3418 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3419 int sa_cost;
3420
3421 if (!(m >= 0 && m < maxm))
3422 return false;
3423
3424 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3425 ? shiftadd_cost[speed][mode][m]
3426 : (mult == op1
3427 ? shiftsub1_cost[speed][mode][m]
3428 : shiftsub0_cost[speed][mode][m]));
3429 res = new_cost (sa_cost, 0);
3430 res = add_costs (res, mult == op1 ? cost0 : cost1);
3431
3432 STRIP_NOPS (multop);
3433 if (!is_gimple_val (multop))
3434 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3435
3436 *cost = res;
3437 return true;
3438 }
3439
3440 /* Estimates cost of forcing expression EXPR into a variable. */
3441
3442 static comp_cost
3443 force_expr_to_var_cost (tree expr, bool speed)
3444 {
3445 static bool costs_initialized = false;
3446 static unsigned integer_cost [2];
3447 static unsigned symbol_cost [2];
3448 static unsigned address_cost [2];
3449 tree op0, op1;
3450 comp_cost cost0, cost1, cost;
3451 enum machine_mode mode;
3452
3453 if (!costs_initialized)
3454 {
3455 tree type = build_pointer_type (integer_type_node);
3456 tree var, addr;
3457 rtx x;
3458 int i;
3459
3460 var = create_tmp_var_raw (integer_type_node, "test_var");
3461 TREE_STATIC (var) = 1;
3462 x = produce_memory_decl_rtl (var, NULL);
3463 SET_DECL_RTL (var, x);
3464
3465 addr = build1 (ADDR_EXPR, type, var);
3466
3467
3468 for (i = 0; i < 2; i++)
3469 {
3470 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3471 2000), i);
3472
3473 symbol_cost[i] = computation_cost (addr, i) + 1;
3474
3475 address_cost[i]
3476 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3477 if (dump_file && (dump_flags & TDF_DETAILS))
3478 {
3479 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3480 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3481 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3482 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3483 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3484 fprintf (dump_file, "\n");
3485 }
3486 }
3487
3488 costs_initialized = true;
3489 }
3490
3491 STRIP_NOPS (expr);
3492
3493 if (SSA_VAR_P (expr))
3494 return no_cost;
3495
3496 if (is_gimple_min_invariant (expr))
3497 {
3498 if (TREE_CODE (expr) == INTEGER_CST)
3499 return new_cost (integer_cost [speed], 0);
3500
3501 if (TREE_CODE (expr) == ADDR_EXPR)
3502 {
3503 tree obj = TREE_OPERAND (expr, 0);
3504
3505 if (TREE_CODE (obj) == VAR_DECL
3506 || TREE_CODE (obj) == PARM_DECL
3507 || TREE_CODE (obj) == RESULT_DECL)
3508 return new_cost (symbol_cost [speed], 0);
3509 }
3510
3511 return new_cost (address_cost [speed], 0);
3512 }
3513
3514 switch (TREE_CODE (expr))
3515 {
3516 case POINTER_PLUS_EXPR:
3517 case PLUS_EXPR:
3518 case MINUS_EXPR:
3519 case MULT_EXPR:
3520 op0 = TREE_OPERAND (expr, 0);
3521 op1 = TREE_OPERAND (expr, 1);
3522 STRIP_NOPS (op0);
3523 STRIP_NOPS (op1);
3524
3525 if (is_gimple_val (op0))
3526 cost0 = no_cost;
3527 else
3528 cost0 = force_expr_to_var_cost (op0, speed);
3529
3530 if (is_gimple_val (op1))
3531 cost1 = no_cost;
3532 else
3533 cost1 = force_expr_to_var_cost (op1, speed);
3534
3535 break;
3536
3537 case NEGATE_EXPR:
3538 op0 = TREE_OPERAND (expr, 0);
3539 STRIP_NOPS (op0);
3540 op1 = NULL_TREE;
3541
3542 if (is_gimple_val (op0))
3543 cost0 = no_cost;
3544 else
3545 cost0 = force_expr_to_var_cost (op0, speed);
3546
3547 cost1 = no_cost;
3548 break;
3549
3550 default:
3551 /* Just an arbitrary value, FIXME. */
3552 return new_cost (target_spill_cost[speed], 0);
3553 }
3554
3555 mode = TYPE_MODE (TREE_TYPE (expr));
3556 switch (TREE_CODE (expr))
3557 {
3558 case POINTER_PLUS_EXPR:
3559 case PLUS_EXPR:
3560 case MINUS_EXPR:
3561 case NEGATE_EXPR:
3562 cost = new_cost (add_cost[speed][mode], 0);
3563 if (TREE_CODE (expr) != NEGATE_EXPR)
3564 {
3565 tree mult = NULL_TREE;
3566 comp_cost sa_cost;
3567 if (TREE_CODE (op1) == MULT_EXPR)
3568 mult = op1;
3569 else if (TREE_CODE (op0) == MULT_EXPR)
3570 mult = op0;
3571
3572 if (mult != NULL_TREE
3573 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3574 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3575 &sa_cost))
3576 return sa_cost;
3577 }
3578 break;
3579
3580 case MULT_EXPR:
3581 if (cst_and_fits_in_hwi (op0))
3582 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3583 mode, speed), 0);
3584 else if (cst_and_fits_in_hwi (op1))
3585 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3586 mode, speed), 0);
3587 else
3588 return new_cost (target_spill_cost [speed], 0);
3589 break;
3590
3591 default:
3592 gcc_unreachable ();
3593 }
3594
3595 cost = add_costs (cost, cost0);
3596 cost = add_costs (cost, cost1);
3597
3598 /* Bound the cost by target_spill_cost. The parts of complicated
3599 computations often are either loop invariant or at least can
3600 be shared between several iv uses, so letting this grow without
3601 limits would not give reasonable results. */
3602 if (cost.cost > (int) target_spill_cost [speed])
3603 cost.cost = target_spill_cost [speed];
3604
3605 return cost;
3606 }
3607
3608 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3609 invariants the computation depends on. */
3610
3611 static comp_cost
3612 force_var_cost (struct ivopts_data *data,
3613 tree expr, bitmap *depends_on)
3614 {
3615 if (depends_on)
3616 {
3617 fd_ivopts_data = data;
3618 walk_tree (&expr, find_depends, depends_on, NULL);
3619 }
3620
3621 return force_expr_to_var_cost (expr, data->speed);
3622 }
3623
3624 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3625 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3626 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3627 invariants the computation depends on. */
3628
3629 static comp_cost
3630 split_address_cost (struct ivopts_data *data,
3631 tree addr, bool *symbol_present, bool *var_present,
3632 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3633 {
3634 tree core;
3635 HOST_WIDE_INT bitsize;
3636 HOST_WIDE_INT bitpos;
3637 tree toffset;
3638 enum machine_mode mode;
3639 int unsignedp, volatilep;
3640
3641 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3642 &unsignedp, &volatilep, false);
3643
3644 if (toffset != 0
3645 || bitpos % BITS_PER_UNIT != 0
3646 || TREE_CODE (core) != VAR_DECL)
3647 {
3648 *symbol_present = false;
3649 *var_present = true;
3650 fd_ivopts_data = data;
3651 walk_tree (&addr, find_depends, depends_on, NULL);
3652 return new_cost (target_spill_cost[data->speed], 0);
3653 }
3654
3655 *offset += bitpos / BITS_PER_UNIT;
3656 if (TREE_STATIC (core)
3657 || DECL_EXTERNAL (core))
3658 {
3659 *symbol_present = true;
3660 *var_present = false;
3661 return no_cost;
3662 }
3663
3664 *symbol_present = false;
3665 *var_present = true;
3666 return no_cost;
3667 }
3668
3669 /* Estimates cost of expressing difference of addresses E1 - E2 as
3670 var + symbol + offset. The value of offset is added to OFFSET,
3671 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3672 part is missing. DEPENDS_ON is a set of the invariants the computation
3673 depends on. */
3674
3675 static comp_cost
3676 ptr_difference_cost (struct ivopts_data *data,
3677 tree e1, tree e2, bool *symbol_present, bool *var_present,
3678 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3679 {
3680 HOST_WIDE_INT diff = 0;
3681 aff_tree aff_e1, aff_e2;
3682 tree type;
3683
3684 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3685
3686 if (ptr_difference_const (e1, e2, &diff))
3687 {
3688 *offset += diff;
3689 *symbol_present = false;
3690 *var_present = false;
3691 return no_cost;
3692 }
3693
3694 if (integer_zerop (e2))
3695 return split_address_cost (data, TREE_OPERAND (e1, 0),
3696 symbol_present, var_present, offset, depends_on);
3697
3698 *symbol_present = false;
3699 *var_present = true;
3700
3701 type = signed_type_for (TREE_TYPE (e1));
3702 tree_to_aff_combination (e1, type, &aff_e1);
3703 tree_to_aff_combination (e2, type, &aff_e2);
3704 aff_combination_scale (&aff_e2, double_int_minus_one);
3705 aff_combination_add (&aff_e1, &aff_e2);
3706
3707 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3708 }
3709
3710 /* Estimates cost of expressing difference E1 - E2 as
3711 var + symbol + offset. The value of offset is added to OFFSET,
3712 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3713 part is missing. DEPENDS_ON is a set of the invariants the computation
3714 depends on. */
3715
3716 static comp_cost
3717 difference_cost (struct ivopts_data *data,
3718 tree e1, tree e2, bool *symbol_present, bool *var_present,
3719 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3720 {
3721 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3722 unsigned HOST_WIDE_INT off1, off2;
3723 aff_tree aff_e1, aff_e2;
3724 tree type;
3725
3726 e1 = strip_offset (e1, &off1);
3727 e2 = strip_offset (e2, &off2);
3728 *offset += off1 - off2;
3729
3730 STRIP_NOPS (e1);
3731 STRIP_NOPS (e2);
3732
3733 if (TREE_CODE (e1) == ADDR_EXPR)
3734 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3735 offset, depends_on);
3736 *symbol_present = false;
3737
3738 if (operand_equal_p (e1, e2, 0))
3739 {
3740 *var_present = false;
3741 return no_cost;
3742 }
3743
3744 *var_present = true;
3745
3746 if (integer_zerop (e2))
3747 return force_var_cost (data, e1, depends_on);
3748
3749 if (integer_zerop (e1))
3750 {
3751 comp_cost cost = force_var_cost (data, e2, depends_on);
3752 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3753 return cost;
3754 }
3755
3756 type = signed_type_for (TREE_TYPE (e1));
3757 tree_to_aff_combination (e1, type, &aff_e1);
3758 tree_to_aff_combination (e2, type, &aff_e2);
3759 aff_combination_scale (&aff_e2, double_int_minus_one);
3760 aff_combination_add (&aff_e1, &aff_e2);
3761
3762 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3763 }
3764
3765 /* Returns true if AFF1 and AFF2 are identical. */
3766
3767 static bool
3768 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3769 {
3770 unsigned i;
3771
3772 if (aff1->n != aff2->n)
3773 return false;
3774
3775 for (i = 0; i < aff1->n; i++)
3776 {
3777 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3778 return false;
3779
3780 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3781 return false;
3782 }
3783 return true;
3784 }
3785
3786 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3787
3788 static int
3789 get_expr_id (struct ivopts_data *data, tree expr)
3790 {
3791 struct iv_inv_expr_ent ent;
3792 struct iv_inv_expr_ent **slot;
3793
3794 ent.expr = expr;
3795 ent.hash = iterative_hash_expr (expr, 0);
3796 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3797 &ent, INSERT);
3798 if (*slot)
3799 return (*slot)->id;
3800
3801 *slot = XNEW (struct iv_inv_expr_ent);
3802 (*slot)->expr = expr;
3803 (*slot)->hash = ent.hash;
3804 (*slot)->id = data->inv_expr_id++;
3805 return (*slot)->id;
3806 }
3807
3808 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3809 requires a new compiler generated temporary. Returns -1 otherwise.
3810 ADDRESS_P is a flag indicating if the expression is for address
3811 computation. */
3812
3813 static int
3814 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3815 tree cbase, HOST_WIDE_INT ratio,
3816 bool address_p)
3817 {
3818 aff_tree ubase_aff, cbase_aff;
3819 tree expr, ub, cb;
3820
3821 STRIP_NOPS (ubase);
3822 STRIP_NOPS (cbase);
3823 ub = ubase;
3824 cb = cbase;
3825
3826 if ((TREE_CODE (ubase) == INTEGER_CST)
3827 && (TREE_CODE (cbase) == INTEGER_CST))
3828 return -1;
3829
3830 /* Strips the constant part. */
3831 if (TREE_CODE (ubase) == PLUS_EXPR
3832 || TREE_CODE (ubase) == MINUS_EXPR
3833 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3834 {
3835 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3836 ubase = TREE_OPERAND (ubase, 0);
3837 }
3838
3839 /* Strips the constant part. */
3840 if (TREE_CODE (cbase) == PLUS_EXPR
3841 || TREE_CODE (cbase) == MINUS_EXPR
3842 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3843 {
3844 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3845 cbase = TREE_OPERAND (cbase, 0);
3846 }
3847
3848 if (address_p)
3849 {
3850 if (((TREE_CODE (ubase) == SSA_NAME)
3851 || (TREE_CODE (ubase) == ADDR_EXPR
3852 && is_gimple_min_invariant (ubase)))
3853 && (TREE_CODE (cbase) == INTEGER_CST))
3854 return -1;
3855
3856 if (((TREE_CODE (cbase) == SSA_NAME)
3857 || (TREE_CODE (cbase) == ADDR_EXPR
3858 && is_gimple_min_invariant (cbase)))
3859 && (TREE_CODE (ubase) == INTEGER_CST))
3860 return -1;
3861 }
3862
3863 if (ratio == 1)
3864 {
3865 if(operand_equal_p (ubase, cbase, 0))
3866 return -1;
3867
3868 if (TREE_CODE (ubase) == ADDR_EXPR
3869 && TREE_CODE (cbase) == ADDR_EXPR)
3870 {
3871 tree usym, csym;
3872
3873 usym = TREE_OPERAND (ubase, 0);
3874 csym = TREE_OPERAND (cbase, 0);
3875 if (TREE_CODE (usym) == ARRAY_REF)
3876 {
3877 tree ind = TREE_OPERAND (usym, 1);
3878 if (TREE_CODE (ind) == INTEGER_CST
3879 && host_integerp (ind, 0)
3880 && TREE_INT_CST_LOW (ind) == 0)
3881 usym = TREE_OPERAND (usym, 0);
3882 }
3883 if (TREE_CODE (csym) == ARRAY_REF)
3884 {
3885 tree ind = TREE_OPERAND (csym, 1);
3886 if (TREE_CODE (ind) == INTEGER_CST
3887 && host_integerp (ind, 0)
3888 && TREE_INT_CST_LOW (ind) == 0)
3889 csym = TREE_OPERAND (csym, 0);
3890 }
3891 if (operand_equal_p (usym, csym, 0))
3892 return -1;
3893 }
3894 /* Now do more complex comparison */
3895 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3896 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3897 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3898 return -1;
3899 }
3900
3901 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3902 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3903
3904 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3905 aff_combination_add (&ubase_aff, &cbase_aff);
3906 expr = aff_combination_to_tree (&ubase_aff);
3907 return get_expr_id (data, expr);
3908 }
3909
3910
3911
3912 /* Determines the cost of the computation by that USE is expressed
3913 from induction variable CAND. If ADDRESS_P is true, we just need
3914 to create an address from it, otherwise we want to get it into
3915 register. A set of invariants we depend on is stored in
3916 DEPENDS_ON. AT is the statement at that the value is computed.
3917 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3918 addressing is likely. */
3919
3920 static comp_cost
3921 get_computation_cost_at (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand,
3923 bool address_p, bitmap *depends_on, gimple at,
3924 bool *can_autoinc,
3925 int *inv_expr_id)
3926 {
3927 tree ubase = use->iv->base, ustep = use->iv->step;
3928 tree cbase, cstep;
3929 tree utype = TREE_TYPE (ubase), ctype;
3930 unsigned HOST_WIDE_INT cstepi, offset = 0;
3931 HOST_WIDE_INT ratio, aratio;
3932 bool var_present, symbol_present, stmt_is_after_inc;
3933 comp_cost cost;
3934 double_int rat;
3935 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3936
3937 *depends_on = NULL;
3938
3939 /* Only consider real candidates. */
3940 if (!cand->iv)
3941 return infinite_cost;
3942
3943 cbase = cand->iv->base;
3944 cstep = cand->iv->step;
3945 ctype = TREE_TYPE (cbase);
3946
3947 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3948 {
3949 /* We do not have a precision to express the values of use. */
3950 return infinite_cost;
3951 }
3952
3953 if (address_p
3954 || (use->iv->base_object
3955 && cand->iv->base_object
3956 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3957 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3958 {
3959 /* Do not try to express address of an object with computation based
3960 on address of a different object. This may cause problems in rtl
3961 level alias analysis (that does not expect this to be happening,
3962 as this is illegal in C), and would be unlikely to be useful
3963 anyway. */
3964 if (use->iv->base_object
3965 && cand->iv->base_object
3966 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3967 return infinite_cost;
3968 }
3969
3970 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3971 {
3972 /* TODO -- add direct handling of this case. */
3973 goto fallback;
3974 }
3975
3976 /* CSTEPI is removed from the offset in case statement is after the
3977 increment. If the step is not constant, we use zero instead.
3978 This is a bit imprecise (there is the extra addition), but
3979 redundancy elimination is likely to transform the code so that
3980 it uses value of the variable before increment anyway,
3981 so it is not that much unrealistic. */
3982 if (cst_and_fits_in_hwi (cstep))
3983 cstepi = int_cst_value (cstep);
3984 else
3985 cstepi = 0;
3986
3987 if (!constant_multiple_of (ustep, cstep, &rat))
3988 return infinite_cost;
3989
3990 if (double_int_fits_in_shwi_p (rat))
3991 ratio = double_int_to_shwi (rat);
3992 else
3993 return infinite_cost;
3994
3995 STRIP_NOPS (cbase);
3996 ctype = TREE_TYPE (cbase);
3997
3998 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3999
4000 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4001 or ratio == 1, it is better to handle this like
4002
4003 ubase - ratio * cbase + ratio * var
4004
4005 (also holds in the case ratio == -1, TODO. */
4006
4007 if (cst_and_fits_in_hwi (cbase))
4008 {
4009 offset = - ratio * int_cst_value (cbase);
4010 cost = difference_cost (data,
4011 ubase, build_int_cst (utype, 0),
4012 &symbol_present, &var_present, &offset,
4013 depends_on);
4014 cost.cost /= avg_loop_niter (data->current_loop);
4015 }
4016 else if (ratio == 1)
4017 {
4018 tree real_cbase = cbase;
4019
4020 /* Check to see if any adjustment is needed. */
4021 if (cstepi == 0 && stmt_is_after_inc)
4022 {
4023 aff_tree real_cbase_aff;
4024 aff_tree cstep_aff;
4025
4026 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4027 &real_cbase_aff);
4028 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4029
4030 aff_combination_add (&real_cbase_aff, &cstep_aff);
4031 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4032 }
4033
4034 cost = difference_cost (data,
4035 ubase, real_cbase,
4036 &symbol_present, &var_present, &offset,
4037 depends_on);
4038 cost.cost /= avg_loop_niter (data->current_loop);
4039 }
4040 else if (address_p
4041 && !POINTER_TYPE_P (ctype)
4042 && multiplier_allowed_in_address_p
4043 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4044 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4045 {
4046 cbase
4047 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4048 cost = difference_cost (data,
4049 ubase, cbase,
4050 &symbol_present, &var_present, &offset,
4051 depends_on);
4052 cost.cost /= avg_loop_niter (data->current_loop);
4053 }
4054 else
4055 {
4056 cost = force_var_cost (data, cbase, depends_on);
4057 cost = add_costs (cost,
4058 difference_cost (data,
4059 ubase, build_int_cst (utype, 0),
4060 &symbol_present, &var_present,
4061 &offset, depends_on));
4062 cost.cost /= avg_loop_niter (data->current_loop);
4063 cost.cost += add_cost[data->speed][TYPE_MODE (ctype)];
4064 }
4065
4066 if (inv_expr_id)
4067 {
4068 *inv_expr_id =
4069 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4070 /* Clear depends on. */
4071 if (*inv_expr_id != -1 && depends_on && *depends_on)
4072 bitmap_clear (*depends_on);
4073 }
4074
4075 /* If we are after the increment, the value of the candidate is higher by
4076 one iteration. */
4077 if (stmt_is_after_inc)
4078 offset -= ratio * cstepi;
4079
4080 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4081 (symbol/var1/const parts may be omitted). If we are looking for an
4082 address, find the cost of addressing this. */
4083 if (address_p)
4084 return add_costs (cost,
4085 get_address_cost (symbol_present, var_present,
4086 offset, ratio, cstepi,
4087 TYPE_MODE (TREE_TYPE (utype)),
4088 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4089 speed, stmt_is_after_inc,
4090 can_autoinc));
4091
4092 /* Otherwise estimate the costs for computing the expression. */
4093 if (!symbol_present && !var_present && !offset)
4094 {
4095 if (ratio != 1)
4096 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4097 return cost;
4098 }
4099
4100 /* Symbol + offset should be compile-time computable so consider that they
4101 are added once to the variable, if present. */
4102 if (var_present && (symbol_present || offset))
4103 cost.cost += adjust_setup_cost (data,
4104 add_cost[speed][TYPE_MODE (ctype)]);
4105
4106 /* Having offset does not affect runtime cost in case it is added to
4107 symbol, but it increases complexity. */
4108 if (offset)
4109 cost.complexity++;
4110
4111 cost.cost += add_cost[speed][TYPE_MODE (ctype)];
4112
4113 aratio = ratio > 0 ? ratio : -ratio;
4114 if (aratio != 1)
4115 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4116 return cost;
4117
4118 fallback:
4119 if (can_autoinc)
4120 *can_autoinc = false;
4121
4122 {
4123 /* Just get the expression, expand it and measure the cost. */
4124 tree comp = get_computation_at (data->current_loop, use, cand, at);
4125
4126 if (!comp)
4127 return infinite_cost;
4128
4129 if (address_p)
4130 comp = build_simple_mem_ref (comp);
4131
4132 return new_cost (computation_cost (comp, speed), 0);
4133 }
4134 }
4135
4136 /* Determines the cost of the computation by that USE is expressed
4137 from induction variable CAND. If ADDRESS_P is true, we just need
4138 to create an address from it, otherwise we want to get it into
4139 register. A set of invariants we depend on is stored in
4140 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4141 autoinc addressing is likely. */
4142
4143 static comp_cost
4144 get_computation_cost (struct ivopts_data *data,
4145 struct iv_use *use, struct iv_cand *cand,
4146 bool address_p, bitmap *depends_on,
4147 bool *can_autoinc, int *inv_expr_id)
4148 {
4149 return get_computation_cost_at (data,
4150 use, cand, address_p, depends_on, use->stmt,
4151 can_autoinc, inv_expr_id);
4152 }
4153
4154 /* Determines cost of basing replacement of USE on CAND in a generic
4155 expression. */
4156
4157 static bool
4158 determine_use_iv_cost_generic (struct ivopts_data *data,
4159 struct iv_use *use, struct iv_cand *cand)
4160 {
4161 bitmap depends_on;
4162 comp_cost cost;
4163 int inv_expr_id = -1;
4164
4165 /* The simple case first -- if we need to express value of the preserved
4166 original biv, the cost is 0. This also prevents us from counting the
4167 cost of increment twice -- once at this use and once in the cost of
4168 the candidate. */
4169 if (cand->pos == IP_ORIGINAL
4170 && cand->incremented_at == use->stmt)
4171 {
4172 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4173 ERROR_MARK, -1);
4174 return true;
4175 }
4176
4177 cost = get_computation_cost (data, use, cand, false, &depends_on,
4178 NULL, &inv_expr_id);
4179
4180 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4181 inv_expr_id);
4182
4183 return !infinite_cost_p (cost);
4184 }
4185
4186 /* Determines cost of basing replacement of USE on CAND in an address. */
4187
4188 static bool
4189 determine_use_iv_cost_address (struct ivopts_data *data,
4190 struct iv_use *use, struct iv_cand *cand)
4191 {
4192 bitmap depends_on;
4193 bool can_autoinc;
4194 int inv_expr_id = -1;
4195 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4196 &can_autoinc, &inv_expr_id);
4197
4198 if (cand->ainc_use == use)
4199 {
4200 if (can_autoinc)
4201 cost.cost -= cand->cost_step;
4202 /* If we generated the candidate solely for exploiting autoincrement
4203 opportunities, and it turns out it can't be used, set the cost to
4204 infinity to make sure we ignore it. */
4205 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4206 cost = infinite_cost;
4207 }
4208 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4209 inv_expr_id);
4210
4211 return !infinite_cost_p (cost);
4212 }
4213
4214 /* Computes value of candidate CAND at position AT in iteration NITER, and
4215 stores it to VAL. */
4216
4217 static void
4218 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4219 aff_tree *val)
4220 {
4221 aff_tree step, delta, nit;
4222 struct iv *iv = cand->iv;
4223 tree type = TREE_TYPE (iv->base);
4224 tree steptype = type;
4225 if (POINTER_TYPE_P (type))
4226 steptype = sizetype;
4227
4228 tree_to_aff_combination (iv->step, steptype, &step);
4229 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4230 aff_combination_convert (&nit, steptype);
4231 aff_combination_mult (&nit, &step, &delta);
4232 if (stmt_after_increment (loop, cand, at))
4233 aff_combination_add (&delta, &step);
4234
4235 tree_to_aff_combination (iv->base, type, val);
4236 aff_combination_add (val, &delta);
4237 }
4238
4239 /* Returns period of induction variable iv. */
4240
4241 static tree
4242 iv_period (struct iv *iv)
4243 {
4244 tree step = iv->step, period, type;
4245 tree pow2div;
4246
4247 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4248
4249 type = unsigned_type_for (TREE_TYPE (step));
4250 /* Period of the iv is lcm (step, type_range)/step -1,
4251 i.e., N*type_range/step - 1. Since type range is power
4252 of two, N == (step >> num_of_ending_zeros_binary (step),
4253 so the final result is
4254
4255 (type_range >> num_of_ending_zeros_binary (step)) - 1
4256
4257 */
4258 pow2div = num_ending_zeros (step);
4259
4260 period = build_low_bits_mask (type,
4261 (TYPE_PRECISION (type)
4262 - tree_low_cst (pow2div, 1)));
4263
4264 return period;
4265 }
4266
4267 /* Returns the comparison operator used when eliminating the iv USE. */
4268
4269 static enum tree_code
4270 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4271 {
4272 struct loop *loop = data->current_loop;
4273 basic_block ex_bb;
4274 edge exit;
4275
4276 ex_bb = gimple_bb (use->stmt);
4277 exit = EDGE_SUCC (ex_bb, 0);
4278 if (flow_bb_inside_loop_p (loop, exit->dest))
4279 exit = EDGE_SUCC (ex_bb, 1);
4280
4281 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4282 }
4283
4284 static tree
4285 strip_wrap_conserving_type_conversions (tree exp)
4286 {
4287 while (tree_ssa_useless_type_conversion (exp)
4288 && (nowrap_type_p (TREE_TYPE (exp))
4289 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4290 exp = TREE_OPERAND (exp, 0);
4291 return exp;
4292 }
4293
4294 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4295 check for an exact match. */
4296
4297 static bool
4298 expr_equal_p (tree e, tree what)
4299 {
4300 gimple stmt;
4301 enum tree_code code;
4302
4303 e = strip_wrap_conserving_type_conversions (e);
4304 what = strip_wrap_conserving_type_conversions (what);
4305
4306 code = TREE_CODE (what);
4307 if (TREE_TYPE (e) != TREE_TYPE (what))
4308 return false;
4309
4310 if (operand_equal_p (e, what, 0))
4311 return true;
4312
4313 if (TREE_CODE (e) != SSA_NAME)
4314 return false;
4315
4316 stmt = SSA_NAME_DEF_STMT (e);
4317 if (gimple_code (stmt) != GIMPLE_ASSIGN
4318 || gimple_assign_rhs_code (stmt) != code)
4319 return false;
4320
4321 switch (get_gimple_rhs_class (code))
4322 {
4323 case GIMPLE_BINARY_RHS:
4324 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4325 return false;
4326 /* Fallthru. */
4327
4328 case GIMPLE_UNARY_RHS:
4329 case GIMPLE_SINGLE_RHS:
4330 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4331 default:
4332 return false;
4333 }
4334 }
4335
4336 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4337 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4338 calculation is performed in non-wrapping type.
4339
4340 TODO: More generally, we could test for the situation that
4341 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4342 This would require knowing the sign of OFFSET.
4343
4344 Also, we only look for the first addition in the computation of BASE.
4345 More complex analysis would be better, but introducing it just for
4346 this optimization seems like an overkill. */
4347
4348 static bool
4349 difference_cannot_overflow_p (tree base, tree offset)
4350 {
4351 enum tree_code code;
4352 tree e1, e2;
4353
4354 if (!nowrap_type_p (TREE_TYPE (base)))
4355 return false;
4356
4357 base = expand_simple_operations (base);
4358
4359 if (TREE_CODE (base) == SSA_NAME)
4360 {
4361 gimple stmt = SSA_NAME_DEF_STMT (base);
4362
4363 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4364 return false;
4365
4366 code = gimple_assign_rhs_code (stmt);
4367 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4368 return false;
4369
4370 e1 = gimple_assign_rhs1 (stmt);
4371 e2 = gimple_assign_rhs2 (stmt);
4372 }
4373 else
4374 {
4375 code = TREE_CODE (base);
4376 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4377 return false;
4378 e1 = TREE_OPERAND (base, 0);
4379 e2 = TREE_OPERAND (base, 1);
4380 }
4381
4382 /* TODO: deeper inspection may be necessary to prove the equality. */
4383 switch (code)
4384 {
4385 case PLUS_EXPR:
4386 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4387 case POINTER_PLUS_EXPR:
4388 return expr_equal_p (e2, offset);
4389
4390 default:
4391 return false;
4392 }
4393 }
4394
4395 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4396 comparison with CAND. NITER describes the number of iterations of
4397 the loops. If successful, the comparison in COMP_P is altered accordingly.
4398
4399 We aim to handle the following situation:
4400
4401 sometype *base, *p;
4402 int a, b, i;
4403
4404 i = a;
4405 p = p_0 = base + a;
4406
4407 do
4408 {
4409 bla (*p);
4410 p++;
4411 i++;
4412 }
4413 while (i < b);
4414
4415 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4416 We aim to optimize this to
4417
4418 p = p_0 = base + a;
4419 do
4420 {
4421 bla (*p);
4422 p++;
4423 }
4424 while (p < p_0 - a + b);
4425
4426 This preserves the correctness, since the pointer arithmetics does not
4427 overflow. More precisely:
4428
4429 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4430 overflow in computing it or the values of p.
4431 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4432 overflow. To prove this, we use the fact that p_0 = base + a. */
4433
4434 static bool
4435 iv_elimination_compare_lt (struct ivopts_data *data,
4436 struct iv_cand *cand, enum tree_code *comp_p,
4437 struct tree_niter_desc *niter)
4438 {
4439 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4440 struct affine_tree_combination nit, tmpa, tmpb;
4441 enum tree_code comp;
4442 HOST_WIDE_INT step;
4443
4444 /* We need to know that the candidate induction variable does not overflow.
4445 While more complex analysis may be used to prove this, for now just
4446 check that the variable appears in the original program and that it
4447 is computed in a type that guarantees no overflows. */
4448 cand_type = TREE_TYPE (cand->iv->base);
4449 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4450 return false;
4451
4452 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4453 the calculation of the BOUND could overflow, making the comparison
4454 invalid. */
4455 if (!data->loop_single_exit_p)
4456 return false;
4457
4458 /* We need to be able to decide whether candidate is increasing or decreasing
4459 in order to choose the right comparison operator. */
4460 if (!cst_and_fits_in_hwi (cand->iv->step))
4461 return false;
4462 step = int_cst_value (cand->iv->step);
4463
4464 /* Check that the number of iterations matches the expected pattern:
4465 a + 1 > b ? 0 : b - a - 1. */
4466 mbz = niter->may_be_zero;
4467 if (TREE_CODE (mbz) == GT_EXPR)
4468 {
4469 /* Handle a + 1 > b. */
4470 tree op0 = TREE_OPERAND (mbz, 0);
4471 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4472 {
4473 a = TREE_OPERAND (op0, 0);
4474 b = TREE_OPERAND (mbz, 1);
4475 }
4476 else
4477 return false;
4478 }
4479 else if (TREE_CODE (mbz) == LT_EXPR)
4480 {
4481 tree op1 = TREE_OPERAND (mbz, 1);
4482
4483 /* Handle b < a + 1. */
4484 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4485 {
4486 a = TREE_OPERAND (op1, 0);
4487 b = TREE_OPERAND (mbz, 0);
4488 }
4489 else
4490 return false;
4491 }
4492 else
4493 return false;
4494
4495 /* Expected number of iterations is B - A - 1. Check that it matches
4496 the actual number, i.e., that B - A - NITER = 1. */
4497 tree_to_aff_combination (niter->niter, nit_type, &nit);
4498 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4499 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4500 aff_combination_scale (&nit, double_int_minus_one);
4501 aff_combination_scale (&tmpa, double_int_minus_one);
4502 aff_combination_add (&tmpb, &tmpa);
4503 aff_combination_add (&tmpb, &nit);
4504 if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4505 return false;
4506
4507 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4508 overflow. */
4509 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4510 cand->iv->step,
4511 fold_convert (TREE_TYPE (cand->iv->step), a));
4512 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4513 return false;
4514
4515 /* Determine the new comparison operator. */
4516 comp = step < 0 ? GT_EXPR : LT_EXPR;
4517 if (*comp_p == NE_EXPR)
4518 *comp_p = comp;
4519 else if (*comp_p == EQ_EXPR)
4520 *comp_p = invert_tree_comparison (comp, false);
4521 else
4522 gcc_unreachable ();
4523
4524 return true;
4525 }
4526
4527 /* Check whether it is possible to express the condition in USE by comparison
4528 of candidate CAND. If so, store the value compared with to BOUND, and the
4529 comparison operator to COMP. */
4530
4531 static bool
4532 may_eliminate_iv (struct ivopts_data *data,
4533 struct iv_use *use, struct iv_cand *cand, tree *bound,
4534 enum tree_code *comp)
4535 {
4536 basic_block ex_bb;
4537 edge exit;
4538 tree period;
4539 struct loop *loop = data->current_loop;
4540 aff_tree bnd;
4541 struct tree_niter_desc *desc = NULL;
4542
4543 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4544 return false;
4545
4546 /* For now works only for exits that dominate the loop latch.
4547 TODO: extend to other conditions inside loop body. */
4548 ex_bb = gimple_bb (use->stmt);
4549 if (use->stmt != last_stmt (ex_bb)
4550 || gimple_code (use->stmt) != GIMPLE_COND
4551 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4552 return false;
4553
4554 exit = EDGE_SUCC (ex_bb, 0);
4555 if (flow_bb_inside_loop_p (loop, exit->dest))
4556 exit = EDGE_SUCC (ex_bb, 1);
4557 if (flow_bb_inside_loop_p (loop, exit->dest))
4558 return false;
4559
4560 desc = niter_for_exit (data, exit);
4561 if (!desc)
4562 return false;
4563
4564 /* Determine whether we can use the variable to test the exit condition.
4565 This is the case iff the period of the induction variable is greater
4566 than the number of iterations for which the exit condition is true. */
4567 period = iv_period (cand->iv);
4568
4569 /* If the number of iterations is constant, compare against it directly. */
4570 if (TREE_CODE (desc->niter) == INTEGER_CST)
4571 {
4572 /* See cand_value_at. */
4573 if (stmt_after_increment (loop, cand, use->stmt))
4574 {
4575 if (!tree_int_cst_lt (desc->niter, period))
4576 return false;
4577 }
4578 else
4579 {
4580 if (tree_int_cst_lt (period, desc->niter))
4581 return false;
4582 }
4583 }
4584
4585 /* If not, and if this is the only possible exit of the loop, see whether
4586 we can get a conservative estimate on the number of iterations of the
4587 entire loop and compare against that instead. */
4588 else
4589 {
4590 double_int period_value, max_niter;
4591
4592 max_niter = desc->max;
4593 if (stmt_after_increment (loop, cand, use->stmt))
4594 max_niter = double_int_add (max_niter, double_int_one);
4595 period_value = tree_to_double_int (period);
4596 if (double_int_ucmp (max_niter, period_value) > 0)
4597 {
4598 /* See if we can take advantage of inferred loop bound information. */
4599 if (data->loop_single_exit_p)
4600 {
4601 if (!max_loop_iterations (loop, &max_niter))
4602 return false;
4603 /* The loop bound is already adjusted by adding 1. */
4604 if (double_int_ucmp (max_niter, period_value) > 0)
4605 return false;
4606 }
4607 else
4608 return false;
4609 }
4610 }
4611
4612 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4613
4614 *bound = aff_combination_to_tree (&bnd);
4615 *comp = iv_elimination_compare (data, use);
4616
4617 /* It is unlikely that computing the number of iterations using division
4618 would be more profitable than keeping the original induction variable. */
4619 if (expression_expensive_p (*bound))
4620 return false;
4621
4622 /* Sometimes, it is possible to handle the situation that the number of
4623 iterations may be zero unless additional assumtions by using <
4624 instead of != in the exit condition.
4625
4626 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4627 base the exit condition on it. However, that is often too
4628 expensive. */
4629 if (!integer_zerop (desc->may_be_zero))
4630 return iv_elimination_compare_lt (data, cand, comp, desc);
4631
4632 return true;
4633 }
4634
4635 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4636 be copied, if is is used in the loop body and DATA->body_includes_call. */
4637
4638 static int
4639 parm_decl_cost (struct ivopts_data *data, tree bound)
4640 {
4641 tree sbound = bound;
4642 STRIP_NOPS (sbound);
4643
4644 if (TREE_CODE (sbound) == SSA_NAME
4645 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4646 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4647 && data->body_includes_call)
4648 return COSTS_N_INSNS (1);
4649
4650 return 0;
4651 }
4652
4653 /* Determines cost of basing replacement of USE on CAND in a condition. */
4654
4655 static bool
4656 determine_use_iv_cost_condition (struct ivopts_data *data,
4657 struct iv_use *use, struct iv_cand *cand)
4658 {
4659 tree bound = NULL_TREE;
4660 struct iv *cmp_iv;
4661 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4662 comp_cost elim_cost, express_cost, cost, bound_cost;
4663 bool ok;
4664 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4665 tree *control_var, *bound_cst;
4666 enum tree_code comp = ERROR_MARK;
4667
4668 /* Only consider real candidates. */
4669 if (!cand->iv)
4670 {
4671 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4672 ERROR_MARK, -1);
4673 return false;
4674 }
4675
4676 /* Try iv elimination. */
4677 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4678 {
4679 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4680 if (elim_cost.cost == 0)
4681 elim_cost.cost = parm_decl_cost (data, bound);
4682 else if (TREE_CODE (bound) == INTEGER_CST)
4683 elim_cost.cost = 0;
4684 /* If we replace a loop condition 'i < n' with 'p < base + n',
4685 depends_on_elim will have 'base' and 'n' set, which implies
4686 that both 'base' and 'n' will be live during the loop. More likely,
4687 'base + n' will be loop invariant, resulting in only one live value
4688 during the loop. So in that case we clear depends_on_elim and set
4689 elim_inv_expr_id instead. */
4690 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4691 {
4692 elim_inv_expr_id = get_expr_id (data, bound);
4693 bitmap_clear (depends_on_elim);
4694 }
4695 /* The bound is a loop invariant, so it will be only computed
4696 once. */
4697 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4698 }
4699 else
4700 elim_cost = infinite_cost;
4701
4702 /* Try expressing the original giv. If it is compared with an invariant,
4703 note that we cannot get rid of it. */
4704 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4705 NULL, &cmp_iv);
4706 gcc_assert (ok);
4707
4708 /* When the condition is a comparison of the candidate IV against
4709 zero, prefer this IV.
4710
4711 TODO: The constant that we're subtracting from the cost should
4712 be target-dependent. This information should be added to the
4713 target costs for each backend. */
4714 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4715 && integer_zerop (*bound_cst)
4716 && (operand_equal_p (*control_var, cand->var_after, 0)
4717 || operand_equal_p (*control_var, cand->var_before, 0)))
4718 elim_cost.cost -= 1;
4719
4720 express_cost = get_computation_cost (data, use, cand, false,
4721 &depends_on_express, NULL,
4722 &express_inv_expr_id);
4723 fd_ivopts_data = data;
4724 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4725
4726 /* Count the cost of the original bound as well. */
4727 bound_cost = force_var_cost (data, *bound_cst, NULL);
4728 if (bound_cost.cost == 0)
4729 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4730 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4731 bound_cost.cost = 0;
4732 express_cost.cost += bound_cost.cost;
4733
4734 /* Choose the better approach, preferring the eliminated IV. */
4735 if (compare_costs (elim_cost, express_cost) <= 0)
4736 {
4737 cost = elim_cost;
4738 depends_on = depends_on_elim;
4739 depends_on_elim = NULL;
4740 inv_expr_id = elim_inv_expr_id;
4741 }
4742 else
4743 {
4744 cost = express_cost;
4745 depends_on = depends_on_express;
4746 depends_on_express = NULL;
4747 bound = NULL_TREE;
4748 comp = ERROR_MARK;
4749 inv_expr_id = express_inv_expr_id;
4750 }
4751
4752 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4753
4754 if (depends_on_elim)
4755 BITMAP_FREE (depends_on_elim);
4756 if (depends_on_express)
4757 BITMAP_FREE (depends_on_express);
4758
4759 return !infinite_cost_p (cost);
4760 }
4761
4762 /* Determines cost of basing replacement of USE on CAND. Returns false
4763 if USE cannot be based on CAND. */
4764
4765 static bool
4766 determine_use_iv_cost (struct ivopts_data *data,
4767 struct iv_use *use, struct iv_cand *cand)
4768 {
4769 switch (use->type)
4770 {
4771 case USE_NONLINEAR_EXPR:
4772 return determine_use_iv_cost_generic (data, use, cand);
4773
4774 case USE_ADDRESS:
4775 return determine_use_iv_cost_address (data, use, cand);
4776
4777 case USE_COMPARE:
4778 return determine_use_iv_cost_condition (data, use, cand);
4779
4780 default:
4781 gcc_unreachable ();
4782 }
4783 }
4784
4785 /* Return true if get_computation_cost indicates that autoincrement is
4786 a possibility for the pair of USE and CAND, false otherwise. */
4787
4788 static bool
4789 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4790 struct iv_cand *cand)
4791 {
4792 bitmap depends_on;
4793 bool can_autoinc;
4794 comp_cost cost;
4795
4796 if (use->type != USE_ADDRESS)
4797 return false;
4798
4799 cost = get_computation_cost (data, use, cand, true, &depends_on,
4800 &can_autoinc, NULL);
4801
4802 BITMAP_FREE (depends_on);
4803
4804 return !infinite_cost_p (cost) && can_autoinc;
4805 }
4806
4807 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4808 use that allows autoincrement, and set their AINC_USE if possible. */
4809
4810 static void
4811 set_autoinc_for_original_candidates (struct ivopts_data *data)
4812 {
4813 unsigned i, j;
4814
4815 for (i = 0; i < n_iv_cands (data); i++)
4816 {
4817 struct iv_cand *cand = iv_cand (data, i);
4818 struct iv_use *closest = NULL;
4819 if (cand->pos != IP_ORIGINAL)
4820 continue;
4821 for (j = 0; j < n_iv_uses (data); j++)
4822 {
4823 struct iv_use *use = iv_use (data, j);
4824 unsigned uid = gimple_uid (use->stmt);
4825 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4826 || uid > gimple_uid (cand->incremented_at))
4827 continue;
4828 if (closest == NULL || uid > gimple_uid (closest->stmt))
4829 closest = use;
4830 }
4831 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4832 continue;
4833 cand->ainc_use = closest;
4834 }
4835 }
4836
4837 /* Finds the candidates for the induction variables. */
4838
4839 static void
4840 find_iv_candidates (struct ivopts_data *data)
4841 {
4842 /* Add commonly used ivs. */
4843 add_standard_iv_candidates (data);
4844
4845 /* Add old induction variables. */
4846 add_old_ivs_candidates (data);
4847
4848 /* Add induction variables derived from uses. */
4849 add_derived_ivs_candidates (data);
4850
4851 set_autoinc_for_original_candidates (data);
4852
4853 /* Record the important candidates. */
4854 record_important_candidates (data);
4855 }
4856
4857 /* Determines costs of basing the use of the iv on an iv candidate. */
4858
4859 static void
4860 determine_use_iv_costs (struct ivopts_data *data)
4861 {
4862 unsigned i, j;
4863 struct iv_use *use;
4864 struct iv_cand *cand;
4865 bitmap to_clear = BITMAP_ALLOC (NULL);
4866
4867 alloc_use_cost_map (data);
4868
4869 for (i = 0; i < n_iv_uses (data); i++)
4870 {
4871 use = iv_use (data, i);
4872
4873 if (data->consider_all_candidates)
4874 {
4875 for (j = 0; j < n_iv_cands (data); j++)
4876 {
4877 cand = iv_cand (data, j);
4878 determine_use_iv_cost (data, use, cand);
4879 }
4880 }
4881 else
4882 {
4883 bitmap_iterator bi;
4884
4885 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4886 {
4887 cand = iv_cand (data, j);
4888 if (!determine_use_iv_cost (data, use, cand))
4889 bitmap_set_bit (to_clear, j);
4890 }
4891
4892 /* Remove the candidates for that the cost is infinite from
4893 the list of related candidates. */
4894 bitmap_and_compl_into (use->related_cands, to_clear);
4895 bitmap_clear (to_clear);
4896 }
4897 }
4898
4899 BITMAP_FREE (to_clear);
4900
4901 if (dump_file && (dump_flags & TDF_DETAILS))
4902 {
4903 fprintf (dump_file, "Use-candidate costs:\n");
4904
4905 for (i = 0; i < n_iv_uses (data); i++)
4906 {
4907 use = iv_use (data, i);
4908
4909 fprintf (dump_file, "Use %d:\n", i);
4910 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4911 for (j = 0; j < use->n_map_members; j++)
4912 {
4913 if (!use->cost_map[j].cand
4914 || infinite_cost_p (use->cost_map[j].cost))
4915 continue;
4916
4917 fprintf (dump_file, " %d\t%d\t%d\t",
4918 use->cost_map[j].cand->id,
4919 use->cost_map[j].cost.cost,
4920 use->cost_map[j].cost.complexity);
4921 if (use->cost_map[j].depends_on)
4922 bitmap_print (dump_file,
4923 use->cost_map[j].depends_on, "","");
4924 if (use->cost_map[j].inv_expr_id != -1)
4925 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4926 fprintf (dump_file, "\n");
4927 }
4928
4929 fprintf (dump_file, "\n");
4930 }
4931 fprintf (dump_file, "\n");
4932 }
4933 }
4934
4935 /* Determines cost of the candidate CAND. */
4936
4937 static void
4938 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4939 {
4940 comp_cost cost_base;
4941 unsigned cost, cost_step;
4942 tree base;
4943
4944 if (!cand->iv)
4945 {
4946 cand->cost = 0;
4947 return;
4948 }
4949
4950 /* There are two costs associated with the candidate -- its increment
4951 and its initialization. The second is almost negligible for any loop
4952 that rolls enough, so we take it just very little into account. */
4953
4954 base = cand->iv->base;
4955 cost_base = force_var_cost (data, base, NULL);
4956 /* It will be exceptional that the iv register happens to be initialized with
4957 the proper value at no cost. In general, there will at least be a regcopy
4958 or a const set. */
4959 if (cost_base.cost == 0)
4960 cost_base.cost = COSTS_N_INSNS (1);
4961 cost_step = add_cost[data->speed][TYPE_MODE (TREE_TYPE (base))];
4962
4963 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4964
4965 /* Prefer the original ivs unless we may gain something by replacing it.
4966 The reason is to make debugging simpler; so this is not relevant for
4967 artificial ivs created by other optimization passes. */
4968 if (cand->pos != IP_ORIGINAL
4969 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4970 cost++;
4971
4972 /* Prefer not to insert statements into latch unless there are some
4973 already (so that we do not create unnecessary jumps). */
4974 if (cand->pos == IP_END
4975 && empty_block_p (ip_end_pos (data->current_loop)))
4976 cost++;
4977
4978 cand->cost = cost;
4979 cand->cost_step = cost_step;
4980 }
4981
4982 /* Determines costs of computation of the candidates. */
4983
4984 static void
4985 determine_iv_costs (struct ivopts_data *data)
4986 {
4987 unsigned i;
4988
4989 if (dump_file && (dump_flags & TDF_DETAILS))
4990 {
4991 fprintf (dump_file, "Candidate costs:\n");
4992 fprintf (dump_file, " cand\tcost\n");
4993 }
4994
4995 for (i = 0; i < n_iv_cands (data); i++)
4996 {
4997 struct iv_cand *cand = iv_cand (data, i);
4998
4999 determine_iv_cost (data, cand);
5000
5001 if (dump_file && (dump_flags & TDF_DETAILS))
5002 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5003 }
5004
5005 if (dump_file && (dump_flags & TDF_DETAILS))
5006 fprintf (dump_file, "\n");
5007 }
5008
5009 /* Calculates cost for having SIZE induction variables. */
5010
5011 static unsigned
5012 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5013 {
5014 /* We add size to the cost, so that we prefer eliminating ivs
5015 if possible. */
5016 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5017 data->body_includes_call);
5018 }
5019
5020 /* For each size of the induction variable set determine the penalty. */
5021
5022 static void
5023 determine_set_costs (struct ivopts_data *data)
5024 {
5025 unsigned j, n;
5026 gimple phi;
5027 gimple_stmt_iterator psi;
5028 tree op;
5029 struct loop *loop = data->current_loop;
5030 bitmap_iterator bi;
5031
5032 if (dump_file && (dump_flags & TDF_DETAILS))
5033 {
5034 fprintf (dump_file, "Global costs:\n");
5035 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5036 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5037 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5038 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5039 }
5040
5041 n = 0;
5042 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5043 {
5044 phi = gsi_stmt (psi);
5045 op = PHI_RESULT (phi);
5046
5047 if (!is_gimple_reg (op))
5048 continue;
5049
5050 if (get_iv (data, op))
5051 continue;
5052
5053 n++;
5054 }
5055
5056 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5057 {
5058 struct version_info *info = ver_info (data, j);
5059
5060 if (info->inv_id && info->has_nonlin_use)
5061 n++;
5062 }
5063
5064 data->regs_used = n;
5065 if (dump_file && (dump_flags & TDF_DETAILS))
5066 fprintf (dump_file, " regs_used %d\n", n);
5067
5068 if (dump_file && (dump_flags & TDF_DETAILS))
5069 {
5070 fprintf (dump_file, " cost for size:\n");
5071 fprintf (dump_file, " ivs\tcost\n");
5072 for (j = 0; j <= 2 * target_avail_regs; j++)
5073 fprintf (dump_file, " %d\t%d\n", j,
5074 ivopts_global_cost_for_size (data, j));
5075 fprintf (dump_file, "\n");
5076 }
5077 }
5078
5079 /* Returns true if A is a cheaper cost pair than B. */
5080
5081 static bool
5082 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5083 {
5084 int cmp;
5085
5086 if (!a)
5087 return false;
5088
5089 if (!b)
5090 return true;
5091
5092 cmp = compare_costs (a->cost, b->cost);
5093 if (cmp < 0)
5094 return true;
5095
5096 if (cmp > 0)
5097 return false;
5098
5099 /* In case the costs are the same, prefer the cheaper candidate. */
5100 if (a->cand->cost < b->cand->cost)
5101 return true;
5102
5103 return false;
5104 }
5105
5106
5107 /* Returns candidate by that USE is expressed in IVS. */
5108
5109 static struct cost_pair *
5110 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5111 {
5112 return ivs->cand_for_use[use->id];
5113 }
5114
5115 /* Computes the cost field of IVS structure. */
5116
5117 static void
5118 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5119 {
5120 comp_cost cost = ivs->cand_use_cost;
5121
5122 cost.cost += ivs->cand_cost;
5123
5124 cost.cost += ivopts_global_cost_for_size (data,
5125 ivs->n_regs + ivs->num_used_inv_expr);
5126
5127 ivs->cost = cost;
5128 }
5129
5130 /* Remove invariants in set INVS to set IVS. */
5131
5132 static void
5133 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5134 {
5135 bitmap_iterator bi;
5136 unsigned iid;
5137
5138 if (!invs)
5139 return;
5140
5141 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5142 {
5143 ivs->n_invariant_uses[iid]--;
5144 if (ivs->n_invariant_uses[iid] == 0)
5145 ivs->n_regs--;
5146 }
5147 }
5148
5149 /* Set USE not to be expressed by any candidate in IVS. */
5150
5151 static void
5152 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5153 struct iv_use *use)
5154 {
5155 unsigned uid = use->id, cid;
5156 struct cost_pair *cp;
5157
5158 cp = ivs->cand_for_use[uid];
5159 if (!cp)
5160 return;
5161 cid = cp->cand->id;
5162
5163 ivs->bad_uses++;
5164 ivs->cand_for_use[uid] = NULL;
5165 ivs->n_cand_uses[cid]--;
5166
5167 if (ivs->n_cand_uses[cid] == 0)
5168 {
5169 bitmap_clear_bit (ivs->cands, cid);
5170 /* Do not count the pseudocandidates. */
5171 if (cp->cand->iv)
5172 ivs->n_regs--;
5173 ivs->n_cands--;
5174 ivs->cand_cost -= cp->cand->cost;
5175
5176 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5177 }
5178
5179 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5180
5181 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5182
5183 if (cp->inv_expr_id != -1)
5184 {
5185 ivs->used_inv_expr[cp->inv_expr_id]--;
5186 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5187 ivs->num_used_inv_expr--;
5188 }
5189 iv_ca_recount_cost (data, ivs);
5190 }
5191
5192 /* Add invariants in set INVS to set IVS. */
5193
5194 static void
5195 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5196 {
5197 bitmap_iterator bi;
5198 unsigned iid;
5199
5200 if (!invs)
5201 return;
5202
5203 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5204 {
5205 ivs->n_invariant_uses[iid]++;
5206 if (ivs->n_invariant_uses[iid] == 1)
5207 ivs->n_regs++;
5208 }
5209 }
5210
5211 /* Set cost pair for USE in set IVS to CP. */
5212
5213 static void
5214 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5215 struct iv_use *use, struct cost_pair *cp)
5216 {
5217 unsigned uid = use->id, cid;
5218
5219 if (ivs->cand_for_use[uid] == cp)
5220 return;
5221
5222 if (ivs->cand_for_use[uid])
5223 iv_ca_set_no_cp (data, ivs, use);
5224
5225 if (cp)
5226 {
5227 cid = cp->cand->id;
5228
5229 ivs->bad_uses--;
5230 ivs->cand_for_use[uid] = cp;
5231 ivs->n_cand_uses[cid]++;
5232 if (ivs->n_cand_uses[cid] == 1)
5233 {
5234 bitmap_set_bit (ivs->cands, cid);
5235 /* Do not count the pseudocandidates. */
5236 if (cp->cand->iv)
5237 ivs->n_regs++;
5238 ivs->n_cands++;
5239 ivs->cand_cost += cp->cand->cost;
5240
5241 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5242 }
5243
5244 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5245 iv_ca_set_add_invariants (ivs, cp->depends_on);
5246
5247 if (cp->inv_expr_id != -1)
5248 {
5249 ivs->used_inv_expr[cp->inv_expr_id]++;
5250 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5251 ivs->num_used_inv_expr++;
5252 }
5253 iv_ca_recount_cost (data, ivs);
5254 }
5255 }
5256
5257 /* Extend set IVS by expressing USE by some of the candidates in it
5258 if possible. All important candidates will be considered
5259 if IMPORTANT_CANDIDATES is true. */
5260
5261 static void
5262 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5263 struct iv_use *use, bool important_candidates)
5264 {
5265 struct cost_pair *best_cp = NULL, *cp;
5266 bitmap_iterator bi;
5267 bitmap cands;
5268 unsigned i;
5269
5270 gcc_assert (ivs->upto >= use->id);
5271
5272 if (ivs->upto == use->id)
5273 {
5274 ivs->upto++;
5275 ivs->bad_uses++;
5276 }
5277
5278 cands = (important_candidates ? data->important_candidates : ivs->cands);
5279 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5280 {
5281 struct iv_cand *cand = iv_cand (data, i);
5282
5283 cp = get_use_iv_cost (data, use, cand);
5284
5285 if (cheaper_cost_pair (cp, best_cp))
5286 best_cp = cp;
5287 }
5288
5289 iv_ca_set_cp (data, ivs, use, best_cp);
5290 }
5291
5292 /* Get cost for assignment IVS. */
5293
5294 static comp_cost
5295 iv_ca_cost (struct iv_ca *ivs)
5296 {
5297 /* This was a conditional expression but it triggered a bug in
5298 Sun C 5.5. */
5299 if (ivs->bad_uses)
5300 return infinite_cost;
5301 else
5302 return ivs->cost;
5303 }
5304
5305 /* Returns true if all dependences of CP are among invariants in IVS. */
5306
5307 static bool
5308 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5309 {
5310 unsigned i;
5311 bitmap_iterator bi;
5312
5313 if (!cp->depends_on)
5314 return true;
5315
5316 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5317 {
5318 if (ivs->n_invariant_uses[i] == 0)
5319 return false;
5320 }
5321
5322 return true;
5323 }
5324
5325 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5326 it before NEXT_CHANGE. */
5327
5328 static struct iv_ca_delta *
5329 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5330 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5331 {
5332 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5333
5334 change->use = use;
5335 change->old_cp = old_cp;
5336 change->new_cp = new_cp;
5337 change->next_change = next_change;
5338
5339 return change;
5340 }
5341
5342 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5343 are rewritten. */
5344
5345 static struct iv_ca_delta *
5346 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5347 {
5348 struct iv_ca_delta *last;
5349
5350 if (!l2)
5351 return l1;
5352
5353 if (!l1)
5354 return l2;
5355
5356 for (last = l1; last->next_change; last = last->next_change)
5357 continue;
5358 last->next_change = l2;
5359
5360 return l1;
5361 }
5362
5363 /* Reverse the list of changes DELTA, forming the inverse to it. */
5364
5365 static struct iv_ca_delta *
5366 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5367 {
5368 struct iv_ca_delta *act, *next, *prev = NULL;
5369 struct cost_pair *tmp;
5370
5371 for (act = delta; act; act = next)
5372 {
5373 next = act->next_change;
5374 act->next_change = prev;
5375 prev = act;
5376
5377 tmp = act->old_cp;
5378 act->old_cp = act->new_cp;
5379 act->new_cp = tmp;
5380 }
5381
5382 return prev;
5383 }
5384
5385 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5386 reverted instead. */
5387
5388 static void
5389 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5390 struct iv_ca_delta *delta, bool forward)
5391 {
5392 struct cost_pair *from, *to;
5393 struct iv_ca_delta *act;
5394
5395 if (!forward)
5396 delta = iv_ca_delta_reverse (delta);
5397
5398 for (act = delta; act; act = act->next_change)
5399 {
5400 from = act->old_cp;
5401 to = act->new_cp;
5402 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5403 iv_ca_set_cp (data, ivs, act->use, to);
5404 }
5405
5406 if (!forward)
5407 iv_ca_delta_reverse (delta);
5408 }
5409
5410 /* Returns true if CAND is used in IVS. */
5411
5412 static bool
5413 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5414 {
5415 return ivs->n_cand_uses[cand->id] > 0;
5416 }
5417
5418 /* Returns number of induction variable candidates in the set IVS. */
5419
5420 static unsigned
5421 iv_ca_n_cands (struct iv_ca *ivs)
5422 {
5423 return ivs->n_cands;
5424 }
5425
5426 /* Free the list of changes DELTA. */
5427
5428 static void
5429 iv_ca_delta_free (struct iv_ca_delta **delta)
5430 {
5431 struct iv_ca_delta *act, *next;
5432
5433 for (act = *delta; act; act = next)
5434 {
5435 next = act->next_change;
5436 free (act);
5437 }
5438
5439 *delta = NULL;
5440 }
5441
5442 /* Allocates new iv candidates assignment. */
5443
5444 static struct iv_ca *
5445 iv_ca_new (struct ivopts_data *data)
5446 {
5447 struct iv_ca *nw = XNEW (struct iv_ca);
5448
5449 nw->upto = 0;
5450 nw->bad_uses = 0;
5451 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5452 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5453 nw->cands = BITMAP_ALLOC (NULL);
5454 nw->n_cands = 0;
5455 nw->n_regs = 0;
5456 nw->cand_use_cost = no_cost;
5457 nw->cand_cost = 0;
5458 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5459 nw->cost = no_cost;
5460 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5461 nw->num_used_inv_expr = 0;
5462
5463 return nw;
5464 }
5465
5466 /* Free memory occupied by the set IVS. */
5467
5468 static void
5469 iv_ca_free (struct iv_ca **ivs)
5470 {
5471 free ((*ivs)->cand_for_use);
5472 free ((*ivs)->n_cand_uses);
5473 BITMAP_FREE ((*ivs)->cands);
5474 free ((*ivs)->n_invariant_uses);
5475 free ((*ivs)->used_inv_expr);
5476 free (*ivs);
5477 *ivs = NULL;
5478 }
5479
5480 /* Dumps IVS to FILE. */
5481
5482 static void
5483 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5484 {
5485 const char *pref = " invariants ";
5486 unsigned i;
5487 comp_cost cost = iv_ca_cost (ivs);
5488
5489 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5490 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5491 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5492 bitmap_print (file, ivs->cands, " candidates: ","\n");
5493
5494 for (i = 0; i < ivs->upto; i++)
5495 {
5496 struct iv_use *use = iv_use (data, i);
5497 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5498 if (cp)
5499 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5500 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5501 else
5502 fprintf (file, " use:%d --> ??\n", use->id);
5503 }
5504
5505 for (i = 1; i <= data->max_inv_id; i++)
5506 if (ivs->n_invariant_uses[i])
5507 {
5508 fprintf (file, "%s%d", pref, i);
5509 pref = ", ";
5510 }
5511 fprintf (file, "\n\n");
5512 }
5513
5514 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5515 new set, and store differences in DELTA. Number of induction variables
5516 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5517 the function will try to find a solution with mimimal iv candidates. */
5518
5519 static comp_cost
5520 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5521 struct iv_cand *cand, struct iv_ca_delta **delta,
5522 unsigned *n_ivs, bool min_ncand)
5523 {
5524 unsigned i;
5525 comp_cost cost;
5526 struct iv_use *use;
5527 struct cost_pair *old_cp, *new_cp;
5528
5529 *delta = NULL;
5530 for (i = 0; i < ivs->upto; i++)
5531 {
5532 use = iv_use (data, i);
5533 old_cp = iv_ca_cand_for_use (ivs, use);
5534
5535 if (old_cp
5536 && old_cp->cand == cand)
5537 continue;
5538
5539 new_cp = get_use_iv_cost (data, use, cand);
5540 if (!new_cp)
5541 continue;
5542
5543 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5544 continue;
5545
5546 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5547 continue;
5548
5549 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5550 }
5551
5552 iv_ca_delta_commit (data, ivs, *delta, true);
5553 cost = iv_ca_cost (ivs);
5554 if (n_ivs)
5555 *n_ivs = iv_ca_n_cands (ivs);
5556 iv_ca_delta_commit (data, ivs, *delta, false);
5557
5558 return cost;
5559 }
5560
5561 /* Try narrowing set IVS by removing CAND. Return the cost of
5562 the new set and store the differences in DELTA. */
5563
5564 static comp_cost
5565 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5566 struct iv_cand *cand, struct iv_ca_delta **delta)
5567 {
5568 unsigned i, ci;
5569 struct iv_use *use;
5570 struct cost_pair *old_cp, *new_cp, *cp;
5571 bitmap_iterator bi;
5572 struct iv_cand *cnd;
5573 comp_cost cost;
5574
5575 *delta = NULL;
5576 for (i = 0; i < n_iv_uses (data); i++)
5577 {
5578 use = iv_use (data, i);
5579
5580 old_cp = iv_ca_cand_for_use (ivs, use);
5581 if (old_cp->cand != cand)
5582 continue;
5583
5584 new_cp = NULL;
5585
5586 if (data->consider_all_candidates)
5587 {
5588 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5589 {
5590 if (ci == cand->id)
5591 continue;
5592
5593 cnd = iv_cand (data, ci);
5594
5595 cp = get_use_iv_cost (data, use, cnd);
5596 if (!cp)
5597 continue;
5598
5599 if (!iv_ca_has_deps (ivs, cp))
5600 continue;
5601
5602 if (!cheaper_cost_pair (cp, new_cp))
5603 continue;
5604
5605 new_cp = cp;
5606 }
5607 }
5608 else
5609 {
5610 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5611 {
5612 if (ci == cand->id)
5613 continue;
5614
5615 cnd = iv_cand (data, ci);
5616
5617 cp = get_use_iv_cost (data, use, cnd);
5618 if (!cp)
5619 continue;
5620 if (!iv_ca_has_deps (ivs, cp))
5621 continue;
5622
5623 if (!cheaper_cost_pair (cp, new_cp))
5624 continue;
5625
5626 new_cp = cp;
5627 }
5628 }
5629
5630 if (!new_cp)
5631 {
5632 iv_ca_delta_free (delta);
5633 return infinite_cost;
5634 }
5635
5636 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5637 }
5638
5639 iv_ca_delta_commit (data, ivs, *delta, true);
5640 cost = iv_ca_cost (ivs);
5641 iv_ca_delta_commit (data, ivs, *delta, false);
5642
5643 return cost;
5644 }
5645
5646 /* Try optimizing the set of candidates IVS by removing candidates different
5647 from to EXCEPT_CAND from it. Return cost of the new set, and store
5648 differences in DELTA. */
5649
5650 static comp_cost
5651 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5652 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5653 {
5654 bitmap_iterator bi;
5655 struct iv_ca_delta *act_delta, *best_delta;
5656 unsigned i;
5657 comp_cost best_cost, acost;
5658 struct iv_cand *cand;
5659
5660 best_delta = NULL;
5661 best_cost = iv_ca_cost (ivs);
5662
5663 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5664 {
5665 cand = iv_cand (data, i);
5666
5667 if (cand == except_cand)
5668 continue;
5669
5670 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5671
5672 if (compare_costs (acost, best_cost) < 0)
5673 {
5674 best_cost = acost;
5675 iv_ca_delta_free (&best_delta);
5676 best_delta = act_delta;
5677 }
5678 else
5679 iv_ca_delta_free (&act_delta);
5680 }
5681
5682 if (!best_delta)
5683 {
5684 *delta = NULL;
5685 return best_cost;
5686 }
5687
5688 /* Recurse to possibly remove other unnecessary ivs. */
5689 iv_ca_delta_commit (data, ivs, best_delta, true);
5690 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5691 iv_ca_delta_commit (data, ivs, best_delta, false);
5692 *delta = iv_ca_delta_join (best_delta, *delta);
5693 return best_cost;
5694 }
5695
5696 /* Tries to extend the sets IVS in the best possible way in order
5697 to express the USE. If ORIGINALP is true, prefer candidates from
5698 the original set of IVs, otherwise favor important candidates not
5699 based on any memory object. */
5700
5701 static bool
5702 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5703 struct iv_use *use, bool originalp)
5704 {
5705 comp_cost best_cost, act_cost;
5706 unsigned i;
5707 bitmap_iterator bi;
5708 struct iv_cand *cand;
5709 struct iv_ca_delta *best_delta = NULL, *act_delta;
5710 struct cost_pair *cp;
5711
5712 iv_ca_add_use (data, ivs, use, false);
5713 best_cost = iv_ca_cost (ivs);
5714
5715 cp = iv_ca_cand_for_use (ivs, use);
5716 if (!cp)
5717 {
5718 ivs->upto--;
5719 ivs->bad_uses--;
5720 iv_ca_add_use (data, ivs, use, true);
5721 best_cost = iv_ca_cost (ivs);
5722 cp = iv_ca_cand_for_use (ivs, use);
5723 }
5724 if (cp)
5725 {
5726 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5727 iv_ca_set_no_cp (data, ivs, use);
5728 }
5729
5730 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5731 first try important candidates not based on any memory object. Only if
5732 this fails, try the specific ones. Rationale -- in loops with many
5733 variables the best choice often is to use just one generic biv. If we
5734 added here many ivs specific to the uses, the optimization algorithm later
5735 would be likely to get stuck in a local minimum, thus causing us to create
5736 too many ivs. The approach from few ivs to more seems more likely to be
5737 successful -- starting from few ivs, replacing an expensive use by a
5738 specific iv should always be a win. */
5739 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5740 {
5741 cand = iv_cand (data, i);
5742
5743 if (originalp && cand->pos !=IP_ORIGINAL)
5744 continue;
5745
5746 if (!originalp && cand->iv->base_object != NULL_TREE)
5747 continue;
5748
5749 if (iv_ca_cand_used_p (ivs, cand))
5750 continue;
5751
5752 cp = get_use_iv_cost (data, use, cand);
5753 if (!cp)
5754 continue;
5755
5756 iv_ca_set_cp (data, ivs, use, cp);
5757 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5758 true);
5759 iv_ca_set_no_cp (data, ivs, use);
5760 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5761
5762 if (compare_costs (act_cost, best_cost) < 0)
5763 {
5764 best_cost = act_cost;
5765
5766 iv_ca_delta_free (&best_delta);
5767 best_delta = act_delta;
5768 }
5769 else
5770 iv_ca_delta_free (&act_delta);
5771 }
5772
5773 if (infinite_cost_p (best_cost))
5774 {
5775 for (i = 0; i < use->n_map_members; i++)
5776 {
5777 cp = use->cost_map + i;
5778 cand = cp->cand;
5779 if (!cand)
5780 continue;
5781
5782 /* Already tried this. */
5783 if (cand->important)
5784 {
5785 if (originalp && cand->pos == IP_ORIGINAL)
5786 continue;
5787 if (!originalp && cand->iv->base_object == NULL_TREE)
5788 continue;
5789 }
5790
5791 if (iv_ca_cand_used_p (ivs, cand))
5792 continue;
5793
5794 act_delta = NULL;
5795 iv_ca_set_cp (data, ivs, use, cp);
5796 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5797 iv_ca_set_no_cp (data, ivs, use);
5798 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5799 cp, act_delta);
5800
5801 if (compare_costs (act_cost, best_cost) < 0)
5802 {
5803 best_cost = act_cost;
5804
5805 if (best_delta)
5806 iv_ca_delta_free (&best_delta);
5807 best_delta = act_delta;
5808 }
5809 else
5810 iv_ca_delta_free (&act_delta);
5811 }
5812 }
5813
5814 iv_ca_delta_commit (data, ivs, best_delta, true);
5815 iv_ca_delta_free (&best_delta);
5816
5817 return !infinite_cost_p (best_cost);
5818 }
5819
5820 /* Finds an initial assignment of candidates to uses. */
5821
5822 static struct iv_ca *
5823 get_initial_solution (struct ivopts_data *data, bool originalp)
5824 {
5825 struct iv_ca *ivs = iv_ca_new (data);
5826 unsigned i;
5827
5828 for (i = 0; i < n_iv_uses (data); i++)
5829 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5830 {
5831 iv_ca_free (&ivs);
5832 return NULL;
5833 }
5834
5835 return ivs;
5836 }
5837
5838 /* Tries to improve set of induction variables IVS. */
5839
5840 static bool
5841 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5842 {
5843 unsigned i, n_ivs;
5844 comp_cost acost, best_cost = iv_ca_cost (ivs);
5845 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5846 struct iv_cand *cand;
5847
5848 /* Try extending the set of induction variables by one. */
5849 for (i = 0; i < n_iv_cands (data); i++)
5850 {
5851 cand = iv_cand (data, i);
5852
5853 if (iv_ca_cand_used_p (ivs, cand))
5854 continue;
5855
5856 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5857 if (!act_delta)
5858 continue;
5859
5860 /* If we successfully added the candidate and the set is small enough,
5861 try optimizing it by removing other candidates. */
5862 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5863 {
5864 iv_ca_delta_commit (data, ivs, act_delta, true);
5865 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5866 iv_ca_delta_commit (data, ivs, act_delta, false);
5867 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5868 }
5869
5870 if (compare_costs (acost, best_cost) < 0)
5871 {
5872 best_cost = acost;
5873 iv_ca_delta_free (&best_delta);
5874 best_delta = act_delta;
5875 }
5876 else
5877 iv_ca_delta_free (&act_delta);
5878 }
5879
5880 if (!best_delta)
5881 {
5882 /* Try removing the candidates from the set instead. */
5883 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5884
5885 /* Nothing more we can do. */
5886 if (!best_delta)
5887 return false;
5888 }
5889
5890 iv_ca_delta_commit (data, ivs, best_delta, true);
5891 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5892 iv_ca_delta_free (&best_delta);
5893 return true;
5894 }
5895
5896 /* Attempts to find the optimal set of induction variables. We do simple
5897 greedy heuristic -- we try to replace at most one candidate in the selected
5898 solution and remove the unused ivs while this improves the cost. */
5899
5900 static struct iv_ca *
5901 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5902 {
5903 struct iv_ca *set;
5904
5905 /* Get the initial solution. */
5906 set = get_initial_solution (data, originalp);
5907 if (!set)
5908 {
5909 if (dump_file && (dump_flags & TDF_DETAILS))
5910 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5911 return NULL;
5912 }
5913
5914 if (dump_file && (dump_flags & TDF_DETAILS))
5915 {
5916 fprintf (dump_file, "Initial set of candidates:\n");
5917 iv_ca_dump (data, dump_file, set);
5918 }
5919
5920 while (try_improve_iv_set (data, set))
5921 {
5922 if (dump_file && (dump_flags & TDF_DETAILS))
5923 {
5924 fprintf (dump_file, "Improved to:\n");
5925 iv_ca_dump (data, dump_file, set);
5926 }
5927 }
5928
5929 return set;
5930 }
5931
5932 static struct iv_ca *
5933 find_optimal_iv_set (struct ivopts_data *data)
5934 {
5935 unsigned i;
5936 struct iv_ca *set, *origset;
5937 struct iv_use *use;
5938 comp_cost cost, origcost;
5939
5940 /* Determine the cost based on a strategy that starts with original IVs,
5941 and try again using a strategy that prefers candidates not based
5942 on any IVs. */
5943 origset = find_optimal_iv_set_1 (data, true);
5944 set = find_optimal_iv_set_1 (data, false);
5945
5946 if (!origset && !set)
5947 return NULL;
5948
5949 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5950 cost = set ? iv_ca_cost (set) : infinite_cost;
5951
5952 if (dump_file && (dump_flags & TDF_DETAILS))
5953 {
5954 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5955 origcost.cost, origcost.complexity);
5956 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5957 cost.cost, cost.complexity);
5958 }
5959
5960 /* Choose the one with the best cost. */
5961 if (compare_costs (origcost, cost) <= 0)
5962 {
5963 if (set)
5964 iv_ca_free (&set);
5965 set = origset;
5966 }
5967 else if (origset)
5968 iv_ca_free (&origset);
5969
5970 for (i = 0; i < n_iv_uses (data); i++)
5971 {
5972 use = iv_use (data, i);
5973 use->selected = iv_ca_cand_for_use (set, use)->cand;
5974 }
5975
5976 return set;
5977 }
5978
5979 /* Creates a new induction variable corresponding to CAND. */
5980
5981 static void
5982 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5983 {
5984 gimple_stmt_iterator incr_pos;
5985 tree base;
5986 bool after = false;
5987
5988 if (!cand->iv)
5989 return;
5990
5991 switch (cand->pos)
5992 {
5993 case IP_NORMAL:
5994 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5995 break;
5996
5997 case IP_END:
5998 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5999 after = true;
6000 break;
6001
6002 case IP_AFTER_USE:
6003 after = true;
6004 /* fall through */
6005 case IP_BEFORE_USE:
6006 incr_pos = gsi_for_stmt (cand->incremented_at);
6007 break;
6008
6009 case IP_ORIGINAL:
6010 /* Mark that the iv is preserved. */
6011 name_info (data, cand->var_before)->preserve_biv = true;
6012 name_info (data, cand->var_after)->preserve_biv = true;
6013
6014 /* Rewrite the increment so that it uses var_before directly. */
6015 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6016 return;
6017 }
6018
6019 gimple_add_tmp_var (cand->var_before);
6020 add_referenced_var (cand->var_before);
6021
6022 base = unshare_expr (cand->iv->base);
6023
6024 create_iv (base, unshare_expr (cand->iv->step),
6025 cand->var_before, data->current_loop,
6026 &incr_pos, after, &cand->var_before, &cand->var_after);
6027 }
6028
6029 /* Creates new induction variables described in SET. */
6030
6031 static void
6032 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6033 {
6034 unsigned i;
6035 struct iv_cand *cand;
6036 bitmap_iterator bi;
6037
6038 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6039 {
6040 cand = iv_cand (data, i);
6041 create_new_iv (data, cand);
6042 }
6043
6044 if (dump_file && (dump_flags & TDF_DETAILS))
6045 {
6046 fprintf (dump_file, "\nSelected IV set: \n");
6047 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6048 {
6049 cand = iv_cand (data, i);
6050 dump_cand (dump_file, cand);
6051 }
6052 fprintf (dump_file, "\n");
6053 }
6054 }
6055
6056 /* Rewrites USE (definition of iv used in a nonlinear expression)
6057 using candidate CAND. */
6058
6059 static void
6060 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6061 struct iv_use *use, struct iv_cand *cand)
6062 {
6063 tree comp;
6064 tree op, tgt;
6065 gimple ass;
6066 gimple_stmt_iterator bsi;
6067
6068 /* An important special case -- if we are asked to express value of
6069 the original iv by itself, just exit; there is no need to
6070 introduce a new computation (that might also need casting the
6071 variable to unsigned and back). */
6072 if (cand->pos == IP_ORIGINAL
6073 && cand->incremented_at == use->stmt)
6074 {
6075 tree step, ctype, utype;
6076 enum tree_code incr_code = PLUS_EXPR, old_code;
6077
6078 gcc_assert (is_gimple_assign (use->stmt));
6079 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6080
6081 step = cand->iv->step;
6082 ctype = TREE_TYPE (step);
6083 utype = TREE_TYPE (cand->var_after);
6084 if (TREE_CODE (step) == NEGATE_EXPR)
6085 {
6086 incr_code = MINUS_EXPR;
6087 step = TREE_OPERAND (step, 0);
6088 }
6089
6090 /* Check whether we may leave the computation unchanged.
6091 This is the case only if it does not rely on other
6092 computations in the loop -- otherwise, the computation
6093 we rely upon may be removed in remove_unused_ivs,
6094 thus leading to ICE. */
6095 old_code = gimple_assign_rhs_code (use->stmt);
6096 if (old_code == PLUS_EXPR
6097 || old_code == MINUS_EXPR
6098 || old_code == POINTER_PLUS_EXPR)
6099 {
6100 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6101 op = gimple_assign_rhs2 (use->stmt);
6102 else if (old_code != MINUS_EXPR
6103 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6104 op = gimple_assign_rhs1 (use->stmt);
6105 else
6106 op = NULL_TREE;
6107 }
6108 else
6109 op = NULL_TREE;
6110
6111 if (op
6112 && (TREE_CODE (op) == INTEGER_CST
6113 || operand_equal_p (op, step, 0)))
6114 return;
6115
6116 /* Otherwise, add the necessary computations to express
6117 the iv. */
6118 op = fold_convert (ctype, cand->var_before);
6119 comp = fold_convert (utype,
6120 build2 (incr_code, ctype, op,
6121 unshare_expr (step)));
6122 }
6123 else
6124 {
6125 comp = get_computation (data->current_loop, use, cand);
6126 gcc_assert (comp != NULL_TREE);
6127 }
6128
6129 switch (gimple_code (use->stmt))
6130 {
6131 case GIMPLE_PHI:
6132 tgt = PHI_RESULT (use->stmt);
6133
6134 /* If we should keep the biv, do not replace it. */
6135 if (name_info (data, tgt)->preserve_biv)
6136 return;
6137
6138 bsi = gsi_after_labels (gimple_bb (use->stmt));
6139 break;
6140
6141 case GIMPLE_ASSIGN:
6142 tgt = gimple_assign_lhs (use->stmt);
6143 bsi = gsi_for_stmt (use->stmt);
6144 break;
6145
6146 default:
6147 gcc_unreachable ();
6148 }
6149
6150 if (!valid_gimple_rhs_p (comp)
6151 || (gimple_code (use->stmt) != GIMPLE_PHI
6152 /* We can't allow re-allocating the stmt as it might be pointed
6153 to still. */
6154 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6155 >= gimple_num_ops (gsi_stmt (bsi)))))
6156 {
6157 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6158 true, GSI_SAME_STMT);
6159 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6160 {
6161 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6162 /* As this isn't a plain copy we have to reset alignment
6163 information. */
6164 if (SSA_NAME_PTR_INFO (comp))
6165 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6166 }
6167 }
6168
6169 if (gimple_code (use->stmt) == GIMPLE_PHI)
6170 {
6171 ass = gimple_build_assign (tgt, comp);
6172 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6173
6174 bsi = gsi_for_stmt (use->stmt);
6175 remove_phi_node (&bsi, false);
6176 }
6177 else
6178 {
6179 gimple_assign_set_rhs_from_tree (&bsi, comp);
6180 use->stmt = gsi_stmt (bsi);
6181 }
6182 }
6183
6184 /* Performs a peephole optimization to reorder the iv update statement with
6185 a mem ref to enable instruction combining in later phases. The mem ref uses
6186 the iv value before the update, so the reordering transformation requires
6187 adjustment of the offset. CAND is the selected IV_CAND.
6188
6189 Example:
6190
6191 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6192 iv2 = iv1 + 1;
6193
6194 if (t < val) (1)
6195 goto L;
6196 goto Head;
6197
6198
6199 directly propagating t over to (1) will introduce overlapping live range
6200 thus increase register pressure. This peephole transform it into:
6201
6202
6203 iv2 = iv1 + 1;
6204 t = MEM_REF (base, iv2, 8, 8);
6205 if (t < val)
6206 goto L;
6207 goto Head;
6208 */
6209
6210 static void
6211 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6212 {
6213 tree var_after;
6214 gimple iv_update, stmt;
6215 basic_block bb;
6216 gimple_stmt_iterator gsi, gsi_iv;
6217
6218 if (cand->pos != IP_NORMAL)
6219 return;
6220
6221 var_after = cand->var_after;
6222 iv_update = SSA_NAME_DEF_STMT (var_after);
6223
6224 bb = gimple_bb (iv_update);
6225 gsi = gsi_last_nondebug_bb (bb);
6226 stmt = gsi_stmt (gsi);
6227
6228 /* Only handle conditional statement for now. */
6229 if (gimple_code (stmt) != GIMPLE_COND)
6230 return;
6231
6232 gsi_prev_nondebug (&gsi);
6233 stmt = gsi_stmt (gsi);
6234 if (stmt != iv_update)
6235 return;
6236
6237 gsi_prev_nondebug (&gsi);
6238 if (gsi_end_p (gsi))
6239 return;
6240
6241 stmt = gsi_stmt (gsi);
6242 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6243 return;
6244
6245 if (stmt != use->stmt)
6246 return;
6247
6248 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6249 return;
6250
6251 if (dump_file && (dump_flags & TDF_DETAILS))
6252 {
6253 fprintf (dump_file, "Reordering \n");
6254 print_gimple_stmt (dump_file, iv_update, 0, 0);
6255 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6256 fprintf (dump_file, "\n");
6257 }
6258
6259 gsi = gsi_for_stmt (use->stmt);
6260 gsi_iv = gsi_for_stmt (iv_update);
6261 gsi_move_before (&gsi_iv, &gsi);
6262
6263 cand->pos = IP_BEFORE_USE;
6264 cand->incremented_at = use->stmt;
6265 }
6266
6267 /* Rewrites USE (address that is an iv) using candidate CAND. */
6268
6269 static void
6270 rewrite_use_address (struct ivopts_data *data,
6271 struct iv_use *use, struct iv_cand *cand)
6272 {
6273 aff_tree aff;
6274 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6275 tree base_hint = NULL_TREE;
6276 tree ref, iv;
6277 bool ok;
6278
6279 adjust_iv_update_pos (cand, use);
6280 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6281 gcc_assert (ok);
6282 unshare_aff_combination (&aff);
6283
6284 /* To avoid undefined overflow problems, all IV candidates use unsigned
6285 integer types. The drawback is that this makes it impossible for
6286 create_mem_ref to distinguish an IV that is based on a memory object
6287 from one that represents simply an offset.
6288
6289 To work around this problem, we pass a hint to create_mem_ref that
6290 indicates which variable (if any) in aff is an IV based on a memory
6291 object. Note that we only consider the candidate. If this is not
6292 based on an object, the base of the reference is in some subexpression
6293 of the use -- but these will use pointer types, so they are recognized
6294 by the create_mem_ref heuristics anyway. */
6295 if (cand->iv->base_object)
6296 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6297
6298 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6299 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6300 reference_alias_ptr_type (*use->op_p),
6301 iv, base_hint, data->speed);
6302 copy_ref_info (ref, *use->op_p);
6303 *use->op_p = ref;
6304 }
6305
6306 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6307 candidate CAND. */
6308
6309 static void
6310 rewrite_use_compare (struct ivopts_data *data,
6311 struct iv_use *use, struct iv_cand *cand)
6312 {
6313 tree comp, *var_p, op, bound;
6314 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6315 enum tree_code compare;
6316 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6317 bool ok;
6318
6319 bound = cp->value;
6320 if (bound)
6321 {
6322 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6323 tree var_type = TREE_TYPE (var);
6324 gimple_seq stmts;
6325
6326 if (dump_file && (dump_flags & TDF_DETAILS))
6327 {
6328 fprintf (dump_file, "Replacing exit test: ");
6329 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6330 }
6331 compare = cp->comp;
6332 bound = unshare_expr (fold_convert (var_type, bound));
6333 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6334 if (stmts)
6335 gsi_insert_seq_on_edge_immediate (
6336 loop_preheader_edge (data->current_loop),
6337 stmts);
6338
6339 gimple_cond_set_lhs (use->stmt, var);
6340 gimple_cond_set_code (use->stmt, compare);
6341 gimple_cond_set_rhs (use->stmt, op);
6342 return;
6343 }
6344
6345 /* The induction variable elimination failed; just express the original
6346 giv. */
6347 comp = get_computation (data->current_loop, use, cand);
6348 gcc_assert (comp != NULL_TREE);
6349
6350 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6351 gcc_assert (ok);
6352
6353 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6354 true, GSI_SAME_STMT);
6355 }
6356
6357 /* Rewrites USE using candidate CAND. */
6358
6359 static void
6360 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6361 {
6362 switch (use->type)
6363 {
6364 case USE_NONLINEAR_EXPR:
6365 rewrite_use_nonlinear_expr (data, use, cand);
6366 break;
6367
6368 case USE_ADDRESS:
6369 rewrite_use_address (data, use, cand);
6370 break;
6371
6372 case USE_COMPARE:
6373 rewrite_use_compare (data, use, cand);
6374 break;
6375
6376 default:
6377 gcc_unreachable ();
6378 }
6379
6380 update_stmt (use->stmt);
6381 }
6382
6383 /* Rewrite the uses using the selected induction variables. */
6384
6385 static void
6386 rewrite_uses (struct ivopts_data *data)
6387 {
6388 unsigned i;
6389 struct iv_cand *cand;
6390 struct iv_use *use;
6391
6392 for (i = 0; i < n_iv_uses (data); i++)
6393 {
6394 use = iv_use (data, i);
6395 cand = use->selected;
6396 gcc_assert (cand);
6397
6398 rewrite_use (data, use, cand);
6399 }
6400 }
6401
6402 /* Removes the ivs that are not used after rewriting. */
6403
6404 static void
6405 remove_unused_ivs (struct ivopts_data *data)
6406 {
6407 unsigned j;
6408 bitmap_iterator bi;
6409 bitmap toremove = BITMAP_ALLOC (NULL);
6410
6411 /* Figure out an order in which to release SSA DEFs so that we don't
6412 release something that we'd have to propagate into a debug stmt
6413 afterwards. */
6414 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6415 {
6416 struct version_info *info;
6417
6418 info = ver_info (data, j);
6419 if (info->iv
6420 && !integer_zerop (info->iv->step)
6421 && !info->inv_id
6422 && !info->iv->have_use_for
6423 && !info->preserve_biv)
6424 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6425 }
6426
6427 release_defs_bitset (toremove);
6428
6429 BITMAP_FREE (toremove);
6430 }
6431
6432 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6433 for pointer_map_traverse. */
6434
6435 static bool
6436 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6437 void *data ATTRIBUTE_UNUSED)
6438 {
6439 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6440
6441 free (niter);
6442 return true;
6443 }
6444
6445 /* Frees data allocated by the optimization of a single loop. */
6446
6447 static void
6448 free_loop_data (struct ivopts_data *data)
6449 {
6450 unsigned i, j;
6451 bitmap_iterator bi;
6452 tree obj;
6453
6454 if (data->niters)
6455 {
6456 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6457 pointer_map_destroy (data->niters);
6458 data->niters = NULL;
6459 }
6460
6461 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6462 {
6463 struct version_info *info;
6464
6465 info = ver_info (data, i);
6466 free (info->iv);
6467 info->iv = NULL;
6468 info->has_nonlin_use = false;
6469 info->preserve_biv = false;
6470 info->inv_id = 0;
6471 }
6472 bitmap_clear (data->relevant);
6473 bitmap_clear (data->important_candidates);
6474
6475 for (i = 0; i < n_iv_uses (data); i++)
6476 {
6477 struct iv_use *use = iv_use (data, i);
6478
6479 free (use->iv);
6480 BITMAP_FREE (use->related_cands);
6481 for (j = 0; j < use->n_map_members; j++)
6482 if (use->cost_map[j].depends_on)
6483 BITMAP_FREE (use->cost_map[j].depends_on);
6484 free (use->cost_map);
6485 free (use);
6486 }
6487 VEC_truncate (iv_use_p, data->iv_uses, 0);
6488
6489 for (i = 0; i < n_iv_cands (data); i++)
6490 {
6491 struct iv_cand *cand = iv_cand (data, i);
6492
6493 free (cand->iv);
6494 if (cand->depends_on)
6495 BITMAP_FREE (cand->depends_on);
6496 free (cand);
6497 }
6498 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6499
6500 if (data->version_info_size < num_ssa_names)
6501 {
6502 data->version_info_size = 2 * num_ssa_names;
6503 free (data->version_info);
6504 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6505 }
6506
6507 data->max_inv_id = 0;
6508
6509 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6510 SET_DECL_RTL (obj, NULL_RTX);
6511
6512 VEC_truncate (tree, decl_rtl_to_reset, 0);
6513
6514 htab_empty (data->inv_expr_tab);
6515 data->inv_expr_id = 0;
6516 }
6517
6518 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6519 loop tree. */
6520
6521 static void
6522 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6523 {
6524 free_loop_data (data);
6525 free (data->version_info);
6526 BITMAP_FREE (data->relevant);
6527 BITMAP_FREE (data->important_candidates);
6528
6529 VEC_free (tree, heap, decl_rtl_to_reset);
6530 VEC_free (iv_use_p, heap, data->iv_uses);
6531 VEC_free (iv_cand_p, heap, data->iv_candidates);
6532 htab_delete (data->inv_expr_tab);
6533 }
6534
6535 /* Returns true if the loop body BODY includes any function calls. */
6536
6537 static bool
6538 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6539 {
6540 gimple_stmt_iterator gsi;
6541 unsigned i;
6542
6543 for (i = 0; i < num_nodes; i++)
6544 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6545 {
6546 gimple stmt = gsi_stmt (gsi);
6547 if (is_gimple_call (stmt)
6548 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6549 return true;
6550 }
6551 return false;
6552 }
6553
6554 /* Optimizes the LOOP. Returns true if anything changed. */
6555
6556 static bool
6557 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6558 {
6559 bool changed = false;
6560 struct iv_ca *iv_ca;
6561 edge exit = single_dom_exit (loop);
6562 basic_block *body;
6563
6564 gcc_assert (!data->niters);
6565 data->current_loop = loop;
6566 data->speed = optimize_loop_for_speed_p (loop);
6567
6568 if (dump_file && (dump_flags & TDF_DETAILS))
6569 {
6570 fprintf (dump_file, "Processing loop %d\n", loop->num);
6571
6572 if (exit)
6573 {
6574 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6575 exit->src->index, exit->dest->index);
6576 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6577 fprintf (dump_file, "\n");
6578 }
6579
6580 fprintf (dump_file, "\n");
6581 }
6582
6583 body = get_loop_body (loop);
6584 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6585 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6586 free (body);
6587
6588 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6589
6590 /* For each ssa name determines whether it behaves as an induction variable
6591 in some loop. */
6592 if (!find_induction_variables (data))
6593 goto finish;
6594
6595 /* Finds interesting uses (item 1). */
6596 find_interesting_uses (data);
6597 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6598 goto finish;
6599
6600 /* Finds candidates for the induction variables (item 2). */
6601 find_iv_candidates (data);
6602
6603 /* Calculates the costs (item 3, part 1). */
6604 determine_iv_costs (data);
6605 determine_use_iv_costs (data);
6606 determine_set_costs (data);
6607
6608 /* Find the optimal set of induction variables (item 3, part 2). */
6609 iv_ca = find_optimal_iv_set (data);
6610 if (!iv_ca)
6611 goto finish;
6612 changed = true;
6613
6614 /* Create the new induction variables (item 4, part 1). */
6615 create_new_ivs (data, iv_ca);
6616 iv_ca_free (&iv_ca);
6617
6618 /* Rewrite the uses (item 4, part 2). */
6619 rewrite_uses (data);
6620
6621 /* Remove the ivs that are unused after rewriting. */
6622 remove_unused_ivs (data);
6623
6624 /* We have changed the structure of induction variables; it might happen
6625 that definitions in the scev database refer to some of them that were
6626 eliminated. */
6627 scev_reset ();
6628
6629 finish:
6630 free_loop_data (data);
6631
6632 return changed;
6633 }
6634
6635 /* Main entry point. Optimizes induction variables in loops. */
6636
6637 void
6638 tree_ssa_iv_optimize (void)
6639 {
6640 struct loop *loop;
6641 struct ivopts_data data;
6642 loop_iterator li;
6643
6644 tree_ssa_iv_optimize_init (&data);
6645
6646 /* Optimize the loops starting with the innermost ones. */
6647 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6648 {
6649 if (dump_file && (dump_flags & TDF_DETAILS))
6650 flow_loop_dump (loop, dump_file, NULL, 1);
6651
6652 tree_ssa_iv_optimize_loop (&data, loop);
6653 }
6654
6655 tree_ssa_iv_optimize_finalize (&data);
6656 }