* g++.dg/cpp0x/nullptr21.c: Remove printfs, make self-checking.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
74 #include "cfgloop.h"
75 #include "tree-pass.h"
76 #include "ggc.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
79 #include "hashtab.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
82 #include "cfgloop.h"
83 #include "params.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
86 #include "target.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
89 #include "expmed.h"
90
91 static hashval_t mbc_entry_hash (const void *);
92 static int mbc_entry_eq (const void*, const void *);
93
94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
95 cost of different addressing modes. This should be moved to a TBD
96 interface between the GIMPLE and RTL worlds. */
97 #include "expr.h"
98 #include "recog.h"
99
100 /* The infinite cost. */
101 #define INFTY 10000000
102
103 #define AVG_LOOP_NITER(LOOP) 5
104
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
108
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
111 {
112 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
115
116 return niter;
117 }
118
119 /* Representation of the induction variable. */
120 struct iv
121 {
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
129 };
130
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
133 {
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
140 };
141
142 /* Types of uses. */
143 enum use_type
144 {
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
148 };
149
150 /* Cost of a computation. */
151 typedef struct
152 {
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
159
160 static const comp_cost no_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
162
163 /* The candidate - cost pair. */
164 struct cost_pair
165 {
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 enum tree_code comp; /* For iv elimination, the comparison. */
174 int inv_expr_id; /* Loop invariant expression id. */
175 };
176
177 /* Use. */
178 struct iv_use
179 {
180 unsigned id; /* The id of the use. */
181 enum use_type type; /* Type of the use. */
182 struct iv *iv; /* The induction variable it is based on. */
183 gimple stmt; /* Statement in that it occurs. */
184 tree *op_p; /* The place where it occurs. */
185 bitmap related_cands; /* The set of "related" iv candidates, plus the common
186 important ones. */
187
188 unsigned n_map_members; /* Number of candidates in the cost_map list. */
189 struct cost_pair *cost_map;
190 /* The costs wrto the iv candidates. */
191
192 struct iv_cand *selected;
193 /* The selected candidate. */
194 };
195
196 /* The position where the iv is computed. */
197 enum iv_position
198 {
199 IP_NORMAL, /* At the end, just before the exit condition. */
200 IP_END, /* At the end of the latch block. */
201 IP_BEFORE_USE, /* Immediately before a specific use. */
202 IP_AFTER_USE, /* Immediately after a specific use. */
203 IP_ORIGINAL /* The original biv. */
204 };
205
206 /* The induction variable candidate. */
207 struct iv_cand
208 {
209 unsigned id; /* The number of the candidate. */
210 bool important; /* Whether this is an "important" candidate, i.e. such
211 that it should be considered by all uses. */
212 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
213 gimple incremented_at;/* For original biv, the statement where it is
214 incremented. */
215 tree var_before; /* The variable used for it before increment. */
216 tree var_after; /* The variable used for it after increment. */
217 struct iv *iv; /* The value of the candidate. NULL for
218 "pseudocandidate" used to indicate the possibility
219 to replace the final value of an iv by direct
220 computation of the value. */
221 unsigned cost; /* Cost of the candidate. */
222 unsigned cost_step; /* Cost of the candidate's increment operation. */
223 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
224 where it is incremented. */
225 bitmap depends_on; /* The list of invariants that are used in step of the
226 biv. */
227 };
228
229 /* Loop invariant expression hashtable entry. */
230 struct iv_inv_expr_ent
231 {
232 tree expr;
233 int id;
234 hashval_t hash;
235 };
236
237 /* The data used by the induction variable optimizations. */
238
239 typedef struct iv_use *iv_use_p;
240 DEF_VEC_P(iv_use_p);
241 DEF_VEC_ALLOC_P(iv_use_p,heap);
242
243 typedef struct iv_cand *iv_cand_p;
244 DEF_VEC_P(iv_cand_p);
245 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246
247 struct ivopts_data
248 {
249 /* The currently optimized loop. */
250 struct loop *current_loop;
251
252 /* Numbers of iterations for all exits of the current loop. */
253 struct pointer_map_t *niters;
254
255 /* Number of registers used in it. */
256 unsigned regs_used;
257
258 /* The size of version_info array allocated. */
259 unsigned version_info_size;
260
261 /* The array of information for the ssa names. */
262 struct version_info *version_info;
263
264 /* The hashtable of loop invariant expressions created
265 by ivopt. */
266 htab_t inv_expr_tab;
267
268 /* Loop invariant expression id. */
269 int inv_expr_id;
270
271 /* The bitmap of indices in version_info whose value was changed. */
272 bitmap relevant;
273
274 /* The uses of induction variables. */
275 VEC(iv_use_p,heap) *iv_uses;
276
277 /* The candidates. */
278 VEC(iv_cand_p,heap) *iv_candidates;
279
280 /* A bitmap of important candidates. */
281 bitmap important_candidates;
282
283 /* The maximum invariant id. */
284 unsigned max_inv_id;
285
286 /* Whether to consider just related and important candidates when replacing a
287 use. */
288 bool consider_all_candidates;
289
290 /* Are we optimizing for speed? */
291 bool speed;
292
293 /* Whether the loop body includes any function calls. */
294 bool body_includes_call;
295
296 /* Whether the loop body can only be exited via single exit. */
297 bool loop_single_exit_p;
298 };
299
300 /* An assignment of iv candidates to uses. */
301
302 struct iv_ca
303 {
304 /* The number of uses covered by the assignment. */
305 unsigned upto;
306
307 /* Number of uses that cannot be expressed by the candidates in the set. */
308 unsigned bad_uses;
309
310 /* Candidate assigned to a use, together with the related costs. */
311 struct cost_pair **cand_for_use;
312
313 /* Number of times each candidate is used. */
314 unsigned *n_cand_uses;
315
316 /* The candidates used. */
317 bitmap cands;
318
319 /* The number of candidates in the set. */
320 unsigned n_cands;
321
322 /* Total number of registers needed. */
323 unsigned n_regs;
324
325 /* Total cost of expressing uses. */
326 comp_cost cand_use_cost;
327
328 /* Total cost of candidates. */
329 unsigned cand_cost;
330
331 /* Number of times each invariant is used. */
332 unsigned *n_invariant_uses;
333
334 /* The array holding the number of uses of each loop
335 invariant expressions created by ivopt. */
336 unsigned *used_inv_expr;
337
338 /* The number of created loop invariants. */
339 unsigned num_used_inv_expr;
340
341 /* Total cost of the assignment. */
342 comp_cost cost;
343 };
344
345 /* Difference of two iv candidate assignments. */
346
347 struct iv_ca_delta
348 {
349 /* Changed use. */
350 struct iv_use *use;
351
352 /* An old assignment (for rollback purposes). */
353 struct cost_pair *old_cp;
354
355 /* A new assignment. */
356 struct cost_pair *new_cp;
357
358 /* Next change in the list. */
359 struct iv_ca_delta *next_change;
360 };
361
362 /* Bound on number of candidates below that all candidates are considered. */
363
364 #define CONSIDER_ALL_CANDIDATES_BOUND \
365 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
366
367 /* If there are more iv occurrences, we just give up (it is quite unlikely that
368 optimizing such a loop would help, and it would take ages). */
369
370 #define MAX_CONSIDERED_USES \
371 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
372
373 /* If there are at most this number of ivs in the set, try removing unnecessary
374 ivs from the set always. */
375
376 #define ALWAYS_PRUNE_CAND_SET_BOUND \
377 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
378
379 /* The list of trees for that the decl_rtl field must be reset is stored
380 here. */
381
382 static VEC(tree,heap) *decl_rtl_to_reset;
383
384 /* Cached costs for multiplies by constants, and a flag to indicate
385 when they're valid. */
386 static htab_t mult_costs[2];
387 static bool cost_tables_exist = false;
388
389 static comp_cost force_expr_to_var_cost (tree, bool);
390
391 /* Number of uses recorded in DATA. */
392
393 static inline unsigned
394 n_iv_uses (struct ivopts_data *data)
395 {
396 return VEC_length (iv_use_p, data->iv_uses);
397 }
398
399 /* Ith use recorded in DATA. */
400
401 static inline struct iv_use *
402 iv_use (struct ivopts_data *data, unsigned i)
403 {
404 return VEC_index (iv_use_p, data->iv_uses, i);
405 }
406
407 /* Number of candidates recorded in DATA. */
408
409 static inline unsigned
410 n_iv_cands (struct ivopts_data *data)
411 {
412 return VEC_length (iv_cand_p, data->iv_candidates);
413 }
414
415 /* Ith candidate recorded in DATA. */
416
417 static inline struct iv_cand *
418 iv_cand (struct ivopts_data *data, unsigned i)
419 {
420 return VEC_index (iv_cand_p, data->iv_candidates, i);
421 }
422
423 /* The single loop exit if it dominates the latch, NULL otherwise. */
424
425 edge
426 single_dom_exit (struct loop *loop)
427 {
428 edge exit = single_exit (loop);
429
430 if (!exit)
431 return NULL;
432
433 if (!just_once_each_iteration_p (loop, exit->src))
434 return NULL;
435
436 return exit;
437 }
438
439 /* Dumps information about the induction variable IV to FILE. */
440
441 extern void dump_iv (FILE *, struct iv *);
442 void
443 dump_iv (FILE *file, struct iv *iv)
444 {
445 if (iv->ssa_name)
446 {
447 fprintf (file, "ssa name ");
448 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
449 fprintf (file, "\n");
450 }
451
452 fprintf (file, " type ");
453 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
454 fprintf (file, "\n");
455
456 if (iv->step)
457 {
458 fprintf (file, " base ");
459 print_generic_expr (file, iv->base, TDF_SLIM);
460 fprintf (file, "\n");
461
462 fprintf (file, " step ");
463 print_generic_expr (file, iv->step, TDF_SLIM);
464 fprintf (file, "\n");
465 }
466 else
467 {
468 fprintf (file, " invariant ");
469 print_generic_expr (file, iv->base, TDF_SLIM);
470 fprintf (file, "\n");
471 }
472
473 if (iv->base_object)
474 {
475 fprintf (file, " base object ");
476 print_generic_expr (file, iv->base_object, TDF_SLIM);
477 fprintf (file, "\n");
478 }
479
480 if (iv->biv_p)
481 fprintf (file, " is a biv\n");
482 }
483
484 /* Dumps information about the USE to FILE. */
485
486 extern void dump_use (FILE *, struct iv_use *);
487 void
488 dump_use (FILE *file, struct iv_use *use)
489 {
490 fprintf (file, "use %d\n", use->id);
491
492 switch (use->type)
493 {
494 case USE_NONLINEAR_EXPR:
495 fprintf (file, " generic\n");
496 break;
497
498 case USE_ADDRESS:
499 fprintf (file, " address\n");
500 break;
501
502 case USE_COMPARE:
503 fprintf (file, " compare\n");
504 break;
505
506 default:
507 gcc_unreachable ();
508 }
509
510 fprintf (file, " in statement ");
511 print_gimple_stmt (file, use->stmt, 0, 0);
512 fprintf (file, "\n");
513
514 fprintf (file, " at position ");
515 if (use->op_p)
516 print_generic_expr (file, *use->op_p, TDF_SLIM);
517 fprintf (file, "\n");
518
519 dump_iv (file, use->iv);
520
521 if (use->related_cands)
522 {
523 fprintf (file, " related candidates ");
524 dump_bitmap (file, use->related_cands);
525 }
526 }
527
528 /* Dumps information about the uses to FILE. */
529
530 extern void dump_uses (FILE *, struct ivopts_data *);
531 void
532 dump_uses (FILE *file, struct ivopts_data *data)
533 {
534 unsigned i;
535 struct iv_use *use;
536
537 for (i = 0; i < n_iv_uses (data); i++)
538 {
539 use = iv_use (data, i);
540
541 dump_use (file, use);
542 fprintf (file, "\n");
543 }
544 }
545
546 /* Dumps information about induction variable candidate CAND to FILE. */
547
548 extern void dump_cand (FILE *, struct iv_cand *);
549 void
550 dump_cand (FILE *file, struct iv_cand *cand)
551 {
552 struct iv *iv = cand->iv;
553
554 fprintf (file, "candidate %d%s\n",
555 cand->id, cand->important ? " (important)" : "");
556
557 if (cand->depends_on)
558 {
559 fprintf (file, " depends on ");
560 dump_bitmap (file, cand->depends_on);
561 }
562
563 if (!iv)
564 {
565 fprintf (file, " final value replacement\n");
566 return;
567 }
568
569 if (cand->var_before)
570 {
571 fprintf (file, " var_before ");
572 print_generic_expr (file, cand->var_before, TDF_SLIM);
573 fprintf (file, "\n");
574 }
575 if (cand->var_after)
576 {
577 fprintf (file, " var_after ");
578 print_generic_expr (file, cand->var_after, TDF_SLIM);
579 fprintf (file, "\n");
580 }
581
582 switch (cand->pos)
583 {
584 case IP_NORMAL:
585 fprintf (file, " incremented before exit test\n");
586 break;
587
588 case IP_BEFORE_USE:
589 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
590 break;
591
592 case IP_AFTER_USE:
593 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
594 break;
595
596 case IP_END:
597 fprintf (file, " incremented at end\n");
598 break;
599
600 case IP_ORIGINAL:
601 fprintf (file, " original biv\n");
602 break;
603 }
604
605 dump_iv (file, iv);
606 }
607
608 /* Returns the info for ssa version VER. */
609
610 static inline struct version_info *
611 ver_info (struct ivopts_data *data, unsigned ver)
612 {
613 return data->version_info + ver;
614 }
615
616 /* Returns the info for ssa name NAME. */
617
618 static inline struct version_info *
619 name_info (struct ivopts_data *data, tree name)
620 {
621 return ver_info (data, SSA_NAME_VERSION (name));
622 }
623
624 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
625 emitted in LOOP. */
626
627 static bool
628 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
629 {
630 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
631
632 gcc_assert (bb);
633
634 if (sbb == loop->latch)
635 return true;
636
637 if (sbb != bb)
638 return false;
639
640 return stmt == last_stmt (bb);
641 }
642
643 /* Returns true if STMT if after the place where the original induction
644 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
645 if the positions are identical. */
646
647 static bool
648 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
649 {
650 basic_block cand_bb = gimple_bb (cand->incremented_at);
651 basic_block stmt_bb = gimple_bb (stmt);
652
653 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
654 return false;
655
656 if (stmt_bb != cand_bb)
657 return true;
658
659 if (true_if_equal
660 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
661 return true;
662 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
663 }
664
665 /* Returns true if STMT if after the place where the induction variable
666 CAND is incremented in LOOP. */
667
668 static bool
669 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
670 {
671 switch (cand->pos)
672 {
673 case IP_END:
674 return false;
675
676 case IP_NORMAL:
677 return stmt_after_ip_normal_pos (loop, stmt);
678
679 case IP_ORIGINAL:
680 case IP_AFTER_USE:
681 return stmt_after_inc_pos (cand, stmt, false);
682
683 case IP_BEFORE_USE:
684 return stmt_after_inc_pos (cand, stmt, true);
685
686 default:
687 gcc_unreachable ();
688 }
689 }
690
691 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
692
693 static bool
694 abnormal_ssa_name_p (tree exp)
695 {
696 if (!exp)
697 return false;
698
699 if (TREE_CODE (exp) != SSA_NAME)
700 return false;
701
702 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
703 }
704
705 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
706 abnormal phi node. Callback for for_each_index. */
707
708 static bool
709 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
710 void *data ATTRIBUTE_UNUSED)
711 {
712 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
713 {
714 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
715 return false;
716 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
717 return false;
718 }
719
720 return !abnormal_ssa_name_p (*index);
721 }
722
723 /* Returns true if EXPR contains a ssa name that occurs in an
724 abnormal phi node. */
725
726 bool
727 contains_abnormal_ssa_name_p (tree expr)
728 {
729 enum tree_code code;
730 enum tree_code_class codeclass;
731
732 if (!expr)
733 return false;
734
735 code = TREE_CODE (expr);
736 codeclass = TREE_CODE_CLASS (code);
737
738 if (code == SSA_NAME)
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
740
741 if (code == INTEGER_CST
742 || is_gimple_min_invariant (expr))
743 return false;
744
745 if (code == ADDR_EXPR)
746 return !for_each_index (&TREE_OPERAND (expr, 0),
747 idx_contains_abnormal_ssa_name_p,
748 NULL);
749
750 if (code == COND_EXPR)
751 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
752 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
753 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
754
755 switch (codeclass)
756 {
757 case tcc_binary:
758 case tcc_comparison:
759 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
760 return true;
761
762 /* Fallthru. */
763 case tcc_unary:
764 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
765 return true;
766
767 break;
768
769 default:
770 gcc_unreachable ();
771 }
772
773 return false;
774 }
775
776 /* Returns the structure describing number of iterations determined from
777 EXIT of DATA->current_loop, or NULL if something goes wrong. */
778
779 static struct tree_niter_desc *
780 niter_for_exit (struct ivopts_data *data, edge exit)
781 {
782 struct tree_niter_desc *desc;
783 void **slot;
784
785 if (!data->niters)
786 {
787 data->niters = pointer_map_create ();
788 slot = NULL;
789 }
790 else
791 slot = pointer_map_contains (data->niters, exit);
792
793 if (!slot)
794 {
795 /* Try to determine number of iterations. We cannot safely work with ssa
796 names that appear in phi nodes on abnormal edges, so that we do not
797 create overlapping life ranges for them (PR 27283). */
798 desc = XNEW (struct tree_niter_desc);
799 if (!number_of_iterations_exit (data->current_loop,
800 exit, desc, true)
801 || contains_abnormal_ssa_name_p (desc->niter))
802 {
803 XDELETE (desc);
804 desc = NULL;
805 }
806 slot = pointer_map_insert (data->niters, exit);
807 *slot = desc;
808 }
809 else
810 desc = (struct tree_niter_desc *) *slot;
811
812 return desc;
813 }
814
815 /* Returns the structure describing number of iterations determined from
816 single dominating exit of DATA->current_loop, or NULL if something
817 goes wrong. */
818
819 static struct tree_niter_desc *
820 niter_for_single_dom_exit (struct ivopts_data *data)
821 {
822 edge exit = single_dom_exit (data->current_loop);
823
824 if (!exit)
825 return NULL;
826
827 return niter_for_exit (data, exit);
828 }
829
830 /* Hash table equality function for expressions. */
831
832 static int
833 htab_inv_expr_eq (const void *ent1, const void *ent2)
834 {
835 const struct iv_inv_expr_ent *expr1 =
836 (const struct iv_inv_expr_ent *)ent1;
837 const struct iv_inv_expr_ent *expr2 =
838 (const struct iv_inv_expr_ent *)ent2;
839
840 return expr1->hash == expr2->hash
841 && operand_equal_p (expr1->expr, expr2->expr, 0);
842 }
843
844 /* Hash function for loop invariant expressions. */
845
846 static hashval_t
847 htab_inv_expr_hash (const void *ent)
848 {
849 const struct iv_inv_expr_ent *expr =
850 (const struct iv_inv_expr_ent *)ent;
851 return expr->hash;
852 }
853
854 /* Allocate data structures for the cost model. */
855
856 void
857 initialize_costs (void)
858 {
859 mult_costs[0] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
860 mult_costs[1] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
861 cost_tables_exist = true;
862 }
863
864 /* Release data structures for the cost model. */
865
866 void
867 finalize_costs (void)
868 {
869 cost_tables_exist = false;
870 htab_delete (mult_costs[0]);
871 htab_delete (mult_costs[1]);
872 }
873
874 /* Initializes data structures used by the iv optimization pass, stored
875 in DATA. */
876
877 static void
878 tree_ssa_iv_optimize_init (struct ivopts_data *data)
879 {
880 data->version_info_size = 2 * num_ssa_names;
881 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
882 data->relevant = BITMAP_ALLOC (NULL);
883 data->important_candidates = BITMAP_ALLOC (NULL);
884 data->max_inv_id = 0;
885 data->niters = NULL;
886 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
887 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
888 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
889 htab_inv_expr_eq, free);
890 data->inv_expr_id = 0;
891 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
892
893 initialize_costs ();
894 }
895
896 /* Returns a memory object to that EXPR points. In case we are able to
897 determine that it does not point to any such object, NULL is returned. */
898
899 static tree
900 determine_base_object (tree expr)
901 {
902 enum tree_code code = TREE_CODE (expr);
903 tree base, obj;
904
905 /* If this is a pointer casted to any type, we need to determine
906 the base object for the pointer; so handle conversions before
907 throwing away non-pointer expressions. */
908 if (CONVERT_EXPR_P (expr))
909 return determine_base_object (TREE_OPERAND (expr, 0));
910
911 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
912 return NULL_TREE;
913
914 switch (code)
915 {
916 case INTEGER_CST:
917 return NULL_TREE;
918
919 case ADDR_EXPR:
920 obj = TREE_OPERAND (expr, 0);
921 base = get_base_address (obj);
922
923 if (!base)
924 return expr;
925
926 if (TREE_CODE (base) == MEM_REF)
927 return determine_base_object (TREE_OPERAND (base, 0));
928
929 return fold_convert (ptr_type_node,
930 build_fold_addr_expr (base));
931
932 case POINTER_PLUS_EXPR:
933 return determine_base_object (TREE_OPERAND (expr, 0));
934
935 case PLUS_EXPR:
936 case MINUS_EXPR:
937 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
938 gcc_unreachable ();
939
940 default:
941 return fold_convert (ptr_type_node, expr);
942 }
943 }
944
945 /* Allocates an induction variable with given initial value BASE and step STEP
946 for loop LOOP. */
947
948 static struct iv *
949 alloc_iv (tree base, tree step)
950 {
951 struct iv *iv = XCNEW (struct iv);
952 gcc_assert (step != NULL_TREE);
953
954 iv->base = base;
955 iv->base_object = determine_base_object (base);
956 iv->step = step;
957 iv->biv_p = false;
958 iv->have_use_for = false;
959 iv->use_id = 0;
960 iv->ssa_name = NULL_TREE;
961
962 return iv;
963 }
964
965 /* Sets STEP and BASE for induction variable IV. */
966
967 static void
968 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
969 {
970 struct version_info *info = name_info (data, iv);
971
972 gcc_assert (!info->iv);
973
974 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
975 info->iv = alloc_iv (base, step);
976 info->iv->ssa_name = iv;
977 }
978
979 /* Finds induction variable declaration for VAR. */
980
981 static struct iv *
982 get_iv (struct ivopts_data *data, tree var)
983 {
984 basic_block bb;
985 tree type = TREE_TYPE (var);
986
987 if (!POINTER_TYPE_P (type)
988 && !INTEGRAL_TYPE_P (type))
989 return NULL;
990
991 if (!name_info (data, var)->iv)
992 {
993 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
994
995 if (!bb
996 || !flow_bb_inside_loop_p (data->current_loop, bb))
997 set_iv (data, var, var, build_int_cst (type, 0));
998 }
999
1000 return name_info (data, var)->iv;
1001 }
1002
1003 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1004 not define a simple affine biv with nonzero step. */
1005
1006 static tree
1007 determine_biv_step (gimple phi)
1008 {
1009 struct loop *loop = gimple_bb (phi)->loop_father;
1010 tree name = PHI_RESULT (phi);
1011 affine_iv iv;
1012
1013 if (!is_gimple_reg (name))
1014 return NULL_TREE;
1015
1016 if (!simple_iv (loop, loop, name, &iv, true))
1017 return NULL_TREE;
1018
1019 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1020 }
1021
1022 /* Finds basic ivs. */
1023
1024 static bool
1025 find_bivs (struct ivopts_data *data)
1026 {
1027 gimple phi;
1028 tree step, type, base;
1029 bool found = false;
1030 struct loop *loop = data->current_loop;
1031 gimple_stmt_iterator psi;
1032
1033 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1034 {
1035 phi = gsi_stmt (psi);
1036
1037 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1038 continue;
1039
1040 step = determine_biv_step (phi);
1041 if (!step)
1042 continue;
1043
1044 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1045 base = expand_simple_operations (base);
1046 if (contains_abnormal_ssa_name_p (base)
1047 || contains_abnormal_ssa_name_p (step))
1048 continue;
1049
1050 type = TREE_TYPE (PHI_RESULT (phi));
1051 base = fold_convert (type, base);
1052 if (step)
1053 {
1054 if (POINTER_TYPE_P (type))
1055 step = convert_to_ptrofftype (step);
1056 else
1057 step = fold_convert (type, step);
1058 }
1059
1060 set_iv (data, PHI_RESULT (phi), base, step);
1061 found = true;
1062 }
1063
1064 return found;
1065 }
1066
1067 /* Marks basic ivs. */
1068
1069 static void
1070 mark_bivs (struct ivopts_data *data)
1071 {
1072 gimple phi;
1073 tree var;
1074 struct iv *iv, *incr_iv;
1075 struct loop *loop = data->current_loop;
1076 basic_block incr_bb;
1077 gimple_stmt_iterator psi;
1078
1079 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1080 {
1081 phi = gsi_stmt (psi);
1082
1083 iv = get_iv (data, PHI_RESULT (phi));
1084 if (!iv)
1085 continue;
1086
1087 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1088 incr_iv = get_iv (data, var);
1089 if (!incr_iv)
1090 continue;
1091
1092 /* If the increment is in the subloop, ignore it. */
1093 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1094 if (incr_bb->loop_father != data->current_loop
1095 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1096 continue;
1097
1098 iv->biv_p = true;
1099 incr_iv->biv_p = true;
1100 }
1101 }
1102
1103 /* Checks whether STMT defines a linear induction variable and stores its
1104 parameters to IV. */
1105
1106 static bool
1107 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1108 {
1109 tree lhs;
1110 struct loop *loop = data->current_loop;
1111
1112 iv->base = NULL_TREE;
1113 iv->step = NULL_TREE;
1114
1115 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1116 return false;
1117
1118 lhs = gimple_assign_lhs (stmt);
1119 if (TREE_CODE (lhs) != SSA_NAME)
1120 return false;
1121
1122 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1123 return false;
1124 iv->base = expand_simple_operations (iv->base);
1125
1126 if (contains_abnormal_ssa_name_p (iv->base)
1127 || contains_abnormal_ssa_name_p (iv->step))
1128 return false;
1129
1130 /* If STMT could throw, then do not consider STMT as defining a GIV.
1131 While this will suppress optimizations, we can not safely delete this
1132 GIV and associated statements, even if it appears it is not used. */
1133 if (stmt_could_throw_p (stmt))
1134 return false;
1135
1136 return true;
1137 }
1138
1139 /* Finds general ivs in statement STMT. */
1140
1141 static void
1142 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1143 {
1144 affine_iv iv;
1145
1146 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1147 return;
1148
1149 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1150 }
1151
1152 /* Finds general ivs in basic block BB. */
1153
1154 static void
1155 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1156 {
1157 gimple_stmt_iterator bsi;
1158
1159 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1160 find_givs_in_stmt (data, gsi_stmt (bsi));
1161 }
1162
1163 /* Finds general ivs. */
1164
1165 static void
1166 find_givs (struct ivopts_data *data)
1167 {
1168 struct loop *loop = data->current_loop;
1169 basic_block *body = get_loop_body_in_dom_order (loop);
1170 unsigned i;
1171
1172 for (i = 0; i < loop->num_nodes; i++)
1173 find_givs_in_bb (data, body[i]);
1174 free (body);
1175 }
1176
1177 /* For each ssa name defined in LOOP determines whether it is an induction
1178 variable and if so, its initial value and step. */
1179
1180 static bool
1181 find_induction_variables (struct ivopts_data *data)
1182 {
1183 unsigned i;
1184 bitmap_iterator bi;
1185
1186 if (!find_bivs (data))
1187 return false;
1188
1189 find_givs (data);
1190 mark_bivs (data);
1191
1192 if (dump_file && (dump_flags & TDF_DETAILS))
1193 {
1194 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1195
1196 if (niter)
1197 {
1198 fprintf (dump_file, " number of iterations ");
1199 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1200 if (!integer_zerop (niter->may_be_zero))
1201 {
1202 fprintf (dump_file, "; zero if ");
1203 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1204 }
1205 fprintf (dump_file, "\n\n");
1206 };
1207
1208 fprintf (dump_file, "Induction variables:\n\n");
1209
1210 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1211 {
1212 if (ver_info (data, i)->iv)
1213 dump_iv (dump_file, ver_info (data, i)->iv);
1214 }
1215 }
1216
1217 return true;
1218 }
1219
1220 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1221
1222 static struct iv_use *
1223 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1224 gimple stmt, enum use_type use_type)
1225 {
1226 struct iv_use *use = XCNEW (struct iv_use);
1227
1228 use->id = n_iv_uses (data);
1229 use->type = use_type;
1230 use->iv = iv;
1231 use->stmt = stmt;
1232 use->op_p = use_p;
1233 use->related_cands = BITMAP_ALLOC (NULL);
1234
1235 /* To avoid showing ssa name in the dumps, if it was not reset by the
1236 caller. */
1237 iv->ssa_name = NULL_TREE;
1238
1239 if (dump_file && (dump_flags & TDF_DETAILS))
1240 dump_use (dump_file, use);
1241
1242 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1243
1244 return use;
1245 }
1246
1247 /* Checks whether OP is a loop-level invariant and if so, records it.
1248 NONLINEAR_USE is true if the invariant is used in a way we do not
1249 handle specially. */
1250
1251 static void
1252 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1253 {
1254 basic_block bb;
1255 struct version_info *info;
1256
1257 if (TREE_CODE (op) != SSA_NAME
1258 || !is_gimple_reg (op))
1259 return;
1260
1261 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1262 if (bb
1263 && flow_bb_inside_loop_p (data->current_loop, bb))
1264 return;
1265
1266 info = name_info (data, op);
1267 info->name = op;
1268 info->has_nonlin_use |= nonlinear_use;
1269 if (!info->inv_id)
1270 info->inv_id = ++data->max_inv_id;
1271 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1272 }
1273
1274 /* Checks whether the use OP is interesting and if so, records it. */
1275
1276 static struct iv_use *
1277 find_interesting_uses_op (struct ivopts_data *data, tree op)
1278 {
1279 struct iv *iv;
1280 struct iv *civ;
1281 gimple stmt;
1282 struct iv_use *use;
1283
1284 if (TREE_CODE (op) != SSA_NAME)
1285 return NULL;
1286
1287 iv = get_iv (data, op);
1288 if (!iv)
1289 return NULL;
1290
1291 if (iv->have_use_for)
1292 {
1293 use = iv_use (data, iv->use_id);
1294
1295 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1296 return use;
1297 }
1298
1299 if (integer_zerop (iv->step))
1300 {
1301 record_invariant (data, op, true);
1302 return NULL;
1303 }
1304 iv->have_use_for = true;
1305
1306 civ = XNEW (struct iv);
1307 *civ = *iv;
1308
1309 stmt = SSA_NAME_DEF_STMT (op);
1310 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1311 || is_gimple_assign (stmt));
1312
1313 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1314 iv->use_id = use->id;
1315
1316 return use;
1317 }
1318
1319 /* Given a condition in statement STMT, checks whether it is a compare
1320 of an induction variable and an invariant. If this is the case,
1321 CONTROL_VAR is set to location of the iv, BOUND to the location of
1322 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1323 induction variable descriptions, and true is returned. If this is not
1324 the case, CONTROL_VAR and BOUND are set to the arguments of the
1325 condition and false is returned. */
1326
1327 static bool
1328 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1329 tree **control_var, tree **bound,
1330 struct iv **iv_var, struct iv **iv_bound)
1331 {
1332 /* The objects returned when COND has constant operands. */
1333 static struct iv const_iv;
1334 static tree zero;
1335 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1336 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1337 bool ret = false;
1338
1339 if (gimple_code (stmt) == GIMPLE_COND)
1340 {
1341 op0 = gimple_cond_lhs_ptr (stmt);
1342 op1 = gimple_cond_rhs_ptr (stmt);
1343 }
1344 else
1345 {
1346 op0 = gimple_assign_rhs1_ptr (stmt);
1347 op1 = gimple_assign_rhs2_ptr (stmt);
1348 }
1349
1350 zero = integer_zero_node;
1351 const_iv.step = integer_zero_node;
1352
1353 if (TREE_CODE (*op0) == SSA_NAME)
1354 iv0 = get_iv (data, *op0);
1355 if (TREE_CODE (*op1) == SSA_NAME)
1356 iv1 = get_iv (data, *op1);
1357
1358 /* Exactly one of the compared values must be an iv, and the other one must
1359 be an invariant. */
1360 if (!iv0 || !iv1)
1361 goto end;
1362
1363 if (integer_zerop (iv0->step))
1364 {
1365 /* Control variable may be on the other side. */
1366 tmp_op = op0; op0 = op1; op1 = tmp_op;
1367 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1368 }
1369 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1370
1371 end:
1372 if (control_var)
1373 *control_var = op0;;
1374 if (iv_var)
1375 *iv_var = iv0;;
1376 if (bound)
1377 *bound = op1;
1378 if (iv_bound)
1379 *iv_bound = iv1;
1380
1381 return ret;
1382 }
1383
1384 /* Checks whether the condition in STMT is interesting and if so,
1385 records it. */
1386
1387 static void
1388 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1389 {
1390 tree *var_p, *bound_p;
1391 struct iv *var_iv, *civ;
1392
1393 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1394 {
1395 find_interesting_uses_op (data, *var_p);
1396 find_interesting_uses_op (data, *bound_p);
1397 return;
1398 }
1399
1400 civ = XNEW (struct iv);
1401 *civ = *var_iv;
1402 record_use (data, NULL, civ, stmt, USE_COMPARE);
1403 }
1404
1405 /* Returns true if expression EXPR is obviously invariant in LOOP,
1406 i.e. if all its operands are defined outside of the LOOP. LOOP
1407 should not be the function body. */
1408
1409 bool
1410 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1411 {
1412 basic_block def_bb;
1413 unsigned i, len;
1414
1415 gcc_assert (loop_depth (loop) > 0);
1416
1417 if (is_gimple_min_invariant (expr))
1418 return true;
1419
1420 if (TREE_CODE (expr) == SSA_NAME)
1421 {
1422 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1423 if (def_bb
1424 && flow_bb_inside_loop_p (loop, def_bb))
1425 return false;
1426
1427 return true;
1428 }
1429
1430 if (!EXPR_P (expr))
1431 return false;
1432
1433 len = TREE_OPERAND_LENGTH (expr);
1434 for (i = 0; i < len; i++)
1435 if (TREE_OPERAND (expr, i)
1436 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1437 return false;
1438
1439 return true;
1440 }
1441
1442 /* Returns true if statement STMT is obviously invariant in LOOP,
1443 i.e. if all its operands on the RHS are defined outside of the LOOP.
1444 LOOP should not be the function body. */
1445
1446 bool
1447 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1448 {
1449 unsigned i;
1450 tree lhs;
1451
1452 gcc_assert (loop_depth (loop) > 0);
1453
1454 lhs = gimple_get_lhs (stmt);
1455 for (i = 0; i < gimple_num_ops (stmt); i++)
1456 {
1457 tree op = gimple_op (stmt, i);
1458 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1459 return false;
1460 }
1461
1462 return true;
1463 }
1464
1465 /* Cumulates the steps of indices into DATA and replaces their values with the
1466 initial ones. Returns false when the value of the index cannot be determined.
1467 Callback for for_each_index. */
1468
1469 struct ifs_ivopts_data
1470 {
1471 struct ivopts_data *ivopts_data;
1472 gimple stmt;
1473 tree step;
1474 };
1475
1476 static bool
1477 idx_find_step (tree base, tree *idx, void *data)
1478 {
1479 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1480 struct iv *iv;
1481 tree step, iv_base, iv_step, lbound, off;
1482 struct loop *loop = dta->ivopts_data->current_loop;
1483
1484 /* If base is a component ref, require that the offset of the reference
1485 be invariant. */
1486 if (TREE_CODE (base) == COMPONENT_REF)
1487 {
1488 off = component_ref_field_offset (base);
1489 return expr_invariant_in_loop_p (loop, off);
1490 }
1491
1492 /* If base is array, first check whether we will be able to move the
1493 reference out of the loop (in order to take its address in strength
1494 reduction). In order for this to work we need both lower bound
1495 and step to be loop invariants. */
1496 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497 {
1498 /* Moreover, for a range, the size needs to be invariant as well. */
1499 if (TREE_CODE (base) == ARRAY_RANGE_REF
1500 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1501 return false;
1502
1503 step = array_ref_element_size (base);
1504 lbound = array_ref_low_bound (base);
1505
1506 if (!expr_invariant_in_loop_p (loop, step)
1507 || !expr_invariant_in_loop_p (loop, lbound))
1508 return false;
1509 }
1510
1511 if (TREE_CODE (*idx) != SSA_NAME)
1512 return true;
1513
1514 iv = get_iv (dta->ivopts_data, *idx);
1515 if (!iv)
1516 return false;
1517
1518 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1519 *&x[0], which is not folded and does not trigger the
1520 ARRAY_REF path below. */
1521 *idx = iv->base;
1522
1523 if (integer_zerop (iv->step))
1524 return true;
1525
1526 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527 {
1528 step = array_ref_element_size (base);
1529
1530 /* We only handle addresses whose step is an integer constant. */
1531 if (TREE_CODE (step) != INTEGER_CST)
1532 return false;
1533 }
1534 else
1535 /* The step for pointer arithmetics already is 1 byte. */
1536 step = size_one_node;
1537
1538 iv_base = iv->base;
1539 iv_step = iv->step;
1540 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1541 sizetype, &iv_base, &iv_step, dta->stmt,
1542 false))
1543 {
1544 /* The index might wrap. */
1545 return false;
1546 }
1547
1548 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1549 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1550
1551 return true;
1552 }
1553
1554 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1555 object is passed to it in DATA. */
1556
1557 static bool
1558 idx_record_use (tree base, tree *idx,
1559 void *vdata)
1560 {
1561 struct ivopts_data *data = (struct ivopts_data *) vdata;
1562 find_interesting_uses_op (data, *idx);
1563 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564 {
1565 find_interesting_uses_op (data, array_ref_element_size (base));
1566 find_interesting_uses_op (data, array_ref_low_bound (base));
1567 }
1568 return true;
1569 }
1570
1571 /* If we can prove that TOP = cst * BOT for some constant cst,
1572 store cst to MUL and return true. Otherwise return false.
1573 The returned value is always sign-extended, regardless of the
1574 signedness of TOP and BOT. */
1575
1576 static bool
1577 constant_multiple_of (tree top, tree bot, double_int *mul)
1578 {
1579 tree mby;
1580 enum tree_code code;
1581 double_int res, p0, p1;
1582 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1583
1584 STRIP_NOPS (top);
1585 STRIP_NOPS (bot);
1586
1587 if (operand_equal_p (top, bot, 0))
1588 {
1589 *mul = double_int_one;
1590 return true;
1591 }
1592
1593 code = TREE_CODE (top);
1594 switch (code)
1595 {
1596 case MULT_EXPR:
1597 mby = TREE_OPERAND (top, 1);
1598 if (TREE_CODE (mby) != INTEGER_CST)
1599 return false;
1600
1601 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1602 return false;
1603
1604 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1605 precision);
1606 return true;
1607
1608 case PLUS_EXPR:
1609 case MINUS_EXPR:
1610 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1611 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1612 return false;
1613
1614 if (code == MINUS_EXPR)
1615 p1 = double_int_neg (p1);
1616 *mul = double_int_sext (double_int_add (p0, p1), precision);
1617 return true;
1618
1619 case INTEGER_CST:
1620 if (TREE_CODE (bot) != INTEGER_CST)
1621 return false;
1622
1623 p0 = double_int_sext (tree_to_double_int (top), precision);
1624 p1 = double_int_sext (tree_to_double_int (bot), precision);
1625 if (double_int_zero_p (p1))
1626 return false;
1627 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1628 precision);
1629 return double_int_zero_p (res);
1630
1631 default:
1632 return false;
1633 }
1634 }
1635
1636 /* Returns true if memory reference REF with step STEP may be unaligned. */
1637
1638 static bool
1639 may_be_unaligned_p (tree ref, tree step)
1640 {
1641 tree base;
1642 tree base_type;
1643 HOST_WIDE_INT bitsize;
1644 HOST_WIDE_INT bitpos;
1645 tree toffset;
1646 enum machine_mode mode;
1647 int unsignedp, volatilep;
1648 unsigned base_align;
1649
1650 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1651 thus they are not misaligned. */
1652 if (TREE_CODE (ref) == TARGET_MEM_REF)
1653 return false;
1654
1655 /* The test below is basically copy of what expr.c:normal_inner_ref
1656 does to check whether the object must be loaded by parts when
1657 STRICT_ALIGNMENT is true. */
1658 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1659 &unsignedp, &volatilep, true);
1660 base_type = TREE_TYPE (base);
1661 base_align = get_object_alignment (base);
1662 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1663
1664 if (mode != BLKmode)
1665 {
1666 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1667
1668 if (base_align < mode_align
1669 || (bitpos % mode_align) != 0
1670 || (bitpos % BITS_PER_UNIT) != 0)
1671 return true;
1672
1673 if (toffset
1674 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1675 return true;
1676
1677 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1678 return true;
1679 }
1680
1681 return false;
1682 }
1683
1684 /* Return true if EXPR may be non-addressable. */
1685
1686 bool
1687 may_be_nonaddressable_p (tree expr)
1688 {
1689 switch (TREE_CODE (expr))
1690 {
1691 case TARGET_MEM_REF:
1692 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1693 target, thus they are always addressable. */
1694 return false;
1695
1696 case COMPONENT_REF:
1697 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1698 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1699
1700 case VIEW_CONVERT_EXPR:
1701 /* This kind of view-conversions may wrap non-addressable objects
1702 and make them look addressable. After some processing the
1703 non-addressability may be uncovered again, causing ADDR_EXPRs
1704 of inappropriate objects to be built. */
1705 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1706 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1707 return true;
1708
1709 /* ... fall through ... */
1710
1711 case ARRAY_REF:
1712 case ARRAY_RANGE_REF:
1713 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1714
1715 CASE_CONVERT:
1716 return true;
1717
1718 default:
1719 break;
1720 }
1721
1722 return false;
1723 }
1724
1725 /* Finds addresses in *OP_P inside STMT. */
1726
1727 static void
1728 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1729 {
1730 tree base = *op_p, step = size_zero_node;
1731 struct iv *civ;
1732 struct ifs_ivopts_data ifs_ivopts_data;
1733
1734 /* Do not play with volatile memory references. A bit too conservative,
1735 perhaps, but safe. */
1736 if (gimple_has_volatile_ops (stmt))
1737 goto fail;
1738
1739 /* Ignore bitfields for now. Not really something terribly complicated
1740 to handle. TODO. */
1741 if (TREE_CODE (base) == BIT_FIELD_REF)
1742 goto fail;
1743
1744 base = unshare_expr (base);
1745
1746 if (TREE_CODE (base) == TARGET_MEM_REF)
1747 {
1748 tree type = build_pointer_type (TREE_TYPE (base));
1749 tree astep;
1750
1751 if (TMR_BASE (base)
1752 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1753 {
1754 civ = get_iv (data, TMR_BASE (base));
1755 if (!civ)
1756 goto fail;
1757
1758 TMR_BASE (base) = civ->base;
1759 step = civ->step;
1760 }
1761 if (TMR_INDEX2 (base)
1762 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1763 {
1764 civ = get_iv (data, TMR_INDEX2 (base));
1765 if (!civ)
1766 goto fail;
1767
1768 TMR_INDEX2 (base) = civ->base;
1769 step = civ->step;
1770 }
1771 if (TMR_INDEX (base)
1772 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1773 {
1774 civ = get_iv (data, TMR_INDEX (base));
1775 if (!civ)
1776 goto fail;
1777
1778 TMR_INDEX (base) = civ->base;
1779 astep = civ->step;
1780
1781 if (astep)
1782 {
1783 if (TMR_STEP (base))
1784 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1785
1786 step = fold_build2 (PLUS_EXPR, type, step, astep);
1787 }
1788 }
1789
1790 if (integer_zerop (step))
1791 goto fail;
1792 base = tree_mem_ref_addr (type, base);
1793 }
1794 else
1795 {
1796 ifs_ivopts_data.ivopts_data = data;
1797 ifs_ivopts_data.stmt = stmt;
1798 ifs_ivopts_data.step = size_zero_node;
1799 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1800 || integer_zerop (ifs_ivopts_data.step))
1801 goto fail;
1802 step = ifs_ivopts_data.step;
1803
1804 /* Check that the base expression is addressable. This needs
1805 to be done after substituting bases of IVs into it. */
1806 if (may_be_nonaddressable_p (base))
1807 goto fail;
1808
1809 /* Moreover, on strict alignment platforms, check that it is
1810 sufficiently aligned. */
1811 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1812 goto fail;
1813
1814 base = build_fold_addr_expr (base);
1815
1816 /* Substituting bases of IVs into the base expression might
1817 have caused folding opportunities. */
1818 if (TREE_CODE (base) == ADDR_EXPR)
1819 {
1820 tree *ref = &TREE_OPERAND (base, 0);
1821 while (handled_component_p (*ref))
1822 ref = &TREE_OPERAND (*ref, 0);
1823 if (TREE_CODE (*ref) == MEM_REF)
1824 {
1825 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1826 TREE_OPERAND (*ref, 0),
1827 TREE_OPERAND (*ref, 1));
1828 if (tem)
1829 *ref = tem;
1830 }
1831 }
1832 }
1833
1834 civ = alloc_iv (base, step);
1835 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1836 return;
1837
1838 fail:
1839 for_each_index (op_p, idx_record_use, data);
1840 }
1841
1842 /* Finds and records invariants used in STMT. */
1843
1844 static void
1845 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1846 {
1847 ssa_op_iter iter;
1848 use_operand_p use_p;
1849 tree op;
1850
1851 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1852 {
1853 op = USE_FROM_PTR (use_p);
1854 record_invariant (data, op, false);
1855 }
1856 }
1857
1858 /* Finds interesting uses of induction variables in the statement STMT. */
1859
1860 static void
1861 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1862 {
1863 struct iv *iv;
1864 tree op, *lhs, *rhs;
1865 ssa_op_iter iter;
1866 use_operand_p use_p;
1867 enum tree_code code;
1868
1869 find_invariants_stmt (data, stmt);
1870
1871 if (gimple_code (stmt) == GIMPLE_COND)
1872 {
1873 find_interesting_uses_cond (data, stmt);
1874 return;
1875 }
1876
1877 if (is_gimple_assign (stmt))
1878 {
1879 lhs = gimple_assign_lhs_ptr (stmt);
1880 rhs = gimple_assign_rhs1_ptr (stmt);
1881
1882 if (TREE_CODE (*lhs) == SSA_NAME)
1883 {
1884 /* If the statement defines an induction variable, the uses are not
1885 interesting by themselves. */
1886
1887 iv = get_iv (data, *lhs);
1888
1889 if (iv && !integer_zerop (iv->step))
1890 return;
1891 }
1892
1893 code = gimple_assign_rhs_code (stmt);
1894 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1895 && (REFERENCE_CLASS_P (*rhs)
1896 || is_gimple_val (*rhs)))
1897 {
1898 if (REFERENCE_CLASS_P (*rhs))
1899 find_interesting_uses_address (data, stmt, rhs);
1900 else
1901 find_interesting_uses_op (data, *rhs);
1902
1903 if (REFERENCE_CLASS_P (*lhs))
1904 find_interesting_uses_address (data, stmt, lhs);
1905 return;
1906 }
1907 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1908 {
1909 find_interesting_uses_cond (data, stmt);
1910 return;
1911 }
1912
1913 /* TODO -- we should also handle address uses of type
1914
1915 memory = call (whatever);
1916
1917 and
1918
1919 call (memory). */
1920 }
1921
1922 if (gimple_code (stmt) == GIMPLE_PHI
1923 && gimple_bb (stmt) == data->current_loop->header)
1924 {
1925 iv = get_iv (data, PHI_RESULT (stmt));
1926
1927 if (iv && !integer_zerop (iv->step))
1928 return;
1929 }
1930
1931 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1932 {
1933 op = USE_FROM_PTR (use_p);
1934
1935 if (TREE_CODE (op) != SSA_NAME)
1936 continue;
1937
1938 iv = get_iv (data, op);
1939 if (!iv)
1940 continue;
1941
1942 find_interesting_uses_op (data, op);
1943 }
1944 }
1945
1946 /* Finds interesting uses of induction variables outside of loops
1947 on loop exit edge EXIT. */
1948
1949 static void
1950 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1951 {
1952 gimple phi;
1953 gimple_stmt_iterator psi;
1954 tree def;
1955
1956 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1957 {
1958 phi = gsi_stmt (psi);
1959 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1960 if (is_gimple_reg (def))
1961 find_interesting_uses_op (data, def);
1962 }
1963 }
1964
1965 /* Finds uses of the induction variables that are interesting. */
1966
1967 static void
1968 find_interesting_uses (struct ivopts_data *data)
1969 {
1970 basic_block bb;
1971 gimple_stmt_iterator bsi;
1972 basic_block *body = get_loop_body (data->current_loop);
1973 unsigned i;
1974 struct version_info *info;
1975 edge e;
1976
1977 if (dump_file && (dump_flags & TDF_DETAILS))
1978 fprintf (dump_file, "Uses:\n\n");
1979
1980 for (i = 0; i < data->current_loop->num_nodes; i++)
1981 {
1982 edge_iterator ei;
1983 bb = body[i];
1984
1985 FOR_EACH_EDGE (e, ei, bb->succs)
1986 if (e->dest != EXIT_BLOCK_PTR
1987 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1988 find_interesting_uses_outside (data, e);
1989
1990 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1991 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1992 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1993 if (!is_gimple_debug (gsi_stmt (bsi)))
1994 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1995 }
1996
1997 if (dump_file && (dump_flags & TDF_DETAILS))
1998 {
1999 bitmap_iterator bi;
2000
2001 fprintf (dump_file, "\n");
2002
2003 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2004 {
2005 info = ver_info (data, i);
2006 if (info->inv_id)
2007 {
2008 fprintf (dump_file, " ");
2009 print_generic_expr (dump_file, info->name, TDF_SLIM);
2010 fprintf (dump_file, " is invariant (%d)%s\n",
2011 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2012 }
2013 }
2014
2015 fprintf (dump_file, "\n");
2016 }
2017
2018 free (body);
2019 }
2020
2021 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2022 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2023 we are at the top-level of the processed address. */
2024
2025 static tree
2026 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2027 unsigned HOST_WIDE_INT *offset)
2028 {
2029 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2030 enum tree_code code;
2031 tree type, orig_type = TREE_TYPE (expr);
2032 unsigned HOST_WIDE_INT off0, off1, st;
2033 tree orig_expr = expr;
2034
2035 STRIP_NOPS (expr);
2036
2037 type = TREE_TYPE (expr);
2038 code = TREE_CODE (expr);
2039 *offset = 0;
2040
2041 switch (code)
2042 {
2043 case INTEGER_CST:
2044 if (!cst_and_fits_in_hwi (expr)
2045 || integer_zerop (expr))
2046 return orig_expr;
2047
2048 *offset = int_cst_value (expr);
2049 return build_int_cst (orig_type, 0);
2050
2051 case POINTER_PLUS_EXPR:
2052 case PLUS_EXPR:
2053 case MINUS_EXPR:
2054 op0 = TREE_OPERAND (expr, 0);
2055 op1 = TREE_OPERAND (expr, 1);
2056
2057 op0 = strip_offset_1 (op0, false, false, &off0);
2058 op1 = strip_offset_1 (op1, false, false, &off1);
2059
2060 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2061 if (op0 == TREE_OPERAND (expr, 0)
2062 && op1 == TREE_OPERAND (expr, 1))
2063 return orig_expr;
2064
2065 if (integer_zerop (op1))
2066 expr = op0;
2067 else if (integer_zerop (op0))
2068 {
2069 if (code == MINUS_EXPR)
2070 expr = fold_build1 (NEGATE_EXPR, type, op1);
2071 else
2072 expr = op1;
2073 }
2074 else
2075 expr = fold_build2 (code, type, op0, op1);
2076
2077 return fold_convert (orig_type, expr);
2078
2079 case MULT_EXPR:
2080 op1 = TREE_OPERAND (expr, 1);
2081 if (!cst_and_fits_in_hwi (op1))
2082 return orig_expr;
2083
2084 op0 = TREE_OPERAND (expr, 0);
2085 op0 = strip_offset_1 (op0, false, false, &off0);
2086 if (op0 == TREE_OPERAND (expr, 0))
2087 return orig_expr;
2088
2089 *offset = off0 * int_cst_value (op1);
2090 if (integer_zerop (op0))
2091 expr = op0;
2092 else
2093 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2094
2095 return fold_convert (orig_type, expr);
2096
2097 case ARRAY_REF:
2098 case ARRAY_RANGE_REF:
2099 if (!inside_addr)
2100 return orig_expr;
2101
2102 step = array_ref_element_size (expr);
2103 if (!cst_and_fits_in_hwi (step))
2104 break;
2105
2106 st = int_cst_value (step);
2107 op1 = TREE_OPERAND (expr, 1);
2108 op1 = strip_offset_1 (op1, false, false, &off1);
2109 *offset = off1 * st;
2110
2111 if (top_compref
2112 && integer_zerop (op1))
2113 {
2114 /* Strip the component reference completely. */
2115 op0 = TREE_OPERAND (expr, 0);
2116 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2117 *offset += off0;
2118 return op0;
2119 }
2120 break;
2121
2122 case COMPONENT_REF:
2123 if (!inside_addr)
2124 return orig_expr;
2125
2126 tmp = component_ref_field_offset (expr);
2127 if (top_compref
2128 && cst_and_fits_in_hwi (tmp))
2129 {
2130 /* Strip the component reference completely. */
2131 op0 = TREE_OPERAND (expr, 0);
2132 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2133 *offset = off0 + int_cst_value (tmp);
2134 return op0;
2135 }
2136 break;
2137
2138 case ADDR_EXPR:
2139 op0 = TREE_OPERAND (expr, 0);
2140 op0 = strip_offset_1 (op0, true, true, &off0);
2141 *offset += off0;
2142
2143 if (op0 == TREE_OPERAND (expr, 0))
2144 return orig_expr;
2145
2146 expr = build_fold_addr_expr (op0);
2147 return fold_convert (orig_type, expr);
2148
2149 case MEM_REF:
2150 /* ??? Offset operand? */
2151 inside_addr = false;
2152 break;
2153
2154 default:
2155 return orig_expr;
2156 }
2157
2158 /* Default handling of expressions for that we want to recurse into
2159 the first operand. */
2160 op0 = TREE_OPERAND (expr, 0);
2161 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2162 *offset += off0;
2163
2164 if (op0 == TREE_OPERAND (expr, 0)
2165 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2166 return orig_expr;
2167
2168 expr = copy_node (expr);
2169 TREE_OPERAND (expr, 0) = op0;
2170 if (op1)
2171 TREE_OPERAND (expr, 1) = op1;
2172
2173 /* Inside address, we might strip the top level component references,
2174 thus changing type of the expression. Handling of ADDR_EXPR
2175 will fix that. */
2176 expr = fold_convert (orig_type, expr);
2177
2178 return expr;
2179 }
2180
2181 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2182
2183 static tree
2184 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2185 {
2186 return strip_offset_1 (expr, false, false, offset);
2187 }
2188
2189 /* Returns variant of TYPE that can be used as base for different uses.
2190 We return unsigned type with the same precision, which avoids problems
2191 with overflows. */
2192
2193 static tree
2194 generic_type_for (tree type)
2195 {
2196 if (POINTER_TYPE_P (type))
2197 return unsigned_type_for (type);
2198
2199 if (TYPE_UNSIGNED (type))
2200 return type;
2201
2202 return unsigned_type_for (type);
2203 }
2204
2205 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2206 the bitmap to that we should store it. */
2207
2208 static struct ivopts_data *fd_ivopts_data;
2209 static tree
2210 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2211 {
2212 bitmap *depends_on = (bitmap *) data;
2213 struct version_info *info;
2214
2215 if (TREE_CODE (*expr_p) != SSA_NAME)
2216 return NULL_TREE;
2217 info = name_info (fd_ivopts_data, *expr_p);
2218
2219 if (!info->inv_id || info->has_nonlin_use)
2220 return NULL_TREE;
2221
2222 if (!*depends_on)
2223 *depends_on = BITMAP_ALLOC (NULL);
2224 bitmap_set_bit (*depends_on, info->inv_id);
2225
2226 return NULL_TREE;
2227 }
2228
2229 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2230 position to POS. If USE is not NULL, the candidate is set as related to
2231 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2232 replacement of the final value of the iv by a direct computation. */
2233
2234 static struct iv_cand *
2235 add_candidate_1 (struct ivopts_data *data,
2236 tree base, tree step, bool important, enum iv_position pos,
2237 struct iv_use *use, gimple incremented_at)
2238 {
2239 unsigned i;
2240 struct iv_cand *cand = NULL;
2241 tree type, orig_type;
2242
2243 /* For non-original variables, make sure their values are computed in a type
2244 that does not invoke undefined behavior on overflows (since in general,
2245 we cannot prove that these induction variables are non-wrapping). */
2246 if (pos != IP_ORIGINAL)
2247 {
2248 orig_type = TREE_TYPE (base);
2249 type = generic_type_for (orig_type);
2250 if (type != orig_type)
2251 {
2252 base = fold_convert (type, base);
2253 step = fold_convert (type, step);
2254 }
2255 }
2256
2257 for (i = 0; i < n_iv_cands (data); i++)
2258 {
2259 cand = iv_cand (data, i);
2260
2261 if (cand->pos != pos)
2262 continue;
2263
2264 if (cand->incremented_at != incremented_at
2265 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2266 && cand->ainc_use != use))
2267 continue;
2268
2269 if (!cand->iv)
2270 {
2271 if (!base && !step)
2272 break;
2273
2274 continue;
2275 }
2276
2277 if (!base && !step)
2278 continue;
2279
2280 if (operand_equal_p (base, cand->iv->base, 0)
2281 && operand_equal_p (step, cand->iv->step, 0)
2282 && (TYPE_PRECISION (TREE_TYPE (base))
2283 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2284 break;
2285 }
2286
2287 if (i == n_iv_cands (data))
2288 {
2289 cand = XCNEW (struct iv_cand);
2290 cand->id = i;
2291
2292 if (!base && !step)
2293 cand->iv = NULL;
2294 else
2295 cand->iv = alloc_iv (base, step);
2296
2297 cand->pos = pos;
2298 if (pos != IP_ORIGINAL && cand->iv)
2299 {
2300 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2301 cand->var_after = cand->var_before;
2302 }
2303 cand->important = important;
2304 cand->incremented_at = incremented_at;
2305 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2306
2307 if (step
2308 && TREE_CODE (step) != INTEGER_CST)
2309 {
2310 fd_ivopts_data = data;
2311 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2312 }
2313
2314 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2315 cand->ainc_use = use;
2316 else
2317 cand->ainc_use = NULL;
2318
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 dump_cand (dump_file, cand);
2321 }
2322
2323 if (important && !cand->important)
2324 {
2325 cand->important = true;
2326 if (dump_file && (dump_flags & TDF_DETAILS))
2327 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2328 }
2329
2330 if (use)
2331 {
2332 bitmap_set_bit (use->related_cands, i);
2333 if (dump_file && (dump_flags & TDF_DETAILS))
2334 fprintf (dump_file, "Candidate %d is related to use %d\n",
2335 cand->id, use->id);
2336 }
2337
2338 return cand;
2339 }
2340
2341 /* Returns true if incrementing the induction variable at the end of the LOOP
2342 is allowed.
2343
2344 The purpose is to avoid splitting latch edge with a biv increment, thus
2345 creating a jump, possibly confusing other optimization passes and leaving
2346 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2347 is not available (so we do not have a better alternative), or if the latch
2348 edge is already nonempty. */
2349
2350 static bool
2351 allow_ip_end_pos_p (struct loop *loop)
2352 {
2353 if (!ip_normal_pos (loop))
2354 return true;
2355
2356 if (!empty_block_p (ip_end_pos (loop)))
2357 return true;
2358
2359 return false;
2360 }
2361
2362 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2363 Important field is set to IMPORTANT. */
2364
2365 static void
2366 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2367 bool important, struct iv_use *use)
2368 {
2369 basic_block use_bb = gimple_bb (use->stmt);
2370 enum machine_mode mem_mode;
2371 unsigned HOST_WIDE_INT cstepi;
2372
2373 /* If we insert the increment in any position other than the standard
2374 ones, we must ensure that it is incremented once per iteration.
2375 It must not be in an inner nested loop, or one side of an if
2376 statement. */
2377 if (use_bb->loop_father != data->current_loop
2378 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2379 || stmt_could_throw_p (use->stmt)
2380 || !cst_and_fits_in_hwi (step))
2381 return;
2382
2383 cstepi = int_cst_value (step);
2384
2385 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2386 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2387 || USE_STORE_PRE_INCREMENT (mem_mode))
2388 && GET_MODE_SIZE (mem_mode) == cstepi)
2389 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2390 || USE_STORE_PRE_DECREMENT (mem_mode))
2391 && GET_MODE_SIZE (mem_mode) == -cstepi))
2392 {
2393 enum tree_code code = MINUS_EXPR;
2394 tree new_base;
2395 tree new_step = step;
2396
2397 if (POINTER_TYPE_P (TREE_TYPE (base)))
2398 {
2399 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2400 code = POINTER_PLUS_EXPR;
2401 }
2402 else
2403 new_step = fold_convert (TREE_TYPE (base), new_step);
2404 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2405 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2406 use->stmt);
2407 }
2408 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2409 || USE_STORE_POST_INCREMENT (mem_mode))
2410 && GET_MODE_SIZE (mem_mode) == cstepi)
2411 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2412 || USE_STORE_POST_DECREMENT (mem_mode))
2413 && GET_MODE_SIZE (mem_mode) == -cstepi))
2414 {
2415 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2416 use->stmt);
2417 }
2418 }
2419
2420 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2421 position to POS. If USE is not NULL, the candidate is set as related to
2422 it. The candidate computation is scheduled on all available positions. */
2423
2424 static void
2425 add_candidate (struct ivopts_data *data,
2426 tree base, tree step, bool important, struct iv_use *use)
2427 {
2428 if (ip_normal_pos (data->current_loop))
2429 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2430 if (ip_end_pos (data->current_loop)
2431 && allow_ip_end_pos_p (data->current_loop))
2432 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2433
2434 if (use != NULL && use->type == USE_ADDRESS)
2435 add_autoinc_candidates (data, base, step, important, use);
2436 }
2437
2438 /* Adds standard iv candidates. */
2439
2440 static void
2441 add_standard_iv_candidates (struct ivopts_data *data)
2442 {
2443 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2444
2445 /* The same for a double-integer type if it is still fast enough. */
2446 if (TYPE_PRECISION
2447 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2448 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2449 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2450 build_int_cst (long_integer_type_node, 1), true, NULL);
2451
2452 /* The same for a double-integer type if it is still fast enough. */
2453 if (TYPE_PRECISION
2454 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2455 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2456 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2457 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2458 }
2459
2460
2461 /* Adds candidates bases on the old induction variable IV. */
2462
2463 static void
2464 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2465 {
2466 gimple phi;
2467 tree def;
2468 struct iv_cand *cand;
2469
2470 add_candidate (data, iv->base, iv->step, true, NULL);
2471
2472 /* The same, but with initial value zero. */
2473 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2474 add_candidate (data, size_int (0), iv->step, true, NULL);
2475 else
2476 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2477 iv->step, true, NULL);
2478
2479 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2480 if (gimple_code (phi) == GIMPLE_PHI)
2481 {
2482 /* Additionally record the possibility of leaving the original iv
2483 untouched. */
2484 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2485 cand = add_candidate_1 (data,
2486 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2487 SSA_NAME_DEF_STMT (def));
2488 cand->var_before = iv->ssa_name;
2489 cand->var_after = def;
2490 }
2491 }
2492
2493 /* Adds candidates based on the old induction variables. */
2494
2495 static void
2496 add_old_ivs_candidates (struct ivopts_data *data)
2497 {
2498 unsigned i;
2499 struct iv *iv;
2500 bitmap_iterator bi;
2501
2502 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2503 {
2504 iv = ver_info (data, i)->iv;
2505 if (iv && iv->biv_p && !integer_zerop (iv->step))
2506 add_old_iv_candidates (data, iv);
2507 }
2508 }
2509
2510 /* Adds candidates based on the value of the induction variable IV and USE. */
2511
2512 static void
2513 add_iv_value_candidates (struct ivopts_data *data,
2514 struct iv *iv, struct iv_use *use)
2515 {
2516 unsigned HOST_WIDE_INT offset;
2517 tree base;
2518 tree basetype;
2519
2520 add_candidate (data, iv->base, iv->step, false, use);
2521
2522 /* The same, but with initial value zero. Make such variable important,
2523 since it is generic enough so that possibly many uses may be based
2524 on it. */
2525 basetype = TREE_TYPE (iv->base);
2526 if (POINTER_TYPE_P (basetype))
2527 basetype = sizetype;
2528 add_candidate (data, build_int_cst (basetype, 0),
2529 iv->step, true, use);
2530
2531 /* Third, try removing the constant offset. Make sure to even
2532 add a candidate for &a[0] vs. (T *)&a. */
2533 base = strip_offset (iv->base, &offset);
2534 if (offset
2535 || base != iv->base)
2536 add_candidate (data, base, iv->step, false, use);
2537 }
2538
2539 /* Adds candidates based on the uses. */
2540
2541 static void
2542 add_derived_ivs_candidates (struct ivopts_data *data)
2543 {
2544 unsigned i;
2545
2546 for (i = 0; i < n_iv_uses (data); i++)
2547 {
2548 struct iv_use *use = iv_use (data, i);
2549
2550 if (!use)
2551 continue;
2552
2553 switch (use->type)
2554 {
2555 case USE_NONLINEAR_EXPR:
2556 case USE_COMPARE:
2557 case USE_ADDRESS:
2558 /* Just add the ivs based on the value of the iv used here. */
2559 add_iv_value_candidates (data, use->iv, use);
2560 break;
2561
2562 default:
2563 gcc_unreachable ();
2564 }
2565 }
2566 }
2567
2568 /* Record important candidates and add them to related_cands bitmaps
2569 if needed. */
2570
2571 static void
2572 record_important_candidates (struct ivopts_data *data)
2573 {
2574 unsigned i;
2575 struct iv_use *use;
2576
2577 for (i = 0; i < n_iv_cands (data); i++)
2578 {
2579 struct iv_cand *cand = iv_cand (data, i);
2580
2581 if (cand->important)
2582 bitmap_set_bit (data->important_candidates, i);
2583 }
2584
2585 data->consider_all_candidates = (n_iv_cands (data)
2586 <= CONSIDER_ALL_CANDIDATES_BOUND);
2587
2588 if (data->consider_all_candidates)
2589 {
2590 /* We will not need "related_cands" bitmaps in this case,
2591 so release them to decrease peak memory consumption. */
2592 for (i = 0; i < n_iv_uses (data); i++)
2593 {
2594 use = iv_use (data, i);
2595 BITMAP_FREE (use->related_cands);
2596 }
2597 }
2598 else
2599 {
2600 /* Add important candidates to the related_cands bitmaps. */
2601 for (i = 0; i < n_iv_uses (data); i++)
2602 bitmap_ior_into (iv_use (data, i)->related_cands,
2603 data->important_candidates);
2604 }
2605 }
2606
2607 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2608 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2609 we allocate a simple list to every use. */
2610
2611 static void
2612 alloc_use_cost_map (struct ivopts_data *data)
2613 {
2614 unsigned i, size, s, j;
2615
2616 for (i = 0; i < n_iv_uses (data); i++)
2617 {
2618 struct iv_use *use = iv_use (data, i);
2619 bitmap_iterator bi;
2620
2621 if (data->consider_all_candidates)
2622 size = n_iv_cands (data);
2623 else
2624 {
2625 s = 0;
2626 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2627 {
2628 s++;
2629 }
2630
2631 /* Round up to the power of two, so that moduling by it is fast. */
2632 for (size = 1; size < s; size <<= 1)
2633 continue;
2634 }
2635
2636 use->n_map_members = size;
2637 use->cost_map = XCNEWVEC (struct cost_pair, size);
2638 }
2639 }
2640
2641 /* Returns description of computation cost of expression whose runtime
2642 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2643
2644 static comp_cost
2645 new_cost (unsigned runtime, unsigned complexity)
2646 {
2647 comp_cost cost;
2648
2649 cost.cost = runtime;
2650 cost.complexity = complexity;
2651
2652 return cost;
2653 }
2654
2655 /* Adds costs COST1 and COST2. */
2656
2657 static comp_cost
2658 add_costs (comp_cost cost1, comp_cost cost2)
2659 {
2660 cost1.cost += cost2.cost;
2661 cost1.complexity += cost2.complexity;
2662
2663 return cost1;
2664 }
2665 /* Subtracts costs COST1 and COST2. */
2666
2667 static comp_cost
2668 sub_costs (comp_cost cost1, comp_cost cost2)
2669 {
2670 cost1.cost -= cost2.cost;
2671 cost1.complexity -= cost2.complexity;
2672
2673 return cost1;
2674 }
2675
2676 /* Returns a negative number if COST1 < COST2, a positive number if
2677 COST1 > COST2, and 0 if COST1 = COST2. */
2678
2679 static int
2680 compare_costs (comp_cost cost1, comp_cost cost2)
2681 {
2682 if (cost1.cost == cost2.cost)
2683 return cost1.complexity - cost2.complexity;
2684
2685 return cost1.cost - cost2.cost;
2686 }
2687
2688 /* Returns true if COST is infinite. */
2689
2690 static bool
2691 infinite_cost_p (comp_cost cost)
2692 {
2693 return cost.cost == INFTY;
2694 }
2695
2696 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2697 on invariants DEPENDS_ON and that the value used in expressing it
2698 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2699
2700 static void
2701 set_use_iv_cost (struct ivopts_data *data,
2702 struct iv_use *use, struct iv_cand *cand,
2703 comp_cost cost, bitmap depends_on, tree value,
2704 enum tree_code comp, int inv_expr_id)
2705 {
2706 unsigned i, s;
2707
2708 if (infinite_cost_p (cost))
2709 {
2710 BITMAP_FREE (depends_on);
2711 return;
2712 }
2713
2714 if (data->consider_all_candidates)
2715 {
2716 use->cost_map[cand->id].cand = cand;
2717 use->cost_map[cand->id].cost = cost;
2718 use->cost_map[cand->id].depends_on = depends_on;
2719 use->cost_map[cand->id].value = value;
2720 use->cost_map[cand->id].comp = comp;
2721 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2722 return;
2723 }
2724
2725 /* n_map_members is a power of two, so this computes modulo. */
2726 s = cand->id & (use->n_map_members - 1);
2727 for (i = s; i < use->n_map_members; i++)
2728 if (!use->cost_map[i].cand)
2729 goto found;
2730 for (i = 0; i < s; i++)
2731 if (!use->cost_map[i].cand)
2732 goto found;
2733
2734 gcc_unreachable ();
2735
2736 found:
2737 use->cost_map[i].cand = cand;
2738 use->cost_map[i].cost = cost;
2739 use->cost_map[i].depends_on = depends_on;
2740 use->cost_map[i].value = value;
2741 use->cost_map[i].comp = comp;
2742 use->cost_map[i].inv_expr_id = inv_expr_id;
2743 }
2744
2745 /* Gets cost of (USE, CANDIDATE) pair. */
2746
2747 static struct cost_pair *
2748 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2749 struct iv_cand *cand)
2750 {
2751 unsigned i, s;
2752 struct cost_pair *ret;
2753
2754 if (!cand)
2755 return NULL;
2756
2757 if (data->consider_all_candidates)
2758 {
2759 ret = use->cost_map + cand->id;
2760 if (!ret->cand)
2761 return NULL;
2762
2763 return ret;
2764 }
2765
2766 /* n_map_members is a power of two, so this computes modulo. */
2767 s = cand->id & (use->n_map_members - 1);
2768 for (i = s; i < use->n_map_members; i++)
2769 if (use->cost_map[i].cand == cand)
2770 return use->cost_map + i;
2771
2772 for (i = 0; i < s; i++)
2773 if (use->cost_map[i].cand == cand)
2774 return use->cost_map + i;
2775
2776 return NULL;
2777 }
2778
2779 /* Returns estimate on cost of computing SEQ. */
2780
2781 static unsigned
2782 seq_cost (rtx seq, bool speed)
2783 {
2784 unsigned cost = 0;
2785 rtx set;
2786
2787 for (; seq; seq = NEXT_INSN (seq))
2788 {
2789 set = single_set (seq);
2790 if (set)
2791 cost += set_src_cost (SET_SRC (set), speed);
2792 else
2793 cost++;
2794 }
2795
2796 return cost;
2797 }
2798
2799 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2800 static rtx
2801 produce_memory_decl_rtl (tree obj, int *regno)
2802 {
2803 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2804 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2805 rtx x;
2806
2807 gcc_assert (obj);
2808 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2809 {
2810 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2811 x = gen_rtx_SYMBOL_REF (address_mode, name);
2812 SET_SYMBOL_REF_DECL (x, obj);
2813 x = gen_rtx_MEM (DECL_MODE (obj), x);
2814 set_mem_addr_space (x, as);
2815 targetm.encode_section_info (obj, x, true);
2816 }
2817 else
2818 {
2819 x = gen_raw_REG (address_mode, (*regno)++);
2820 x = gen_rtx_MEM (DECL_MODE (obj), x);
2821 set_mem_addr_space (x, as);
2822 }
2823
2824 return x;
2825 }
2826
2827 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2828 walk_tree. DATA contains the actual fake register number. */
2829
2830 static tree
2831 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2832 {
2833 tree obj = NULL_TREE;
2834 rtx x = NULL_RTX;
2835 int *regno = (int *) data;
2836
2837 switch (TREE_CODE (*expr_p))
2838 {
2839 case ADDR_EXPR:
2840 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2841 handled_component_p (*expr_p);
2842 expr_p = &TREE_OPERAND (*expr_p, 0))
2843 continue;
2844 obj = *expr_p;
2845 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2846 x = produce_memory_decl_rtl (obj, regno);
2847 break;
2848
2849 case SSA_NAME:
2850 *ws = 0;
2851 obj = SSA_NAME_VAR (*expr_p);
2852 if (!DECL_RTL_SET_P (obj))
2853 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2854 break;
2855
2856 case VAR_DECL:
2857 case PARM_DECL:
2858 case RESULT_DECL:
2859 *ws = 0;
2860 obj = *expr_p;
2861
2862 if (DECL_RTL_SET_P (obj))
2863 break;
2864
2865 if (DECL_MODE (obj) == BLKmode)
2866 x = produce_memory_decl_rtl (obj, regno);
2867 else
2868 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2869
2870 break;
2871
2872 default:
2873 break;
2874 }
2875
2876 if (x)
2877 {
2878 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2879 SET_DECL_RTL (obj, x);
2880 }
2881
2882 return NULL_TREE;
2883 }
2884
2885 /* Determines cost of the computation of EXPR. */
2886
2887 static unsigned
2888 computation_cost (tree expr, bool speed)
2889 {
2890 rtx seq, rslt;
2891 tree type = TREE_TYPE (expr);
2892 unsigned cost;
2893 /* Avoid using hard regs in ways which may be unsupported. */
2894 int regno = LAST_VIRTUAL_REGISTER + 1;
2895 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2896 enum node_frequency real_frequency = node->frequency;
2897
2898 node->frequency = NODE_FREQUENCY_NORMAL;
2899 crtl->maybe_hot_insn_p = speed;
2900 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2901 start_sequence ();
2902 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2903 seq = get_insns ();
2904 end_sequence ();
2905 default_rtl_profile ();
2906 node->frequency = real_frequency;
2907
2908 cost = seq_cost (seq, speed);
2909 if (MEM_P (rslt))
2910 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2911 TYPE_ADDR_SPACE (type), speed);
2912 else if (!REG_P (rslt))
2913 cost += set_src_cost (rslt, speed);
2914
2915 return cost;
2916 }
2917
2918 /* Returns variable containing the value of candidate CAND at statement AT. */
2919
2920 static tree
2921 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2922 {
2923 if (stmt_after_increment (loop, cand, stmt))
2924 return cand->var_after;
2925 else
2926 return cand->var_before;
2927 }
2928
2929 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2930 same precision that is at least as wide as the precision of TYPE, stores
2931 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2932 type of A and B. */
2933
2934 static tree
2935 determine_common_wider_type (tree *a, tree *b)
2936 {
2937 tree wider_type = NULL;
2938 tree suba, subb;
2939 tree atype = TREE_TYPE (*a);
2940
2941 if (CONVERT_EXPR_P (*a))
2942 {
2943 suba = TREE_OPERAND (*a, 0);
2944 wider_type = TREE_TYPE (suba);
2945 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2946 return atype;
2947 }
2948 else
2949 return atype;
2950
2951 if (CONVERT_EXPR_P (*b))
2952 {
2953 subb = TREE_OPERAND (*b, 0);
2954 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2955 return atype;
2956 }
2957 else
2958 return atype;
2959
2960 *a = suba;
2961 *b = subb;
2962 return wider_type;
2963 }
2964
2965 /* Determines the expression by that USE is expressed from induction variable
2966 CAND at statement AT in LOOP. The expression is stored in a decomposed
2967 form into AFF. Returns false if USE cannot be expressed using CAND. */
2968
2969 static bool
2970 get_computation_aff (struct loop *loop,
2971 struct iv_use *use, struct iv_cand *cand, gimple at,
2972 struct affine_tree_combination *aff)
2973 {
2974 tree ubase = use->iv->base;
2975 tree ustep = use->iv->step;
2976 tree cbase = cand->iv->base;
2977 tree cstep = cand->iv->step, cstep_common;
2978 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2979 tree common_type, var;
2980 tree uutype;
2981 aff_tree cbase_aff, var_aff;
2982 double_int rat;
2983
2984 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2985 {
2986 /* We do not have a precision to express the values of use. */
2987 return false;
2988 }
2989
2990 var = var_at_stmt (loop, cand, at);
2991 uutype = unsigned_type_for (utype);
2992
2993 /* If the conversion is not noop, perform it. */
2994 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2995 {
2996 cstep = fold_convert (uutype, cstep);
2997 cbase = fold_convert (uutype, cbase);
2998 var = fold_convert (uutype, var);
2999 }
3000
3001 if (!constant_multiple_of (ustep, cstep, &rat))
3002 return false;
3003
3004 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3005 type, we achieve better folding by computing their difference in this
3006 wider type, and cast the result to UUTYPE. We do not need to worry about
3007 overflows, as all the arithmetics will in the end be performed in UUTYPE
3008 anyway. */
3009 common_type = determine_common_wider_type (&ubase, &cbase);
3010
3011 /* use = ubase - ratio * cbase + ratio * var. */
3012 tree_to_aff_combination (ubase, common_type, aff);
3013 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3014 tree_to_aff_combination (var, uutype, &var_aff);
3015
3016 /* We need to shift the value if we are after the increment. */
3017 if (stmt_after_increment (loop, cand, at))
3018 {
3019 aff_tree cstep_aff;
3020
3021 if (common_type != uutype)
3022 cstep_common = fold_convert (common_type, cstep);
3023 else
3024 cstep_common = cstep;
3025
3026 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3027 aff_combination_add (&cbase_aff, &cstep_aff);
3028 }
3029
3030 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3031 aff_combination_add (aff, &cbase_aff);
3032 if (common_type != uutype)
3033 aff_combination_convert (aff, uutype);
3034
3035 aff_combination_scale (&var_aff, rat);
3036 aff_combination_add (aff, &var_aff);
3037
3038 return true;
3039 }
3040
3041 /* Determines the expression by that USE is expressed from induction variable
3042 CAND at statement AT in LOOP. The computation is unshared. */
3043
3044 static tree
3045 get_computation_at (struct loop *loop,
3046 struct iv_use *use, struct iv_cand *cand, gimple at)
3047 {
3048 aff_tree aff;
3049 tree type = TREE_TYPE (use->iv->base);
3050
3051 if (!get_computation_aff (loop, use, cand, at, &aff))
3052 return NULL_TREE;
3053 unshare_aff_combination (&aff);
3054 return fold_convert (type, aff_combination_to_tree (&aff));
3055 }
3056
3057 /* Determines the expression by that USE is expressed from induction variable
3058 CAND in LOOP. The computation is unshared. */
3059
3060 static tree
3061 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3062 {
3063 return get_computation_at (loop, use, cand, use->stmt);
3064 }
3065
3066 /* Adjust the cost COST for being in loop setup rather than loop body.
3067 If we're optimizing for space, the loop setup overhead is constant;
3068 if we're optimizing for speed, amortize it over the per-iteration cost. */
3069 static unsigned
3070 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3071 {
3072 if (cost == INFTY)
3073 return cost;
3074 else if (optimize_loop_for_speed_p (data->current_loop))
3075 return cost / avg_loop_niter (data->current_loop);
3076 else
3077 return cost;
3078 }
3079
3080 /* Returns cost of addition in MODE. */
3081
3082 unsigned
3083 add_regs_cost (enum machine_mode mode, bool speed)
3084 {
3085 static unsigned costs[NUM_MACHINE_MODES][2];
3086 rtx seq;
3087 unsigned cost;
3088
3089 if (costs[mode][speed])
3090 return costs[mode][speed];
3091
3092 start_sequence ();
3093 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3094 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3095 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3096 NULL_RTX);
3097 seq = get_insns ();
3098 end_sequence ();
3099
3100 cost = seq_cost (seq, speed);
3101 if (!cost)
3102 cost = 1;
3103
3104 costs[mode][speed] = cost;
3105
3106 if (dump_file && (dump_flags & TDF_DETAILS))
3107 fprintf (dump_file, "Addition in %s costs %d\n",
3108 GET_MODE_NAME (mode), cost);
3109 return cost;
3110 }
3111
3112 /* Returns cost of multiplication in MODE. */
3113
3114 unsigned
3115 multiply_regs_cost (enum machine_mode mode, bool speed)
3116 {
3117 static unsigned costs[NUM_MACHINE_MODES][2];
3118 rtx seq;
3119 unsigned cost;
3120
3121 if (costs[mode][speed])
3122 return costs[mode][speed];
3123
3124 start_sequence ();
3125 force_operand (gen_rtx_fmt_ee (MULT, mode,
3126 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3127 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3128 NULL_RTX);
3129 seq = get_insns ();
3130 end_sequence ();
3131
3132 cost = seq_cost (seq, speed);
3133 if (!cost)
3134 cost = 1;
3135
3136 costs[mode][speed] = cost;
3137
3138 if (dump_file && (dump_flags & TDF_DETAILS))
3139 fprintf (dump_file, "Multiplication in %s costs %d\n",
3140 GET_MODE_NAME (mode), cost);
3141 return cost;
3142 }
3143
3144 /* Returns cost of addition with a constant in MODE. */
3145
3146 unsigned
3147 add_const_cost (enum machine_mode mode, bool speed)
3148 {
3149 static unsigned costs[NUM_MACHINE_MODES][2];
3150 rtx seq;
3151 unsigned cost;
3152
3153 if (costs[mode][speed])
3154 return costs[mode][speed];
3155
3156 /* Arbitrarily generate insns for x + 2, as the exact constant
3157 shouldn't matter. */
3158 start_sequence ();
3159 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3160 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3161 gen_int_mode (2, mode)),
3162 NULL_RTX);
3163 seq = get_insns ();
3164 end_sequence ();
3165
3166 cost = seq_cost (seq, speed);
3167 if (!cost)
3168 cost = 1;
3169
3170 costs[mode][speed] = cost;
3171
3172 if (dump_file && (dump_flags & TDF_DETAILS))
3173 fprintf (dump_file, "Addition to constant in %s costs %d\n",
3174 GET_MODE_NAME (mode), cost);
3175 return cost;
3176 }
3177
3178 /* Returns cost of extend or truncate in MODE. */
3179
3180 unsigned
3181 extend_or_trunc_reg_cost (tree type_to, tree type_from, bool speed)
3182 {
3183 static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES][2];
3184 rtx seq;
3185 unsigned cost;
3186 enum machine_mode mode_to = TYPE_MODE (type_to);
3187 enum machine_mode mode_from = TYPE_MODE (type_from);
3188 tree size_to = TYPE_SIZE (type_to);
3189 tree size_from = TYPE_SIZE (type_from);
3190 enum rtx_code code;
3191
3192 gcc_assert (TREE_CODE (size_to) == INTEGER_CST
3193 && TREE_CODE (size_from) == INTEGER_CST);
3194
3195 if (costs[mode_to][mode_from][speed])
3196 return costs[mode_to][mode_from][speed];
3197
3198 if (tree_int_cst_lt (size_to, size_from))
3199 code = TRUNCATE;
3200 else if (TYPE_UNSIGNED (type_to))
3201 code = ZERO_EXTEND;
3202 else
3203 code = SIGN_EXTEND;
3204
3205 start_sequence ();
3206 gen_rtx_fmt_e (code, mode_to,
3207 gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1));
3208 seq = get_insns ();
3209 end_sequence ();
3210
3211 cost = seq_cost (seq, speed);
3212 if (!cost)
3213 cost = 1;
3214
3215 if (dump_file && (dump_flags & TDF_DETAILS))
3216 fprintf (dump_file, "Conversion from %s to %s costs %d\n",
3217 GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost);
3218
3219 costs[mode_to][mode_from][speed] = cost;
3220 return cost;
3221 }
3222
3223 /* Returns cost of negation in MODE. */
3224
3225 unsigned
3226 negate_reg_cost (enum machine_mode mode, bool speed)
3227 {
3228 static unsigned costs[NUM_MACHINE_MODES][2];
3229 rtx seq;
3230 unsigned cost;
3231
3232 if (costs[mode][speed])
3233 return costs[mode][speed];
3234
3235 start_sequence ();
3236 force_operand (gen_rtx_fmt_e (NEG, mode,
3237 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)),
3238 NULL_RTX);
3239 seq = get_insns ();
3240 end_sequence ();
3241
3242 cost = seq_cost (seq, speed);
3243 if (!cost)
3244 cost = 1;
3245
3246 costs[mode][speed] = cost;
3247
3248 if (dump_file && (dump_flags & TDF_DETAILS))
3249 fprintf (dump_file, "Negation in %s costs %d\n",
3250 GET_MODE_NAME (mode), cost);
3251 return cost;
3252 }
3253
3254 /* Entry in a hashtable of already known costs for multiplication. */
3255 struct mbc_entry
3256 {
3257 HOST_WIDE_INT cst; /* The constant to multiply by. */
3258 enum machine_mode mode; /* In mode. */
3259 unsigned cost; /* The cost. */
3260 };
3261
3262 /* Counts hash value for the ENTRY. */
3263
3264 static hashval_t
3265 mbc_entry_hash (const void *entry)
3266 {
3267 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3268
3269 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3270 }
3271
3272 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3273
3274 static int
3275 mbc_entry_eq (const void *entry1, const void *entry2)
3276 {
3277 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3278 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3279
3280 return (e1->mode == e2->mode
3281 && e1->cst == e2->cst);
3282 }
3283
3284 /* Returns cost of multiplication by constant CST in MODE. */
3285
3286 unsigned
3287 multiply_by_const_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3288 {
3289 struct mbc_entry **cached, act;
3290 rtx seq;
3291 unsigned cost;
3292
3293 gcc_assert (cost_tables_exist);
3294
3295 act.mode = mode;
3296 act.cst = cst;
3297 cached = (struct mbc_entry **)
3298 htab_find_slot (mult_costs[speed], &act, INSERT);
3299
3300 if (*cached)
3301 return (*cached)->cost;
3302
3303 *cached = XNEW (struct mbc_entry);
3304 (*cached)->mode = mode;
3305 (*cached)->cst = cst;
3306
3307 start_sequence ();
3308 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3309 gen_int_mode (cst, mode), NULL_RTX, 0);
3310 seq = get_insns ();
3311 end_sequence ();
3312
3313 cost = seq_cost (seq, speed);
3314
3315 if (dump_file && (dump_flags & TDF_DETAILS))
3316 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3317 (int) cst, GET_MODE_NAME (mode), cost);
3318
3319 (*cached)->cost = cost;
3320
3321 return cost;
3322 }
3323
3324 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3325 validity for a memory reference accessing memory of mode MODE in
3326 address space AS. */
3327
3328 DEF_VEC_P (sbitmap);
3329 DEF_VEC_ALLOC_P (sbitmap, heap);
3330
3331 bool
3332 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3333 addr_space_t as)
3334 {
3335 #define MAX_RATIO 128
3336 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3337 static VEC (sbitmap, heap) *valid_mult_list;
3338 sbitmap valid_mult;
3339
3340 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3341 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3342
3343 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3344 if (!valid_mult)
3345 {
3346 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3347 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3348 rtx addr;
3349 HOST_WIDE_INT i;
3350
3351 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3352 sbitmap_zero (valid_mult);
3353 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3354 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3355 {
3356 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3357 if (memory_address_addr_space_p (mode, addr, as))
3358 SET_BIT (valid_mult, i + MAX_RATIO);
3359 }
3360
3361 if (dump_file && (dump_flags & TDF_DETAILS))
3362 {
3363 fprintf (dump_file, " allowed multipliers:");
3364 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3365 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3366 fprintf (dump_file, " %d", (int) i);
3367 fprintf (dump_file, "\n");
3368 fprintf (dump_file, "\n");
3369 }
3370
3371 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3372 }
3373
3374 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3375 return false;
3376
3377 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3378 }
3379
3380 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3381 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3382 variable is omitted. Compute the cost for a memory reference that accesses
3383 a memory location of mode MEM_MODE in address space AS.
3384
3385 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3386 size of MEM_MODE / RATIO) is available. To make this determination, we
3387 look at the size of the increment to be made, which is given in CSTEP.
3388 CSTEP may be zero if the step is unknown.
3389 STMT_AFTER_INC is true iff the statement we're looking at is after the
3390 increment of the original biv.
3391
3392 TODO -- there must be some better way. This all is quite crude. */
3393
3394 typedef struct
3395 {
3396 HOST_WIDE_INT min_offset, max_offset;
3397 unsigned costs[2][2][2][2];
3398 } *address_cost_data;
3399
3400 DEF_VEC_P (address_cost_data);
3401 DEF_VEC_ALLOC_P (address_cost_data, heap);
3402
3403 static comp_cost
3404 get_address_cost (bool symbol_present, bool var_present,
3405 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3406 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3407 addr_space_t as, bool speed,
3408 bool stmt_after_inc, bool *may_autoinc)
3409 {
3410 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3411 static VEC(address_cost_data, heap) *address_cost_data_list;
3412 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3413 address_cost_data data;
3414 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3415 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3416 unsigned cost, acost, complexity;
3417 bool offset_p, ratio_p, autoinc;
3418 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3419 unsigned HOST_WIDE_INT mask;
3420 unsigned bits;
3421
3422 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3423 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3424 data_index + 1);
3425
3426 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3427 if (!data)
3428 {
3429 HOST_WIDE_INT i;
3430 HOST_WIDE_INT rat, off = 0;
3431 int old_cse_not_expected, width;
3432 unsigned sym_p, var_p, off_p, rat_p, add_c;
3433 rtx seq, addr, base;
3434 rtx reg0, reg1;
3435
3436 data = (address_cost_data) xcalloc (1, sizeof (*data));
3437
3438 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3439
3440 width = GET_MODE_BITSIZE (address_mode) - 1;
3441 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3442 width = HOST_BITS_PER_WIDE_INT - 1;
3443 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3444
3445 for (i = width; i >= 0; i--)
3446 {
3447 off = -((HOST_WIDE_INT) 1 << i);
3448 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3449 if (memory_address_addr_space_p (mem_mode, addr, as))
3450 break;
3451 }
3452 data->min_offset = (i == -1? 0 : off);
3453
3454 for (i = width; i >= 0; i--)
3455 {
3456 off = ((HOST_WIDE_INT) 1 << i) - 1;
3457 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3458 if (memory_address_addr_space_p (mem_mode, addr, as))
3459 break;
3460 }
3461 if (i == -1)
3462 off = 0;
3463 data->max_offset = off;
3464
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3466 {
3467 fprintf (dump_file, "get_address_cost:\n");
3468 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3469 GET_MODE_NAME (mem_mode),
3470 data->min_offset);
3471 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3472 GET_MODE_NAME (mem_mode),
3473 data->max_offset);
3474 }
3475
3476 rat = 1;
3477 for (i = 2; i <= MAX_RATIO; i++)
3478 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3479 {
3480 rat = i;
3481 break;
3482 }
3483
3484 /* Compute the cost of various addressing modes. */
3485 acost = 0;
3486 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3487 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3488
3489 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3490 || USE_STORE_PRE_DECREMENT (mem_mode))
3491 {
3492 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3493 has_predec[mem_mode]
3494 = memory_address_addr_space_p (mem_mode, addr, as);
3495 }
3496 if (USE_LOAD_POST_DECREMENT (mem_mode)
3497 || USE_STORE_POST_DECREMENT (mem_mode))
3498 {
3499 addr = gen_rtx_POST_DEC (address_mode, reg0);
3500 has_postdec[mem_mode]
3501 = memory_address_addr_space_p (mem_mode, addr, as);
3502 }
3503 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3504 || USE_STORE_PRE_DECREMENT (mem_mode))
3505 {
3506 addr = gen_rtx_PRE_INC (address_mode, reg0);
3507 has_preinc[mem_mode]
3508 = memory_address_addr_space_p (mem_mode, addr, as);
3509 }
3510 if (USE_LOAD_POST_INCREMENT (mem_mode)
3511 || USE_STORE_POST_INCREMENT (mem_mode))
3512 {
3513 addr = gen_rtx_POST_INC (address_mode, reg0);
3514 has_postinc[mem_mode]
3515 = memory_address_addr_space_p (mem_mode, addr, as);
3516 }
3517 for (i = 0; i < 16; i++)
3518 {
3519 sym_p = i & 1;
3520 var_p = (i >> 1) & 1;
3521 off_p = (i >> 2) & 1;
3522 rat_p = (i >> 3) & 1;
3523
3524 addr = reg0;
3525 if (rat_p)
3526 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3527 gen_int_mode (rat, address_mode));
3528
3529 if (var_p)
3530 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3531
3532 if (sym_p)
3533 {
3534 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3535 /* ??? We can run into trouble with some backends by presenting
3536 it with symbols which haven't been properly passed through
3537 targetm.encode_section_info. By setting the local bit, we
3538 enhance the probability of things working. */
3539 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3540
3541 if (off_p)
3542 base = gen_rtx_fmt_e (CONST, address_mode,
3543 gen_rtx_fmt_ee
3544 (PLUS, address_mode, base,
3545 gen_int_mode (off, address_mode)));
3546 }
3547 else if (off_p)
3548 base = gen_int_mode (off, address_mode);
3549 else
3550 base = NULL_RTX;
3551
3552 if (base)
3553 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3554
3555 start_sequence ();
3556 /* To avoid splitting addressing modes, pretend that no cse will
3557 follow. */
3558 old_cse_not_expected = cse_not_expected;
3559 cse_not_expected = true;
3560 addr = memory_address_addr_space (mem_mode, addr, as);
3561 cse_not_expected = old_cse_not_expected;
3562 seq = get_insns ();
3563 end_sequence ();
3564
3565 acost = seq_cost (seq, speed);
3566 acost += address_cost (addr, mem_mode, as, speed);
3567
3568 if (!acost)
3569 acost = 1;
3570 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3571 }
3572
3573 /* On some targets, it is quite expensive to load symbol to a register,
3574 which makes addresses that contain symbols look much more expensive.
3575 However, the symbol will have to be loaded in any case before the
3576 loop (and quite likely we have it in register already), so it does not
3577 make much sense to penalize them too heavily. So make some final
3578 tweaks for the SYMBOL_PRESENT modes:
3579
3580 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3581 var is cheaper, use this mode with small penalty.
3582 If VAR_PRESENT is true, try whether the mode with
3583 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3584 if this is the case, use it. */
3585 add_c = add_regs_cost (address_mode, speed);
3586 for (i = 0; i < 8; i++)
3587 {
3588 var_p = i & 1;
3589 off_p = (i >> 1) & 1;
3590 rat_p = (i >> 2) & 1;
3591
3592 acost = data->costs[0][1][off_p][rat_p] + 1;
3593 if (var_p)
3594 acost += add_c;
3595
3596 if (acost < data->costs[1][var_p][off_p][rat_p])
3597 data->costs[1][var_p][off_p][rat_p] = acost;
3598 }
3599
3600 if (dump_file && (dump_flags & TDF_DETAILS))
3601 {
3602 fprintf (dump_file, "Address costs:\n");
3603
3604 for (i = 0; i < 16; i++)
3605 {
3606 sym_p = i & 1;
3607 var_p = (i >> 1) & 1;
3608 off_p = (i >> 2) & 1;
3609 rat_p = (i >> 3) & 1;
3610
3611 fprintf (dump_file, " ");
3612 if (sym_p)
3613 fprintf (dump_file, "sym + ");
3614 if (var_p)
3615 fprintf (dump_file, "var + ");
3616 if (off_p)
3617 fprintf (dump_file, "cst + ");
3618 if (rat_p)
3619 fprintf (dump_file, "rat * ");
3620
3621 acost = data->costs[sym_p][var_p][off_p][rat_p];
3622 fprintf (dump_file, "index costs %d\n", acost);
3623 }
3624 if (has_predec[mem_mode] || has_postdec[mem_mode]
3625 || has_preinc[mem_mode] || has_postinc[mem_mode])
3626 fprintf (dump_file, " May include autoinc/dec\n");
3627 fprintf (dump_file, "\n");
3628 }
3629
3630 VEC_replace (address_cost_data, address_cost_data_list,
3631 data_index, data);
3632 }
3633
3634 bits = GET_MODE_BITSIZE (address_mode);
3635 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3636 offset &= mask;
3637 if ((offset >> (bits - 1) & 1))
3638 offset |= ~mask;
3639 s_offset = offset;
3640
3641 autoinc = false;
3642 msize = GET_MODE_SIZE (mem_mode);
3643 autoinc_offset = offset;
3644 if (stmt_after_inc)
3645 autoinc_offset += ratio * cstep;
3646 if (symbol_present || var_present || ratio != 1)
3647 autoinc = false;
3648 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3649 && msize == cstep)
3650 || (has_postdec[mem_mode] && autoinc_offset == 0
3651 && msize == -cstep)
3652 || (has_preinc[mem_mode] && autoinc_offset == msize
3653 && msize == cstep)
3654 || (has_predec[mem_mode] && autoinc_offset == -msize
3655 && msize == -cstep))
3656 autoinc = true;
3657
3658 cost = 0;
3659 offset_p = (s_offset != 0
3660 && data->min_offset <= s_offset
3661 && s_offset <= data->max_offset);
3662 ratio_p = (ratio != 1
3663 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3664
3665 if (ratio != 1 && !ratio_p)
3666 cost += multiply_by_const_cost (ratio, address_mode, speed);
3667
3668 if (s_offset && !offset_p && !symbol_present)
3669 cost += add_regs_cost (address_mode, speed);
3670
3671 if (may_autoinc)
3672 *may_autoinc = autoinc;
3673 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3674 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3675 return new_cost (cost + acost, complexity);
3676 }
3677
3678 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3679 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3680 calculating the operands of EXPR. Returns true if successful, and returns
3681 the cost in COST. */
3682
3683 static bool
3684 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3685 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3686 {
3687 comp_cost res;
3688 tree op1 = TREE_OPERAND (expr, 1);
3689 tree cst = TREE_OPERAND (mult, 1);
3690 tree multop = TREE_OPERAND (mult, 0);
3691 int m = exact_log2 (int_cst_value (cst));
3692 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3693 int sa_cost;
3694
3695 if (!(m >= 0 && m < maxm))
3696 return false;
3697
3698 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3699 ? shiftadd_cost[speed][mode][m]
3700 : (mult == op1
3701 ? shiftsub1_cost[speed][mode][m]
3702 : shiftsub0_cost[speed][mode][m]));
3703 res = new_cost (sa_cost, 0);
3704 res = add_costs (res, mult == op1 ? cost0 : cost1);
3705
3706 STRIP_NOPS (multop);
3707 if (!is_gimple_val (multop))
3708 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3709
3710 *cost = res;
3711 return true;
3712 }
3713
3714 /* Estimates cost of forcing expression EXPR into a variable. */
3715
3716 static comp_cost
3717 force_expr_to_var_cost (tree expr, bool speed)
3718 {
3719 static bool costs_initialized = false;
3720 static unsigned integer_cost [2];
3721 static unsigned symbol_cost [2];
3722 static unsigned address_cost [2];
3723 tree op0, op1;
3724 comp_cost cost0, cost1, cost;
3725 enum machine_mode mode;
3726
3727 if (!costs_initialized)
3728 {
3729 tree type = build_pointer_type (integer_type_node);
3730 tree var, addr;
3731 rtx x;
3732 int i;
3733
3734 var = create_tmp_var_raw (integer_type_node, "test_var");
3735 TREE_STATIC (var) = 1;
3736 x = produce_memory_decl_rtl (var, NULL);
3737 SET_DECL_RTL (var, x);
3738
3739 addr = build1 (ADDR_EXPR, type, var);
3740
3741
3742 for (i = 0; i < 2; i++)
3743 {
3744 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3745 2000), i);
3746
3747 symbol_cost[i] = computation_cost (addr, i) + 1;
3748
3749 address_cost[i]
3750 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3751 if (dump_file && (dump_flags & TDF_DETAILS))
3752 {
3753 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3754 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3755 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3756 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3757 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3758 fprintf (dump_file, "\n");
3759 }
3760 }
3761
3762 costs_initialized = true;
3763 }
3764
3765 STRIP_NOPS (expr);
3766
3767 if (SSA_VAR_P (expr))
3768 return no_cost;
3769
3770 if (is_gimple_min_invariant (expr))
3771 {
3772 if (TREE_CODE (expr) == INTEGER_CST)
3773 return new_cost (integer_cost [speed], 0);
3774
3775 if (TREE_CODE (expr) == ADDR_EXPR)
3776 {
3777 tree obj = TREE_OPERAND (expr, 0);
3778
3779 if (TREE_CODE (obj) == VAR_DECL
3780 || TREE_CODE (obj) == PARM_DECL
3781 || TREE_CODE (obj) == RESULT_DECL)
3782 return new_cost (symbol_cost [speed], 0);
3783 }
3784
3785 return new_cost (address_cost [speed], 0);
3786 }
3787
3788 switch (TREE_CODE (expr))
3789 {
3790 case POINTER_PLUS_EXPR:
3791 case PLUS_EXPR:
3792 case MINUS_EXPR:
3793 case MULT_EXPR:
3794 op0 = TREE_OPERAND (expr, 0);
3795 op1 = TREE_OPERAND (expr, 1);
3796 STRIP_NOPS (op0);
3797 STRIP_NOPS (op1);
3798
3799 if (is_gimple_val (op0))
3800 cost0 = no_cost;
3801 else
3802 cost0 = force_expr_to_var_cost (op0, speed);
3803
3804 if (is_gimple_val (op1))
3805 cost1 = no_cost;
3806 else
3807 cost1 = force_expr_to_var_cost (op1, speed);
3808
3809 break;
3810
3811 case NEGATE_EXPR:
3812 op0 = TREE_OPERAND (expr, 0);
3813 STRIP_NOPS (op0);
3814 op1 = NULL_TREE;
3815
3816 if (is_gimple_val (op0))
3817 cost0 = no_cost;
3818 else
3819 cost0 = force_expr_to_var_cost (op0, speed);
3820
3821 cost1 = no_cost;
3822 break;
3823
3824 default:
3825 /* Just an arbitrary value, FIXME. */
3826 return new_cost (target_spill_cost[speed], 0);
3827 }
3828
3829 mode = TYPE_MODE (TREE_TYPE (expr));
3830 switch (TREE_CODE (expr))
3831 {
3832 case POINTER_PLUS_EXPR:
3833 case PLUS_EXPR:
3834 case MINUS_EXPR:
3835 case NEGATE_EXPR:
3836 cost = new_cost (add_regs_cost (mode, speed), 0);
3837 if (TREE_CODE (expr) != NEGATE_EXPR)
3838 {
3839 tree mult = NULL_TREE;
3840 comp_cost sa_cost;
3841 if (TREE_CODE (op1) == MULT_EXPR)
3842 mult = op1;
3843 else if (TREE_CODE (op0) == MULT_EXPR)
3844 mult = op0;
3845
3846 if (mult != NULL_TREE
3847 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3848 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3849 &sa_cost))
3850 return sa_cost;
3851 }
3852 break;
3853
3854 case MULT_EXPR:
3855 if (cst_and_fits_in_hwi (op0))
3856 cost = new_cost (multiply_by_const_cost (int_cst_value (op0),
3857 mode, speed), 0);
3858 else if (cst_and_fits_in_hwi (op1))
3859 cost = new_cost (multiply_by_const_cost (int_cst_value (op1),
3860 mode, speed), 0);
3861 else
3862 return new_cost (target_spill_cost [speed], 0);
3863 break;
3864
3865 default:
3866 gcc_unreachable ();
3867 }
3868
3869 cost = add_costs (cost, cost0);
3870 cost = add_costs (cost, cost1);
3871
3872 /* Bound the cost by target_spill_cost. The parts of complicated
3873 computations often are either loop invariant or at least can
3874 be shared between several iv uses, so letting this grow without
3875 limits would not give reasonable results. */
3876 if (cost.cost > (int) target_spill_cost [speed])
3877 cost.cost = target_spill_cost [speed];
3878
3879 return cost;
3880 }
3881
3882 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3883 invariants the computation depends on. */
3884
3885 static comp_cost
3886 force_var_cost (struct ivopts_data *data,
3887 tree expr, bitmap *depends_on)
3888 {
3889 if (depends_on)
3890 {
3891 fd_ivopts_data = data;
3892 walk_tree (&expr, find_depends, depends_on, NULL);
3893 }
3894
3895 return force_expr_to_var_cost (expr, data->speed);
3896 }
3897
3898 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3899 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3900 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3901 invariants the computation depends on. */
3902
3903 static comp_cost
3904 split_address_cost (struct ivopts_data *data,
3905 tree addr, bool *symbol_present, bool *var_present,
3906 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3907 {
3908 tree core;
3909 HOST_WIDE_INT bitsize;
3910 HOST_WIDE_INT bitpos;
3911 tree toffset;
3912 enum machine_mode mode;
3913 int unsignedp, volatilep;
3914
3915 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3916 &unsignedp, &volatilep, false);
3917
3918 if (toffset != 0
3919 || bitpos % BITS_PER_UNIT != 0
3920 || TREE_CODE (core) != VAR_DECL)
3921 {
3922 *symbol_present = false;
3923 *var_present = true;
3924 fd_ivopts_data = data;
3925 walk_tree (&addr, find_depends, depends_on, NULL);
3926 return new_cost (target_spill_cost[data->speed], 0);
3927 }
3928
3929 *offset += bitpos / BITS_PER_UNIT;
3930 if (TREE_STATIC (core)
3931 || DECL_EXTERNAL (core))
3932 {
3933 *symbol_present = true;
3934 *var_present = false;
3935 return no_cost;
3936 }
3937
3938 *symbol_present = false;
3939 *var_present = true;
3940 return no_cost;
3941 }
3942
3943 /* Estimates cost of expressing difference of addresses E1 - E2 as
3944 var + symbol + offset. The value of offset is added to OFFSET,
3945 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3946 part is missing. DEPENDS_ON is a set of the invariants the computation
3947 depends on. */
3948
3949 static comp_cost
3950 ptr_difference_cost (struct ivopts_data *data,
3951 tree e1, tree e2, bool *symbol_present, bool *var_present,
3952 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3953 {
3954 HOST_WIDE_INT diff = 0;
3955 aff_tree aff_e1, aff_e2;
3956 tree type;
3957
3958 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3959
3960 if (ptr_difference_const (e1, e2, &diff))
3961 {
3962 *offset += diff;
3963 *symbol_present = false;
3964 *var_present = false;
3965 return no_cost;
3966 }
3967
3968 if (integer_zerop (e2))
3969 return split_address_cost (data, TREE_OPERAND (e1, 0),
3970 symbol_present, var_present, offset, depends_on);
3971
3972 *symbol_present = false;
3973 *var_present = true;
3974
3975 type = signed_type_for (TREE_TYPE (e1));
3976 tree_to_aff_combination (e1, type, &aff_e1);
3977 tree_to_aff_combination (e2, type, &aff_e2);
3978 aff_combination_scale (&aff_e2, double_int_minus_one);
3979 aff_combination_add (&aff_e1, &aff_e2);
3980
3981 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3982 }
3983
3984 /* Estimates cost of expressing difference E1 - E2 as
3985 var + symbol + offset. The value of offset is added to OFFSET,
3986 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3987 part is missing. DEPENDS_ON is a set of the invariants the computation
3988 depends on. */
3989
3990 static comp_cost
3991 difference_cost (struct ivopts_data *data,
3992 tree e1, tree e2, bool *symbol_present, bool *var_present,
3993 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3994 {
3995 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3996 unsigned HOST_WIDE_INT off1, off2;
3997 aff_tree aff_e1, aff_e2;
3998 tree type;
3999
4000 e1 = strip_offset (e1, &off1);
4001 e2 = strip_offset (e2, &off2);
4002 *offset += off1 - off2;
4003
4004 STRIP_NOPS (e1);
4005 STRIP_NOPS (e2);
4006
4007 if (TREE_CODE (e1) == ADDR_EXPR)
4008 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4009 offset, depends_on);
4010 *symbol_present = false;
4011
4012 if (operand_equal_p (e1, e2, 0))
4013 {
4014 *var_present = false;
4015 return no_cost;
4016 }
4017
4018 *var_present = true;
4019
4020 if (integer_zerop (e2))
4021 return force_var_cost (data, e1, depends_on);
4022
4023 if (integer_zerop (e1))
4024 {
4025 comp_cost cost = force_var_cost (data, e2, depends_on);
4026 cost.cost += multiply_by_const_cost (-1, mode, data->speed);
4027 return cost;
4028 }
4029
4030 type = signed_type_for (TREE_TYPE (e1));
4031 tree_to_aff_combination (e1, type, &aff_e1);
4032 tree_to_aff_combination (e2, type, &aff_e2);
4033 aff_combination_scale (&aff_e2, double_int_minus_one);
4034 aff_combination_add (&aff_e1, &aff_e2);
4035
4036 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4037 }
4038
4039 /* Returns true if AFF1 and AFF2 are identical. */
4040
4041 static bool
4042 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4043 {
4044 unsigned i;
4045
4046 if (aff1->n != aff2->n)
4047 return false;
4048
4049 for (i = 0; i < aff1->n; i++)
4050 {
4051 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
4052 return false;
4053
4054 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4055 return false;
4056 }
4057 return true;
4058 }
4059
4060 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4061
4062 static int
4063 get_expr_id (struct ivopts_data *data, tree expr)
4064 {
4065 struct iv_inv_expr_ent ent;
4066 struct iv_inv_expr_ent **slot;
4067
4068 ent.expr = expr;
4069 ent.hash = iterative_hash_expr (expr, 0);
4070 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
4071 &ent, INSERT);
4072 if (*slot)
4073 return (*slot)->id;
4074
4075 *slot = XNEW (struct iv_inv_expr_ent);
4076 (*slot)->expr = expr;
4077 (*slot)->hash = ent.hash;
4078 (*slot)->id = data->inv_expr_id++;
4079 return (*slot)->id;
4080 }
4081
4082 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4083 requires a new compiler generated temporary. Returns -1 otherwise.
4084 ADDRESS_P is a flag indicating if the expression is for address
4085 computation. */
4086
4087 static int
4088 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4089 tree cbase, HOST_WIDE_INT ratio,
4090 bool address_p)
4091 {
4092 aff_tree ubase_aff, cbase_aff;
4093 tree expr, ub, cb;
4094
4095 STRIP_NOPS (ubase);
4096 STRIP_NOPS (cbase);
4097 ub = ubase;
4098 cb = cbase;
4099
4100 if ((TREE_CODE (ubase) == INTEGER_CST)
4101 && (TREE_CODE (cbase) == INTEGER_CST))
4102 return -1;
4103
4104 /* Strips the constant part. */
4105 if (TREE_CODE (ubase) == PLUS_EXPR
4106 || TREE_CODE (ubase) == MINUS_EXPR
4107 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4108 {
4109 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4110 ubase = TREE_OPERAND (ubase, 0);
4111 }
4112
4113 /* Strips the constant part. */
4114 if (TREE_CODE (cbase) == PLUS_EXPR
4115 || TREE_CODE (cbase) == MINUS_EXPR
4116 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4117 {
4118 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4119 cbase = TREE_OPERAND (cbase, 0);
4120 }
4121
4122 if (address_p)
4123 {
4124 if (((TREE_CODE (ubase) == SSA_NAME)
4125 || (TREE_CODE (ubase) == ADDR_EXPR
4126 && is_gimple_min_invariant (ubase)))
4127 && (TREE_CODE (cbase) == INTEGER_CST))
4128 return -1;
4129
4130 if (((TREE_CODE (cbase) == SSA_NAME)
4131 || (TREE_CODE (cbase) == ADDR_EXPR
4132 && is_gimple_min_invariant (cbase)))
4133 && (TREE_CODE (ubase) == INTEGER_CST))
4134 return -1;
4135 }
4136
4137 if (ratio == 1)
4138 {
4139 if(operand_equal_p (ubase, cbase, 0))
4140 return -1;
4141
4142 if (TREE_CODE (ubase) == ADDR_EXPR
4143 && TREE_CODE (cbase) == ADDR_EXPR)
4144 {
4145 tree usym, csym;
4146
4147 usym = TREE_OPERAND (ubase, 0);
4148 csym = TREE_OPERAND (cbase, 0);
4149 if (TREE_CODE (usym) == ARRAY_REF)
4150 {
4151 tree ind = TREE_OPERAND (usym, 1);
4152 if (TREE_CODE (ind) == INTEGER_CST
4153 && host_integerp (ind, 0)
4154 && TREE_INT_CST_LOW (ind) == 0)
4155 usym = TREE_OPERAND (usym, 0);
4156 }
4157 if (TREE_CODE (csym) == ARRAY_REF)
4158 {
4159 tree ind = TREE_OPERAND (csym, 1);
4160 if (TREE_CODE (ind) == INTEGER_CST
4161 && host_integerp (ind, 0)
4162 && TREE_INT_CST_LOW (ind) == 0)
4163 csym = TREE_OPERAND (csym, 0);
4164 }
4165 if (operand_equal_p (usym, csym, 0))
4166 return -1;
4167 }
4168 /* Now do more complex comparison */
4169 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4170 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4171 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4172 return -1;
4173 }
4174
4175 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4176 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4177
4178 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4179 aff_combination_add (&ubase_aff, &cbase_aff);
4180 expr = aff_combination_to_tree (&ubase_aff);
4181 return get_expr_id (data, expr);
4182 }
4183
4184
4185
4186 /* Determines the cost of the computation by that USE is expressed
4187 from induction variable CAND. If ADDRESS_P is true, we just need
4188 to create an address from it, otherwise we want to get it into
4189 register. A set of invariants we depend on is stored in
4190 DEPENDS_ON. AT is the statement at that the value is computed.
4191 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4192 addressing is likely. */
4193
4194 static comp_cost
4195 get_computation_cost_at (struct ivopts_data *data,
4196 struct iv_use *use, struct iv_cand *cand,
4197 bool address_p, bitmap *depends_on, gimple at,
4198 bool *can_autoinc,
4199 int *inv_expr_id)
4200 {
4201 tree ubase = use->iv->base, ustep = use->iv->step;
4202 tree cbase, cstep;
4203 tree utype = TREE_TYPE (ubase), ctype;
4204 unsigned HOST_WIDE_INT cstepi, offset = 0;
4205 HOST_WIDE_INT ratio, aratio;
4206 bool var_present, symbol_present, stmt_is_after_inc;
4207 comp_cost cost;
4208 double_int rat;
4209 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4210
4211 *depends_on = NULL;
4212
4213 /* Only consider real candidates. */
4214 if (!cand->iv)
4215 return infinite_cost;
4216
4217 cbase = cand->iv->base;
4218 cstep = cand->iv->step;
4219 ctype = TREE_TYPE (cbase);
4220
4221 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4222 {
4223 /* We do not have a precision to express the values of use. */
4224 return infinite_cost;
4225 }
4226
4227 if (address_p
4228 || (use->iv->base_object
4229 && cand->iv->base_object
4230 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4231 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4232 {
4233 /* Do not try to express address of an object with computation based
4234 on address of a different object. This may cause problems in rtl
4235 level alias analysis (that does not expect this to be happening,
4236 as this is illegal in C), and would be unlikely to be useful
4237 anyway. */
4238 if (use->iv->base_object
4239 && cand->iv->base_object
4240 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4241 return infinite_cost;
4242 }
4243
4244 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4245 {
4246 /* TODO -- add direct handling of this case. */
4247 goto fallback;
4248 }
4249
4250 /* CSTEPI is removed from the offset in case statement is after the
4251 increment. If the step is not constant, we use zero instead.
4252 This is a bit imprecise (there is the extra addition), but
4253 redundancy elimination is likely to transform the code so that
4254 it uses value of the variable before increment anyway,
4255 so it is not that much unrealistic. */
4256 if (cst_and_fits_in_hwi (cstep))
4257 cstepi = int_cst_value (cstep);
4258 else
4259 cstepi = 0;
4260
4261 if (!constant_multiple_of (ustep, cstep, &rat))
4262 return infinite_cost;
4263
4264 if (double_int_fits_in_shwi_p (rat))
4265 ratio = double_int_to_shwi (rat);
4266 else
4267 return infinite_cost;
4268
4269 STRIP_NOPS (cbase);
4270 ctype = TREE_TYPE (cbase);
4271
4272 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4273
4274 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4275 or ratio == 1, it is better to handle this like
4276
4277 ubase - ratio * cbase + ratio * var
4278
4279 (also holds in the case ratio == -1, TODO. */
4280
4281 if (cst_and_fits_in_hwi (cbase))
4282 {
4283 offset = - ratio * int_cst_value (cbase);
4284 cost = difference_cost (data,
4285 ubase, build_int_cst (utype, 0),
4286 &symbol_present, &var_present, &offset,
4287 depends_on);
4288 cost.cost /= avg_loop_niter (data->current_loop);
4289 }
4290 else if (ratio == 1)
4291 {
4292 tree real_cbase = cbase;
4293
4294 /* Check to see if any adjustment is needed. */
4295 if (cstepi == 0 && stmt_is_after_inc)
4296 {
4297 aff_tree real_cbase_aff;
4298 aff_tree cstep_aff;
4299
4300 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4301 &real_cbase_aff);
4302 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4303
4304 aff_combination_add (&real_cbase_aff, &cstep_aff);
4305 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4306 }
4307
4308 cost = difference_cost (data,
4309 ubase, real_cbase,
4310 &symbol_present, &var_present, &offset,
4311 depends_on);
4312 cost.cost /= avg_loop_niter (data->current_loop);
4313 }
4314 else if (address_p
4315 && !POINTER_TYPE_P (ctype)
4316 && multiplier_allowed_in_address_p
4317 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4318 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4319 {
4320 cbase
4321 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4322 cost = difference_cost (data,
4323 ubase, cbase,
4324 &symbol_present, &var_present, &offset,
4325 depends_on);
4326 cost.cost /= avg_loop_niter (data->current_loop);
4327 }
4328 else
4329 {
4330 cost = force_var_cost (data, cbase, depends_on);
4331 cost = add_costs (cost,
4332 difference_cost (data,
4333 ubase, build_int_cst (utype, 0),
4334 &symbol_present, &var_present,
4335 &offset, depends_on));
4336 cost.cost /= avg_loop_niter (data->current_loop);
4337 cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed);
4338 }
4339
4340 if (inv_expr_id)
4341 {
4342 *inv_expr_id =
4343 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4344 /* Clear depends on. */
4345 if (*inv_expr_id != -1 && depends_on && *depends_on)
4346 bitmap_clear (*depends_on);
4347 }
4348
4349 /* If we are after the increment, the value of the candidate is higher by
4350 one iteration. */
4351 if (stmt_is_after_inc)
4352 offset -= ratio * cstepi;
4353
4354 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4355 (symbol/var1/const parts may be omitted). If we are looking for an
4356 address, find the cost of addressing this. */
4357 if (address_p)
4358 return add_costs (cost,
4359 get_address_cost (symbol_present, var_present,
4360 offset, ratio, cstepi,
4361 TYPE_MODE (TREE_TYPE (utype)),
4362 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4363 speed, stmt_is_after_inc,
4364 can_autoinc));
4365
4366 /* Otherwise estimate the costs for computing the expression. */
4367 if (!symbol_present && !var_present && !offset)
4368 {
4369 if (ratio != 1)
4370 cost.cost += multiply_by_const_cost (ratio, TYPE_MODE (ctype), speed);
4371 return cost;
4372 }
4373
4374 /* Symbol + offset should be compile-time computable so consider that they
4375 are added once to the variable, if present. */
4376 if (var_present && (symbol_present || offset))
4377 cost.cost += adjust_setup_cost (data,
4378 add_regs_cost (TYPE_MODE (ctype), speed));
4379
4380 /* Having offset does not affect runtime cost in case it is added to
4381 symbol, but it increases complexity. */
4382 if (offset)
4383 cost.complexity++;
4384
4385 cost.cost += add_regs_cost (TYPE_MODE (ctype), speed);
4386
4387 aratio = ratio > 0 ? ratio : -ratio;
4388 if (aratio != 1)
4389 cost.cost += multiply_by_const_cost (aratio, TYPE_MODE (ctype), speed);
4390 return cost;
4391
4392 fallback:
4393 if (can_autoinc)
4394 *can_autoinc = false;
4395
4396 {
4397 /* Just get the expression, expand it and measure the cost. */
4398 tree comp = get_computation_at (data->current_loop, use, cand, at);
4399
4400 if (!comp)
4401 return infinite_cost;
4402
4403 if (address_p)
4404 comp = build_simple_mem_ref (comp);
4405
4406 return new_cost (computation_cost (comp, speed), 0);
4407 }
4408 }
4409
4410 /* Determines the cost of the computation by that USE is expressed
4411 from induction variable CAND. If ADDRESS_P is true, we just need
4412 to create an address from it, otherwise we want to get it into
4413 register. A set of invariants we depend on is stored in
4414 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4415 autoinc addressing is likely. */
4416
4417 static comp_cost
4418 get_computation_cost (struct ivopts_data *data,
4419 struct iv_use *use, struct iv_cand *cand,
4420 bool address_p, bitmap *depends_on,
4421 bool *can_autoinc, int *inv_expr_id)
4422 {
4423 return get_computation_cost_at (data,
4424 use, cand, address_p, depends_on, use->stmt,
4425 can_autoinc, inv_expr_id);
4426 }
4427
4428 /* Determines cost of basing replacement of USE on CAND in a generic
4429 expression. */
4430
4431 static bool
4432 determine_use_iv_cost_generic (struct ivopts_data *data,
4433 struct iv_use *use, struct iv_cand *cand)
4434 {
4435 bitmap depends_on;
4436 comp_cost cost;
4437 int inv_expr_id = -1;
4438
4439 /* The simple case first -- if we need to express value of the preserved
4440 original biv, the cost is 0. This also prevents us from counting the
4441 cost of increment twice -- once at this use and once in the cost of
4442 the candidate. */
4443 if (cand->pos == IP_ORIGINAL
4444 && cand->incremented_at == use->stmt)
4445 {
4446 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4447 ERROR_MARK, -1);
4448 return true;
4449 }
4450
4451 cost = get_computation_cost (data, use, cand, false, &depends_on,
4452 NULL, &inv_expr_id);
4453
4454 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4455 inv_expr_id);
4456
4457 return !infinite_cost_p (cost);
4458 }
4459
4460 /* Determines cost of basing replacement of USE on CAND in an address. */
4461
4462 static bool
4463 determine_use_iv_cost_address (struct ivopts_data *data,
4464 struct iv_use *use, struct iv_cand *cand)
4465 {
4466 bitmap depends_on;
4467 bool can_autoinc;
4468 int inv_expr_id = -1;
4469 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4470 &can_autoinc, &inv_expr_id);
4471
4472 if (cand->ainc_use == use)
4473 {
4474 if (can_autoinc)
4475 cost.cost -= cand->cost_step;
4476 /* If we generated the candidate solely for exploiting autoincrement
4477 opportunities, and it turns out it can't be used, set the cost to
4478 infinity to make sure we ignore it. */
4479 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4480 cost = infinite_cost;
4481 }
4482 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4483 inv_expr_id);
4484
4485 return !infinite_cost_p (cost);
4486 }
4487
4488 /* Computes value of candidate CAND at position AT in iteration NITER, and
4489 stores it to VAL. */
4490
4491 static void
4492 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4493 aff_tree *val)
4494 {
4495 aff_tree step, delta, nit;
4496 struct iv *iv = cand->iv;
4497 tree type = TREE_TYPE (iv->base);
4498 tree steptype = type;
4499 if (POINTER_TYPE_P (type))
4500 steptype = sizetype;
4501
4502 tree_to_aff_combination (iv->step, steptype, &step);
4503 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4504 aff_combination_convert (&nit, steptype);
4505 aff_combination_mult (&nit, &step, &delta);
4506 if (stmt_after_increment (loop, cand, at))
4507 aff_combination_add (&delta, &step);
4508
4509 tree_to_aff_combination (iv->base, type, val);
4510 aff_combination_add (val, &delta);
4511 }
4512
4513 /* Returns period of induction variable iv. */
4514
4515 static tree
4516 iv_period (struct iv *iv)
4517 {
4518 tree step = iv->step, period, type;
4519 tree pow2div;
4520
4521 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4522
4523 type = unsigned_type_for (TREE_TYPE (step));
4524 /* Period of the iv is lcm (step, type_range)/step -1,
4525 i.e., N*type_range/step - 1. Since type range is power
4526 of two, N == (step >> num_of_ending_zeros_binary (step),
4527 so the final result is
4528
4529 (type_range >> num_of_ending_zeros_binary (step)) - 1
4530
4531 */
4532 pow2div = num_ending_zeros (step);
4533
4534 period = build_low_bits_mask (type,
4535 (TYPE_PRECISION (type)
4536 - tree_low_cst (pow2div, 1)));
4537
4538 return period;
4539 }
4540
4541 /* Returns the comparison operator used when eliminating the iv USE. */
4542
4543 static enum tree_code
4544 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4545 {
4546 struct loop *loop = data->current_loop;
4547 basic_block ex_bb;
4548 edge exit;
4549
4550 ex_bb = gimple_bb (use->stmt);
4551 exit = EDGE_SUCC (ex_bb, 0);
4552 if (flow_bb_inside_loop_p (loop, exit->dest))
4553 exit = EDGE_SUCC (ex_bb, 1);
4554
4555 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4556 }
4557
4558 static tree
4559 strip_wrap_conserving_type_conversions (tree exp)
4560 {
4561 while (tree_ssa_useless_type_conversion (exp)
4562 && (nowrap_type_p (TREE_TYPE (exp))
4563 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4564 exp = TREE_OPERAND (exp, 0);
4565 return exp;
4566 }
4567
4568 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4569 check for an exact match. */
4570
4571 static bool
4572 expr_equal_p (tree e, tree what)
4573 {
4574 gimple stmt;
4575 enum tree_code code;
4576
4577 e = strip_wrap_conserving_type_conversions (e);
4578 what = strip_wrap_conserving_type_conversions (what);
4579
4580 code = TREE_CODE (what);
4581 if (TREE_TYPE (e) != TREE_TYPE (what))
4582 return false;
4583
4584 if (operand_equal_p (e, what, 0))
4585 return true;
4586
4587 if (TREE_CODE (e) != SSA_NAME)
4588 return false;
4589
4590 stmt = SSA_NAME_DEF_STMT (e);
4591 if (gimple_code (stmt) != GIMPLE_ASSIGN
4592 || gimple_assign_rhs_code (stmt) != code)
4593 return false;
4594
4595 switch (get_gimple_rhs_class (code))
4596 {
4597 case GIMPLE_BINARY_RHS:
4598 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4599 return false;
4600 /* Fallthru. */
4601
4602 case GIMPLE_UNARY_RHS:
4603 case GIMPLE_SINGLE_RHS:
4604 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4605 default:
4606 return false;
4607 }
4608 }
4609
4610 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4611 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4612 calculation is performed in non-wrapping type.
4613
4614 TODO: More generally, we could test for the situation that
4615 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4616 This would require knowing the sign of OFFSET.
4617
4618 Also, we only look for the first addition in the computation of BASE.
4619 More complex analysis would be better, but introducing it just for
4620 this optimization seems like an overkill. */
4621
4622 static bool
4623 difference_cannot_overflow_p (tree base, tree offset)
4624 {
4625 enum tree_code code;
4626 tree e1, e2;
4627
4628 if (!nowrap_type_p (TREE_TYPE (base)))
4629 return false;
4630
4631 base = expand_simple_operations (base);
4632
4633 if (TREE_CODE (base) == SSA_NAME)
4634 {
4635 gimple stmt = SSA_NAME_DEF_STMT (base);
4636
4637 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4638 return false;
4639
4640 code = gimple_assign_rhs_code (stmt);
4641 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4642 return false;
4643
4644 e1 = gimple_assign_rhs1 (stmt);
4645 e2 = gimple_assign_rhs2 (stmt);
4646 }
4647 else
4648 {
4649 code = TREE_CODE (base);
4650 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4651 return false;
4652 e1 = TREE_OPERAND (base, 0);
4653 e2 = TREE_OPERAND (base, 1);
4654 }
4655
4656 /* TODO: deeper inspection may be necessary to prove the equality. */
4657 switch (code)
4658 {
4659 case PLUS_EXPR:
4660 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4661 case POINTER_PLUS_EXPR:
4662 return expr_equal_p (e2, offset);
4663
4664 default:
4665 return false;
4666 }
4667 }
4668
4669 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4670 comparison with CAND. NITER describes the number of iterations of
4671 the loops. If successful, the comparison in COMP_P is altered accordingly.
4672
4673 We aim to handle the following situation:
4674
4675 sometype *base, *p;
4676 int a, b, i;
4677
4678 i = a;
4679 p = p_0 = base + a;
4680
4681 do
4682 {
4683 bla (*p);
4684 p++;
4685 i++;
4686 }
4687 while (i < b);
4688
4689 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4690 We aim to optimize this to
4691
4692 p = p_0 = base + a;
4693 do
4694 {
4695 bla (*p);
4696 p++;
4697 }
4698 while (p < p_0 - a + b);
4699
4700 This preserves the correctness, since the pointer arithmetics does not
4701 overflow. More precisely:
4702
4703 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4704 overflow in computing it or the values of p.
4705 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4706 overflow. To prove this, we use the fact that p_0 = base + a. */
4707
4708 static bool
4709 iv_elimination_compare_lt (struct ivopts_data *data,
4710 struct iv_cand *cand, enum tree_code *comp_p,
4711 struct tree_niter_desc *niter)
4712 {
4713 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4714 struct affine_tree_combination nit, tmpa, tmpb;
4715 enum tree_code comp;
4716 HOST_WIDE_INT step;
4717
4718 /* We need to know that the candidate induction variable does not overflow.
4719 While more complex analysis may be used to prove this, for now just
4720 check that the variable appears in the original program and that it
4721 is computed in a type that guarantees no overflows. */
4722 cand_type = TREE_TYPE (cand->iv->base);
4723 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4724 return false;
4725
4726 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4727 the calculation of the BOUND could overflow, making the comparison
4728 invalid. */
4729 if (!data->loop_single_exit_p)
4730 return false;
4731
4732 /* We need to be able to decide whether candidate is increasing or decreasing
4733 in order to choose the right comparison operator. */
4734 if (!cst_and_fits_in_hwi (cand->iv->step))
4735 return false;
4736 step = int_cst_value (cand->iv->step);
4737
4738 /* Check that the number of iterations matches the expected pattern:
4739 a + 1 > b ? 0 : b - a - 1. */
4740 mbz = niter->may_be_zero;
4741 if (TREE_CODE (mbz) == GT_EXPR)
4742 {
4743 /* Handle a + 1 > b. */
4744 tree op0 = TREE_OPERAND (mbz, 0);
4745 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4746 {
4747 a = TREE_OPERAND (op0, 0);
4748 b = TREE_OPERAND (mbz, 1);
4749 }
4750 else
4751 return false;
4752 }
4753 else if (TREE_CODE (mbz) == LT_EXPR)
4754 {
4755 tree op1 = TREE_OPERAND (mbz, 1);
4756
4757 /* Handle b < a + 1. */
4758 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4759 {
4760 a = TREE_OPERAND (op1, 0);
4761 b = TREE_OPERAND (mbz, 0);
4762 }
4763 else
4764 return false;
4765 }
4766 else
4767 return false;
4768
4769 /* Expected number of iterations is B - A - 1. Check that it matches
4770 the actual number, i.e., that B - A - NITER = 1. */
4771 tree_to_aff_combination (niter->niter, nit_type, &nit);
4772 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4773 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4774 aff_combination_scale (&nit, double_int_minus_one);
4775 aff_combination_scale (&tmpa, double_int_minus_one);
4776 aff_combination_add (&tmpb, &tmpa);
4777 aff_combination_add (&tmpb, &nit);
4778 if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4779 return false;
4780
4781 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4782 overflow. */
4783 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4784 cand->iv->step,
4785 fold_convert (TREE_TYPE (cand->iv->step), a));
4786 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4787 return false;
4788
4789 /* Determine the new comparison operator. */
4790 comp = step < 0 ? GT_EXPR : LT_EXPR;
4791 if (*comp_p == NE_EXPR)
4792 *comp_p = comp;
4793 else if (*comp_p == EQ_EXPR)
4794 *comp_p = invert_tree_comparison (comp, false);
4795 else
4796 gcc_unreachable ();
4797
4798 return true;
4799 }
4800
4801 /* Check whether it is possible to express the condition in USE by comparison
4802 of candidate CAND. If so, store the value compared with to BOUND, and the
4803 comparison operator to COMP. */
4804
4805 static bool
4806 may_eliminate_iv (struct ivopts_data *data,
4807 struct iv_use *use, struct iv_cand *cand, tree *bound,
4808 enum tree_code *comp)
4809 {
4810 basic_block ex_bb;
4811 edge exit;
4812 tree period;
4813 struct loop *loop = data->current_loop;
4814 aff_tree bnd;
4815 struct tree_niter_desc *desc = NULL;
4816
4817 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4818 return false;
4819
4820 /* For now works only for exits that dominate the loop latch.
4821 TODO: extend to other conditions inside loop body. */
4822 ex_bb = gimple_bb (use->stmt);
4823 if (use->stmt != last_stmt (ex_bb)
4824 || gimple_code (use->stmt) != GIMPLE_COND
4825 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4826 return false;
4827
4828 exit = EDGE_SUCC (ex_bb, 0);
4829 if (flow_bb_inside_loop_p (loop, exit->dest))
4830 exit = EDGE_SUCC (ex_bb, 1);
4831 if (flow_bb_inside_loop_p (loop, exit->dest))
4832 return false;
4833
4834 desc = niter_for_exit (data, exit);
4835 if (!desc)
4836 return false;
4837
4838 /* Determine whether we can use the variable to test the exit condition.
4839 This is the case iff the period of the induction variable is greater
4840 than the number of iterations for which the exit condition is true. */
4841 period = iv_period (cand->iv);
4842
4843 /* If the number of iterations is constant, compare against it directly. */
4844 if (TREE_CODE (desc->niter) == INTEGER_CST)
4845 {
4846 /* See cand_value_at. */
4847 if (stmt_after_increment (loop, cand, use->stmt))
4848 {
4849 if (!tree_int_cst_lt (desc->niter, period))
4850 return false;
4851 }
4852 else
4853 {
4854 if (tree_int_cst_lt (period, desc->niter))
4855 return false;
4856 }
4857 }
4858
4859 /* If not, and if this is the only possible exit of the loop, see whether
4860 we can get a conservative estimate on the number of iterations of the
4861 entire loop and compare against that instead. */
4862 else
4863 {
4864 double_int period_value, max_niter;
4865
4866 max_niter = desc->max;
4867 if (stmt_after_increment (loop, cand, use->stmt))
4868 max_niter = double_int_add (max_niter, double_int_one);
4869 period_value = tree_to_double_int (period);
4870 if (double_int_ucmp (max_niter, period_value) > 0)
4871 {
4872 /* See if we can take advantage of inferred loop bound information. */
4873 if (data->loop_single_exit_p)
4874 {
4875 if (!max_loop_iterations (loop, &max_niter))
4876 return false;
4877 /* The loop bound is already adjusted by adding 1. */
4878 if (double_int_ucmp (max_niter, period_value) > 0)
4879 return false;
4880 }
4881 else
4882 return false;
4883 }
4884 }
4885
4886 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4887
4888 *bound = aff_combination_to_tree (&bnd);
4889 *comp = iv_elimination_compare (data, use);
4890
4891 /* It is unlikely that computing the number of iterations using division
4892 would be more profitable than keeping the original induction variable. */
4893 if (expression_expensive_p (*bound))
4894 return false;
4895
4896 /* Sometimes, it is possible to handle the situation that the number of
4897 iterations may be zero unless additional assumtions by using <
4898 instead of != in the exit condition.
4899
4900 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4901 base the exit condition on it. However, that is often too
4902 expensive. */
4903 if (!integer_zerop (desc->may_be_zero))
4904 return iv_elimination_compare_lt (data, cand, comp, desc);
4905
4906 return true;
4907 }
4908
4909 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4910 be copied, if is is used in the loop body and DATA->body_includes_call. */
4911
4912 static int
4913 parm_decl_cost (struct ivopts_data *data, tree bound)
4914 {
4915 tree sbound = bound;
4916 STRIP_NOPS (sbound);
4917
4918 if (TREE_CODE (sbound) == SSA_NAME
4919 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4920 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4921 && data->body_includes_call)
4922 return COSTS_N_INSNS (1);
4923
4924 return 0;
4925 }
4926
4927 /* Determines cost of basing replacement of USE on CAND in a condition. */
4928
4929 static bool
4930 determine_use_iv_cost_condition (struct ivopts_data *data,
4931 struct iv_use *use, struct iv_cand *cand)
4932 {
4933 tree bound = NULL_TREE;
4934 struct iv *cmp_iv;
4935 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4936 comp_cost elim_cost, express_cost, cost, bound_cost;
4937 bool ok;
4938 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4939 tree *control_var, *bound_cst;
4940 enum tree_code comp = ERROR_MARK;
4941
4942 /* Only consider real candidates. */
4943 if (!cand->iv)
4944 {
4945 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4946 ERROR_MARK, -1);
4947 return false;
4948 }
4949
4950 /* Try iv elimination. */
4951 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4952 {
4953 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4954 if (elim_cost.cost == 0)
4955 elim_cost.cost = parm_decl_cost (data, bound);
4956 else if (TREE_CODE (bound) == INTEGER_CST)
4957 elim_cost.cost = 0;
4958 /* If we replace a loop condition 'i < n' with 'p < base + n',
4959 depends_on_elim will have 'base' and 'n' set, which implies
4960 that both 'base' and 'n' will be live during the loop. More likely,
4961 'base + n' will be loop invariant, resulting in only one live value
4962 during the loop. So in that case we clear depends_on_elim and set
4963 elim_inv_expr_id instead. */
4964 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4965 {
4966 elim_inv_expr_id = get_expr_id (data, bound);
4967 bitmap_clear (depends_on_elim);
4968 }
4969 /* The bound is a loop invariant, so it will be only computed
4970 once. */
4971 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4972 }
4973 else
4974 elim_cost = infinite_cost;
4975
4976 /* Try expressing the original giv. If it is compared with an invariant,
4977 note that we cannot get rid of it. */
4978 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4979 NULL, &cmp_iv);
4980 gcc_assert (ok);
4981
4982 /* When the condition is a comparison of the candidate IV against
4983 zero, prefer this IV.
4984
4985 TODO: The constant that we're subtracting from the cost should
4986 be target-dependent. This information should be added to the
4987 target costs for each backend. */
4988 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4989 && integer_zerop (*bound_cst)
4990 && (operand_equal_p (*control_var, cand->var_after, 0)
4991 || operand_equal_p (*control_var, cand->var_before, 0)))
4992 elim_cost.cost -= 1;
4993
4994 express_cost = get_computation_cost (data, use, cand, false,
4995 &depends_on_express, NULL,
4996 &express_inv_expr_id);
4997 fd_ivopts_data = data;
4998 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4999
5000 /* Count the cost of the original bound as well. */
5001 bound_cost = force_var_cost (data, *bound_cst, NULL);
5002 if (bound_cost.cost == 0)
5003 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5004 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5005 bound_cost.cost = 0;
5006 express_cost.cost += bound_cost.cost;
5007
5008 /* Choose the better approach, preferring the eliminated IV. */
5009 if (compare_costs (elim_cost, express_cost) <= 0)
5010 {
5011 cost = elim_cost;
5012 depends_on = depends_on_elim;
5013 depends_on_elim = NULL;
5014 inv_expr_id = elim_inv_expr_id;
5015 }
5016 else
5017 {
5018 cost = express_cost;
5019 depends_on = depends_on_express;
5020 depends_on_express = NULL;
5021 bound = NULL_TREE;
5022 comp = ERROR_MARK;
5023 inv_expr_id = express_inv_expr_id;
5024 }
5025
5026 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5027
5028 if (depends_on_elim)
5029 BITMAP_FREE (depends_on_elim);
5030 if (depends_on_express)
5031 BITMAP_FREE (depends_on_express);
5032
5033 return !infinite_cost_p (cost);
5034 }
5035
5036 /* Determines cost of basing replacement of USE on CAND. Returns false
5037 if USE cannot be based on CAND. */
5038
5039 static bool
5040 determine_use_iv_cost (struct ivopts_data *data,
5041 struct iv_use *use, struct iv_cand *cand)
5042 {
5043 switch (use->type)
5044 {
5045 case USE_NONLINEAR_EXPR:
5046 return determine_use_iv_cost_generic (data, use, cand);
5047
5048 case USE_ADDRESS:
5049 return determine_use_iv_cost_address (data, use, cand);
5050
5051 case USE_COMPARE:
5052 return determine_use_iv_cost_condition (data, use, cand);
5053
5054 default:
5055 gcc_unreachable ();
5056 }
5057 }
5058
5059 /* Return true if get_computation_cost indicates that autoincrement is
5060 a possibility for the pair of USE and CAND, false otherwise. */
5061
5062 static bool
5063 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5064 struct iv_cand *cand)
5065 {
5066 bitmap depends_on;
5067 bool can_autoinc;
5068 comp_cost cost;
5069
5070 if (use->type != USE_ADDRESS)
5071 return false;
5072
5073 cost = get_computation_cost (data, use, cand, true, &depends_on,
5074 &can_autoinc, NULL);
5075
5076 BITMAP_FREE (depends_on);
5077
5078 return !infinite_cost_p (cost) && can_autoinc;
5079 }
5080
5081 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5082 use that allows autoincrement, and set their AINC_USE if possible. */
5083
5084 static void
5085 set_autoinc_for_original_candidates (struct ivopts_data *data)
5086 {
5087 unsigned i, j;
5088
5089 for (i = 0; i < n_iv_cands (data); i++)
5090 {
5091 struct iv_cand *cand = iv_cand (data, i);
5092 struct iv_use *closest = NULL;
5093 if (cand->pos != IP_ORIGINAL)
5094 continue;
5095 for (j = 0; j < n_iv_uses (data); j++)
5096 {
5097 struct iv_use *use = iv_use (data, j);
5098 unsigned uid = gimple_uid (use->stmt);
5099 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
5100 || uid > gimple_uid (cand->incremented_at))
5101 continue;
5102 if (closest == NULL || uid > gimple_uid (closest->stmt))
5103 closest = use;
5104 }
5105 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
5106 continue;
5107 cand->ainc_use = closest;
5108 }
5109 }
5110
5111 /* Finds the candidates for the induction variables. */
5112
5113 static void
5114 find_iv_candidates (struct ivopts_data *data)
5115 {
5116 /* Add commonly used ivs. */
5117 add_standard_iv_candidates (data);
5118
5119 /* Add old induction variables. */
5120 add_old_ivs_candidates (data);
5121
5122 /* Add induction variables derived from uses. */
5123 add_derived_ivs_candidates (data);
5124
5125 set_autoinc_for_original_candidates (data);
5126
5127 /* Record the important candidates. */
5128 record_important_candidates (data);
5129 }
5130
5131 /* Determines costs of basing the use of the iv on an iv candidate. */
5132
5133 static void
5134 determine_use_iv_costs (struct ivopts_data *data)
5135 {
5136 unsigned i, j;
5137 struct iv_use *use;
5138 struct iv_cand *cand;
5139 bitmap to_clear = BITMAP_ALLOC (NULL);
5140
5141 alloc_use_cost_map (data);
5142
5143 for (i = 0; i < n_iv_uses (data); i++)
5144 {
5145 use = iv_use (data, i);
5146
5147 if (data->consider_all_candidates)
5148 {
5149 for (j = 0; j < n_iv_cands (data); j++)
5150 {
5151 cand = iv_cand (data, j);
5152 determine_use_iv_cost (data, use, cand);
5153 }
5154 }
5155 else
5156 {
5157 bitmap_iterator bi;
5158
5159 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5160 {
5161 cand = iv_cand (data, j);
5162 if (!determine_use_iv_cost (data, use, cand))
5163 bitmap_set_bit (to_clear, j);
5164 }
5165
5166 /* Remove the candidates for that the cost is infinite from
5167 the list of related candidates. */
5168 bitmap_and_compl_into (use->related_cands, to_clear);
5169 bitmap_clear (to_clear);
5170 }
5171 }
5172
5173 BITMAP_FREE (to_clear);
5174
5175 if (dump_file && (dump_flags & TDF_DETAILS))
5176 {
5177 fprintf (dump_file, "Use-candidate costs:\n");
5178
5179 for (i = 0; i < n_iv_uses (data); i++)
5180 {
5181 use = iv_use (data, i);
5182
5183 fprintf (dump_file, "Use %d:\n", i);
5184 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5185 for (j = 0; j < use->n_map_members; j++)
5186 {
5187 if (!use->cost_map[j].cand
5188 || infinite_cost_p (use->cost_map[j].cost))
5189 continue;
5190
5191 fprintf (dump_file, " %d\t%d\t%d\t",
5192 use->cost_map[j].cand->id,
5193 use->cost_map[j].cost.cost,
5194 use->cost_map[j].cost.complexity);
5195 if (use->cost_map[j].depends_on)
5196 bitmap_print (dump_file,
5197 use->cost_map[j].depends_on, "","");
5198 if (use->cost_map[j].inv_expr_id != -1)
5199 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5200 fprintf (dump_file, "\n");
5201 }
5202
5203 fprintf (dump_file, "\n");
5204 }
5205 fprintf (dump_file, "\n");
5206 }
5207 }
5208
5209 /* Determines cost of the candidate CAND. */
5210
5211 static void
5212 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5213 {
5214 comp_cost cost_base;
5215 unsigned cost, cost_step;
5216 tree base;
5217
5218 if (!cand->iv)
5219 {
5220 cand->cost = 0;
5221 return;
5222 }
5223
5224 /* There are two costs associated with the candidate -- its increment
5225 and its initialization. The second is almost negligible for any loop
5226 that rolls enough, so we take it just very little into account. */
5227
5228 base = cand->iv->base;
5229 cost_base = force_var_cost (data, base, NULL);
5230 /* It will be exceptional that the iv register happens to be initialized with
5231 the proper value at no cost. In general, there will at least be a regcopy
5232 or a const set. */
5233 if (cost_base.cost == 0)
5234 cost_base.cost = COSTS_N_INSNS (1);
5235 cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5236
5237 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5238
5239 /* Prefer the original ivs unless we may gain something by replacing it.
5240 The reason is to make debugging simpler; so this is not relevant for
5241 artificial ivs created by other optimization passes. */
5242 if (cand->pos != IP_ORIGINAL
5243 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5244 cost++;
5245
5246 /* Prefer not to insert statements into latch unless there are some
5247 already (so that we do not create unnecessary jumps). */
5248 if (cand->pos == IP_END
5249 && empty_block_p (ip_end_pos (data->current_loop)))
5250 cost++;
5251
5252 cand->cost = cost;
5253 cand->cost_step = cost_step;
5254 }
5255
5256 /* Determines costs of computation of the candidates. */
5257
5258 static void
5259 determine_iv_costs (struct ivopts_data *data)
5260 {
5261 unsigned i;
5262
5263 if (dump_file && (dump_flags & TDF_DETAILS))
5264 {
5265 fprintf (dump_file, "Candidate costs:\n");
5266 fprintf (dump_file, " cand\tcost\n");
5267 }
5268
5269 for (i = 0; i < n_iv_cands (data); i++)
5270 {
5271 struct iv_cand *cand = iv_cand (data, i);
5272
5273 determine_iv_cost (data, cand);
5274
5275 if (dump_file && (dump_flags & TDF_DETAILS))
5276 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5277 }
5278
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5280 fprintf (dump_file, "\n");
5281 }
5282
5283 /* Calculates cost for having SIZE induction variables. */
5284
5285 static unsigned
5286 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5287 {
5288 /* We add size to the cost, so that we prefer eliminating ivs
5289 if possible. */
5290 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5291 data->body_includes_call);
5292 }
5293
5294 /* For each size of the induction variable set determine the penalty. */
5295
5296 static void
5297 determine_set_costs (struct ivopts_data *data)
5298 {
5299 unsigned j, n;
5300 gimple phi;
5301 gimple_stmt_iterator psi;
5302 tree op;
5303 struct loop *loop = data->current_loop;
5304 bitmap_iterator bi;
5305
5306 if (dump_file && (dump_flags & TDF_DETAILS))
5307 {
5308 fprintf (dump_file, "Global costs:\n");
5309 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5310 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5311 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5312 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5313 }
5314
5315 n = 0;
5316 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5317 {
5318 phi = gsi_stmt (psi);
5319 op = PHI_RESULT (phi);
5320
5321 if (!is_gimple_reg (op))
5322 continue;
5323
5324 if (get_iv (data, op))
5325 continue;
5326
5327 n++;
5328 }
5329
5330 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5331 {
5332 struct version_info *info = ver_info (data, j);
5333
5334 if (info->inv_id && info->has_nonlin_use)
5335 n++;
5336 }
5337
5338 data->regs_used = n;
5339 if (dump_file && (dump_flags & TDF_DETAILS))
5340 fprintf (dump_file, " regs_used %d\n", n);
5341
5342 if (dump_file && (dump_flags & TDF_DETAILS))
5343 {
5344 fprintf (dump_file, " cost for size:\n");
5345 fprintf (dump_file, " ivs\tcost\n");
5346 for (j = 0; j <= 2 * target_avail_regs; j++)
5347 fprintf (dump_file, " %d\t%d\n", j,
5348 ivopts_global_cost_for_size (data, j));
5349 fprintf (dump_file, "\n");
5350 }
5351 }
5352
5353 /* Returns true if A is a cheaper cost pair than B. */
5354
5355 static bool
5356 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5357 {
5358 int cmp;
5359
5360 if (!a)
5361 return false;
5362
5363 if (!b)
5364 return true;
5365
5366 cmp = compare_costs (a->cost, b->cost);
5367 if (cmp < 0)
5368 return true;
5369
5370 if (cmp > 0)
5371 return false;
5372
5373 /* In case the costs are the same, prefer the cheaper candidate. */
5374 if (a->cand->cost < b->cand->cost)
5375 return true;
5376
5377 return false;
5378 }
5379
5380
5381 /* Returns candidate by that USE is expressed in IVS. */
5382
5383 static struct cost_pair *
5384 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5385 {
5386 return ivs->cand_for_use[use->id];
5387 }
5388
5389 /* Computes the cost field of IVS structure. */
5390
5391 static void
5392 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5393 {
5394 comp_cost cost = ivs->cand_use_cost;
5395
5396 cost.cost += ivs->cand_cost;
5397
5398 cost.cost += ivopts_global_cost_for_size (data,
5399 ivs->n_regs + ivs->num_used_inv_expr);
5400
5401 ivs->cost = cost;
5402 }
5403
5404 /* Remove invariants in set INVS to set IVS. */
5405
5406 static void
5407 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5408 {
5409 bitmap_iterator bi;
5410 unsigned iid;
5411
5412 if (!invs)
5413 return;
5414
5415 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5416 {
5417 ivs->n_invariant_uses[iid]--;
5418 if (ivs->n_invariant_uses[iid] == 0)
5419 ivs->n_regs--;
5420 }
5421 }
5422
5423 /* Set USE not to be expressed by any candidate in IVS. */
5424
5425 static void
5426 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5427 struct iv_use *use)
5428 {
5429 unsigned uid = use->id, cid;
5430 struct cost_pair *cp;
5431
5432 cp = ivs->cand_for_use[uid];
5433 if (!cp)
5434 return;
5435 cid = cp->cand->id;
5436
5437 ivs->bad_uses++;
5438 ivs->cand_for_use[uid] = NULL;
5439 ivs->n_cand_uses[cid]--;
5440
5441 if (ivs->n_cand_uses[cid] == 0)
5442 {
5443 bitmap_clear_bit (ivs->cands, cid);
5444 /* Do not count the pseudocandidates. */
5445 if (cp->cand->iv)
5446 ivs->n_regs--;
5447 ivs->n_cands--;
5448 ivs->cand_cost -= cp->cand->cost;
5449
5450 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5451 }
5452
5453 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5454
5455 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5456
5457 if (cp->inv_expr_id != -1)
5458 {
5459 ivs->used_inv_expr[cp->inv_expr_id]--;
5460 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5461 ivs->num_used_inv_expr--;
5462 }
5463 iv_ca_recount_cost (data, ivs);
5464 }
5465
5466 /* Add invariants in set INVS to set IVS. */
5467
5468 static void
5469 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5470 {
5471 bitmap_iterator bi;
5472 unsigned iid;
5473
5474 if (!invs)
5475 return;
5476
5477 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5478 {
5479 ivs->n_invariant_uses[iid]++;
5480 if (ivs->n_invariant_uses[iid] == 1)
5481 ivs->n_regs++;
5482 }
5483 }
5484
5485 /* Set cost pair for USE in set IVS to CP. */
5486
5487 static void
5488 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5489 struct iv_use *use, struct cost_pair *cp)
5490 {
5491 unsigned uid = use->id, cid;
5492
5493 if (ivs->cand_for_use[uid] == cp)
5494 return;
5495
5496 if (ivs->cand_for_use[uid])
5497 iv_ca_set_no_cp (data, ivs, use);
5498
5499 if (cp)
5500 {
5501 cid = cp->cand->id;
5502
5503 ivs->bad_uses--;
5504 ivs->cand_for_use[uid] = cp;
5505 ivs->n_cand_uses[cid]++;
5506 if (ivs->n_cand_uses[cid] == 1)
5507 {
5508 bitmap_set_bit (ivs->cands, cid);
5509 /* Do not count the pseudocandidates. */
5510 if (cp->cand->iv)
5511 ivs->n_regs++;
5512 ivs->n_cands++;
5513 ivs->cand_cost += cp->cand->cost;
5514
5515 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5516 }
5517
5518 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5519 iv_ca_set_add_invariants (ivs, cp->depends_on);
5520
5521 if (cp->inv_expr_id != -1)
5522 {
5523 ivs->used_inv_expr[cp->inv_expr_id]++;
5524 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5525 ivs->num_used_inv_expr++;
5526 }
5527 iv_ca_recount_cost (data, ivs);
5528 }
5529 }
5530
5531 /* Extend set IVS by expressing USE by some of the candidates in it
5532 if possible. All important candidates will be considered
5533 if IMPORTANT_CANDIDATES is true. */
5534
5535 static void
5536 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5537 struct iv_use *use, bool important_candidates)
5538 {
5539 struct cost_pair *best_cp = NULL, *cp;
5540 bitmap_iterator bi;
5541 bitmap cands;
5542 unsigned i;
5543
5544 gcc_assert (ivs->upto >= use->id);
5545
5546 if (ivs->upto == use->id)
5547 {
5548 ivs->upto++;
5549 ivs->bad_uses++;
5550 }
5551
5552 cands = (important_candidates ? data->important_candidates : ivs->cands);
5553 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5554 {
5555 struct iv_cand *cand = iv_cand (data, i);
5556
5557 cp = get_use_iv_cost (data, use, cand);
5558
5559 if (cheaper_cost_pair (cp, best_cp))
5560 best_cp = cp;
5561 }
5562
5563 iv_ca_set_cp (data, ivs, use, best_cp);
5564 }
5565
5566 /* Get cost for assignment IVS. */
5567
5568 static comp_cost
5569 iv_ca_cost (struct iv_ca *ivs)
5570 {
5571 /* This was a conditional expression but it triggered a bug in
5572 Sun C 5.5. */
5573 if (ivs->bad_uses)
5574 return infinite_cost;
5575 else
5576 return ivs->cost;
5577 }
5578
5579 /* Returns true if all dependences of CP are among invariants in IVS. */
5580
5581 static bool
5582 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5583 {
5584 unsigned i;
5585 bitmap_iterator bi;
5586
5587 if (!cp->depends_on)
5588 return true;
5589
5590 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5591 {
5592 if (ivs->n_invariant_uses[i] == 0)
5593 return false;
5594 }
5595
5596 return true;
5597 }
5598
5599 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5600 it before NEXT_CHANGE. */
5601
5602 static struct iv_ca_delta *
5603 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5604 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5605 {
5606 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5607
5608 change->use = use;
5609 change->old_cp = old_cp;
5610 change->new_cp = new_cp;
5611 change->next_change = next_change;
5612
5613 return change;
5614 }
5615
5616 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5617 are rewritten. */
5618
5619 static struct iv_ca_delta *
5620 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5621 {
5622 struct iv_ca_delta *last;
5623
5624 if (!l2)
5625 return l1;
5626
5627 if (!l1)
5628 return l2;
5629
5630 for (last = l1; last->next_change; last = last->next_change)
5631 continue;
5632 last->next_change = l2;
5633
5634 return l1;
5635 }
5636
5637 /* Reverse the list of changes DELTA, forming the inverse to it. */
5638
5639 static struct iv_ca_delta *
5640 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5641 {
5642 struct iv_ca_delta *act, *next, *prev = NULL;
5643 struct cost_pair *tmp;
5644
5645 for (act = delta; act; act = next)
5646 {
5647 next = act->next_change;
5648 act->next_change = prev;
5649 prev = act;
5650
5651 tmp = act->old_cp;
5652 act->old_cp = act->new_cp;
5653 act->new_cp = tmp;
5654 }
5655
5656 return prev;
5657 }
5658
5659 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5660 reverted instead. */
5661
5662 static void
5663 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5664 struct iv_ca_delta *delta, bool forward)
5665 {
5666 struct cost_pair *from, *to;
5667 struct iv_ca_delta *act;
5668
5669 if (!forward)
5670 delta = iv_ca_delta_reverse (delta);
5671
5672 for (act = delta; act; act = act->next_change)
5673 {
5674 from = act->old_cp;
5675 to = act->new_cp;
5676 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5677 iv_ca_set_cp (data, ivs, act->use, to);
5678 }
5679
5680 if (!forward)
5681 iv_ca_delta_reverse (delta);
5682 }
5683
5684 /* Returns true if CAND is used in IVS. */
5685
5686 static bool
5687 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5688 {
5689 return ivs->n_cand_uses[cand->id] > 0;
5690 }
5691
5692 /* Returns number of induction variable candidates in the set IVS. */
5693
5694 static unsigned
5695 iv_ca_n_cands (struct iv_ca *ivs)
5696 {
5697 return ivs->n_cands;
5698 }
5699
5700 /* Free the list of changes DELTA. */
5701
5702 static void
5703 iv_ca_delta_free (struct iv_ca_delta **delta)
5704 {
5705 struct iv_ca_delta *act, *next;
5706
5707 for (act = *delta; act; act = next)
5708 {
5709 next = act->next_change;
5710 free (act);
5711 }
5712
5713 *delta = NULL;
5714 }
5715
5716 /* Allocates new iv candidates assignment. */
5717
5718 static struct iv_ca *
5719 iv_ca_new (struct ivopts_data *data)
5720 {
5721 struct iv_ca *nw = XNEW (struct iv_ca);
5722
5723 nw->upto = 0;
5724 nw->bad_uses = 0;
5725 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5726 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5727 nw->cands = BITMAP_ALLOC (NULL);
5728 nw->n_cands = 0;
5729 nw->n_regs = 0;
5730 nw->cand_use_cost = no_cost;
5731 nw->cand_cost = 0;
5732 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5733 nw->cost = no_cost;
5734 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5735 nw->num_used_inv_expr = 0;
5736
5737 return nw;
5738 }
5739
5740 /* Free memory occupied by the set IVS. */
5741
5742 static void
5743 iv_ca_free (struct iv_ca **ivs)
5744 {
5745 free ((*ivs)->cand_for_use);
5746 free ((*ivs)->n_cand_uses);
5747 BITMAP_FREE ((*ivs)->cands);
5748 free ((*ivs)->n_invariant_uses);
5749 free ((*ivs)->used_inv_expr);
5750 free (*ivs);
5751 *ivs = NULL;
5752 }
5753
5754 /* Dumps IVS to FILE. */
5755
5756 static void
5757 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5758 {
5759 const char *pref = " invariants ";
5760 unsigned i;
5761 comp_cost cost = iv_ca_cost (ivs);
5762
5763 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5764 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5765 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5766 bitmap_print (file, ivs->cands, " candidates: ","\n");
5767
5768 for (i = 0; i < ivs->upto; i++)
5769 {
5770 struct iv_use *use = iv_use (data, i);
5771 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5772 if (cp)
5773 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5774 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5775 else
5776 fprintf (file, " use:%d --> ??\n", use->id);
5777 }
5778
5779 for (i = 1; i <= data->max_inv_id; i++)
5780 if (ivs->n_invariant_uses[i])
5781 {
5782 fprintf (file, "%s%d", pref, i);
5783 pref = ", ";
5784 }
5785 fprintf (file, "\n\n");
5786 }
5787
5788 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5789 new set, and store differences in DELTA. Number of induction variables
5790 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5791 the function will try to find a solution with mimimal iv candidates. */
5792
5793 static comp_cost
5794 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5795 struct iv_cand *cand, struct iv_ca_delta **delta,
5796 unsigned *n_ivs, bool min_ncand)
5797 {
5798 unsigned i;
5799 comp_cost cost;
5800 struct iv_use *use;
5801 struct cost_pair *old_cp, *new_cp;
5802
5803 *delta = NULL;
5804 for (i = 0; i < ivs->upto; i++)
5805 {
5806 use = iv_use (data, i);
5807 old_cp = iv_ca_cand_for_use (ivs, use);
5808
5809 if (old_cp
5810 && old_cp->cand == cand)
5811 continue;
5812
5813 new_cp = get_use_iv_cost (data, use, cand);
5814 if (!new_cp)
5815 continue;
5816
5817 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5818 continue;
5819
5820 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5821 continue;
5822
5823 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5824 }
5825
5826 iv_ca_delta_commit (data, ivs, *delta, true);
5827 cost = iv_ca_cost (ivs);
5828 if (n_ivs)
5829 *n_ivs = iv_ca_n_cands (ivs);
5830 iv_ca_delta_commit (data, ivs, *delta, false);
5831
5832 return cost;
5833 }
5834
5835 /* Try narrowing set IVS by removing CAND. Return the cost of
5836 the new set and store the differences in DELTA. */
5837
5838 static comp_cost
5839 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5840 struct iv_cand *cand, struct iv_ca_delta **delta)
5841 {
5842 unsigned i, ci;
5843 struct iv_use *use;
5844 struct cost_pair *old_cp, *new_cp, *cp;
5845 bitmap_iterator bi;
5846 struct iv_cand *cnd;
5847 comp_cost cost;
5848
5849 *delta = NULL;
5850 for (i = 0; i < n_iv_uses (data); i++)
5851 {
5852 use = iv_use (data, i);
5853
5854 old_cp = iv_ca_cand_for_use (ivs, use);
5855 if (old_cp->cand != cand)
5856 continue;
5857
5858 new_cp = NULL;
5859
5860 if (data->consider_all_candidates)
5861 {
5862 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5863 {
5864 if (ci == cand->id)
5865 continue;
5866
5867 cnd = iv_cand (data, ci);
5868
5869 cp = get_use_iv_cost (data, use, cnd);
5870 if (!cp)
5871 continue;
5872
5873 if (!iv_ca_has_deps (ivs, cp))
5874 continue;
5875
5876 if (!cheaper_cost_pair (cp, new_cp))
5877 continue;
5878
5879 new_cp = cp;
5880 }
5881 }
5882 else
5883 {
5884 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5885 {
5886 if (ci == cand->id)
5887 continue;
5888
5889 cnd = iv_cand (data, ci);
5890
5891 cp = get_use_iv_cost (data, use, cnd);
5892 if (!cp)
5893 continue;
5894 if (!iv_ca_has_deps (ivs, cp))
5895 continue;
5896
5897 if (!cheaper_cost_pair (cp, new_cp))
5898 continue;
5899
5900 new_cp = cp;
5901 }
5902 }
5903
5904 if (!new_cp)
5905 {
5906 iv_ca_delta_free (delta);
5907 return infinite_cost;
5908 }
5909
5910 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5911 }
5912
5913 iv_ca_delta_commit (data, ivs, *delta, true);
5914 cost = iv_ca_cost (ivs);
5915 iv_ca_delta_commit (data, ivs, *delta, false);
5916
5917 return cost;
5918 }
5919
5920 /* Try optimizing the set of candidates IVS by removing candidates different
5921 from to EXCEPT_CAND from it. Return cost of the new set, and store
5922 differences in DELTA. */
5923
5924 static comp_cost
5925 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5926 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5927 {
5928 bitmap_iterator bi;
5929 struct iv_ca_delta *act_delta, *best_delta;
5930 unsigned i;
5931 comp_cost best_cost, acost;
5932 struct iv_cand *cand;
5933
5934 best_delta = NULL;
5935 best_cost = iv_ca_cost (ivs);
5936
5937 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5938 {
5939 cand = iv_cand (data, i);
5940
5941 if (cand == except_cand)
5942 continue;
5943
5944 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5945
5946 if (compare_costs (acost, best_cost) < 0)
5947 {
5948 best_cost = acost;
5949 iv_ca_delta_free (&best_delta);
5950 best_delta = act_delta;
5951 }
5952 else
5953 iv_ca_delta_free (&act_delta);
5954 }
5955
5956 if (!best_delta)
5957 {
5958 *delta = NULL;
5959 return best_cost;
5960 }
5961
5962 /* Recurse to possibly remove other unnecessary ivs. */
5963 iv_ca_delta_commit (data, ivs, best_delta, true);
5964 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5965 iv_ca_delta_commit (data, ivs, best_delta, false);
5966 *delta = iv_ca_delta_join (best_delta, *delta);
5967 return best_cost;
5968 }
5969
5970 /* Tries to extend the sets IVS in the best possible way in order
5971 to express the USE. If ORIGINALP is true, prefer candidates from
5972 the original set of IVs, otherwise favor important candidates not
5973 based on any memory object. */
5974
5975 static bool
5976 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5977 struct iv_use *use, bool originalp)
5978 {
5979 comp_cost best_cost, act_cost;
5980 unsigned i;
5981 bitmap_iterator bi;
5982 struct iv_cand *cand;
5983 struct iv_ca_delta *best_delta = NULL, *act_delta;
5984 struct cost_pair *cp;
5985
5986 iv_ca_add_use (data, ivs, use, false);
5987 best_cost = iv_ca_cost (ivs);
5988
5989 cp = iv_ca_cand_for_use (ivs, use);
5990 if (!cp)
5991 {
5992 ivs->upto--;
5993 ivs->bad_uses--;
5994 iv_ca_add_use (data, ivs, use, true);
5995 best_cost = iv_ca_cost (ivs);
5996 cp = iv_ca_cand_for_use (ivs, use);
5997 }
5998 if (cp)
5999 {
6000 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6001 iv_ca_set_no_cp (data, ivs, use);
6002 }
6003
6004 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6005 first try important candidates not based on any memory object. Only if
6006 this fails, try the specific ones. Rationale -- in loops with many
6007 variables the best choice often is to use just one generic biv. If we
6008 added here many ivs specific to the uses, the optimization algorithm later
6009 would be likely to get stuck in a local minimum, thus causing us to create
6010 too many ivs. The approach from few ivs to more seems more likely to be
6011 successful -- starting from few ivs, replacing an expensive use by a
6012 specific iv should always be a win. */
6013 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6014 {
6015 cand = iv_cand (data, i);
6016
6017 if (originalp && cand->pos !=IP_ORIGINAL)
6018 continue;
6019
6020 if (!originalp && cand->iv->base_object != NULL_TREE)
6021 continue;
6022
6023 if (iv_ca_cand_used_p (ivs, cand))
6024 continue;
6025
6026 cp = get_use_iv_cost (data, use, cand);
6027 if (!cp)
6028 continue;
6029
6030 iv_ca_set_cp (data, ivs, use, cp);
6031 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6032 true);
6033 iv_ca_set_no_cp (data, ivs, use);
6034 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6035
6036 if (compare_costs (act_cost, best_cost) < 0)
6037 {
6038 best_cost = act_cost;
6039
6040 iv_ca_delta_free (&best_delta);
6041 best_delta = act_delta;
6042 }
6043 else
6044 iv_ca_delta_free (&act_delta);
6045 }
6046
6047 if (infinite_cost_p (best_cost))
6048 {
6049 for (i = 0; i < use->n_map_members; i++)
6050 {
6051 cp = use->cost_map + i;
6052 cand = cp->cand;
6053 if (!cand)
6054 continue;
6055
6056 /* Already tried this. */
6057 if (cand->important)
6058 {
6059 if (originalp && cand->pos == IP_ORIGINAL)
6060 continue;
6061 if (!originalp && cand->iv->base_object == NULL_TREE)
6062 continue;
6063 }
6064
6065 if (iv_ca_cand_used_p (ivs, cand))
6066 continue;
6067
6068 act_delta = NULL;
6069 iv_ca_set_cp (data, ivs, use, cp);
6070 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6071 iv_ca_set_no_cp (data, ivs, use);
6072 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6073 cp, act_delta);
6074
6075 if (compare_costs (act_cost, best_cost) < 0)
6076 {
6077 best_cost = act_cost;
6078
6079 if (best_delta)
6080 iv_ca_delta_free (&best_delta);
6081 best_delta = act_delta;
6082 }
6083 else
6084 iv_ca_delta_free (&act_delta);
6085 }
6086 }
6087
6088 iv_ca_delta_commit (data, ivs, best_delta, true);
6089 iv_ca_delta_free (&best_delta);
6090
6091 return !infinite_cost_p (best_cost);
6092 }
6093
6094 /* Finds an initial assignment of candidates to uses. */
6095
6096 static struct iv_ca *
6097 get_initial_solution (struct ivopts_data *data, bool originalp)
6098 {
6099 struct iv_ca *ivs = iv_ca_new (data);
6100 unsigned i;
6101
6102 for (i = 0; i < n_iv_uses (data); i++)
6103 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6104 {
6105 iv_ca_free (&ivs);
6106 return NULL;
6107 }
6108
6109 return ivs;
6110 }
6111
6112 /* Tries to improve set of induction variables IVS. */
6113
6114 static bool
6115 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6116 {
6117 unsigned i, n_ivs;
6118 comp_cost acost, best_cost = iv_ca_cost (ivs);
6119 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6120 struct iv_cand *cand;
6121
6122 /* Try extending the set of induction variables by one. */
6123 for (i = 0; i < n_iv_cands (data); i++)
6124 {
6125 cand = iv_cand (data, i);
6126
6127 if (iv_ca_cand_used_p (ivs, cand))
6128 continue;
6129
6130 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6131 if (!act_delta)
6132 continue;
6133
6134 /* If we successfully added the candidate and the set is small enough,
6135 try optimizing it by removing other candidates. */
6136 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6137 {
6138 iv_ca_delta_commit (data, ivs, act_delta, true);
6139 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6140 iv_ca_delta_commit (data, ivs, act_delta, false);
6141 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6142 }
6143
6144 if (compare_costs (acost, best_cost) < 0)
6145 {
6146 best_cost = acost;
6147 iv_ca_delta_free (&best_delta);
6148 best_delta = act_delta;
6149 }
6150 else
6151 iv_ca_delta_free (&act_delta);
6152 }
6153
6154 if (!best_delta)
6155 {
6156 /* Try removing the candidates from the set instead. */
6157 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6158
6159 /* Nothing more we can do. */
6160 if (!best_delta)
6161 return false;
6162 }
6163
6164 iv_ca_delta_commit (data, ivs, best_delta, true);
6165 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6166 iv_ca_delta_free (&best_delta);
6167 return true;
6168 }
6169
6170 /* Attempts to find the optimal set of induction variables. We do simple
6171 greedy heuristic -- we try to replace at most one candidate in the selected
6172 solution and remove the unused ivs while this improves the cost. */
6173
6174 static struct iv_ca *
6175 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6176 {
6177 struct iv_ca *set;
6178
6179 /* Get the initial solution. */
6180 set = get_initial_solution (data, originalp);
6181 if (!set)
6182 {
6183 if (dump_file && (dump_flags & TDF_DETAILS))
6184 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6185 return NULL;
6186 }
6187
6188 if (dump_file && (dump_flags & TDF_DETAILS))
6189 {
6190 fprintf (dump_file, "Initial set of candidates:\n");
6191 iv_ca_dump (data, dump_file, set);
6192 }
6193
6194 while (try_improve_iv_set (data, set))
6195 {
6196 if (dump_file && (dump_flags & TDF_DETAILS))
6197 {
6198 fprintf (dump_file, "Improved to:\n");
6199 iv_ca_dump (data, dump_file, set);
6200 }
6201 }
6202
6203 return set;
6204 }
6205
6206 static struct iv_ca *
6207 find_optimal_iv_set (struct ivopts_data *data)
6208 {
6209 unsigned i;
6210 struct iv_ca *set, *origset;
6211 struct iv_use *use;
6212 comp_cost cost, origcost;
6213
6214 /* Determine the cost based on a strategy that starts with original IVs,
6215 and try again using a strategy that prefers candidates not based
6216 on any IVs. */
6217 origset = find_optimal_iv_set_1 (data, true);
6218 set = find_optimal_iv_set_1 (data, false);
6219
6220 if (!origset && !set)
6221 return NULL;
6222
6223 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6224 cost = set ? iv_ca_cost (set) : infinite_cost;
6225
6226 if (dump_file && (dump_flags & TDF_DETAILS))
6227 {
6228 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6229 origcost.cost, origcost.complexity);
6230 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6231 cost.cost, cost.complexity);
6232 }
6233
6234 /* Choose the one with the best cost. */
6235 if (compare_costs (origcost, cost) <= 0)
6236 {
6237 if (set)
6238 iv_ca_free (&set);
6239 set = origset;
6240 }
6241 else if (origset)
6242 iv_ca_free (&origset);
6243
6244 for (i = 0; i < n_iv_uses (data); i++)
6245 {
6246 use = iv_use (data, i);
6247 use->selected = iv_ca_cand_for_use (set, use)->cand;
6248 }
6249
6250 return set;
6251 }
6252
6253 /* Creates a new induction variable corresponding to CAND. */
6254
6255 static void
6256 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6257 {
6258 gimple_stmt_iterator incr_pos;
6259 tree base;
6260 bool after = false;
6261
6262 if (!cand->iv)
6263 return;
6264
6265 switch (cand->pos)
6266 {
6267 case IP_NORMAL:
6268 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6269 break;
6270
6271 case IP_END:
6272 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6273 after = true;
6274 break;
6275
6276 case IP_AFTER_USE:
6277 after = true;
6278 /* fall through */
6279 case IP_BEFORE_USE:
6280 incr_pos = gsi_for_stmt (cand->incremented_at);
6281 break;
6282
6283 case IP_ORIGINAL:
6284 /* Mark that the iv is preserved. */
6285 name_info (data, cand->var_before)->preserve_biv = true;
6286 name_info (data, cand->var_after)->preserve_biv = true;
6287
6288 /* Rewrite the increment so that it uses var_before directly. */
6289 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6290 return;
6291 }
6292
6293 gimple_add_tmp_var (cand->var_before);
6294 add_referenced_var (cand->var_before);
6295
6296 base = unshare_expr (cand->iv->base);
6297
6298 create_iv (base, unshare_expr (cand->iv->step),
6299 cand->var_before, data->current_loop,
6300 &incr_pos, after, &cand->var_before, &cand->var_after);
6301 }
6302
6303 /* Creates new induction variables described in SET. */
6304
6305 static void
6306 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6307 {
6308 unsigned i;
6309 struct iv_cand *cand;
6310 bitmap_iterator bi;
6311
6312 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6313 {
6314 cand = iv_cand (data, i);
6315 create_new_iv (data, cand);
6316 }
6317
6318 if (dump_file && (dump_flags & TDF_DETAILS))
6319 {
6320 fprintf (dump_file, "\nSelected IV set: \n");
6321 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6322 {
6323 cand = iv_cand (data, i);
6324 dump_cand (dump_file, cand);
6325 }
6326 fprintf (dump_file, "\n");
6327 }
6328 }
6329
6330 /* Rewrites USE (definition of iv used in a nonlinear expression)
6331 using candidate CAND. */
6332
6333 static void
6334 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6335 struct iv_use *use, struct iv_cand *cand)
6336 {
6337 tree comp;
6338 tree op, tgt;
6339 gimple ass;
6340 gimple_stmt_iterator bsi;
6341
6342 /* An important special case -- if we are asked to express value of
6343 the original iv by itself, just exit; there is no need to
6344 introduce a new computation (that might also need casting the
6345 variable to unsigned and back). */
6346 if (cand->pos == IP_ORIGINAL
6347 && cand->incremented_at == use->stmt)
6348 {
6349 tree step, ctype, utype;
6350 enum tree_code incr_code = PLUS_EXPR, old_code;
6351
6352 gcc_assert (is_gimple_assign (use->stmt));
6353 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6354
6355 step = cand->iv->step;
6356 ctype = TREE_TYPE (step);
6357 utype = TREE_TYPE (cand->var_after);
6358 if (TREE_CODE (step) == NEGATE_EXPR)
6359 {
6360 incr_code = MINUS_EXPR;
6361 step = TREE_OPERAND (step, 0);
6362 }
6363
6364 /* Check whether we may leave the computation unchanged.
6365 This is the case only if it does not rely on other
6366 computations in the loop -- otherwise, the computation
6367 we rely upon may be removed in remove_unused_ivs,
6368 thus leading to ICE. */
6369 old_code = gimple_assign_rhs_code (use->stmt);
6370 if (old_code == PLUS_EXPR
6371 || old_code == MINUS_EXPR
6372 || old_code == POINTER_PLUS_EXPR)
6373 {
6374 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6375 op = gimple_assign_rhs2 (use->stmt);
6376 else if (old_code != MINUS_EXPR
6377 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6378 op = gimple_assign_rhs1 (use->stmt);
6379 else
6380 op = NULL_TREE;
6381 }
6382 else
6383 op = NULL_TREE;
6384
6385 if (op
6386 && (TREE_CODE (op) == INTEGER_CST
6387 || operand_equal_p (op, step, 0)))
6388 return;
6389
6390 /* Otherwise, add the necessary computations to express
6391 the iv. */
6392 op = fold_convert (ctype, cand->var_before);
6393 comp = fold_convert (utype,
6394 build2 (incr_code, ctype, op,
6395 unshare_expr (step)));
6396 }
6397 else
6398 {
6399 comp = get_computation (data->current_loop, use, cand);
6400 gcc_assert (comp != NULL_TREE);
6401 }
6402
6403 switch (gimple_code (use->stmt))
6404 {
6405 case GIMPLE_PHI:
6406 tgt = PHI_RESULT (use->stmt);
6407
6408 /* If we should keep the biv, do not replace it. */
6409 if (name_info (data, tgt)->preserve_biv)
6410 return;
6411
6412 bsi = gsi_after_labels (gimple_bb (use->stmt));
6413 break;
6414
6415 case GIMPLE_ASSIGN:
6416 tgt = gimple_assign_lhs (use->stmt);
6417 bsi = gsi_for_stmt (use->stmt);
6418 break;
6419
6420 default:
6421 gcc_unreachable ();
6422 }
6423
6424 if (!valid_gimple_rhs_p (comp)
6425 || (gimple_code (use->stmt) != GIMPLE_PHI
6426 /* We can't allow re-allocating the stmt as it might be pointed
6427 to still. */
6428 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6429 >= gimple_num_ops (gsi_stmt (bsi)))))
6430 {
6431 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6432 true, GSI_SAME_STMT);
6433 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6434 {
6435 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6436 /* As this isn't a plain copy we have to reset alignment
6437 information. */
6438 if (SSA_NAME_PTR_INFO (comp))
6439 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6440 }
6441 }
6442
6443 if (gimple_code (use->stmt) == GIMPLE_PHI)
6444 {
6445 ass = gimple_build_assign (tgt, comp);
6446 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6447
6448 bsi = gsi_for_stmt (use->stmt);
6449 remove_phi_node (&bsi, false);
6450 }
6451 else
6452 {
6453 gimple_assign_set_rhs_from_tree (&bsi, comp);
6454 use->stmt = gsi_stmt (bsi);
6455 }
6456 }
6457
6458 /* Performs a peephole optimization to reorder the iv update statement with
6459 a mem ref to enable instruction combining in later phases. The mem ref uses
6460 the iv value before the update, so the reordering transformation requires
6461 adjustment of the offset. CAND is the selected IV_CAND.
6462
6463 Example:
6464
6465 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6466 iv2 = iv1 + 1;
6467
6468 if (t < val) (1)
6469 goto L;
6470 goto Head;
6471
6472
6473 directly propagating t over to (1) will introduce overlapping live range
6474 thus increase register pressure. This peephole transform it into:
6475
6476
6477 iv2 = iv1 + 1;
6478 t = MEM_REF (base, iv2, 8, 8);
6479 if (t < val)
6480 goto L;
6481 goto Head;
6482 */
6483
6484 static void
6485 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6486 {
6487 tree var_after;
6488 gimple iv_update, stmt;
6489 basic_block bb;
6490 gimple_stmt_iterator gsi, gsi_iv;
6491
6492 if (cand->pos != IP_NORMAL)
6493 return;
6494
6495 var_after = cand->var_after;
6496 iv_update = SSA_NAME_DEF_STMT (var_after);
6497
6498 bb = gimple_bb (iv_update);
6499 gsi = gsi_last_nondebug_bb (bb);
6500 stmt = gsi_stmt (gsi);
6501
6502 /* Only handle conditional statement for now. */
6503 if (gimple_code (stmt) != GIMPLE_COND)
6504 return;
6505
6506 gsi_prev_nondebug (&gsi);
6507 stmt = gsi_stmt (gsi);
6508 if (stmt != iv_update)
6509 return;
6510
6511 gsi_prev_nondebug (&gsi);
6512 if (gsi_end_p (gsi))
6513 return;
6514
6515 stmt = gsi_stmt (gsi);
6516 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6517 return;
6518
6519 if (stmt != use->stmt)
6520 return;
6521
6522 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6523 return;
6524
6525 if (dump_file && (dump_flags & TDF_DETAILS))
6526 {
6527 fprintf (dump_file, "Reordering \n");
6528 print_gimple_stmt (dump_file, iv_update, 0, 0);
6529 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6530 fprintf (dump_file, "\n");
6531 }
6532
6533 gsi = gsi_for_stmt (use->stmt);
6534 gsi_iv = gsi_for_stmt (iv_update);
6535 gsi_move_before (&gsi_iv, &gsi);
6536
6537 cand->pos = IP_BEFORE_USE;
6538 cand->incremented_at = use->stmt;
6539 }
6540
6541 /* Rewrites USE (address that is an iv) using candidate CAND. */
6542
6543 static void
6544 rewrite_use_address (struct ivopts_data *data,
6545 struct iv_use *use, struct iv_cand *cand)
6546 {
6547 aff_tree aff;
6548 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6549 tree base_hint = NULL_TREE;
6550 tree ref, iv;
6551 bool ok;
6552
6553 adjust_iv_update_pos (cand, use);
6554 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6555 gcc_assert (ok);
6556 unshare_aff_combination (&aff);
6557
6558 /* To avoid undefined overflow problems, all IV candidates use unsigned
6559 integer types. The drawback is that this makes it impossible for
6560 create_mem_ref to distinguish an IV that is based on a memory object
6561 from one that represents simply an offset.
6562
6563 To work around this problem, we pass a hint to create_mem_ref that
6564 indicates which variable (if any) in aff is an IV based on a memory
6565 object. Note that we only consider the candidate. If this is not
6566 based on an object, the base of the reference is in some subexpression
6567 of the use -- but these will use pointer types, so they are recognized
6568 by the create_mem_ref heuristics anyway. */
6569 if (cand->iv->base_object)
6570 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6571
6572 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6573 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6574 reference_alias_ptr_type (*use->op_p),
6575 iv, base_hint, data->speed);
6576 copy_ref_info (ref, *use->op_p);
6577 *use->op_p = ref;
6578 }
6579
6580 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6581 candidate CAND. */
6582
6583 static void
6584 rewrite_use_compare (struct ivopts_data *data,
6585 struct iv_use *use, struct iv_cand *cand)
6586 {
6587 tree comp, *var_p, op, bound;
6588 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6589 enum tree_code compare;
6590 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6591 bool ok;
6592
6593 bound = cp->value;
6594 if (bound)
6595 {
6596 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6597 tree var_type = TREE_TYPE (var);
6598 gimple_seq stmts;
6599
6600 if (dump_file && (dump_flags & TDF_DETAILS))
6601 {
6602 fprintf (dump_file, "Replacing exit test: ");
6603 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6604 }
6605 compare = cp->comp;
6606 bound = unshare_expr (fold_convert (var_type, bound));
6607 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6608 if (stmts)
6609 gsi_insert_seq_on_edge_immediate (
6610 loop_preheader_edge (data->current_loop),
6611 stmts);
6612
6613 gimple_cond_set_lhs (use->stmt, var);
6614 gimple_cond_set_code (use->stmt, compare);
6615 gimple_cond_set_rhs (use->stmt, op);
6616 return;
6617 }
6618
6619 /* The induction variable elimination failed; just express the original
6620 giv. */
6621 comp = get_computation (data->current_loop, use, cand);
6622 gcc_assert (comp != NULL_TREE);
6623
6624 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6625 gcc_assert (ok);
6626
6627 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6628 true, GSI_SAME_STMT);
6629 }
6630
6631 /* Rewrites USE using candidate CAND. */
6632
6633 static void
6634 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6635 {
6636 switch (use->type)
6637 {
6638 case USE_NONLINEAR_EXPR:
6639 rewrite_use_nonlinear_expr (data, use, cand);
6640 break;
6641
6642 case USE_ADDRESS:
6643 rewrite_use_address (data, use, cand);
6644 break;
6645
6646 case USE_COMPARE:
6647 rewrite_use_compare (data, use, cand);
6648 break;
6649
6650 default:
6651 gcc_unreachable ();
6652 }
6653
6654 update_stmt (use->stmt);
6655 }
6656
6657 /* Rewrite the uses using the selected induction variables. */
6658
6659 static void
6660 rewrite_uses (struct ivopts_data *data)
6661 {
6662 unsigned i;
6663 struct iv_cand *cand;
6664 struct iv_use *use;
6665
6666 for (i = 0; i < n_iv_uses (data); i++)
6667 {
6668 use = iv_use (data, i);
6669 cand = use->selected;
6670 gcc_assert (cand);
6671
6672 rewrite_use (data, use, cand);
6673 }
6674 }
6675
6676 /* Removes the ivs that are not used after rewriting. */
6677
6678 static void
6679 remove_unused_ivs (struct ivopts_data *data)
6680 {
6681 unsigned j;
6682 bitmap_iterator bi;
6683 bitmap toremove = BITMAP_ALLOC (NULL);
6684
6685 /* Figure out an order in which to release SSA DEFs so that we don't
6686 release something that we'd have to propagate into a debug stmt
6687 afterwards. */
6688 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6689 {
6690 struct version_info *info;
6691
6692 info = ver_info (data, j);
6693 if (info->iv
6694 && !integer_zerop (info->iv->step)
6695 && !info->inv_id
6696 && !info->iv->have_use_for
6697 && !info->preserve_biv)
6698 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6699 }
6700
6701 release_defs_bitset (toremove);
6702
6703 BITMAP_FREE (toremove);
6704 }
6705
6706 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6707 for pointer_map_traverse. */
6708
6709 static bool
6710 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6711 void *data ATTRIBUTE_UNUSED)
6712 {
6713 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6714
6715 free (niter);
6716 return true;
6717 }
6718
6719 /* Frees data allocated by the optimization of a single loop. */
6720
6721 static void
6722 free_loop_data (struct ivopts_data *data)
6723 {
6724 unsigned i, j;
6725 bitmap_iterator bi;
6726 tree obj;
6727
6728 if (data->niters)
6729 {
6730 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6731 pointer_map_destroy (data->niters);
6732 data->niters = NULL;
6733 }
6734
6735 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6736 {
6737 struct version_info *info;
6738
6739 info = ver_info (data, i);
6740 free (info->iv);
6741 info->iv = NULL;
6742 info->has_nonlin_use = false;
6743 info->preserve_biv = false;
6744 info->inv_id = 0;
6745 }
6746 bitmap_clear (data->relevant);
6747 bitmap_clear (data->important_candidates);
6748
6749 for (i = 0; i < n_iv_uses (data); i++)
6750 {
6751 struct iv_use *use = iv_use (data, i);
6752
6753 free (use->iv);
6754 BITMAP_FREE (use->related_cands);
6755 for (j = 0; j < use->n_map_members; j++)
6756 if (use->cost_map[j].depends_on)
6757 BITMAP_FREE (use->cost_map[j].depends_on);
6758 free (use->cost_map);
6759 free (use);
6760 }
6761 VEC_truncate (iv_use_p, data->iv_uses, 0);
6762
6763 for (i = 0; i < n_iv_cands (data); i++)
6764 {
6765 struct iv_cand *cand = iv_cand (data, i);
6766
6767 free (cand->iv);
6768 if (cand->depends_on)
6769 BITMAP_FREE (cand->depends_on);
6770 free (cand);
6771 }
6772 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6773
6774 if (data->version_info_size < num_ssa_names)
6775 {
6776 data->version_info_size = 2 * num_ssa_names;
6777 free (data->version_info);
6778 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6779 }
6780
6781 data->max_inv_id = 0;
6782
6783 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6784 SET_DECL_RTL (obj, NULL_RTX);
6785
6786 VEC_truncate (tree, decl_rtl_to_reset, 0);
6787
6788 htab_empty (data->inv_expr_tab);
6789 data->inv_expr_id = 0;
6790 }
6791
6792 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6793 loop tree. */
6794
6795 static void
6796 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6797 {
6798 free_loop_data (data);
6799 free (data->version_info);
6800 BITMAP_FREE (data->relevant);
6801 BITMAP_FREE (data->important_candidates);
6802
6803 VEC_free (tree, heap, decl_rtl_to_reset);
6804 VEC_free (iv_use_p, heap, data->iv_uses);
6805 VEC_free (iv_cand_p, heap, data->iv_candidates);
6806 htab_delete (data->inv_expr_tab);
6807
6808 finalize_costs ();
6809 }
6810
6811 /* Returns true if the loop body BODY includes any function calls. */
6812
6813 static bool
6814 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6815 {
6816 gimple_stmt_iterator gsi;
6817 unsigned i;
6818
6819 for (i = 0; i < num_nodes; i++)
6820 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6821 {
6822 gimple stmt = gsi_stmt (gsi);
6823 if (is_gimple_call (stmt)
6824 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6825 return true;
6826 }
6827 return false;
6828 }
6829
6830 /* Optimizes the LOOP. Returns true if anything changed. */
6831
6832 static bool
6833 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6834 {
6835 bool changed = false;
6836 struct iv_ca *iv_ca;
6837 edge exit = single_dom_exit (loop);
6838 basic_block *body;
6839
6840 gcc_assert (!data->niters);
6841 data->current_loop = loop;
6842 data->speed = optimize_loop_for_speed_p (loop);
6843
6844 if (dump_file && (dump_flags & TDF_DETAILS))
6845 {
6846 fprintf (dump_file, "Processing loop %d\n", loop->num);
6847
6848 if (exit)
6849 {
6850 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6851 exit->src->index, exit->dest->index);
6852 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6853 fprintf (dump_file, "\n");
6854 }
6855
6856 fprintf (dump_file, "\n");
6857 }
6858
6859 body = get_loop_body (loop);
6860 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6861 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6862 free (body);
6863
6864 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6865
6866 /* For each ssa name determines whether it behaves as an induction variable
6867 in some loop. */
6868 if (!find_induction_variables (data))
6869 goto finish;
6870
6871 /* Finds interesting uses (item 1). */
6872 find_interesting_uses (data);
6873 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6874 goto finish;
6875
6876 /* Finds candidates for the induction variables (item 2). */
6877 find_iv_candidates (data);
6878
6879 /* Calculates the costs (item 3, part 1). */
6880 determine_iv_costs (data);
6881 determine_use_iv_costs (data);
6882 determine_set_costs (data);
6883
6884 /* Find the optimal set of induction variables (item 3, part 2). */
6885 iv_ca = find_optimal_iv_set (data);
6886 if (!iv_ca)
6887 goto finish;
6888 changed = true;
6889
6890 /* Create the new induction variables (item 4, part 1). */
6891 create_new_ivs (data, iv_ca);
6892 iv_ca_free (&iv_ca);
6893
6894 /* Rewrite the uses (item 4, part 2). */
6895 rewrite_uses (data);
6896
6897 /* Remove the ivs that are unused after rewriting. */
6898 remove_unused_ivs (data);
6899
6900 /* We have changed the structure of induction variables; it might happen
6901 that definitions in the scev database refer to some of them that were
6902 eliminated. */
6903 scev_reset ();
6904
6905 finish:
6906 free_loop_data (data);
6907
6908 return changed;
6909 }
6910
6911 /* Main entry point. Optimizes induction variables in loops. */
6912
6913 void
6914 tree_ssa_iv_optimize (void)
6915 {
6916 struct loop *loop;
6917 struct ivopts_data data;
6918 loop_iterator li;
6919
6920 tree_ssa_iv_optimize_init (&data);
6921
6922 /* Optimize the loops starting with the innermost ones. */
6923 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6924 {
6925 if (dump_file && (dump_flags & TDF_DETAILS))
6926 flow_loop_dump (loop, dump_file, NULL, 1);
6927
6928 tree_ssa_iv_optimize_loop (&data, loop);
6929 }
6930
6931 tree_ssa_iv_optimize_finalize (&data);
6932 }