ipa-cp.c (ipcp_cloning_candidate_p): Use opt_for_fn.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "predict.h"
72 #include "vec.h"
73 #include "hashtab.h"
74 #include "hash-set.h"
75 #include "machmode.h"
76 #include "hard-reg-set.h"
77 #include "input.h"
78 #include "function.h"
79 #include "dominance.h"
80 #include "cfg.h"
81 #include "basic-block.h"
82 #include "gimple-pretty-print.h"
83 #include "hash-map.h"
84 #include "hash-table.h"
85 #include "tree-ssa-alias.h"
86 #include "internal-fn.h"
87 #include "tree-eh.h"
88 #include "gimple-expr.h"
89 #include "is-a.h"
90 #include "gimple.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "gimple-ssa.h"
95 #include "plugin-api.h"
96 #include "ipa-ref.h"
97 #include "cgraph.h"
98 #include "tree-cfg.h"
99 #include "tree-phinodes.h"
100 #include "ssa-iterators.h"
101 #include "stringpool.h"
102 #include "tree-ssanames.h"
103 #include "tree-ssa-loop-ivopts.h"
104 #include "tree-ssa-loop-manip.h"
105 #include "tree-ssa-loop-niter.h"
106 #include "tree-ssa-loop.h"
107 #include "expr.h"
108 #include "tree-dfa.h"
109 #include "tree-ssa.h"
110 #include "cfgloop.h"
111 #include "tree-pass.h"
112 #include "insn-config.h"
113 #include "tree-chrec.h"
114 #include "tree-scalar-evolution.h"
115 #include "cfgloop.h"
116 #include "params.h"
117 #include "langhooks.h"
118 #include "tree-affine.h"
119 #include "target.h"
120 #include "tree-inline.h"
121 #include "tree-ssa-propagate.h"
122 #include "expmed.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
125
126 /* FIXME: Expressions are expanded to RTL in this pass to determine the
127 cost of different addressing modes. This should be moved to a TBD
128 interface between the GIMPLE and RTL worlds. */
129 #include "expr.h"
130 #include "recog.h"
131
132 /* The infinite cost. */
133 #define INFTY 10000000
134
135 #define AVG_LOOP_NITER(LOOP) 5
136
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
139 exists. */
140
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop *loop)
143 {
144 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
145 if (niter == -1)
146 return AVG_LOOP_NITER (loop);
147
148 return niter;
149 }
150
151 /* Representation of the induction variable. */
152 struct iv
153 {
154 tree base; /* Initial value of the iv. */
155 tree base_object; /* A memory object to that the induction variable points. */
156 tree step; /* Step of the iv (constant only). */
157 tree ssa_name; /* The ssa name with the value. */
158 bool biv_p; /* Is it a biv? */
159 bool have_use_for; /* Do we already have a use for it? */
160 unsigned use_id; /* The identifier in the use if it is the case. */
161 };
162
163 /* Per-ssa version information (induction variable descriptions, etc.). */
164 struct version_info
165 {
166 tree name; /* The ssa name. */
167 struct iv *iv; /* Induction variable description. */
168 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
169 an expression that is not an induction variable. */
170 bool preserve_biv; /* For the original biv, whether to preserve it. */
171 unsigned inv_id; /* Id of an invariant. */
172 };
173
174 /* Types of uses. */
175 enum use_type
176 {
177 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
178 USE_ADDRESS, /* Use in an address. */
179 USE_COMPARE /* Use is a compare. */
180 };
181
182 /* Cost of a computation. */
183 typedef struct
184 {
185 int cost; /* The runtime cost. */
186 unsigned complexity; /* The estimate of the complexity of the code for
187 the computation (in no concrete units --
188 complexity field should be larger for more
189 complex expressions and addressing modes). */
190 } comp_cost;
191
192 static const comp_cost no_cost = {0, 0};
193 static const comp_cost infinite_cost = {INFTY, INFTY};
194
195 /* The candidate - cost pair. */
196 struct cost_pair
197 {
198 struct iv_cand *cand; /* The candidate. */
199 comp_cost cost; /* The cost. */
200 bitmap depends_on; /* The list of invariants that have to be
201 preserved. */
202 tree value; /* For final value elimination, the expression for
203 the final value of the iv. For iv elimination,
204 the new bound to compare with. */
205 enum tree_code comp; /* For iv elimination, the comparison. */
206 int inv_expr_id; /* Loop invariant expression id. */
207 };
208
209 /* Use. */
210 struct iv_use
211 {
212 unsigned id; /* The id of the use. */
213 enum use_type type; /* Type of the use. */
214 struct iv *iv; /* The induction variable it is based on. */
215 gimple stmt; /* Statement in that it occurs. */
216 tree *op_p; /* The place where it occurs. */
217 bitmap related_cands; /* The set of "related" iv candidates, plus the common
218 important ones. */
219
220 unsigned n_map_members; /* Number of candidates in the cost_map list. */
221 struct cost_pair *cost_map;
222 /* The costs wrto the iv candidates. */
223
224 struct iv_cand *selected;
225 /* The selected candidate. */
226 };
227
228 /* The position where the iv is computed. */
229 enum iv_position
230 {
231 IP_NORMAL, /* At the end, just before the exit condition. */
232 IP_END, /* At the end of the latch block. */
233 IP_BEFORE_USE, /* Immediately before a specific use. */
234 IP_AFTER_USE, /* Immediately after a specific use. */
235 IP_ORIGINAL /* The original biv. */
236 };
237
238 /* The induction variable candidate. */
239 struct iv_cand
240 {
241 unsigned id; /* The number of the candidate. */
242 bool important; /* Whether this is an "important" candidate, i.e. such
243 that it should be considered by all uses. */
244 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
245 gimple incremented_at;/* For original biv, the statement where it is
246 incremented. */
247 tree var_before; /* The variable used for it before increment. */
248 tree var_after; /* The variable used for it after increment. */
249 struct iv *iv; /* The value of the candidate. NULL for
250 "pseudocandidate" used to indicate the possibility
251 to replace the final value of an iv by direct
252 computation of the value. */
253 unsigned cost; /* Cost of the candidate. */
254 unsigned cost_step; /* Cost of the candidate's increment operation. */
255 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
256 where it is incremented. */
257 bitmap depends_on; /* The list of invariants that are used in step of the
258 biv. */
259 };
260
261 /* Loop invariant expression hashtable entry. */
262 struct iv_inv_expr_ent
263 {
264 tree expr;
265 int id;
266 hashval_t hash;
267 };
268
269 /* The data used by the induction variable optimizations. */
270
271 typedef struct iv_use *iv_use_p;
272
273 typedef struct iv_cand *iv_cand_p;
274
275 /* Hashtable helpers. */
276
277 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
278 {
279 typedef iv_inv_expr_ent value_type;
280 typedef iv_inv_expr_ent compare_type;
281 static inline hashval_t hash (const value_type *);
282 static inline bool equal (const value_type *, const compare_type *);
283 };
284
285 /* Hash function for loop invariant expressions. */
286
287 inline hashval_t
288 iv_inv_expr_hasher::hash (const value_type *expr)
289 {
290 return expr->hash;
291 }
292
293 /* Hash table equality function for expressions. */
294
295 inline bool
296 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
297 {
298 return expr1->hash == expr2->hash
299 && operand_equal_p (expr1->expr, expr2->expr, 0);
300 }
301
302 struct ivopts_data
303 {
304 /* The currently optimized loop. */
305 struct loop *current_loop;
306
307 /* Numbers of iterations for all exits of the current loop. */
308 hash_map<edge, tree_niter_desc *> *niters;
309
310 /* Number of registers used in it. */
311 unsigned regs_used;
312
313 /* The size of version_info array allocated. */
314 unsigned version_info_size;
315
316 /* The array of information for the ssa names. */
317 struct version_info *version_info;
318
319 /* The hashtable of loop invariant expressions created
320 by ivopt. */
321 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
322
323 /* Loop invariant expression id. */
324 int inv_expr_id;
325
326 /* The bitmap of indices in version_info whose value was changed. */
327 bitmap relevant;
328
329 /* The uses of induction variables. */
330 vec<iv_use_p> iv_uses;
331
332 /* The candidates. */
333 vec<iv_cand_p> iv_candidates;
334
335 /* A bitmap of important candidates. */
336 bitmap important_candidates;
337
338 /* Cache used by tree_to_aff_combination_expand. */
339 hash_map<tree, name_expansion *> *name_expansion_cache;
340
341 /* The maximum invariant id. */
342 unsigned max_inv_id;
343
344 /* Whether to consider just related and important candidates when replacing a
345 use. */
346 bool consider_all_candidates;
347
348 /* Are we optimizing for speed? */
349 bool speed;
350
351 /* Whether the loop body includes any function calls. */
352 bool body_includes_call;
353
354 /* Whether the loop body can only be exited via single exit. */
355 bool loop_single_exit_p;
356 };
357
358 /* An assignment of iv candidates to uses. */
359
360 struct iv_ca
361 {
362 /* The number of uses covered by the assignment. */
363 unsigned upto;
364
365 /* Number of uses that cannot be expressed by the candidates in the set. */
366 unsigned bad_uses;
367
368 /* Candidate assigned to a use, together with the related costs. */
369 struct cost_pair **cand_for_use;
370
371 /* Number of times each candidate is used. */
372 unsigned *n_cand_uses;
373
374 /* The candidates used. */
375 bitmap cands;
376
377 /* The number of candidates in the set. */
378 unsigned n_cands;
379
380 /* Total number of registers needed. */
381 unsigned n_regs;
382
383 /* Total cost of expressing uses. */
384 comp_cost cand_use_cost;
385
386 /* Total cost of candidates. */
387 unsigned cand_cost;
388
389 /* Number of times each invariant is used. */
390 unsigned *n_invariant_uses;
391
392 /* The array holding the number of uses of each loop
393 invariant expressions created by ivopt. */
394 unsigned *used_inv_expr;
395
396 /* The number of created loop invariants. */
397 unsigned num_used_inv_expr;
398
399 /* Total cost of the assignment. */
400 comp_cost cost;
401 };
402
403 /* Difference of two iv candidate assignments. */
404
405 struct iv_ca_delta
406 {
407 /* Changed use. */
408 struct iv_use *use;
409
410 /* An old assignment (for rollback purposes). */
411 struct cost_pair *old_cp;
412
413 /* A new assignment. */
414 struct cost_pair *new_cp;
415
416 /* Next change in the list. */
417 struct iv_ca_delta *next_change;
418 };
419
420 /* Bound on number of candidates below that all candidates are considered. */
421
422 #define CONSIDER_ALL_CANDIDATES_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
424
425 /* If there are more iv occurrences, we just give up (it is quite unlikely that
426 optimizing such a loop would help, and it would take ages). */
427
428 #define MAX_CONSIDERED_USES \
429 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
430
431 /* If there are at most this number of ivs in the set, try removing unnecessary
432 ivs from the set always. */
433
434 #define ALWAYS_PRUNE_CAND_SET_BOUND \
435 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
436
437 /* The list of trees for that the decl_rtl field must be reset is stored
438 here. */
439
440 static vec<tree> decl_rtl_to_reset;
441
442 static comp_cost force_expr_to_var_cost (tree, bool);
443
444 /* Number of uses recorded in DATA. */
445
446 static inline unsigned
447 n_iv_uses (struct ivopts_data *data)
448 {
449 return data->iv_uses.length ();
450 }
451
452 /* Ith use recorded in DATA. */
453
454 static inline struct iv_use *
455 iv_use (struct ivopts_data *data, unsigned i)
456 {
457 return data->iv_uses[i];
458 }
459
460 /* Number of candidates recorded in DATA. */
461
462 static inline unsigned
463 n_iv_cands (struct ivopts_data *data)
464 {
465 return data->iv_candidates.length ();
466 }
467
468 /* Ith candidate recorded in DATA. */
469
470 static inline struct iv_cand *
471 iv_cand (struct ivopts_data *data, unsigned i)
472 {
473 return data->iv_candidates[i];
474 }
475
476 /* The single loop exit if it dominates the latch, NULL otherwise. */
477
478 edge
479 single_dom_exit (struct loop *loop)
480 {
481 edge exit = single_exit (loop);
482
483 if (!exit)
484 return NULL;
485
486 if (!just_once_each_iteration_p (loop, exit->src))
487 return NULL;
488
489 return exit;
490 }
491
492 /* Dumps information about the induction variable IV to FILE. */
493
494 void
495 dump_iv (FILE *file, struct iv *iv)
496 {
497 if (iv->ssa_name)
498 {
499 fprintf (file, "ssa name ");
500 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
501 fprintf (file, "\n");
502 }
503
504 fprintf (file, " type ");
505 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
506 fprintf (file, "\n");
507
508 if (iv->step)
509 {
510 fprintf (file, " base ");
511 print_generic_expr (file, iv->base, TDF_SLIM);
512 fprintf (file, "\n");
513
514 fprintf (file, " step ");
515 print_generic_expr (file, iv->step, TDF_SLIM);
516 fprintf (file, "\n");
517 }
518 else
519 {
520 fprintf (file, " invariant ");
521 print_generic_expr (file, iv->base, TDF_SLIM);
522 fprintf (file, "\n");
523 }
524
525 if (iv->base_object)
526 {
527 fprintf (file, " base object ");
528 print_generic_expr (file, iv->base_object, TDF_SLIM);
529 fprintf (file, "\n");
530 }
531
532 if (iv->biv_p)
533 fprintf (file, " is a biv\n");
534 }
535
536 /* Dumps information about the USE to FILE. */
537
538 void
539 dump_use (FILE *file, struct iv_use *use)
540 {
541 fprintf (file, "use %d\n", use->id);
542
543 switch (use->type)
544 {
545 case USE_NONLINEAR_EXPR:
546 fprintf (file, " generic\n");
547 break;
548
549 case USE_ADDRESS:
550 fprintf (file, " address\n");
551 break;
552
553 case USE_COMPARE:
554 fprintf (file, " compare\n");
555 break;
556
557 default:
558 gcc_unreachable ();
559 }
560
561 fprintf (file, " in statement ");
562 print_gimple_stmt (file, use->stmt, 0, 0);
563 fprintf (file, "\n");
564
565 fprintf (file, " at position ");
566 if (use->op_p)
567 print_generic_expr (file, *use->op_p, TDF_SLIM);
568 fprintf (file, "\n");
569
570 dump_iv (file, use->iv);
571
572 if (use->related_cands)
573 {
574 fprintf (file, " related candidates ");
575 dump_bitmap (file, use->related_cands);
576 }
577 }
578
579 /* Dumps information about the uses to FILE. */
580
581 void
582 dump_uses (FILE *file, struct ivopts_data *data)
583 {
584 unsigned i;
585 struct iv_use *use;
586
587 for (i = 0; i < n_iv_uses (data); i++)
588 {
589 use = iv_use (data, i);
590
591 dump_use (file, use);
592 fprintf (file, "\n");
593 }
594 }
595
596 /* Dumps information about induction variable candidate CAND to FILE. */
597
598 void
599 dump_cand (FILE *file, struct iv_cand *cand)
600 {
601 struct iv *iv = cand->iv;
602
603 fprintf (file, "candidate %d%s\n",
604 cand->id, cand->important ? " (important)" : "");
605
606 if (cand->depends_on)
607 {
608 fprintf (file, " depends on ");
609 dump_bitmap (file, cand->depends_on);
610 }
611
612 if (!iv)
613 {
614 fprintf (file, " final value replacement\n");
615 return;
616 }
617
618 if (cand->var_before)
619 {
620 fprintf (file, " var_before ");
621 print_generic_expr (file, cand->var_before, TDF_SLIM);
622 fprintf (file, "\n");
623 }
624 if (cand->var_after)
625 {
626 fprintf (file, " var_after ");
627 print_generic_expr (file, cand->var_after, TDF_SLIM);
628 fprintf (file, "\n");
629 }
630
631 switch (cand->pos)
632 {
633 case IP_NORMAL:
634 fprintf (file, " incremented before exit test\n");
635 break;
636
637 case IP_BEFORE_USE:
638 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
639 break;
640
641 case IP_AFTER_USE:
642 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
643 break;
644
645 case IP_END:
646 fprintf (file, " incremented at end\n");
647 break;
648
649 case IP_ORIGINAL:
650 fprintf (file, " original biv\n");
651 break;
652 }
653
654 dump_iv (file, iv);
655 }
656
657 /* Returns the info for ssa version VER. */
658
659 static inline struct version_info *
660 ver_info (struct ivopts_data *data, unsigned ver)
661 {
662 return data->version_info + ver;
663 }
664
665 /* Returns the info for ssa name NAME. */
666
667 static inline struct version_info *
668 name_info (struct ivopts_data *data, tree name)
669 {
670 return ver_info (data, SSA_NAME_VERSION (name));
671 }
672
673 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
674 emitted in LOOP. */
675
676 static bool
677 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
678 {
679 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
680
681 gcc_assert (bb);
682
683 if (sbb == loop->latch)
684 return true;
685
686 if (sbb != bb)
687 return false;
688
689 return stmt == last_stmt (bb);
690 }
691
692 /* Returns true if STMT if after the place where the original induction
693 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
694 if the positions are identical. */
695
696 static bool
697 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
698 {
699 basic_block cand_bb = gimple_bb (cand->incremented_at);
700 basic_block stmt_bb = gimple_bb (stmt);
701
702 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
703 return false;
704
705 if (stmt_bb != cand_bb)
706 return true;
707
708 if (true_if_equal
709 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
710 return true;
711 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
712 }
713
714 /* Returns true if STMT if after the place where the induction variable
715 CAND is incremented in LOOP. */
716
717 static bool
718 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
719 {
720 switch (cand->pos)
721 {
722 case IP_END:
723 return false;
724
725 case IP_NORMAL:
726 return stmt_after_ip_normal_pos (loop, stmt);
727
728 case IP_ORIGINAL:
729 case IP_AFTER_USE:
730 return stmt_after_inc_pos (cand, stmt, false);
731
732 case IP_BEFORE_USE:
733 return stmt_after_inc_pos (cand, stmt, true);
734
735 default:
736 gcc_unreachable ();
737 }
738 }
739
740 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
741
742 static bool
743 abnormal_ssa_name_p (tree exp)
744 {
745 if (!exp)
746 return false;
747
748 if (TREE_CODE (exp) != SSA_NAME)
749 return false;
750
751 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
752 }
753
754 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
755 abnormal phi node. Callback for for_each_index. */
756
757 static bool
758 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
759 void *data ATTRIBUTE_UNUSED)
760 {
761 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
762 {
763 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
764 return false;
765 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
766 return false;
767 }
768
769 return !abnormal_ssa_name_p (*index);
770 }
771
772 /* Returns true if EXPR contains a ssa name that occurs in an
773 abnormal phi node. */
774
775 bool
776 contains_abnormal_ssa_name_p (tree expr)
777 {
778 enum tree_code code;
779 enum tree_code_class codeclass;
780
781 if (!expr)
782 return false;
783
784 code = TREE_CODE (expr);
785 codeclass = TREE_CODE_CLASS (code);
786
787 if (code == SSA_NAME)
788 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
789
790 if (code == INTEGER_CST
791 || is_gimple_min_invariant (expr))
792 return false;
793
794 if (code == ADDR_EXPR)
795 return !for_each_index (&TREE_OPERAND (expr, 0),
796 idx_contains_abnormal_ssa_name_p,
797 NULL);
798
799 if (code == COND_EXPR)
800 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
801 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
802 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
803
804 switch (codeclass)
805 {
806 case tcc_binary:
807 case tcc_comparison:
808 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
809 return true;
810
811 /* Fallthru. */
812 case tcc_unary:
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
814 return true;
815
816 break;
817
818 default:
819 gcc_unreachable ();
820 }
821
822 return false;
823 }
824
825 /* Returns the structure describing number of iterations determined from
826 EXIT of DATA->current_loop, or NULL if something goes wrong. */
827
828 static struct tree_niter_desc *
829 niter_for_exit (struct ivopts_data *data, edge exit)
830 {
831 struct tree_niter_desc *desc;
832 tree_niter_desc **slot;
833
834 if (!data->niters)
835 {
836 data->niters = new hash_map<edge, tree_niter_desc *>;
837 slot = NULL;
838 }
839 else
840 slot = data->niters->get (exit);
841
842 if (!slot)
843 {
844 /* Try to determine number of iterations. We cannot safely work with ssa
845 names that appear in phi nodes on abnormal edges, so that we do not
846 create overlapping life ranges for them (PR 27283). */
847 desc = XNEW (struct tree_niter_desc);
848 if (!number_of_iterations_exit (data->current_loop,
849 exit, desc, true)
850 || contains_abnormal_ssa_name_p (desc->niter))
851 {
852 XDELETE (desc);
853 desc = NULL;
854 }
855 data->niters->put (exit, desc);
856 }
857 else
858 desc = *slot;
859
860 return desc;
861 }
862
863 /* Returns the structure describing number of iterations determined from
864 single dominating exit of DATA->current_loop, or NULL if something
865 goes wrong. */
866
867 static struct tree_niter_desc *
868 niter_for_single_dom_exit (struct ivopts_data *data)
869 {
870 edge exit = single_dom_exit (data->current_loop);
871
872 if (!exit)
873 return NULL;
874
875 return niter_for_exit (data, exit);
876 }
877
878 /* Initializes data structures used by the iv optimization pass, stored
879 in DATA. */
880
881 static void
882 tree_ssa_iv_optimize_init (struct ivopts_data *data)
883 {
884 data->version_info_size = 2 * num_ssa_names;
885 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
886 data->relevant = BITMAP_ALLOC (NULL);
887 data->important_candidates = BITMAP_ALLOC (NULL);
888 data->max_inv_id = 0;
889 data->niters = NULL;
890 data->iv_uses.create (20);
891 data->iv_candidates.create (20);
892 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
893 data->inv_expr_id = 0;
894 data->name_expansion_cache = NULL;
895 decl_rtl_to_reset.create (20);
896 }
897
898 /* Returns a memory object to that EXPR points. In case we are able to
899 determine that it does not point to any such object, NULL is returned. */
900
901 static tree
902 determine_base_object (tree expr)
903 {
904 enum tree_code code = TREE_CODE (expr);
905 tree base, obj;
906
907 /* If this is a pointer casted to any type, we need to determine
908 the base object for the pointer; so handle conversions before
909 throwing away non-pointer expressions. */
910 if (CONVERT_EXPR_P (expr))
911 return determine_base_object (TREE_OPERAND (expr, 0));
912
913 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
914 return NULL_TREE;
915
916 switch (code)
917 {
918 case INTEGER_CST:
919 return NULL_TREE;
920
921 case ADDR_EXPR:
922 obj = TREE_OPERAND (expr, 0);
923 base = get_base_address (obj);
924
925 if (!base)
926 return expr;
927
928 if (TREE_CODE (base) == MEM_REF)
929 return determine_base_object (TREE_OPERAND (base, 0));
930
931 return fold_convert (ptr_type_node,
932 build_fold_addr_expr (base));
933
934 case POINTER_PLUS_EXPR:
935 return determine_base_object (TREE_OPERAND (expr, 0));
936
937 case PLUS_EXPR:
938 case MINUS_EXPR:
939 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
940 gcc_unreachable ();
941
942 default:
943 return fold_convert (ptr_type_node, expr);
944 }
945 }
946
947 /* Return true if address expression with non-DECL_P operand appears
948 in EXPR. */
949
950 static bool
951 contain_complex_addr_expr (tree expr)
952 {
953 bool res = false;
954
955 STRIP_NOPS (expr);
956 switch (TREE_CODE (expr))
957 {
958 case POINTER_PLUS_EXPR:
959 case PLUS_EXPR:
960 case MINUS_EXPR:
961 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
962 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
963 break;
964
965 case ADDR_EXPR:
966 return (!DECL_P (TREE_OPERAND (expr, 0)));
967
968 default:
969 return false;
970 }
971
972 return res;
973 }
974
975 /* Allocates an induction variable with given initial value BASE and step STEP
976 for loop LOOP. */
977
978 static struct iv *
979 alloc_iv (tree base, tree step)
980 {
981 tree expr = base;
982 struct iv *iv = XCNEW (struct iv);
983 gcc_assert (step != NULL_TREE);
984
985 /* Lower address expression in base except ones with DECL_P as operand.
986 By doing this:
987 1) More accurate cost can be computed for address expressions;
988 2) Duplicate candidates won't be created for bases in different
989 forms, like &a[0] and &a. */
990 STRIP_NOPS (expr);
991 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
992 || contain_complex_addr_expr (expr))
993 {
994 aff_tree comb;
995 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
996 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
997 }
998
999 iv->base = base;
1000 iv->base_object = determine_base_object (base);
1001 iv->step = step;
1002 iv->biv_p = false;
1003 iv->have_use_for = false;
1004 iv->use_id = 0;
1005 iv->ssa_name = NULL_TREE;
1006
1007 return iv;
1008 }
1009
1010 /* Sets STEP and BASE for induction variable IV. */
1011
1012 static void
1013 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1014 {
1015 struct version_info *info = name_info (data, iv);
1016
1017 gcc_assert (!info->iv);
1018
1019 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1020 info->iv = alloc_iv (base, step);
1021 info->iv->ssa_name = iv;
1022 }
1023
1024 /* Finds induction variable declaration for VAR. */
1025
1026 static struct iv *
1027 get_iv (struct ivopts_data *data, tree var)
1028 {
1029 basic_block bb;
1030 tree type = TREE_TYPE (var);
1031
1032 if (!POINTER_TYPE_P (type)
1033 && !INTEGRAL_TYPE_P (type))
1034 return NULL;
1035
1036 if (!name_info (data, var)->iv)
1037 {
1038 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1039
1040 if (!bb
1041 || !flow_bb_inside_loop_p (data->current_loop, bb))
1042 set_iv (data, var, var, build_int_cst (type, 0));
1043 }
1044
1045 return name_info (data, var)->iv;
1046 }
1047
1048 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1049 not define a simple affine biv with nonzero step. */
1050
1051 static tree
1052 determine_biv_step (gimple phi)
1053 {
1054 struct loop *loop = gimple_bb (phi)->loop_father;
1055 tree name = PHI_RESULT (phi);
1056 affine_iv iv;
1057
1058 if (virtual_operand_p (name))
1059 return NULL_TREE;
1060
1061 if (!simple_iv (loop, loop, name, &iv, true))
1062 return NULL_TREE;
1063
1064 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1065 }
1066
1067 /* Finds basic ivs. */
1068
1069 static bool
1070 find_bivs (struct ivopts_data *data)
1071 {
1072 gimple phi;
1073 tree step, type, base;
1074 bool found = false;
1075 struct loop *loop = data->current_loop;
1076 gimple_stmt_iterator psi;
1077
1078 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1079 {
1080 phi = gsi_stmt (psi);
1081
1082 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1083 continue;
1084
1085 step = determine_biv_step (phi);
1086 if (!step)
1087 continue;
1088
1089 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1090 base = expand_simple_operations (base);
1091 if (contains_abnormal_ssa_name_p (base)
1092 || contains_abnormal_ssa_name_p (step))
1093 continue;
1094
1095 type = TREE_TYPE (PHI_RESULT (phi));
1096 base = fold_convert (type, base);
1097 if (step)
1098 {
1099 if (POINTER_TYPE_P (type))
1100 step = convert_to_ptrofftype (step);
1101 else
1102 step = fold_convert (type, step);
1103 }
1104
1105 set_iv (data, PHI_RESULT (phi), base, step);
1106 found = true;
1107 }
1108
1109 return found;
1110 }
1111
1112 /* Marks basic ivs. */
1113
1114 static void
1115 mark_bivs (struct ivopts_data *data)
1116 {
1117 gimple phi, def;
1118 tree var;
1119 struct iv *iv, *incr_iv;
1120 struct loop *loop = data->current_loop;
1121 basic_block incr_bb;
1122 gimple_stmt_iterator psi;
1123
1124 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1125 {
1126 phi = gsi_stmt (psi);
1127
1128 iv = get_iv (data, PHI_RESULT (phi));
1129 if (!iv)
1130 continue;
1131
1132 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1133 def = SSA_NAME_DEF_STMT (var);
1134 /* Don't mark iv peeled from other one as biv. */
1135 if (def
1136 && gimple_code (def) == GIMPLE_PHI
1137 && gimple_bb (def) == loop->header)
1138 continue;
1139
1140 incr_iv = get_iv (data, var);
1141 if (!incr_iv)
1142 continue;
1143
1144 /* If the increment is in the subloop, ignore it. */
1145 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1146 if (incr_bb->loop_father != data->current_loop
1147 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1148 continue;
1149
1150 iv->biv_p = true;
1151 incr_iv->biv_p = true;
1152 }
1153 }
1154
1155 /* Checks whether STMT defines a linear induction variable and stores its
1156 parameters to IV. */
1157
1158 static bool
1159 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1160 {
1161 tree lhs;
1162 struct loop *loop = data->current_loop;
1163
1164 iv->base = NULL_TREE;
1165 iv->step = NULL_TREE;
1166
1167 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1168 return false;
1169
1170 lhs = gimple_assign_lhs (stmt);
1171 if (TREE_CODE (lhs) != SSA_NAME)
1172 return false;
1173
1174 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1175 return false;
1176 iv->base = expand_simple_operations (iv->base);
1177
1178 if (contains_abnormal_ssa_name_p (iv->base)
1179 || contains_abnormal_ssa_name_p (iv->step))
1180 return false;
1181
1182 /* If STMT could throw, then do not consider STMT as defining a GIV.
1183 While this will suppress optimizations, we can not safely delete this
1184 GIV and associated statements, even if it appears it is not used. */
1185 if (stmt_could_throw_p (stmt))
1186 return false;
1187
1188 return true;
1189 }
1190
1191 /* Finds general ivs in statement STMT. */
1192
1193 static void
1194 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1195 {
1196 affine_iv iv;
1197
1198 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1199 return;
1200
1201 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1202 }
1203
1204 /* Finds general ivs in basic block BB. */
1205
1206 static void
1207 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1208 {
1209 gimple_stmt_iterator bsi;
1210
1211 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1212 find_givs_in_stmt (data, gsi_stmt (bsi));
1213 }
1214
1215 /* Finds general ivs. */
1216
1217 static void
1218 find_givs (struct ivopts_data *data)
1219 {
1220 struct loop *loop = data->current_loop;
1221 basic_block *body = get_loop_body_in_dom_order (loop);
1222 unsigned i;
1223
1224 for (i = 0; i < loop->num_nodes; i++)
1225 find_givs_in_bb (data, body[i]);
1226 free (body);
1227 }
1228
1229 /* For each ssa name defined in LOOP determines whether it is an induction
1230 variable and if so, its initial value and step. */
1231
1232 static bool
1233 find_induction_variables (struct ivopts_data *data)
1234 {
1235 unsigned i;
1236 bitmap_iterator bi;
1237
1238 if (!find_bivs (data))
1239 return false;
1240
1241 find_givs (data);
1242 mark_bivs (data);
1243
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1245 {
1246 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1247
1248 if (niter)
1249 {
1250 fprintf (dump_file, " number of iterations ");
1251 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1252 if (!integer_zerop (niter->may_be_zero))
1253 {
1254 fprintf (dump_file, "; zero if ");
1255 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1256 }
1257 fprintf (dump_file, "\n\n");
1258 };
1259
1260 fprintf (dump_file, "Induction variables:\n\n");
1261
1262 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1263 {
1264 if (ver_info (data, i)->iv)
1265 dump_iv (dump_file, ver_info (data, i)->iv);
1266 }
1267 }
1268
1269 return true;
1270 }
1271
1272 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1273
1274 static struct iv_use *
1275 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1276 gimple stmt, enum use_type use_type)
1277 {
1278 struct iv_use *use = XCNEW (struct iv_use);
1279
1280 use->id = n_iv_uses (data);
1281 use->type = use_type;
1282 use->iv = iv;
1283 use->stmt = stmt;
1284 use->op_p = use_p;
1285 use->related_cands = BITMAP_ALLOC (NULL);
1286
1287 /* To avoid showing ssa name in the dumps, if it was not reset by the
1288 caller. */
1289 iv->ssa_name = NULL_TREE;
1290
1291 if (dump_file && (dump_flags & TDF_DETAILS))
1292 dump_use (dump_file, use);
1293
1294 data->iv_uses.safe_push (use);
1295
1296 return use;
1297 }
1298
1299 /* Checks whether OP is a loop-level invariant and if so, records it.
1300 NONLINEAR_USE is true if the invariant is used in a way we do not
1301 handle specially. */
1302
1303 static void
1304 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1305 {
1306 basic_block bb;
1307 struct version_info *info;
1308
1309 if (TREE_CODE (op) != SSA_NAME
1310 || virtual_operand_p (op))
1311 return;
1312
1313 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1314 if (bb
1315 && flow_bb_inside_loop_p (data->current_loop, bb))
1316 return;
1317
1318 info = name_info (data, op);
1319 info->name = op;
1320 info->has_nonlin_use |= nonlinear_use;
1321 if (!info->inv_id)
1322 info->inv_id = ++data->max_inv_id;
1323 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1324 }
1325
1326 /* Checks whether the use OP is interesting and if so, records it. */
1327
1328 static struct iv_use *
1329 find_interesting_uses_op (struct ivopts_data *data, tree op)
1330 {
1331 struct iv *iv;
1332 struct iv *civ;
1333 gimple stmt;
1334 struct iv_use *use;
1335
1336 if (TREE_CODE (op) != SSA_NAME)
1337 return NULL;
1338
1339 iv = get_iv (data, op);
1340 if (!iv)
1341 return NULL;
1342
1343 if (iv->have_use_for)
1344 {
1345 use = iv_use (data, iv->use_id);
1346
1347 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1348 return use;
1349 }
1350
1351 if (integer_zerop (iv->step))
1352 {
1353 record_invariant (data, op, true);
1354 return NULL;
1355 }
1356 iv->have_use_for = true;
1357
1358 civ = XNEW (struct iv);
1359 *civ = *iv;
1360
1361 stmt = SSA_NAME_DEF_STMT (op);
1362 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1363 || is_gimple_assign (stmt));
1364
1365 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1366 iv->use_id = use->id;
1367
1368 return use;
1369 }
1370
1371 /* Given a condition in statement STMT, checks whether it is a compare
1372 of an induction variable and an invariant. If this is the case,
1373 CONTROL_VAR is set to location of the iv, BOUND to the location of
1374 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1375 induction variable descriptions, and true is returned. If this is not
1376 the case, CONTROL_VAR and BOUND are set to the arguments of the
1377 condition and false is returned. */
1378
1379 static bool
1380 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1381 tree **control_var, tree **bound,
1382 struct iv **iv_var, struct iv **iv_bound)
1383 {
1384 /* The objects returned when COND has constant operands. */
1385 static struct iv const_iv;
1386 static tree zero;
1387 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1388 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1389 bool ret = false;
1390
1391 if (gimple_code (stmt) == GIMPLE_COND)
1392 {
1393 op0 = gimple_cond_lhs_ptr (stmt);
1394 op1 = gimple_cond_rhs_ptr (stmt);
1395 }
1396 else
1397 {
1398 op0 = gimple_assign_rhs1_ptr (stmt);
1399 op1 = gimple_assign_rhs2_ptr (stmt);
1400 }
1401
1402 zero = integer_zero_node;
1403 const_iv.step = integer_zero_node;
1404
1405 if (TREE_CODE (*op0) == SSA_NAME)
1406 iv0 = get_iv (data, *op0);
1407 if (TREE_CODE (*op1) == SSA_NAME)
1408 iv1 = get_iv (data, *op1);
1409
1410 /* Exactly one of the compared values must be an iv, and the other one must
1411 be an invariant. */
1412 if (!iv0 || !iv1)
1413 goto end;
1414
1415 if (integer_zerop (iv0->step))
1416 {
1417 /* Control variable may be on the other side. */
1418 tmp_op = op0; op0 = op1; op1 = tmp_op;
1419 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1420 }
1421 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1422
1423 end:
1424 if (control_var)
1425 *control_var = op0;;
1426 if (iv_var)
1427 *iv_var = iv0;;
1428 if (bound)
1429 *bound = op1;
1430 if (iv_bound)
1431 *iv_bound = iv1;
1432
1433 return ret;
1434 }
1435
1436 /* Checks whether the condition in STMT is interesting and if so,
1437 records it. */
1438
1439 static void
1440 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1441 {
1442 tree *var_p, *bound_p;
1443 struct iv *var_iv, *civ;
1444
1445 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1446 {
1447 find_interesting_uses_op (data, *var_p);
1448 find_interesting_uses_op (data, *bound_p);
1449 return;
1450 }
1451
1452 civ = XNEW (struct iv);
1453 *civ = *var_iv;
1454 record_use (data, NULL, civ, stmt, USE_COMPARE);
1455 }
1456
1457 /* Returns the outermost loop EXPR is obviously invariant in
1458 relative to the loop LOOP, i.e. if all its operands are defined
1459 outside of the returned loop. Returns NULL if EXPR is not
1460 even obviously invariant in LOOP. */
1461
1462 struct loop *
1463 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1464 {
1465 basic_block def_bb;
1466 unsigned i, len;
1467
1468 if (is_gimple_min_invariant (expr))
1469 return current_loops->tree_root;
1470
1471 if (TREE_CODE (expr) == SSA_NAME)
1472 {
1473 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1474 if (def_bb)
1475 {
1476 if (flow_bb_inside_loop_p (loop, def_bb))
1477 return NULL;
1478 return superloop_at_depth (loop,
1479 loop_depth (def_bb->loop_father) + 1);
1480 }
1481
1482 return current_loops->tree_root;
1483 }
1484
1485 if (!EXPR_P (expr))
1486 return NULL;
1487
1488 unsigned maxdepth = 0;
1489 len = TREE_OPERAND_LENGTH (expr);
1490 for (i = 0; i < len; i++)
1491 {
1492 struct loop *ivloop;
1493 if (!TREE_OPERAND (expr, i))
1494 continue;
1495
1496 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1497 if (!ivloop)
1498 return NULL;
1499 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1500 }
1501
1502 return superloop_at_depth (loop, maxdepth);
1503 }
1504
1505 /* Returns true if expression EXPR is obviously invariant in LOOP,
1506 i.e. if all its operands are defined outside of the LOOP. LOOP
1507 should not be the function body. */
1508
1509 bool
1510 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1511 {
1512 basic_block def_bb;
1513 unsigned i, len;
1514
1515 gcc_assert (loop_depth (loop) > 0);
1516
1517 if (is_gimple_min_invariant (expr))
1518 return true;
1519
1520 if (TREE_CODE (expr) == SSA_NAME)
1521 {
1522 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1523 if (def_bb
1524 && flow_bb_inside_loop_p (loop, def_bb))
1525 return false;
1526
1527 return true;
1528 }
1529
1530 if (!EXPR_P (expr))
1531 return false;
1532
1533 len = TREE_OPERAND_LENGTH (expr);
1534 for (i = 0; i < len; i++)
1535 if (TREE_OPERAND (expr, i)
1536 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1537 return false;
1538
1539 return true;
1540 }
1541
1542 /* Cumulates the steps of indices into DATA and replaces their values with the
1543 initial ones. Returns false when the value of the index cannot be determined.
1544 Callback for for_each_index. */
1545
1546 struct ifs_ivopts_data
1547 {
1548 struct ivopts_data *ivopts_data;
1549 gimple stmt;
1550 tree step;
1551 };
1552
1553 static bool
1554 idx_find_step (tree base, tree *idx, void *data)
1555 {
1556 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1557 struct iv *iv;
1558 tree step, iv_base, iv_step, lbound, off;
1559 struct loop *loop = dta->ivopts_data->current_loop;
1560
1561 /* If base is a component ref, require that the offset of the reference
1562 be invariant. */
1563 if (TREE_CODE (base) == COMPONENT_REF)
1564 {
1565 off = component_ref_field_offset (base);
1566 return expr_invariant_in_loop_p (loop, off);
1567 }
1568
1569 /* If base is array, first check whether we will be able to move the
1570 reference out of the loop (in order to take its address in strength
1571 reduction). In order for this to work we need both lower bound
1572 and step to be loop invariants. */
1573 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1574 {
1575 /* Moreover, for a range, the size needs to be invariant as well. */
1576 if (TREE_CODE (base) == ARRAY_RANGE_REF
1577 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1578 return false;
1579
1580 step = array_ref_element_size (base);
1581 lbound = array_ref_low_bound (base);
1582
1583 if (!expr_invariant_in_loop_p (loop, step)
1584 || !expr_invariant_in_loop_p (loop, lbound))
1585 return false;
1586 }
1587
1588 if (TREE_CODE (*idx) != SSA_NAME)
1589 return true;
1590
1591 iv = get_iv (dta->ivopts_data, *idx);
1592 if (!iv)
1593 return false;
1594
1595 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1596 *&x[0], which is not folded and does not trigger the
1597 ARRAY_REF path below. */
1598 *idx = iv->base;
1599
1600 if (integer_zerop (iv->step))
1601 return true;
1602
1603 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1604 {
1605 step = array_ref_element_size (base);
1606
1607 /* We only handle addresses whose step is an integer constant. */
1608 if (TREE_CODE (step) != INTEGER_CST)
1609 return false;
1610 }
1611 else
1612 /* The step for pointer arithmetics already is 1 byte. */
1613 step = size_one_node;
1614
1615 iv_base = iv->base;
1616 iv_step = iv->step;
1617 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1618 sizetype, &iv_base, &iv_step, dta->stmt,
1619 false))
1620 {
1621 /* The index might wrap. */
1622 return false;
1623 }
1624
1625 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1626 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1627
1628 return true;
1629 }
1630
1631 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1632 object is passed to it in DATA. */
1633
1634 static bool
1635 idx_record_use (tree base, tree *idx,
1636 void *vdata)
1637 {
1638 struct ivopts_data *data = (struct ivopts_data *) vdata;
1639 find_interesting_uses_op (data, *idx);
1640 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1641 {
1642 find_interesting_uses_op (data, array_ref_element_size (base));
1643 find_interesting_uses_op (data, array_ref_low_bound (base));
1644 }
1645 return true;
1646 }
1647
1648 /* If we can prove that TOP = cst * BOT for some constant cst,
1649 store cst to MUL and return true. Otherwise return false.
1650 The returned value is always sign-extended, regardless of the
1651 signedness of TOP and BOT. */
1652
1653 static bool
1654 constant_multiple_of (tree top, tree bot, widest_int *mul)
1655 {
1656 tree mby;
1657 enum tree_code code;
1658 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1659 widest_int res, p0, p1;
1660
1661 STRIP_NOPS (top);
1662 STRIP_NOPS (bot);
1663
1664 if (operand_equal_p (top, bot, 0))
1665 {
1666 *mul = 1;
1667 return true;
1668 }
1669
1670 code = TREE_CODE (top);
1671 switch (code)
1672 {
1673 case MULT_EXPR:
1674 mby = TREE_OPERAND (top, 1);
1675 if (TREE_CODE (mby) != INTEGER_CST)
1676 return false;
1677
1678 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1679 return false;
1680
1681 *mul = wi::sext (res * wi::to_widest (mby), precision);
1682 return true;
1683
1684 case PLUS_EXPR:
1685 case MINUS_EXPR:
1686 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1687 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1688 return false;
1689
1690 if (code == MINUS_EXPR)
1691 p1 = -p1;
1692 *mul = wi::sext (p0 + p1, precision);
1693 return true;
1694
1695 case INTEGER_CST:
1696 if (TREE_CODE (bot) != INTEGER_CST)
1697 return false;
1698
1699 p0 = widest_int::from (top, SIGNED);
1700 p1 = widest_int::from (bot, SIGNED);
1701 if (p1 == 0)
1702 return false;
1703 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1704 return res == 0;
1705
1706 default:
1707 return false;
1708 }
1709 }
1710
1711 /* Return true if memory reference REF with step STEP may be unaligned. */
1712
1713 static bool
1714 may_be_unaligned_p (tree ref, tree step)
1715 {
1716 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1717 thus they are not misaligned. */
1718 if (TREE_CODE (ref) == TARGET_MEM_REF)
1719 return false;
1720
1721 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1722 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1723 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1724
1725 unsigned HOST_WIDE_INT bitpos;
1726 unsigned int ref_align;
1727 get_object_alignment_1 (ref, &ref_align, &bitpos);
1728 if (ref_align < align
1729 || (bitpos % align) != 0
1730 || (bitpos % BITS_PER_UNIT) != 0)
1731 return true;
1732
1733 unsigned int trailing_zeros = tree_ctz (step);
1734 if (trailing_zeros < HOST_BITS_PER_INT
1735 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1736 return true;
1737
1738 return false;
1739 }
1740
1741 /* Return true if EXPR may be non-addressable. */
1742
1743 bool
1744 may_be_nonaddressable_p (tree expr)
1745 {
1746 switch (TREE_CODE (expr))
1747 {
1748 case TARGET_MEM_REF:
1749 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1750 target, thus they are always addressable. */
1751 return false;
1752
1753 case COMPONENT_REF:
1754 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1755 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1756
1757 case VIEW_CONVERT_EXPR:
1758 /* This kind of view-conversions may wrap non-addressable objects
1759 and make them look addressable. After some processing the
1760 non-addressability may be uncovered again, causing ADDR_EXPRs
1761 of inappropriate objects to be built. */
1762 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1763 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1764 return true;
1765
1766 /* ... fall through ... */
1767
1768 case ARRAY_REF:
1769 case ARRAY_RANGE_REF:
1770 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1771
1772 CASE_CONVERT:
1773 return true;
1774
1775 default:
1776 break;
1777 }
1778
1779 return false;
1780 }
1781
1782 /* Finds addresses in *OP_P inside STMT. */
1783
1784 static void
1785 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1786 {
1787 tree base = *op_p, step = size_zero_node;
1788 struct iv *civ;
1789 struct ifs_ivopts_data ifs_ivopts_data;
1790
1791 /* Do not play with volatile memory references. A bit too conservative,
1792 perhaps, but safe. */
1793 if (gimple_has_volatile_ops (stmt))
1794 goto fail;
1795
1796 /* Ignore bitfields for now. Not really something terribly complicated
1797 to handle. TODO. */
1798 if (TREE_CODE (base) == BIT_FIELD_REF)
1799 goto fail;
1800
1801 base = unshare_expr (base);
1802
1803 if (TREE_CODE (base) == TARGET_MEM_REF)
1804 {
1805 tree type = build_pointer_type (TREE_TYPE (base));
1806 tree astep;
1807
1808 if (TMR_BASE (base)
1809 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1810 {
1811 civ = get_iv (data, TMR_BASE (base));
1812 if (!civ)
1813 goto fail;
1814
1815 TMR_BASE (base) = civ->base;
1816 step = civ->step;
1817 }
1818 if (TMR_INDEX2 (base)
1819 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1820 {
1821 civ = get_iv (data, TMR_INDEX2 (base));
1822 if (!civ)
1823 goto fail;
1824
1825 TMR_INDEX2 (base) = civ->base;
1826 step = civ->step;
1827 }
1828 if (TMR_INDEX (base)
1829 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1830 {
1831 civ = get_iv (data, TMR_INDEX (base));
1832 if (!civ)
1833 goto fail;
1834
1835 TMR_INDEX (base) = civ->base;
1836 astep = civ->step;
1837
1838 if (astep)
1839 {
1840 if (TMR_STEP (base))
1841 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1842
1843 step = fold_build2 (PLUS_EXPR, type, step, astep);
1844 }
1845 }
1846
1847 if (integer_zerop (step))
1848 goto fail;
1849 base = tree_mem_ref_addr (type, base);
1850 }
1851 else
1852 {
1853 ifs_ivopts_data.ivopts_data = data;
1854 ifs_ivopts_data.stmt = stmt;
1855 ifs_ivopts_data.step = size_zero_node;
1856 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1857 || integer_zerop (ifs_ivopts_data.step))
1858 goto fail;
1859 step = ifs_ivopts_data.step;
1860
1861 /* Check that the base expression is addressable. This needs
1862 to be done after substituting bases of IVs into it. */
1863 if (may_be_nonaddressable_p (base))
1864 goto fail;
1865
1866 /* Moreover, on strict alignment platforms, check that it is
1867 sufficiently aligned. */
1868 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1869 goto fail;
1870
1871 base = build_fold_addr_expr (base);
1872
1873 /* Substituting bases of IVs into the base expression might
1874 have caused folding opportunities. */
1875 if (TREE_CODE (base) == ADDR_EXPR)
1876 {
1877 tree *ref = &TREE_OPERAND (base, 0);
1878 while (handled_component_p (*ref))
1879 ref = &TREE_OPERAND (*ref, 0);
1880 if (TREE_CODE (*ref) == MEM_REF)
1881 {
1882 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1883 TREE_OPERAND (*ref, 0),
1884 TREE_OPERAND (*ref, 1));
1885 if (tem)
1886 *ref = tem;
1887 }
1888 }
1889 }
1890
1891 civ = alloc_iv (base, step);
1892 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1893 return;
1894
1895 fail:
1896 for_each_index (op_p, idx_record_use, data);
1897 }
1898
1899 /* Finds and records invariants used in STMT. */
1900
1901 static void
1902 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1903 {
1904 ssa_op_iter iter;
1905 use_operand_p use_p;
1906 tree op;
1907
1908 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1909 {
1910 op = USE_FROM_PTR (use_p);
1911 record_invariant (data, op, false);
1912 }
1913 }
1914
1915 /* Finds interesting uses of induction variables in the statement STMT. */
1916
1917 static void
1918 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1919 {
1920 struct iv *iv;
1921 tree op, *lhs, *rhs;
1922 ssa_op_iter iter;
1923 use_operand_p use_p;
1924 enum tree_code code;
1925
1926 find_invariants_stmt (data, stmt);
1927
1928 if (gimple_code (stmt) == GIMPLE_COND)
1929 {
1930 find_interesting_uses_cond (data, stmt);
1931 return;
1932 }
1933
1934 if (is_gimple_assign (stmt))
1935 {
1936 lhs = gimple_assign_lhs_ptr (stmt);
1937 rhs = gimple_assign_rhs1_ptr (stmt);
1938
1939 if (TREE_CODE (*lhs) == SSA_NAME)
1940 {
1941 /* If the statement defines an induction variable, the uses are not
1942 interesting by themselves. */
1943
1944 iv = get_iv (data, *lhs);
1945
1946 if (iv && !integer_zerop (iv->step))
1947 return;
1948 }
1949
1950 code = gimple_assign_rhs_code (stmt);
1951 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1952 && (REFERENCE_CLASS_P (*rhs)
1953 || is_gimple_val (*rhs)))
1954 {
1955 if (REFERENCE_CLASS_P (*rhs))
1956 find_interesting_uses_address (data, stmt, rhs);
1957 else
1958 find_interesting_uses_op (data, *rhs);
1959
1960 if (REFERENCE_CLASS_P (*lhs))
1961 find_interesting_uses_address (data, stmt, lhs);
1962 return;
1963 }
1964 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1965 {
1966 find_interesting_uses_cond (data, stmt);
1967 return;
1968 }
1969
1970 /* TODO -- we should also handle address uses of type
1971
1972 memory = call (whatever);
1973
1974 and
1975
1976 call (memory). */
1977 }
1978
1979 if (gimple_code (stmt) == GIMPLE_PHI
1980 && gimple_bb (stmt) == data->current_loop->header)
1981 {
1982 iv = get_iv (data, PHI_RESULT (stmt));
1983
1984 if (iv && !integer_zerop (iv->step))
1985 return;
1986 }
1987
1988 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1989 {
1990 op = USE_FROM_PTR (use_p);
1991
1992 if (TREE_CODE (op) != SSA_NAME)
1993 continue;
1994
1995 iv = get_iv (data, op);
1996 if (!iv)
1997 continue;
1998
1999 find_interesting_uses_op (data, op);
2000 }
2001 }
2002
2003 /* Finds interesting uses of induction variables outside of loops
2004 on loop exit edge EXIT. */
2005
2006 static void
2007 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2008 {
2009 gimple phi;
2010 gimple_stmt_iterator psi;
2011 tree def;
2012
2013 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2014 {
2015 phi = gsi_stmt (psi);
2016 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2017 if (!virtual_operand_p (def))
2018 find_interesting_uses_op (data, def);
2019 }
2020 }
2021
2022 /* Finds uses of the induction variables that are interesting. */
2023
2024 static void
2025 find_interesting_uses (struct ivopts_data *data)
2026 {
2027 basic_block bb;
2028 gimple_stmt_iterator bsi;
2029 basic_block *body = get_loop_body (data->current_loop);
2030 unsigned i;
2031 struct version_info *info;
2032 edge e;
2033
2034 if (dump_file && (dump_flags & TDF_DETAILS))
2035 fprintf (dump_file, "Uses:\n\n");
2036
2037 for (i = 0; i < data->current_loop->num_nodes; i++)
2038 {
2039 edge_iterator ei;
2040 bb = body[i];
2041
2042 FOR_EACH_EDGE (e, ei, bb->succs)
2043 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2044 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2045 find_interesting_uses_outside (data, e);
2046
2047 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2048 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2049 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2050 if (!is_gimple_debug (gsi_stmt (bsi)))
2051 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2052 }
2053
2054 if (dump_file && (dump_flags & TDF_DETAILS))
2055 {
2056 bitmap_iterator bi;
2057
2058 fprintf (dump_file, "\n");
2059
2060 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2061 {
2062 info = ver_info (data, i);
2063 if (info->inv_id)
2064 {
2065 fprintf (dump_file, " ");
2066 print_generic_expr (dump_file, info->name, TDF_SLIM);
2067 fprintf (dump_file, " is invariant (%d)%s\n",
2068 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2069 }
2070 }
2071
2072 fprintf (dump_file, "\n");
2073 }
2074
2075 free (body);
2076 }
2077
2078 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2079 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2080 we are at the top-level of the processed address. */
2081
2082 static tree
2083 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2084 HOST_WIDE_INT *offset)
2085 {
2086 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2087 enum tree_code code;
2088 tree type, orig_type = TREE_TYPE (expr);
2089 HOST_WIDE_INT off0, off1, st;
2090 tree orig_expr = expr;
2091
2092 STRIP_NOPS (expr);
2093
2094 type = TREE_TYPE (expr);
2095 code = TREE_CODE (expr);
2096 *offset = 0;
2097
2098 switch (code)
2099 {
2100 case INTEGER_CST:
2101 if (!cst_and_fits_in_hwi (expr)
2102 || integer_zerop (expr))
2103 return orig_expr;
2104
2105 *offset = int_cst_value (expr);
2106 return build_int_cst (orig_type, 0);
2107
2108 case POINTER_PLUS_EXPR:
2109 case PLUS_EXPR:
2110 case MINUS_EXPR:
2111 op0 = TREE_OPERAND (expr, 0);
2112 op1 = TREE_OPERAND (expr, 1);
2113
2114 op0 = strip_offset_1 (op0, false, false, &off0);
2115 op1 = strip_offset_1 (op1, false, false, &off1);
2116
2117 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2118 if (op0 == TREE_OPERAND (expr, 0)
2119 && op1 == TREE_OPERAND (expr, 1))
2120 return orig_expr;
2121
2122 if (integer_zerop (op1))
2123 expr = op0;
2124 else if (integer_zerop (op0))
2125 {
2126 if (code == MINUS_EXPR)
2127 expr = fold_build1 (NEGATE_EXPR, type, op1);
2128 else
2129 expr = op1;
2130 }
2131 else
2132 expr = fold_build2 (code, type, op0, op1);
2133
2134 return fold_convert (orig_type, expr);
2135
2136 case MULT_EXPR:
2137 op1 = TREE_OPERAND (expr, 1);
2138 if (!cst_and_fits_in_hwi (op1))
2139 return orig_expr;
2140
2141 op0 = TREE_OPERAND (expr, 0);
2142 op0 = strip_offset_1 (op0, false, false, &off0);
2143 if (op0 == TREE_OPERAND (expr, 0))
2144 return orig_expr;
2145
2146 *offset = off0 * int_cst_value (op1);
2147 if (integer_zerop (op0))
2148 expr = op0;
2149 else
2150 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2151
2152 return fold_convert (orig_type, expr);
2153
2154 case ARRAY_REF:
2155 case ARRAY_RANGE_REF:
2156 if (!inside_addr)
2157 return orig_expr;
2158
2159 step = array_ref_element_size (expr);
2160 if (!cst_and_fits_in_hwi (step))
2161 break;
2162
2163 st = int_cst_value (step);
2164 op1 = TREE_OPERAND (expr, 1);
2165 op1 = strip_offset_1 (op1, false, false, &off1);
2166 *offset = off1 * st;
2167
2168 if (top_compref
2169 && integer_zerop (op1))
2170 {
2171 /* Strip the component reference completely. */
2172 op0 = TREE_OPERAND (expr, 0);
2173 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2174 *offset += off0;
2175 return op0;
2176 }
2177 break;
2178
2179 case COMPONENT_REF:
2180 {
2181 tree field;
2182
2183 if (!inside_addr)
2184 return orig_expr;
2185
2186 tmp = component_ref_field_offset (expr);
2187 field = TREE_OPERAND (expr, 1);
2188 if (top_compref
2189 && cst_and_fits_in_hwi (tmp)
2190 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2191 {
2192 HOST_WIDE_INT boffset, abs_off;
2193
2194 /* Strip the component reference completely. */
2195 op0 = TREE_OPERAND (expr, 0);
2196 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2197 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2198 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2199 if (boffset < 0)
2200 abs_off = -abs_off;
2201
2202 *offset = off0 + int_cst_value (tmp) + abs_off;
2203 return op0;
2204 }
2205 }
2206 break;
2207
2208 case ADDR_EXPR:
2209 op0 = TREE_OPERAND (expr, 0);
2210 op0 = strip_offset_1 (op0, true, true, &off0);
2211 *offset += off0;
2212
2213 if (op0 == TREE_OPERAND (expr, 0))
2214 return orig_expr;
2215
2216 expr = build_fold_addr_expr (op0);
2217 return fold_convert (orig_type, expr);
2218
2219 case MEM_REF:
2220 /* ??? Offset operand? */
2221 inside_addr = false;
2222 break;
2223
2224 default:
2225 return orig_expr;
2226 }
2227
2228 /* Default handling of expressions for that we want to recurse into
2229 the first operand. */
2230 op0 = TREE_OPERAND (expr, 0);
2231 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2232 *offset += off0;
2233
2234 if (op0 == TREE_OPERAND (expr, 0)
2235 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2236 return orig_expr;
2237
2238 expr = copy_node (expr);
2239 TREE_OPERAND (expr, 0) = op0;
2240 if (op1)
2241 TREE_OPERAND (expr, 1) = op1;
2242
2243 /* Inside address, we might strip the top level component references,
2244 thus changing type of the expression. Handling of ADDR_EXPR
2245 will fix that. */
2246 expr = fold_convert (orig_type, expr);
2247
2248 return expr;
2249 }
2250
2251 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2252
2253 static tree
2254 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2255 {
2256 HOST_WIDE_INT off;
2257 tree core = strip_offset_1 (expr, false, false, &off);
2258 *offset = off;
2259 return core;
2260 }
2261
2262 /* Returns variant of TYPE that can be used as base for different uses.
2263 We return unsigned type with the same precision, which avoids problems
2264 with overflows. */
2265
2266 static tree
2267 generic_type_for (tree type)
2268 {
2269 if (POINTER_TYPE_P (type))
2270 return unsigned_type_for (type);
2271
2272 if (TYPE_UNSIGNED (type))
2273 return type;
2274
2275 return unsigned_type_for (type);
2276 }
2277
2278 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2279 the bitmap to that we should store it. */
2280
2281 static struct ivopts_data *fd_ivopts_data;
2282 static tree
2283 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2284 {
2285 bitmap *depends_on = (bitmap *) data;
2286 struct version_info *info;
2287
2288 if (TREE_CODE (*expr_p) != SSA_NAME)
2289 return NULL_TREE;
2290 info = name_info (fd_ivopts_data, *expr_p);
2291
2292 if (!info->inv_id || info->has_nonlin_use)
2293 return NULL_TREE;
2294
2295 if (!*depends_on)
2296 *depends_on = BITMAP_ALLOC (NULL);
2297 bitmap_set_bit (*depends_on, info->inv_id);
2298
2299 return NULL_TREE;
2300 }
2301
2302 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2303 position to POS. If USE is not NULL, the candidate is set as related to
2304 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2305 replacement of the final value of the iv by a direct computation. */
2306
2307 static struct iv_cand *
2308 add_candidate_1 (struct ivopts_data *data,
2309 tree base, tree step, bool important, enum iv_position pos,
2310 struct iv_use *use, gimple incremented_at)
2311 {
2312 unsigned i;
2313 struct iv_cand *cand = NULL;
2314 tree type, orig_type;
2315
2316 /* For non-original variables, make sure their values are computed in a type
2317 that does not invoke undefined behavior on overflows (since in general,
2318 we cannot prove that these induction variables are non-wrapping). */
2319 if (pos != IP_ORIGINAL)
2320 {
2321 orig_type = TREE_TYPE (base);
2322 type = generic_type_for (orig_type);
2323 if (type != orig_type)
2324 {
2325 base = fold_convert (type, base);
2326 step = fold_convert (type, step);
2327 }
2328 }
2329
2330 for (i = 0; i < n_iv_cands (data); i++)
2331 {
2332 cand = iv_cand (data, i);
2333
2334 if (cand->pos != pos)
2335 continue;
2336
2337 if (cand->incremented_at != incremented_at
2338 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2339 && cand->ainc_use != use))
2340 continue;
2341
2342 if (!cand->iv)
2343 {
2344 if (!base && !step)
2345 break;
2346
2347 continue;
2348 }
2349
2350 if (!base && !step)
2351 continue;
2352
2353 if (operand_equal_p (base, cand->iv->base, 0)
2354 && operand_equal_p (step, cand->iv->step, 0)
2355 && (TYPE_PRECISION (TREE_TYPE (base))
2356 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2357 break;
2358 }
2359
2360 if (i == n_iv_cands (data))
2361 {
2362 cand = XCNEW (struct iv_cand);
2363 cand->id = i;
2364
2365 if (!base && !step)
2366 cand->iv = NULL;
2367 else
2368 cand->iv = alloc_iv (base, step);
2369
2370 cand->pos = pos;
2371 if (pos != IP_ORIGINAL && cand->iv)
2372 {
2373 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2374 cand->var_after = cand->var_before;
2375 }
2376 cand->important = important;
2377 cand->incremented_at = incremented_at;
2378 data->iv_candidates.safe_push (cand);
2379
2380 if (step
2381 && TREE_CODE (step) != INTEGER_CST)
2382 {
2383 fd_ivopts_data = data;
2384 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2385 }
2386
2387 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2388 cand->ainc_use = use;
2389 else
2390 cand->ainc_use = NULL;
2391
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2393 dump_cand (dump_file, cand);
2394 }
2395
2396 if (important && !cand->important)
2397 {
2398 cand->important = true;
2399 if (dump_file && (dump_flags & TDF_DETAILS))
2400 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2401 }
2402
2403 if (use)
2404 {
2405 bitmap_set_bit (use->related_cands, i);
2406 if (dump_file && (dump_flags & TDF_DETAILS))
2407 fprintf (dump_file, "Candidate %d is related to use %d\n",
2408 cand->id, use->id);
2409 }
2410
2411 return cand;
2412 }
2413
2414 /* Returns true if incrementing the induction variable at the end of the LOOP
2415 is allowed.
2416
2417 The purpose is to avoid splitting latch edge with a biv increment, thus
2418 creating a jump, possibly confusing other optimization passes and leaving
2419 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2420 is not available (so we do not have a better alternative), or if the latch
2421 edge is already nonempty. */
2422
2423 static bool
2424 allow_ip_end_pos_p (struct loop *loop)
2425 {
2426 if (!ip_normal_pos (loop))
2427 return true;
2428
2429 if (!empty_block_p (ip_end_pos (loop)))
2430 return true;
2431
2432 return false;
2433 }
2434
2435 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2436 Important field is set to IMPORTANT. */
2437
2438 static void
2439 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2440 bool important, struct iv_use *use)
2441 {
2442 basic_block use_bb = gimple_bb (use->stmt);
2443 machine_mode mem_mode;
2444 unsigned HOST_WIDE_INT cstepi;
2445
2446 /* If we insert the increment in any position other than the standard
2447 ones, we must ensure that it is incremented once per iteration.
2448 It must not be in an inner nested loop, or one side of an if
2449 statement. */
2450 if (use_bb->loop_father != data->current_loop
2451 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2452 || stmt_could_throw_p (use->stmt)
2453 || !cst_and_fits_in_hwi (step))
2454 return;
2455
2456 cstepi = int_cst_value (step);
2457
2458 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2459 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2460 || USE_STORE_PRE_INCREMENT (mem_mode))
2461 && GET_MODE_SIZE (mem_mode) == cstepi)
2462 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2463 || USE_STORE_PRE_DECREMENT (mem_mode))
2464 && GET_MODE_SIZE (mem_mode) == -cstepi))
2465 {
2466 enum tree_code code = MINUS_EXPR;
2467 tree new_base;
2468 tree new_step = step;
2469
2470 if (POINTER_TYPE_P (TREE_TYPE (base)))
2471 {
2472 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2473 code = POINTER_PLUS_EXPR;
2474 }
2475 else
2476 new_step = fold_convert (TREE_TYPE (base), new_step);
2477 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2478 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2479 use->stmt);
2480 }
2481 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2482 || USE_STORE_POST_INCREMENT (mem_mode))
2483 && GET_MODE_SIZE (mem_mode) == cstepi)
2484 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2485 || USE_STORE_POST_DECREMENT (mem_mode))
2486 && GET_MODE_SIZE (mem_mode) == -cstepi))
2487 {
2488 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2489 use->stmt);
2490 }
2491 }
2492
2493 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2494 position to POS. If USE is not NULL, the candidate is set as related to
2495 it. The candidate computation is scheduled on all available positions. */
2496
2497 static void
2498 add_candidate (struct ivopts_data *data,
2499 tree base, tree step, bool important, struct iv_use *use)
2500 {
2501 if (ip_normal_pos (data->current_loop))
2502 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2503 if (ip_end_pos (data->current_loop)
2504 && allow_ip_end_pos_p (data->current_loop))
2505 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2506
2507 if (use != NULL && use->type == USE_ADDRESS)
2508 add_autoinc_candidates (data, base, step, important, use);
2509 }
2510
2511 /* Adds standard iv candidates. */
2512
2513 static void
2514 add_standard_iv_candidates (struct ivopts_data *data)
2515 {
2516 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2517
2518 /* The same for a double-integer type if it is still fast enough. */
2519 if (TYPE_PRECISION
2520 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2521 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2522 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2523 build_int_cst (long_integer_type_node, 1), true, NULL);
2524
2525 /* The same for a double-integer type if it is still fast enough. */
2526 if (TYPE_PRECISION
2527 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2528 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2529 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2530 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2531 }
2532
2533
2534 /* Adds candidates bases on the old induction variable IV. */
2535
2536 static void
2537 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2538 {
2539 gimple phi;
2540 tree def;
2541 struct iv_cand *cand;
2542
2543 add_candidate (data, iv->base, iv->step, true, NULL);
2544
2545 /* The same, but with initial value zero. */
2546 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2547 add_candidate (data, size_int (0), iv->step, true, NULL);
2548 else
2549 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2550 iv->step, true, NULL);
2551
2552 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2553 if (gimple_code (phi) == GIMPLE_PHI)
2554 {
2555 /* Additionally record the possibility of leaving the original iv
2556 untouched. */
2557 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2558 /* Don't add candidate if it's from another PHI node because
2559 it's an affine iv appearing in the form of PEELED_CHREC. */
2560 phi = SSA_NAME_DEF_STMT (def);
2561 if (gimple_code (phi) != GIMPLE_PHI)
2562 {
2563 cand = add_candidate_1 (data,
2564 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2565 SSA_NAME_DEF_STMT (def));
2566 cand->var_before = iv->ssa_name;
2567 cand->var_after = def;
2568 }
2569 else
2570 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2571 }
2572 }
2573
2574 /* Adds candidates based on the old induction variables. */
2575
2576 static void
2577 add_old_ivs_candidates (struct ivopts_data *data)
2578 {
2579 unsigned i;
2580 struct iv *iv;
2581 bitmap_iterator bi;
2582
2583 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2584 {
2585 iv = ver_info (data, i)->iv;
2586 if (iv && iv->biv_p && !integer_zerop (iv->step))
2587 add_old_iv_candidates (data, iv);
2588 }
2589 }
2590
2591 /* Adds candidates based on the value of the induction variable IV and USE. */
2592
2593 static void
2594 add_iv_value_candidates (struct ivopts_data *data,
2595 struct iv *iv, struct iv_use *use)
2596 {
2597 unsigned HOST_WIDE_INT offset;
2598 tree base;
2599 tree basetype;
2600
2601 add_candidate (data, iv->base, iv->step, false, use);
2602
2603 /* The same, but with initial value zero. Make such variable important,
2604 since it is generic enough so that possibly many uses may be based
2605 on it. */
2606 basetype = TREE_TYPE (iv->base);
2607 if (POINTER_TYPE_P (basetype))
2608 basetype = sizetype;
2609 add_candidate (data, build_int_cst (basetype, 0),
2610 iv->step, true, use);
2611
2612 /* Third, try removing the constant offset. Make sure to even
2613 add a candidate for &a[0] vs. (T *)&a. */
2614 base = strip_offset (iv->base, &offset);
2615 if (offset
2616 || base != iv->base)
2617 add_candidate (data, base, iv->step, false, use);
2618 }
2619
2620 /* Adds candidates based on the uses. */
2621
2622 static void
2623 add_derived_ivs_candidates (struct ivopts_data *data)
2624 {
2625 unsigned i;
2626
2627 for (i = 0; i < n_iv_uses (data); i++)
2628 {
2629 struct iv_use *use = iv_use (data, i);
2630
2631 if (!use)
2632 continue;
2633
2634 switch (use->type)
2635 {
2636 case USE_NONLINEAR_EXPR:
2637 case USE_COMPARE:
2638 case USE_ADDRESS:
2639 /* Just add the ivs based on the value of the iv used here. */
2640 add_iv_value_candidates (data, use->iv, use);
2641 break;
2642
2643 default:
2644 gcc_unreachable ();
2645 }
2646 }
2647 }
2648
2649 /* Record important candidates and add them to related_cands bitmaps
2650 if needed. */
2651
2652 static void
2653 record_important_candidates (struct ivopts_data *data)
2654 {
2655 unsigned i;
2656 struct iv_use *use;
2657
2658 for (i = 0; i < n_iv_cands (data); i++)
2659 {
2660 struct iv_cand *cand = iv_cand (data, i);
2661
2662 if (cand->important)
2663 bitmap_set_bit (data->important_candidates, i);
2664 }
2665
2666 data->consider_all_candidates = (n_iv_cands (data)
2667 <= CONSIDER_ALL_CANDIDATES_BOUND);
2668
2669 if (data->consider_all_candidates)
2670 {
2671 /* We will not need "related_cands" bitmaps in this case,
2672 so release them to decrease peak memory consumption. */
2673 for (i = 0; i < n_iv_uses (data); i++)
2674 {
2675 use = iv_use (data, i);
2676 BITMAP_FREE (use->related_cands);
2677 }
2678 }
2679 else
2680 {
2681 /* Add important candidates to the related_cands bitmaps. */
2682 for (i = 0; i < n_iv_uses (data); i++)
2683 bitmap_ior_into (iv_use (data, i)->related_cands,
2684 data->important_candidates);
2685 }
2686 }
2687
2688 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2689 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2690 we allocate a simple list to every use. */
2691
2692 static void
2693 alloc_use_cost_map (struct ivopts_data *data)
2694 {
2695 unsigned i, size, s;
2696
2697 for (i = 0; i < n_iv_uses (data); i++)
2698 {
2699 struct iv_use *use = iv_use (data, i);
2700
2701 if (data->consider_all_candidates)
2702 size = n_iv_cands (data);
2703 else
2704 {
2705 s = bitmap_count_bits (use->related_cands);
2706
2707 /* Round up to the power of two, so that moduling by it is fast. */
2708 size = s ? (1 << ceil_log2 (s)) : 1;
2709 }
2710
2711 use->n_map_members = size;
2712 use->cost_map = XCNEWVEC (struct cost_pair, size);
2713 }
2714 }
2715
2716 /* Returns description of computation cost of expression whose runtime
2717 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2718
2719 static comp_cost
2720 new_cost (unsigned runtime, unsigned complexity)
2721 {
2722 comp_cost cost;
2723
2724 cost.cost = runtime;
2725 cost.complexity = complexity;
2726
2727 return cost;
2728 }
2729
2730 /* Adds costs COST1 and COST2. */
2731
2732 static comp_cost
2733 add_costs (comp_cost cost1, comp_cost cost2)
2734 {
2735 cost1.cost += cost2.cost;
2736 cost1.complexity += cost2.complexity;
2737
2738 return cost1;
2739 }
2740 /* Subtracts costs COST1 and COST2. */
2741
2742 static comp_cost
2743 sub_costs (comp_cost cost1, comp_cost cost2)
2744 {
2745 cost1.cost -= cost2.cost;
2746 cost1.complexity -= cost2.complexity;
2747
2748 return cost1;
2749 }
2750
2751 /* Returns a negative number if COST1 < COST2, a positive number if
2752 COST1 > COST2, and 0 if COST1 = COST2. */
2753
2754 static int
2755 compare_costs (comp_cost cost1, comp_cost cost2)
2756 {
2757 if (cost1.cost == cost2.cost)
2758 return cost1.complexity - cost2.complexity;
2759
2760 return cost1.cost - cost2.cost;
2761 }
2762
2763 /* Returns true if COST is infinite. */
2764
2765 static bool
2766 infinite_cost_p (comp_cost cost)
2767 {
2768 return cost.cost == INFTY;
2769 }
2770
2771 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2772 on invariants DEPENDS_ON and that the value used in expressing it
2773 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2774
2775 static void
2776 set_use_iv_cost (struct ivopts_data *data,
2777 struct iv_use *use, struct iv_cand *cand,
2778 comp_cost cost, bitmap depends_on, tree value,
2779 enum tree_code comp, int inv_expr_id)
2780 {
2781 unsigned i, s;
2782
2783 if (infinite_cost_p (cost))
2784 {
2785 BITMAP_FREE (depends_on);
2786 return;
2787 }
2788
2789 if (data->consider_all_candidates)
2790 {
2791 use->cost_map[cand->id].cand = cand;
2792 use->cost_map[cand->id].cost = cost;
2793 use->cost_map[cand->id].depends_on = depends_on;
2794 use->cost_map[cand->id].value = value;
2795 use->cost_map[cand->id].comp = comp;
2796 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2797 return;
2798 }
2799
2800 /* n_map_members is a power of two, so this computes modulo. */
2801 s = cand->id & (use->n_map_members - 1);
2802 for (i = s; i < use->n_map_members; i++)
2803 if (!use->cost_map[i].cand)
2804 goto found;
2805 for (i = 0; i < s; i++)
2806 if (!use->cost_map[i].cand)
2807 goto found;
2808
2809 gcc_unreachable ();
2810
2811 found:
2812 use->cost_map[i].cand = cand;
2813 use->cost_map[i].cost = cost;
2814 use->cost_map[i].depends_on = depends_on;
2815 use->cost_map[i].value = value;
2816 use->cost_map[i].comp = comp;
2817 use->cost_map[i].inv_expr_id = inv_expr_id;
2818 }
2819
2820 /* Gets cost of (USE, CANDIDATE) pair. */
2821
2822 static struct cost_pair *
2823 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2824 struct iv_cand *cand)
2825 {
2826 unsigned i, s;
2827 struct cost_pair *ret;
2828
2829 if (!cand)
2830 return NULL;
2831
2832 if (data->consider_all_candidates)
2833 {
2834 ret = use->cost_map + cand->id;
2835 if (!ret->cand)
2836 return NULL;
2837
2838 return ret;
2839 }
2840
2841 /* n_map_members is a power of two, so this computes modulo. */
2842 s = cand->id & (use->n_map_members - 1);
2843 for (i = s; i < use->n_map_members; i++)
2844 if (use->cost_map[i].cand == cand)
2845 return use->cost_map + i;
2846 else if (use->cost_map[i].cand == NULL)
2847 return NULL;
2848 for (i = 0; i < s; i++)
2849 if (use->cost_map[i].cand == cand)
2850 return use->cost_map + i;
2851 else if (use->cost_map[i].cand == NULL)
2852 return NULL;
2853
2854 return NULL;
2855 }
2856
2857 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2858 static rtx
2859 produce_memory_decl_rtl (tree obj, int *regno)
2860 {
2861 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2862 machine_mode address_mode = targetm.addr_space.address_mode (as);
2863 rtx x;
2864
2865 gcc_assert (obj);
2866 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2867 {
2868 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2869 x = gen_rtx_SYMBOL_REF (address_mode, name);
2870 SET_SYMBOL_REF_DECL (x, obj);
2871 x = gen_rtx_MEM (DECL_MODE (obj), x);
2872 set_mem_addr_space (x, as);
2873 targetm.encode_section_info (obj, x, true);
2874 }
2875 else
2876 {
2877 x = gen_raw_REG (address_mode, (*regno)++);
2878 x = gen_rtx_MEM (DECL_MODE (obj), x);
2879 set_mem_addr_space (x, as);
2880 }
2881
2882 return x;
2883 }
2884
2885 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2886 walk_tree. DATA contains the actual fake register number. */
2887
2888 static tree
2889 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2890 {
2891 tree obj = NULL_TREE;
2892 rtx x = NULL_RTX;
2893 int *regno = (int *) data;
2894
2895 switch (TREE_CODE (*expr_p))
2896 {
2897 case ADDR_EXPR:
2898 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2899 handled_component_p (*expr_p);
2900 expr_p = &TREE_OPERAND (*expr_p, 0))
2901 continue;
2902 obj = *expr_p;
2903 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2904 x = produce_memory_decl_rtl (obj, regno);
2905 break;
2906
2907 case SSA_NAME:
2908 *ws = 0;
2909 obj = SSA_NAME_VAR (*expr_p);
2910 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2911 if (!obj)
2912 return NULL_TREE;
2913 if (!DECL_RTL_SET_P (obj))
2914 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2915 break;
2916
2917 case VAR_DECL:
2918 case PARM_DECL:
2919 case RESULT_DECL:
2920 *ws = 0;
2921 obj = *expr_p;
2922
2923 if (DECL_RTL_SET_P (obj))
2924 break;
2925
2926 if (DECL_MODE (obj) == BLKmode)
2927 x = produce_memory_decl_rtl (obj, regno);
2928 else
2929 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2930
2931 break;
2932
2933 default:
2934 break;
2935 }
2936
2937 if (x)
2938 {
2939 decl_rtl_to_reset.safe_push (obj);
2940 SET_DECL_RTL (obj, x);
2941 }
2942
2943 return NULL_TREE;
2944 }
2945
2946 /* Determines cost of the computation of EXPR. */
2947
2948 static unsigned
2949 computation_cost (tree expr, bool speed)
2950 {
2951 rtx_insn *seq;
2952 rtx rslt;
2953 tree type = TREE_TYPE (expr);
2954 unsigned cost;
2955 /* Avoid using hard regs in ways which may be unsupported. */
2956 int regno = LAST_VIRTUAL_REGISTER + 1;
2957 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2958 enum node_frequency real_frequency = node->frequency;
2959
2960 node->frequency = NODE_FREQUENCY_NORMAL;
2961 crtl->maybe_hot_insn_p = speed;
2962 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2963 start_sequence ();
2964 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2965 seq = get_insns ();
2966 end_sequence ();
2967 default_rtl_profile ();
2968 node->frequency = real_frequency;
2969
2970 cost = seq_cost (seq, speed);
2971 if (MEM_P (rslt))
2972 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2973 TYPE_ADDR_SPACE (type), speed);
2974 else if (!REG_P (rslt))
2975 cost += set_src_cost (rslt, speed);
2976
2977 return cost;
2978 }
2979
2980 /* Returns variable containing the value of candidate CAND at statement AT. */
2981
2982 static tree
2983 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2984 {
2985 if (stmt_after_increment (loop, cand, stmt))
2986 return cand->var_after;
2987 else
2988 return cand->var_before;
2989 }
2990
2991 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2992 same precision that is at least as wide as the precision of TYPE, stores
2993 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2994 type of A and B. */
2995
2996 static tree
2997 determine_common_wider_type (tree *a, tree *b)
2998 {
2999 tree wider_type = NULL;
3000 tree suba, subb;
3001 tree atype = TREE_TYPE (*a);
3002
3003 if (CONVERT_EXPR_P (*a))
3004 {
3005 suba = TREE_OPERAND (*a, 0);
3006 wider_type = TREE_TYPE (suba);
3007 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3008 return atype;
3009 }
3010 else
3011 return atype;
3012
3013 if (CONVERT_EXPR_P (*b))
3014 {
3015 subb = TREE_OPERAND (*b, 0);
3016 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3017 return atype;
3018 }
3019 else
3020 return atype;
3021
3022 *a = suba;
3023 *b = subb;
3024 return wider_type;
3025 }
3026
3027 /* Determines the expression by that USE is expressed from induction variable
3028 CAND at statement AT in LOOP. The expression is stored in a decomposed
3029 form into AFF. Returns false if USE cannot be expressed using CAND. */
3030
3031 static bool
3032 get_computation_aff (struct loop *loop,
3033 struct iv_use *use, struct iv_cand *cand, gimple at,
3034 struct aff_tree *aff)
3035 {
3036 tree ubase = use->iv->base;
3037 tree ustep = use->iv->step;
3038 tree cbase = cand->iv->base;
3039 tree cstep = cand->iv->step, cstep_common;
3040 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3041 tree common_type, var;
3042 tree uutype;
3043 aff_tree cbase_aff, var_aff;
3044 widest_int rat;
3045
3046 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3047 {
3048 /* We do not have a precision to express the values of use. */
3049 return false;
3050 }
3051
3052 var = var_at_stmt (loop, cand, at);
3053 uutype = unsigned_type_for (utype);
3054
3055 /* If the conversion is not noop, perform it. */
3056 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3057 {
3058 cstep = fold_convert (uutype, cstep);
3059 cbase = fold_convert (uutype, cbase);
3060 var = fold_convert (uutype, var);
3061 }
3062
3063 if (!constant_multiple_of (ustep, cstep, &rat))
3064 return false;
3065
3066 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3067 type, we achieve better folding by computing their difference in this
3068 wider type, and cast the result to UUTYPE. We do not need to worry about
3069 overflows, as all the arithmetics will in the end be performed in UUTYPE
3070 anyway. */
3071 common_type = determine_common_wider_type (&ubase, &cbase);
3072
3073 /* use = ubase - ratio * cbase + ratio * var. */
3074 tree_to_aff_combination (ubase, common_type, aff);
3075 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3076 tree_to_aff_combination (var, uutype, &var_aff);
3077
3078 /* We need to shift the value if we are after the increment. */
3079 if (stmt_after_increment (loop, cand, at))
3080 {
3081 aff_tree cstep_aff;
3082
3083 if (common_type != uutype)
3084 cstep_common = fold_convert (common_type, cstep);
3085 else
3086 cstep_common = cstep;
3087
3088 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3089 aff_combination_add (&cbase_aff, &cstep_aff);
3090 }
3091
3092 aff_combination_scale (&cbase_aff, -rat);
3093 aff_combination_add (aff, &cbase_aff);
3094 if (common_type != uutype)
3095 aff_combination_convert (aff, uutype);
3096
3097 aff_combination_scale (&var_aff, rat);
3098 aff_combination_add (aff, &var_aff);
3099
3100 return true;
3101 }
3102
3103 /* Return the type of USE. */
3104
3105 static tree
3106 get_use_type (struct iv_use *use)
3107 {
3108 tree base_type = TREE_TYPE (use->iv->base);
3109 tree type;
3110
3111 if (use->type == USE_ADDRESS)
3112 {
3113 /* The base_type may be a void pointer. Create a pointer type based on
3114 the mem_ref instead. */
3115 type = build_pointer_type (TREE_TYPE (*use->op_p));
3116 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3117 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3118 }
3119 else
3120 type = base_type;
3121
3122 return type;
3123 }
3124
3125 /* Determines the expression by that USE is expressed from induction variable
3126 CAND at statement AT in LOOP. The computation is unshared. */
3127
3128 static tree
3129 get_computation_at (struct loop *loop,
3130 struct iv_use *use, struct iv_cand *cand, gimple at)
3131 {
3132 aff_tree aff;
3133 tree type = get_use_type (use);
3134
3135 if (!get_computation_aff (loop, use, cand, at, &aff))
3136 return NULL_TREE;
3137 unshare_aff_combination (&aff);
3138 return fold_convert (type, aff_combination_to_tree (&aff));
3139 }
3140
3141 /* Determines the expression by that USE is expressed from induction variable
3142 CAND in LOOP. The computation is unshared. */
3143
3144 static tree
3145 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3146 {
3147 return get_computation_at (loop, use, cand, use->stmt);
3148 }
3149
3150 /* Adjust the cost COST for being in loop setup rather than loop body.
3151 If we're optimizing for space, the loop setup overhead is constant;
3152 if we're optimizing for speed, amortize it over the per-iteration cost. */
3153 static unsigned
3154 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3155 {
3156 if (cost == INFTY)
3157 return cost;
3158 else if (optimize_loop_for_speed_p (data->current_loop))
3159 return cost / avg_loop_niter (data->current_loop);
3160 else
3161 return cost;
3162 }
3163
3164 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3165 validity for a memory reference accessing memory of mode MODE in
3166 address space AS. */
3167
3168
3169 bool
3170 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3171 addr_space_t as)
3172 {
3173 #define MAX_RATIO 128
3174 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3175 static vec<sbitmap> valid_mult_list;
3176 sbitmap valid_mult;
3177
3178 if (data_index >= valid_mult_list.length ())
3179 valid_mult_list.safe_grow_cleared (data_index + 1);
3180
3181 valid_mult = valid_mult_list[data_index];
3182 if (!valid_mult)
3183 {
3184 machine_mode address_mode = targetm.addr_space.address_mode (as);
3185 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3186 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3187 rtx addr, scaled;
3188 HOST_WIDE_INT i;
3189
3190 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3191 bitmap_clear (valid_mult);
3192 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3193 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3194 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3195 {
3196 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3197 if (memory_address_addr_space_p (mode, addr, as)
3198 || memory_address_addr_space_p (mode, scaled, as))
3199 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3200 }
3201
3202 if (dump_file && (dump_flags & TDF_DETAILS))
3203 {
3204 fprintf (dump_file, " allowed multipliers:");
3205 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3206 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3207 fprintf (dump_file, " %d", (int) i);
3208 fprintf (dump_file, "\n");
3209 fprintf (dump_file, "\n");
3210 }
3211
3212 valid_mult_list[data_index] = valid_mult;
3213 }
3214
3215 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3216 return false;
3217
3218 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3219 }
3220
3221 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3222 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3223 variable is omitted. Compute the cost for a memory reference that accesses
3224 a memory location of mode MEM_MODE in address space AS.
3225
3226 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3227 size of MEM_MODE / RATIO) is available. To make this determination, we
3228 look at the size of the increment to be made, which is given in CSTEP.
3229 CSTEP may be zero if the step is unknown.
3230 STMT_AFTER_INC is true iff the statement we're looking at is after the
3231 increment of the original biv.
3232
3233 TODO -- there must be some better way. This all is quite crude. */
3234
3235 enum ainc_type
3236 {
3237 AINC_PRE_INC, /* Pre increment. */
3238 AINC_PRE_DEC, /* Pre decrement. */
3239 AINC_POST_INC, /* Post increment. */
3240 AINC_POST_DEC, /* Post decrement. */
3241 AINC_NONE /* Also the number of auto increment types. */
3242 };
3243
3244 typedef struct address_cost_data_s
3245 {
3246 HOST_WIDE_INT min_offset, max_offset;
3247 unsigned costs[2][2][2][2];
3248 unsigned ainc_costs[AINC_NONE];
3249 } *address_cost_data;
3250
3251
3252 static comp_cost
3253 get_address_cost (bool symbol_present, bool var_present,
3254 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3255 HOST_WIDE_INT cstep, machine_mode mem_mode,
3256 addr_space_t as, bool speed,
3257 bool stmt_after_inc, bool *may_autoinc)
3258 {
3259 machine_mode address_mode = targetm.addr_space.address_mode (as);
3260 static vec<address_cost_data> address_cost_data_list;
3261 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3262 address_cost_data data;
3263 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3264 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3265 unsigned cost, acost, complexity;
3266 enum ainc_type autoinc_type;
3267 bool offset_p, ratio_p, autoinc;
3268 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3269 unsigned HOST_WIDE_INT mask;
3270 unsigned bits;
3271
3272 if (data_index >= address_cost_data_list.length ())
3273 address_cost_data_list.safe_grow_cleared (data_index + 1);
3274
3275 data = address_cost_data_list[data_index];
3276 if (!data)
3277 {
3278 HOST_WIDE_INT i;
3279 HOST_WIDE_INT rat, off = 0;
3280 int old_cse_not_expected, width;
3281 unsigned sym_p, var_p, off_p, rat_p, add_c;
3282 rtx_insn *seq;
3283 rtx addr, base;
3284 rtx reg0, reg1;
3285
3286 data = (address_cost_data) xcalloc (1, sizeof (*data));
3287
3288 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3289
3290 width = GET_MODE_BITSIZE (address_mode) - 1;
3291 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3292 width = HOST_BITS_PER_WIDE_INT - 1;
3293 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3294
3295 for (i = width; i >= 0; i--)
3296 {
3297 off = -((unsigned HOST_WIDE_INT) 1 << i);
3298 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3299 if (memory_address_addr_space_p (mem_mode, addr, as))
3300 break;
3301 }
3302 data->min_offset = (i == -1? 0 : off);
3303
3304 for (i = width; i >= 0; i--)
3305 {
3306 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3307 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3308 if (memory_address_addr_space_p (mem_mode, addr, as))
3309 break;
3310 /* For some TARGET, like ARM THUMB1, the offset should be nature
3311 aligned. Try an aligned offset if address_mode is not QImode. */
3312 off = (address_mode == QImode)
3313 ? 0
3314 : ((unsigned HOST_WIDE_INT) 1 << i)
3315 - GET_MODE_SIZE (address_mode);
3316 if (off > 0)
3317 {
3318 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3319 if (memory_address_addr_space_p (mem_mode, addr, as))
3320 break;
3321 }
3322 }
3323 if (i == -1)
3324 off = 0;
3325 data->max_offset = off;
3326
3327 if (dump_file && (dump_flags & TDF_DETAILS))
3328 {
3329 fprintf (dump_file, "get_address_cost:\n");
3330 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3331 GET_MODE_NAME (mem_mode),
3332 data->min_offset);
3333 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3334 GET_MODE_NAME (mem_mode),
3335 data->max_offset);
3336 }
3337
3338 rat = 1;
3339 for (i = 2; i <= MAX_RATIO; i++)
3340 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3341 {
3342 rat = i;
3343 break;
3344 }
3345
3346 /* Compute the cost of various addressing modes. */
3347 acost = 0;
3348 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3349 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3350
3351 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3352 || USE_STORE_PRE_DECREMENT (mem_mode))
3353 {
3354 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3355 has_predec[mem_mode]
3356 = memory_address_addr_space_p (mem_mode, addr, as);
3357
3358 if (has_predec[mem_mode])
3359 data->ainc_costs[AINC_PRE_DEC]
3360 = address_cost (addr, mem_mode, as, speed);
3361 }
3362 if (USE_LOAD_POST_DECREMENT (mem_mode)
3363 || USE_STORE_POST_DECREMENT (mem_mode))
3364 {
3365 addr = gen_rtx_POST_DEC (address_mode, reg0);
3366 has_postdec[mem_mode]
3367 = memory_address_addr_space_p (mem_mode, addr, as);
3368
3369 if (has_postdec[mem_mode])
3370 data->ainc_costs[AINC_POST_DEC]
3371 = address_cost (addr, mem_mode, as, speed);
3372 }
3373 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3374 || USE_STORE_PRE_DECREMENT (mem_mode))
3375 {
3376 addr = gen_rtx_PRE_INC (address_mode, reg0);
3377 has_preinc[mem_mode]
3378 = memory_address_addr_space_p (mem_mode, addr, as);
3379
3380 if (has_preinc[mem_mode])
3381 data->ainc_costs[AINC_PRE_INC]
3382 = address_cost (addr, mem_mode, as, speed);
3383 }
3384 if (USE_LOAD_POST_INCREMENT (mem_mode)
3385 || USE_STORE_POST_INCREMENT (mem_mode))
3386 {
3387 addr = gen_rtx_POST_INC (address_mode, reg0);
3388 has_postinc[mem_mode]
3389 = memory_address_addr_space_p (mem_mode, addr, as);
3390
3391 if (has_postinc[mem_mode])
3392 data->ainc_costs[AINC_POST_INC]
3393 = address_cost (addr, mem_mode, as, speed);
3394 }
3395 for (i = 0; i < 16; i++)
3396 {
3397 sym_p = i & 1;
3398 var_p = (i >> 1) & 1;
3399 off_p = (i >> 2) & 1;
3400 rat_p = (i >> 3) & 1;
3401
3402 addr = reg0;
3403 if (rat_p)
3404 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3405 gen_int_mode (rat, address_mode));
3406
3407 if (var_p)
3408 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3409
3410 if (sym_p)
3411 {
3412 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3413 /* ??? We can run into trouble with some backends by presenting
3414 it with symbols which haven't been properly passed through
3415 targetm.encode_section_info. By setting the local bit, we
3416 enhance the probability of things working. */
3417 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3418
3419 if (off_p)
3420 base = gen_rtx_fmt_e (CONST, address_mode,
3421 gen_rtx_fmt_ee
3422 (PLUS, address_mode, base,
3423 gen_int_mode (off, address_mode)));
3424 }
3425 else if (off_p)
3426 base = gen_int_mode (off, address_mode);
3427 else
3428 base = NULL_RTX;
3429
3430 if (base)
3431 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3432
3433 start_sequence ();
3434 /* To avoid splitting addressing modes, pretend that no cse will
3435 follow. */
3436 old_cse_not_expected = cse_not_expected;
3437 cse_not_expected = true;
3438 addr = memory_address_addr_space (mem_mode, addr, as);
3439 cse_not_expected = old_cse_not_expected;
3440 seq = get_insns ();
3441 end_sequence ();
3442
3443 acost = seq_cost (seq, speed);
3444 acost += address_cost (addr, mem_mode, as, speed);
3445
3446 if (!acost)
3447 acost = 1;
3448 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3449 }
3450
3451 /* On some targets, it is quite expensive to load symbol to a register,
3452 which makes addresses that contain symbols look much more expensive.
3453 However, the symbol will have to be loaded in any case before the
3454 loop (and quite likely we have it in register already), so it does not
3455 make much sense to penalize them too heavily. So make some final
3456 tweaks for the SYMBOL_PRESENT modes:
3457
3458 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3459 var is cheaper, use this mode with small penalty.
3460 If VAR_PRESENT is true, try whether the mode with
3461 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3462 if this is the case, use it. */
3463 add_c = add_cost (speed, address_mode);
3464 for (i = 0; i < 8; i++)
3465 {
3466 var_p = i & 1;
3467 off_p = (i >> 1) & 1;
3468 rat_p = (i >> 2) & 1;
3469
3470 acost = data->costs[0][1][off_p][rat_p] + 1;
3471 if (var_p)
3472 acost += add_c;
3473
3474 if (acost < data->costs[1][var_p][off_p][rat_p])
3475 data->costs[1][var_p][off_p][rat_p] = acost;
3476 }
3477
3478 if (dump_file && (dump_flags & TDF_DETAILS))
3479 {
3480 fprintf (dump_file, "Address costs:\n");
3481
3482 for (i = 0; i < 16; i++)
3483 {
3484 sym_p = i & 1;
3485 var_p = (i >> 1) & 1;
3486 off_p = (i >> 2) & 1;
3487 rat_p = (i >> 3) & 1;
3488
3489 fprintf (dump_file, " ");
3490 if (sym_p)
3491 fprintf (dump_file, "sym + ");
3492 if (var_p)
3493 fprintf (dump_file, "var + ");
3494 if (off_p)
3495 fprintf (dump_file, "cst + ");
3496 if (rat_p)
3497 fprintf (dump_file, "rat * ");
3498
3499 acost = data->costs[sym_p][var_p][off_p][rat_p];
3500 fprintf (dump_file, "index costs %d\n", acost);
3501 }
3502 if (has_predec[mem_mode] || has_postdec[mem_mode]
3503 || has_preinc[mem_mode] || has_postinc[mem_mode])
3504 fprintf (dump_file, " May include autoinc/dec\n");
3505 fprintf (dump_file, "\n");
3506 }
3507
3508 address_cost_data_list[data_index] = data;
3509 }
3510
3511 bits = GET_MODE_BITSIZE (address_mode);
3512 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3513 offset &= mask;
3514 if ((offset >> (bits - 1) & 1))
3515 offset |= ~mask;
3516 s_offset = offset;
3517
3518 autoinc = false;
3519 autoinc_type = AINC_NONE;
3520 msize = GET_MODE_SIZE (mem_mode);
3521 autoinc_offset = offset;
3522 if (stmt_after_inc)
3523 autoinc_offset += ratio * cstep;
3524 if (symbol_present || var_present || ratio != 1)
3525 autoinc = false;
3526 else
3527 {
3528 if (has_postinc[mem_mode] && autoinc_offset == 0
3529 && msize == cstep)
3530 autoinc_type = AINC_POST_INC;
3531 else if (has_postdec[mem_mode] && autoinc_offset == 0
3532 && msize == -cstep)
3533 autoinc_type = AINC_POST_DEC;
3534 else if (has_preinc[mem_mode] && autoinc_offset == msize
3535 && msize == cstep)
3536 autoinc_type = AINC_PRE_INC;
3537 else if (has_predec[mem_mode] && autoinc_offset == -msize
3538 && msize == -cstep)
3539 autoinc_type = AINC_PRE_DEC;
3540
3541 if (autoinc_type != AINC_NONE)
3542 autoinc = true;
3543 }
3544
3545 cost = 0;
3546 offset_p = (s_offset != 0
3547 && data->min_offset <= s_offset
3548 && s_offset <= data->max_offset);
3549 ratio_p = (ratio != 1
3550 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3551
3552 if (ratio != 1 && !ratio_p)
3553 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3554
3555 if (s_offset && !offset_p && !symbol_present)
3556 cost += add_cost (speed, address_mode);
3557
3558 if (may_autoinc)
3559 *may_autoinc = autoinc;
3560 if (autoinc)
3561 acost = data->ainc_costs[autoinc_type];
3562 else
3563 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3564 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3565 return new_cost (cost + acost, complexity);
3566 }
3567
3568 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3569 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3570 calculating the operands of EXPR. Returns true if successful, and returns
3571 the cost in COST. */
3572
3573 static bool
3574 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3575 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3576 {
3577 comp_cost res;
3578 tree op1 = TREE_OPERAND (expr, 1);
3579 tree cst = TREE_OPERAND (mult, 1);
3580 tree multop = TREE_OPERAND (mult, 0);
3581 int m = exact_log2 (int_cst_value (cst));
3582 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3583 int sa_cost;
3584 bool equal_p = false;
3585
3586 if (!(m >= 0 && m < maxm))
3587 return false;
3588
3589 if (operand_equal_p (op1, mult, 0))
3590 equal_p = true;
3591
3592 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3593 ? shiftadd_cost (speed, mode, m)
3594 : (equal_p
3595 ? shiftsub1_cost (speed, mode, m)
3596 : shiftsub0_cost (speed, mode, m)));
3597 res = new_cost (sa_cost, 0);
3598 res = add_costs (res, equal_p ? cost0 : cost1);
3599
3600 STRIP_NOPS (multop);
3601 if (!is_gimple_val (multop))
3602 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3603
3604 *cost = res;
3605 return true;
3606 }
3607
3608 /* Estimates cost of forcing expression EXPR into a variable. */
3609
3610 static comp_cost
3611 force_expr_to_var_cost (tree expr, bool speed)
3612 {
3613 static bool costs_initialized = false;
3614 static unsigned integer_cost [2];
3615 static unsigned symbol_cost [2];
3616 static unsigned address_cost [2];
3617 tree op0, op1;
3618 comp_cost cost0, cost1, cost;
3619 machine_mode mode;
3620
3621 if (!costs_initialized)
3622 {
3623 tree type = build_pointer_type (integer_type_node);
3624 tree var, addr;
3625 rtx x;
3626 int i;
3627
3628 var = create_tmp_var_raw (integer_type_node, "test_var");
3629 TREE_STATIC (var) = 1;
3630 x = produce_memory_decl_rtl (var, NULL);
3631 SET_DECL_RTL (var, x);
3632
3633 addr = build1 (ADDR_EXPR, type, var);
3634
3635
3636 for (i = 0; i < 2; i++)
3637 {
3638 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3639 2000), i);
3640
3641 symbol_cost[i] = computation_cost (addr, i) + 1;
3642
3643 address_cost[i]
3644 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3645 if (dump_file && (dump_flags & TDF_DETAILS))
3646 {
3647 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3648 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3649 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3650 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3651 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3652 fprintf (dump_file, "\n");
3653 }
3654 }
3655
3656 costs_initialized = true;
3657 }
3658
3659 STRIP_NOPS (expr);
3660
3661 if (SSA_VAR_P (expr))
3662 return no_cost;
3663
3664 if (is_gimple_min_invariant (expr))
3665 {
3666 if (TREE_CODE (expr) == INTEGER_CST)
3667 return new_cost (integer_cost [speed], 0);
3668
3669 if (TREE_CODE (expr) == ADDR_EXPR)
3670 {
3671 tree obj = TREE_OPERAND (expr, 0);
3672
3673 if (TREE_CODE (obj) == VAR_DECL
3674 || TREE_CODE (obj) == PARM_DECL
3675 || TREE_CODE (obj) == RESULT_DECL)
3676 return new_cost (symbol_cost [speed], 0);
3677 }
3678
3679 return new_cost (address_cost [speed], 0);
3680 }
3681
3682 switch (TREE_CODE (expr))
3683 {
3684 case POINTER_PLUS_EXPR:
3685 case PLUS_EXPR:
3686 case MINUS_EXPR:
3687 case MULT_EXPR:
3688 op0 = TREE_OPERAND (expr, 0);
3689 op1 = TREE_OPERAND (expr, 1);
3690 STRIP_NOPS (op0);
3691 STRIP_NOPS (op1);
3692 break;
3693
3694 CASE_CONVERT:
3695 case NEGATE_EXPR:
3696 op0 = TREE_OPERAND (expr, 0);
3697 STRIP_NOPS (op0);
3698 op1 = NULL_TREE;
3699 break;
3700
3701 default:
3702 /* Just an arbitrary value, FIXME. */
3703 return new_cost (target_spill_cost[speed], 0);
3704 }
3705
3706 if (op0 == NULL_TREE
3707 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3708 cost0 = no_cost;
3709 else
3710 cost0 = force_expr_to_var_cost (op0, speed);
3711
3712 if (op1 == NULL_TREE
3713 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3714 cost1 = no_cost;
3715 else
3716 cost1 = force_expr_to_var_cost (op1, speed);
3717
3718 mode = TYPE_MODE (TREE_TYPE (expr));
3719 switch (TREE_CODE (expr))
3720 {
3721 case POINTER_PLUS_EXPR:
3722 case PLUS_EXPR:
3723 case MINUS_EXPR:
3724 case NEGATE_EXPR:
3725 cost = new_cost (add_cost (speed, mode), 0);
3726 if (TREE_CODE (expr) != NEGATE_EXPR)
3727 {
3728 tree mult = NULL_TREE;
3729 comp_cost sa_cost;
3730 if (TREE_CODE (op1) == MULT_EXPR)
3731 mult = op1;
3732 else if (TREE_CODE (op0) == MULT_EXPR)
3733 mult = op0;
3734
3735 if (mult != NULL_TREE
3736 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3737 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3738 speed, &sa_cost))
3739 return sa_cost;
3740 }
3741 break;
3742
3743 CASE_CONVERT:
3744 {
3745 tree inner_mode, outer_mode;
3746 outer_mode = TREE_TYPE (expr);
3747 inner_mode = TREE_TYPE (op0);
3748 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3749 TYPE_MODE (inner_mode), speed), 0);
3750 }
3751 break;
3752
3753 case MULT_EXPR:
3754 if (cst_and_fits_in_hwi (op0))
3755 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3756 mode, speed), 0);
3757 else if (cst_and_fits_in_hwi (op1))
3758 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3759 mode, speed), 0);
3760 else
3761 return new_cost (target_spill_cost [speed], 0);
3762 break;
3763
3764 default:
3765 gcc_unreachable ();
3766 }
3767
3768 cost = add_costs (cost, cost0);
3769 cost = add_costs (cost, cost1);
3770
3771 /* Bound the cost by target_spill_cost. The parts of complicated
3772 computations often are either loop invariant or at least can
3773 be shared between several iv uses, so letting this grow without
3774 limits would not give reasonable results. */
3775 if (cost.cost > (int) target_spill_cost [speed])
3776 cost.cost = target_spill_cost [speed];
3777
3778 return cost;
3779 }
3780
3781 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3782 invariants the computation depends on. */
3783
3784 static comp_cost
3785 force_var_cost (struct ivopts_data *data,
3786 tree expr, bitmap *depends_on)
3787 {
3788 if (depends_on)
3789 {
3790 fd_ivopts_data = data;
3791 walk_tree (&expr, find_depends, depends_on, NULL);
3792 }
3793
3794 return force_expr_to_var_cost (expr, data->speed);
3795 }
3796
3797 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3798 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3799 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3800 invariants the computation depends on. */
3801
3802 static comp_cost
3803 split_address_cost (struct ivopts_data *data,
3804 tree addr, bool *symbol_present, bool *var_present,
3805 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3806 {
3807 tree core;
3808 HOST_WIDE_INT bitsize;
3809 HOST_WIDE_INT bitpos;
3810 tree toffset;
3811 machine_mode mode;
3812 int unsignedp, volatilep;
3813
3814 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3815 &unsignedp, &volatilep, false);
3816
3817 if (toffset != 0
3818 || bitpos % BITS_PER_UNIT != 0
3819 || TREE_CODE (core) != VAR_DECL)
3820 {
3821 *symbol_present = false;
3822 *var_present = true;
3823 fd_ivopts_data = data;
3824 walk_tree (&addr, find_depends, depends_on, NULL);
3825 return new_cost (target_spill_cost[data->speed], 0);
3826 }
3827
3828 *offset += bitpos / BITS_PER_UNIT;
3829 if (TREE_STATIC (core)
3830 || DECL_EXTERNAL (core))
3831 {
3832 *symbol_present = true;
3833 *var_present = false;
3834 return no_cost;
3835 }
3836
3837 *symbol_present = false;
3838 *var_present = true;
3839 return no_cost;
3840 }
3841
3842 /* Estimates cost of expressing difference of addresses E1 - E2 as
3843 var + symbol + offset. The value of offset is added to OFFSET,
3844 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3845 part is missing. DEPENDS_ON is a set of the invariants the computation
3846 depends on. */
3847
3848 static comp_cost
3849 ptr_difference_cost (struct ivopts_data *data,
3850 tree e1, tree e2, bool *symbol_present, bool *var_present,
3851 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3852 {
3853 HOST_WIDE_INT diff = 0;
3854 aff_tree aff_e1, aff_e2;
3855 tree type;
3856
3857 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3858
3859 if (ptr_difference_const (e1, e2, &diff))
3860 {
3861 *offset += diff;
3862 *symbol_present = false;
3863 *var_present = false;
3864 return no_cost;
3865 }
3866
3867 if (integer_zerop (e2))
3868 return split_address_cost (data, TREE_OPERAND (e1, 0),
3869 symbol_present, var_present, offset, depends_on);
3870
3871 *symbol_present = false;
3872 *var_present = true;
3873
3874 type = signed_type_for (TREE_TYPE (e1));
3875 tree_to_aff_combination (e1, type, &aff_e1);
3876 tree_to_aff_combination (e2, type, &aff_e2);
3877 aff_combination_scale (&aff_e2, -1);
3878 aff_combination_add (&aff_e1, &aff_e2);
3879
3880 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3881 }
3882
3883 /* Estimates cost of expressing difference E1 - E2 as
3884 var + symbol + offset. The value of offset is added to OFFSET,
3885 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3886 part is missing. DEPENDS_ON is a set of the invariants the computation
3887 depends on. */
3888
3889 static comp_cost
3890 difference_cost (struct ivopts_data *data,
3891 tree e1, tree e2, bool *symbol_present, bool *var_present,
3892 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3893 {
3894 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3895 unsigned HOST_WIDE_INT off1, off2;
3896 aff_tree aff_e1, aff_e2;
3897 tree type;
3898
3899 e1 = strip_offset (e1, &off1);
3900 e2 = strip_offset (e2, &off2);
3901 *offset += off1 - off2;
3902
3903 STRIP_NOPS (e1);
3904 STRIP_NOPS (e2);
3905
3906 if (TREE_CODE (e1) == ADDR_EXPR)
3907 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3908 offset, depends_on);
3909 *symbol_present = false;
3910
3911 if (operand_equal_p (e1, e2, 0))
3912 {
3913 *var_present = false;
3914 return no_cost;
3915 }
3916
3917 *var_present = true;
3918
3919 if (integer_zerop (e2))
3920 return force_var_cost (data, e1, depends_on);
3921
3922 if (integer_zerop (e1))
3923 {
3924 comp_cost cost = force_var_cost (data, e2, depends_on);
3925 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3926 return cost;
3927 }
3928
3929 type = signed_type_for (TREE_TYPE (e1));
3930 tree_to_aff_combination (e1, type, &aff_e1);
3931 tree_to_aff_combination (e2, type, &aff_e2);
3932 aff_combination_scale (&aff_e2, -1);
3933 aff_combination_add (&aff_e1, &aff_e2);
3934
3935 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3936 }
3937
3938 /* Returns true if AFF1 and AFF2 are identical. */
3939
3940 static bool
3941 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3942 {
3943 unsigned i;
3944
3945 if (aff1->n != aff2->n)
3946 return false;
3947
3948 for (i = 0; i < aff1->n; i++)
3949 {
3950 if (aff1->elts[i].coef != aff2->elts[i].coef)
3951 return false;
3952
3953 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3954 return false;
3955 }
3956 return true;
3957 }
3958
3959 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3960
3961 static int
3962 get_expr_id (struct ivopts_data *data, tree expr)
3963 {
3964 struct iv_inv_expr_ent ent;
3965 struct iv_inv_expr_ent **slot;
3966
3967 ent.expr = expr;
3968 ent.hash = iterative_hash_expr (expr, 0);
3969 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3970 if (*slot)
3971 return (*slot)->id;
3972
3973 *slot = XNEW (struct iv_inv_expr_ent);
3974 (*slot)->expr = expr;
3975 (*slot)->hash = ent.hash;
3976 (*slot)->id = data->inv_expr_id++;
3977 return (*slot)->id;
3978 }
3979
3980 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3981 requires a new compiler generated temporary. Returns -1 otherwise.
3982 ADDRESS_P is a flag indicating if the expression is for address
3983 computation. */
3984
3985 static int
3986 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3987 tree cbase, HOST_WIDE_INT ratio,
3988 bool address_p)
3989 {
3990 aff_tree ubase_aff, cbase_aff;
3991 tree expr, ub, cb;
3992
3993 STRIP_NOPS (ubase);
3994 STRIP_NOPS (cbase);
3995 ub = ubase;
3996 cb = cbase;
3997
3998 if ((TREE_CODE (ubase) == INTEGER_CST)
3999 && (TREE_CODE (cbase) == INTEGER_CST))
4000 return -1;
4001
4002 /* Strips the constant part. */
4003 if (TREE_CODE (ubase) == PLUS_EXPR
4004 || TREE_CODE (ubase) == MINUS_EXPR
4005 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4006 {
4007 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4008 ubase = TREE_OPERAND (ubase, 0);
4009 }
4010
4011 /* Strips the constant part. */
4012 if (TREE_CODE (cbase) == PLUS_EXPR
4013 || TREE_CODE (cbase) == MINUS_EXPR
4014 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4015 {
4016 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4017 cbase = TREE_OPERAND (cbase, 0);
4018 }
4019
4020 if (address_p)
4021 {
4022 if (((TREE_CODE (ubase) == SSA_NAME)
4023 || (TREE_CODE (ubase) == ADDR_EXPR
4024 && is_gimple_min_invariant (ubase)))
4025 && (TREE_CODE (cbase) == INTEGER_CST))
4026 return -1;
4027
4028 if (((TREE_CODE (cbase) == SSA_NAME)
4029 || (TREE_CODE (cbase) == ADDR_EXPR
4030 && is_gimple_min_invariant (cbase)))
4031 && (TREE_CODE (ubase) == INTEGER_CST))
4032 return -1;
4033 }
4034
4035 if (ratio == 1)
4036 {
4037 if (operand_equal_p (ubase, cbase, 0))
4038 return -1;
4039
4040 if (TREE_CODE (ubase) == ADDR_EXPR
4041 && TREE_CODE (cbase) == ADDR_EXPR)
4042 {
4043 tree usym, csym;
4044
4045 usym = TREE_OPERAND (ubase, 0);
4046 csym = TREE_OPERAND (cbase, 0);
4047 if (TREE_CODE (usym) == ARRAY_REF)
4048 {
4049 tree ind = TREE_OPERAND (usym, 1);
4050 if (TREE_CODE (ind) == INTEGER_CST
4051 && tree_fits_shwi_p (ind)
4052 && tree_to_shwi (ind) == 0)
4053 usym = TREE_OPERAND (usym, 0);
4054 }
4055 if (TREE_CODE (csym) == ARRAY_REF)
4056 {
4057 tree ind = TREE_OPERAND (csym, 1);
4058 if (TREE_CODE (ind) == INTEGER_CST
4059 && tree_fits_shwi_p (ind)
4060 && tree_to_shwi (ind) == 0)
4061 csym = TREE_OPERAND (csym, 0);
4062 }
4063 if (operand_equal_p (usym, csym, 0))
4064 return -1;
4065 }
4066 /* Now do more complex comparison */
4067 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4068 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4069 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4070 return -1;
4071 }
4072
4073 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4074 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4075
4076 aff_combination_scale (&cbase_aff, -1 * ratio);
4077 aff_combination_add (&ubase_aff, &cbase_aff);
4078 expr = aff_combination_to_tree (&ubase_aff);
4079 return get_expr_id (data, expr);
4080 }
4081
4082
4083
4084 /* Determines the cost of the computation by that USE is expressed
4085 from induction variable CAND. If ADDRESS_P is true, we just need
4086 to create an address from it, otherwise we want to get it into
4087 register. A set of invariants we depend on is stored in
4088 DEPENDS_ON. AT is the statement at that the value is computed.
4089 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4090 addressing is likely. */
4091
4092 static comp_cost
4093 get_computation_cost_at (struct ivopts_data *data,
4094 struct iv_use *use, struct iv_cand *cand,
4095 bool address_p, bitmap *depends_on, gimple at,
4096 bool *can_autoinc,
4097 int *inv_expr_id)
4098 {
4099 tree ubase = use->iv->base, ustep = use->iv->step;
4100 tree cbase, cstep;
4101 tree utype = TREE_TYPE (ubase), ctype;
4102 unsigned HOST_WIDE_INT cstepi, offset = 0;
4103 HOST_WIDE_INT ratio, aratio;
4104 bool var_present, symbol_present, stmt_is_after_inc;
4105 comp_cost cost;
4106 widest_int rat;
4107 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4108 machine_mode mem_mode = (address_p
4109 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4110 : VOIDmode);
4111
4112 *depends_on = NULL;
4113
4114 /* Only consider real candidates. */
4115 if (!cand->iv)
4116 return infinite_cost;
4117
4118 cbase = cand->iv->base;
4119 cstep = cand->iv->step;
4120 ctype = TREE_TYPE (cbase);
4121
4122 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4123 {
4124 /* We do not have a precision to express the values of use. */
4125 return infinite_cost;
4126 }
4127
4128 if (address_p
4129 || (use->iv->base_object
4130 && cand->iv->base_object
4131 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4132 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4133 {
4134 /* Do not try to express address of an object with computation based
4135 on address of a different object. This may cause problems in rtl
4136 level alias analysis (that does not expect this to be happening,
4137 as this is illegal in C), and would be unlikely to be useful
4138 anyway. */
4139 if (use->iv->base_object
4140 && cand->iv->base_object
4141 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4142 return infinite_cost;
4143 }
4144
4145 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4146 {
4147 /* TODO -- add direct handling of this case. */
4148 goto fallback;
4149 }
4150
4151 /* CSTEPI is removed from the offset in case statement is after the
4152 increment. If the step is not constant, we use zero instead.
4153 This is a bit imprecise (there is the extra addition), but
4154 redundancy elimination is likely to transform the code so that
4155 it uses value of the variable before increment anyway,
4156 so it is not that much unrealistic. */
4157 if (cst_and_fits_in_hwi (cstep))
4158 cstepi = int_cst_value (cstep);
4159 else
4160 cstepi = 0;
4161
4162 if (!constant_multiple_of (ustep, cstep, &rat))
4163 return infinite_cost;
4164
4165 if (wi::fits_shwi_p (rat))
4166 ratio = rat.to_shwi ();
4167 else
4168 return infinite_cost;
4169
4170 STRIP_NOPS (cbase);
4171 ctype = TREE_TYPE (cbase);
4172
4173 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4174
4175 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4176 or ratio == 1, it is better to handle this like
4177
4178 ubase - ratio * cbase + ratio * var
4179
4180 (also holds in the case ratio == -1, TODO. */
4181
4182 if (cst_and_fits_in_hwi (cbase))
4183 {
4184 offset = - ratio * int_cst_value (cbase);
4185 cost = difference_cost (data,
4186 ubase, build_int_cst (utype, 0),
4187 &symbol_present, &var_present, &offset,
4188 depends_on);
4189 cost.cost /= avg_loop_niter (data->current_loop);
4190 }
4191 else if (ratio == 1)
4192 {
4193 tree real_cbase = cbase;
4194
4195 /* Check to see if any adjustment is needed. */
4196 if (cstepi == 0 && stmt_is_after_inc)
4197 {
4198 aff_tree real_cbase_aff;
4199 aff_tree cstep_aff;
4200
4201 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4202 &real_cbase_aff);
4203 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4204
4205 aff_combination_add (&real_cbase_aff, &cstep_aff);
4206 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4207 }
4208
4209 cost = difference_cost (data,
4210 ubase, real_cbase,
4211 &symbol_present, &var_present, &offset,
4212 depends_on);
4213 cost.cost /= avg_loop_niter (data->current_loop);
4214 }
4215 else if (address_p
4216 && !POINTER_TYPE_P (ctype)
4217 && multiplier_allowed_in_address_p
4218 (ratio, mem_mode,
4219 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4220 {
4221 cbase
4222 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4223 cost = difference_cost (data,
4224 ubase, cbase,
4225 &symbol_present, &var_present, &offset,
4226 depends_on);
4227 cost.cost /= avg_loop_niter (data->current_loop);
4228 }
4229 else
4230 {
4231 cost = force_var_cost (data, cbase, depends_on);
4232 cost = add_costs (cost,
4233 difference_cost (data,
4234 ubase, build_int_cst (utype, 0),
4235 &symbol_present, &var_present,
4236 &offset, depends_on));
4237 cost.cost /= avg_loop_niter (data->current_loop);
4238 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4239 }
4240
4241 if (inv_expr_id)
4242 {
4243 *inv_expr_id =
4244 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4245 /* Clear depends on. */
4246 if (*inv_expr_id != -1 && depends_on && *depends_on)
4247 bitmap_clear (*depends_on);
4248 }
4249
4250 /* If we are after the increment, the value of the candidate is higher by
4251 one iteration. */
4252 if (stmt_is_after_inc)
4253 offset -= ratio * cstepi;
4254
4255 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4256 (symbol/var1/const parts may be omitted). If we are looking for an
4257 address, find the cost of addressing this. */
4258 if (address_p)
4259 return add_costs (cost,
4260 get_address_cost (symbol_present, var_present,
4261 offset, ratio, cstepi,
4262 mem_mode,
4263 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4264 speed, stmt_is_after_inc,
4265 can_autoinc));
4266
4267 /* Otherwise estimate the costs for computing the expression. */
4268 if (!symbol_present && !var_present && !offset)
4269 {
4270 if (ratio != 1)
4271 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4272 return cost;
4273 }
4274
4275 /* Symbol + offset should be compile-time computable so consider that they
4276 are added once to the variable, if present. */
4277 if (var_present && (symbol_present || offset))
4278 cost.cost += adjust_setup_cost (data,
4279 add_cost (speed, TYPE_MODE (ctype)));
4280
4281 /* Having offset does not affect runtime cost in case it is added to
4282 symbol, but it increases complexity. */
4283 if (offset)
4284 cost.complexity++;
4285
4286 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4287
4288 aratio = ratio > 0 ? ratio : -ratio;
4289 if (aratio != 1)
4290 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4291 return cost;
4292
4293 fallback:
4294 if (can_autoinc)
4295 *can_autoinc = false;
4296
4297 {
4298 /* Just get the expression, expand it and measure the cost. */
4299 tree comp = get_computation_at (data->current_loop, use, cand, at);
4300
4301 if (!comp)
4302 return infinite_cost;
4303
4304 if (address_p)
4305 comp = build_simple_mem_ref (comp);
4306
4307 return new_cost (computation_cost (comp, speed), 0);
4308 }
4309 }
4310
4311 /* Determines the cost of the computation by that USE is expressed
4312 from induction variable CAND. If ADDRESS_P is true, we just need
4313 to create an address from it, otherwise we want to get it into
4314 register. A set of invariants we depend on is stored in
4315 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4316 autoinc addressing is likely. */
4317
4318 static comp_cost
4319 get_computation_cost (struct ivopts_data *data,
4320 struct iv_use *use, struct iv_cand *cand,
4321 bool address_p, bitmap *depends_on,
4322 bool *can_autoinc, int *inv_expr_id)
4323 {
4324 return get_computation_cost_at (data,
4325 use, cand, address_p, depends_on, use->stmt,
4326 can_autoinc, inv_expr_id);
4327 }
4328
4329 /* Determines cost of basing replacement of USE on CAND in a generic
4330 expression. */
4331
4332 static bool
4333 determine_use_iv_cost_generic (struct ivopts_data *data,
4334 struct iv_use *use, struct iv_cand *cand)
4335 {
4336 bitmap depends_on;
4337 comp_cost cost;
4338 int inv_expr_id = -1;
4339
4340 /* The simple case first -- if we need to express value of the preserved
4341 original biv, the cost is 0. This also prevents us from counting the
4342 cost of increment twice -- once at this use and once in the cost of
4343 the candidate. */
4344 if (cand->pos == IP_ORIGINAL
4345 && cand->incremented_at == use->stmt)
4346 {
4347 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4348 ERROR_MARK, -1);
4349 return true;
4350 }
4351
4352 cost = get_computation_cost (data, use, cand, false, &depends_on,
4353 NULL, &inv_expr_id);
4354
4355 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4356 inv_expr_id);
4357
4358 return !infinite_cost_p (cost);
4359 }
4360
4361 /* Determines cost of basing replacement of USE on CAND in an address. */
4362
4363 static bool
4364 determine_use_iv_cost_address (struct ivopts_data *data,
4365 struct iv_use *use, struct iv_cand *cand)
4366 {
4367 bitmap depends_on;
4368 bool can_autoinc;
4369 int inv_expr_id = -1;
4370 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4371 &can_autoinc, &inv_expr_id);
4372
4373 if (cand->ainc_use == use)
4374 {
4375 if (can_autoinc)
4376 cost.cost -= cand->cost_step;
4377 /* If we generated the candidate solely for exploiting autoincrement
4378 opportunities, and it turns out it can't be used, set the cost to
4379 infinity to make sure we ignore it. */
4380 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4381 cost = infinite_cost;
4382 }
4383 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4384 inv_expr_id);
4385
4386 return !infinite_cost_p (cost);
4387 }
4388
4389 /* Computes value of candidate CAND at position AT in iteration NITER, and
4390 stores it to VAL. */
4391
4392 static void
4393 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4394 aff_tree *val)
4395 {
4396 aff_tree step, delta, nit;
4397 struct iv *iv = cand->iv;
4398 tree type = TREE_TYPE (iv->base);
4399 tree steptype = type;
4400 if (POINTER_TYPE_P (type))
4401 steptype = sizetype;
4402 steptype = unsigned_type_for (type);
4403
4404 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4405 aff_combination_convert (&step, steptype);
4406 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4407 aff_combination_convert (&nit, steptype);
4408 aff_combination_mult (&nit, &step, &delta);
4409 if (stmt_after_increment (loop, cand, at))
4410 aff_combination_add (&delta, &step);
4411
4412 tree_to_aff_combination (iv->base, type, val);
4413 if (!POINTER_TYPE_P (type))
4414 aff_combination_convert (val, steptype);
4415 aff_combination_add (val, &delta);
4416 }
4417
4418 /* Returns period of induction variable iv. */
4419
4420 static tree
4421 iv_period (struct iv *iv)
4422 {
4423 tree step = iv->step, period, type;
4424 tree pow2div;
4425
4426 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4427
4428 type = unsigned_type_for (TREE_TYPE (step));
4429 /* Period of the iv is lcm (step, type_range)/step -1,
4430 i.e., N*type_range/step - 1. Since type range is power
4431 of two, N == (step >> num_of_ending_zeros_binary (step),
4432 so the final result is
4433
4434 (type_range >> num_of_ending_zeros_binary (step)) - 1
4435
4436 */
4437 pow2div = num_ending_zeros (step);
4438
4439 period = build_low_bits_mask (type,
4440 (TYPE_PRECISION (type)
4441 - tree_to_uhwi (pow2div)));
4442
4443 return period;
4444 }
4445
4446 /* Returns the comparison operator used when eliminating the iv USE. */
4447
4448 static enum tree_code
4449 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4450 {
4451 struct loop *loop = data->current_loop;
4452 basic_block ex_bb;
4453 edge exit;
4454
4455 ex_bb = gimple_bb (use->stmt);
4456 exit = EDGE_SUCC (ex_bb, 0);
4457 if (flow_bb_inside_loop_p (loop, exit->dest))
4458 exit = EDGE_SUCC (ex_bb, 1);
4459
4460 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4461 }
4462
4463 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4464 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4465 calculation is performed in non-wrapping type.
4466
4467 TODO: More generally, we could test for the situation that
4468 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4469 This would require knowing the sign of OFFSET. */
4470
4471 static bool
4472 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4473 {
4474 enum tree_code code;
4475 tree e1, e2;
4476 aff_tree aff_e1, aff_e2, aff_offset;
4477
4478 if (!nowrap_type_p (TREE_TYPE (base)))
4479 return false;
4480
4481 base = expand_simple_operations (base);
4482
4483 if (TREE_CODE (base) == SSA_NAME)
4484 {
4485 gimple stmt = SSA_NAME_DEF_STMT (base);
4486
4487 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4488 return false;
4489
4490 code = gimple_assign_rhs_code (stmt);
4491 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4492 return false;
4493
4494 e1 = gimple_assign_rhs1 (stmt);
4495 e2 = gimple_assign_rhs2 (stmt);
4496 }
4497 else
4498 {
4499 code = TREE_CODE (base);
4500 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4501 return false;
4502 e1 = TREE_OPERAND (base, 0);
4503 e2 = TREE_OPERAND (base, 1);
4504 }
4505
4506 /* Use affine expansion as deeper inspection to prove the equality. */
4507 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4508 &aff_e2, &data->name_expansion_cache);
4509 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4510 &aff_offset, &data->name_expansion_cache);
4511 aff_combination_scale (&aff_offset, -1);
4512 switch (code)
4513 {
4514 case PLUS_EXPR:
4515 aff_combination_add (&aff_e2, &aff_offset);
4516 if (aff_combination_zero_p (&aff_e2))
4517 return true;
4518
4519 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4520 &aff_e1, &data->name_expansion_cache);
4521 aff_combination_add (&aff_e1, &aff_offset);
4522 return aff_combination_zero_p (&aff_e1);
4523
4524 case POINTER_PLUS_EXPR:
4525 aff_combination_add (&aff_e2, &aff_offset);
4526 return aff_combination_zero_p (&aff_e2);
4527
4528 default:
4529 return false;
4530 }
4531 }
4532
4533 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4534 comparison with CAND. NITER describes the number of iterations of
4535 the loops. If successful, the comparison in COMP_P is altered accordingly.
4536
4537 We aim to handle the following situation:
4538
4539 sometype *base, *p;
4540 int a, b, i;
4541
4542 i = a;
4543 p = p_0 = base + a;
4544
4545 do
4546 {
4547 bla (*p);
4548 p++;
4549 i++;
4550 }
4551 while (i < b);
4552
4553 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4554 We aim to optimize this to
4555
4556 p = p_0 = base + a;
4557 do
4558 {
4559 bla (*p);
4560 p++;
4561 }
4562 while (p < p_0 - a + b);
4563
4564 This preserves the correctness, since the pointer arithmetics does not
4565 overflow. More precisely:
4566
4567 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4568 overflow in computing it or the values of p.
4569 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4570 overflow. To prove this, we use the fact that p_0 = base + a. */
4571
4572 static bool
4573 iv_elimination_compare_lt (struct ivopts_data *data,
4574 struct iv_cand *cand, enum tree_code *comp_p,
4575 struct tree_niter_desc *niter)
4576 {
4577 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4578 struct aff_tree nit, tmpa, tmpb;
4579 enum tree_code comp;
4580 HOST_WIDE_INT step;
4581
4582 /* We need to know that the candidate induction variable does not overflow.
4583 While more complex analysis may be used to prove this, for now just
4584 check that the variable appears in the original program and that it
4585 is computed in a type that guarantees no overflows. */
4586 cand_type = TREE_TYPE (cand->iv->base);
4587 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4588 return false;
4589
4590 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4591 the calculation of the BOUND could overflow, making the comparison
4592 invalid. */
4593 if (!data->loop_single_exit_p)
4594 return false;
4595
4596 /* We need to be able to decide whether candidate is increasing or decreasing
4597 in order to choose the right comparison operator. */
4598 if (!cst_and_fits_in_hwi (cand->iv->step))
4599 return false;
4600 step = int_cst_value (cand->iv->step);
4601
4602 /* Check that the number of iterations matches the expected pattern:
4603 a + 1 > b ? 0 : b - a - 1. */
4604 mbz = niter->may_be_zero;
4605 if (TREE_CODE (mbz) == GT_EXPR)
4606 {
4607 /* Handle a + 1 > b. */
4608 tree op0 = TREE_OPERAND (mbz, 0);
4609 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4610 {
4611 a = TREE_OPERAND (op0, 0);
4612 b = TREE_OPERAND (mbz, 1);
4613 }
4614 else
4615 return false;
4616 }
4617 else if (TREE_CODE (mbz) == LT_EXPR)
4618 {
4619 tree op1 = TREE_OPERAND (mbz, 1);
4620
4621 /* Handle b < a + 1. */
4622 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4623 {
4624 a = TREE_OPERAND (op1, 0);
4625 b = TREE_OPERAND (mbz, 0);
4626 }
4627 else
4628 return false;
4629 }
4630 else
4631 return false;
4632
4633 /* Expected number of iterations is B - A - 1. Check that it matches
4634 the actual number, i.e., that B - A - NITER = 1. */
4635 tree_to_aff_combination (niter->niter, nit_type, &nit);
4636 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4637 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4638 aff_combination_scale (&nit, -1);
4639 aff_combination_scale (&tmpa, -1);
4640 aff_combination_add (&tmpb, &tmpa);
4641 aff_combination_add (&tmpb, &nit);
4642 if (tmpb.n != 0 || tmpb.offset != 1)
4643 return false;
4644
4645 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4646 overflow. */
4647 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4648 cand->iv->step,
4649 fold_convert (TREE_TYPE (cand->iv->step), a));
4650 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4651 return false;
4652
4653 /* Determine the new comparison operator. */
4654 comp = step < 0 ? GT_EXPR : LT_EXPR;
4655 if (*comp_p == NE_EXPR)
4656 *comp_p = comp;
4657 else if (*comp_p == EQ_EXPR)
4658 *comp_p = invert_tree_comparison (comp, false);
4659 else
4660 gcc_unreachable ();
4661
4662 return true;
4663 }
4664
4665 /* Check whether it is possible to express the condition in USE by comparison
4666 of candidate CAND. If so, store the value compared with to BOUND, and the
4667 comparison operator to COMP. */
4668
4669 static bool
4670 may_eliminate_iv (struct ivopts_data *data,
4671 struct iv_use *use, struct iv_cand *cand, tree *bound,
4672 enum tree_code *comp)
4673 {
4674 basic_block ex_bb;
4675 edge exit;
4676 tree period;
4677 struct loop *loop = data->current_loop;
4678 aff_tree bnd;
4679 struct tree_niter_desc *desc = NULL;
4680
4681 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4682 return false;
4683
4684 /* For now works only for exits that dominate the loop latch.
4685 TODO: extend to other conditions inside loop body. */
4686 ex_bb = gimple_bb (use->stmt);
4687 if (use->stmt != last_stmt (ex_bb)
4688 || gimple_code (use->stmt) != GIMPLE_COND
4689 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4690 return false;
4691
4692 exit = EDGE_SUCC (ex_bb, 0);
4693 if (flow_bb_inside_loop_p (loop, exit->dest))
4694 exit = EDGE_SUCC (ex_bb, 1);
4695 if (flow_bb_inside_loop_p (loop, exit->dest))
4696 return false;
4697
4698 desc = niter_for_exit (data, exit);
4699 if (!desc)
4700 return false;
4701
4702 /* Determine whether we can use the variable to test the exit condition.
4703 This is the case iff the period of the induction variable is greater
4704 than the number of iterations for which the exit condition is true. */
4705 period = iv_period (cand->iv);
4706
4707 /* If the number of iterations is constant, compare against it directly. */
4708 if (TREE_CODE (desc->niter) == INTEGER_CST)
4709 {
4710 /* See cand_value_at. */
4711 if (stmt_after_increment (loop, cand, use->stmt))
4712 {
4713 if (!tree_int_cst_lt (desc->niter, period))
4714 return false;
4715 }
4716 else
4717 {
4718 if (tree_int_cst_lt (period, desc->niter))
4719 return false;
4720 }
4721 }
4722
4723 /* If not, and if this is the only possible exit of the loop, see whether
4724 we can get a conservative estimate on the number of iterations of the
4725 entire loop and compare against that instead. */
4726 else
4727 {
4728 widest_int period_value, max_niter;
4729
4730 max_niter = desc->max;
4731 if (stmt_after_increment (loop, cand, use->stmt))
4732 max_niter += 1;
4733 period_value = wi::to_widest (period);
4734 if (wi::gtu_p (max_niter, period_value))
4735 {
4736 /* See if we can take advantage of inferred loop bound information. */
4737 if (data->loop_single_exit_p)
4738 {
4739 if (!max_loop_iterations (loop, &max_niter))
4740 return false;
4741 /* The loop bound is already adjusted by adding 1. */
4742 if (wi::gtu_p (max_niter, period_value))
4743 return false;
4744 }
4745 else
4746 return false;
4747 }
4748 }
4749
4750 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4751
4752 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4753 aff_combination_to_tree (&bnd));
4754 *comp = iv_elimination_compare (data, use);
4755
4756 /* It is unlikely that computing the number of iterations using division
4757 would be more profitable than keeping the original induction variable. */
4758 if (expression_expensive_p (*bound))
4759 return false;
4760
4761 /* Sometimes, it is possible to handle the situation that the number of
4762 iterations may be zero unless additional assumtions by using <
4763 instead of != in the exit condition.
4764
4765 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4766 base the exit condition on it. However, that is often too
4767 expensive. */
4768 if (!integer_zerop (desc->may_be_zero))
4769 return iv_elimination_compare_lt (data, cand, comp, desc);
4770
4771 return true;
4772 }
4773
4774 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4775 be copied, if is is used in the loop body and DATA->body_includes_call. */
4776
4777 static int
4778 parm_decl_cost (struct ivopts_data *data, tree bound)
4779 {
4780 tree sbound = bound;
4781 STRIP_NOPS (sbound);
4782
4783 if (TREE_CODE (sbound) == SSA_NAME
4784 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4785 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4786 && data->body_includes_call)
4787 return COSTS_N_INSNS (1);
4788
4789 return 0;
4790 }
4791
4792 /* Determines cost of basing replacement of USE on CAND in a condition. */
4793
4794 static bool
4795 determine_use_iv_cost_condition (struct ivopts_data *data,
4796 struct iv_use *use, struct iv_cand *cand)
4797 {
4798 tree bound = NULL_TREE;
4799 struct iv *cmp_iv;
4800 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4801 comp_cost elim_cost, express_cost, cost, bound_cost;
4802 bool ok;
4803 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4804 tree *control_var, *bound_cst;
4805 enum tree_code comp = ERROR_MARK;
4806
4807 /* Only consider real candidates. */
4808 if (!cand->iv)
4809 {
4810 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4811 ERROR_MARK, -1);
4812 return false;
4813 }
4814
4815 /* Try iv elimination. */
4816 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4817 {
4818 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4819 if (elim_cost.cost == 0)
4820 elim_cost.cost = parm_decl_cost (data, bound);
4821 else if (TREE_CODE (bound) == INTEGER_CST)
4822 elim_cost.cost = 0;
4823 /* If we replace a loop condition 'i < n' with 'p < base + n',
4824 depends_on_elim will have 'base' and 'n' set, which implies
4825 that both 'base' and 'n' will be live during the loop. More likely,
4826 'base + n' will be loop invariant, resulting in only one live value
4827 during the loop. So in that case we clear depends_on_elim and set
4828 elim_inv_expr_id instead. */
4829 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4830 {
4831 elim_inv_expr_id = get_expr_id (data, bound);
4832 bitmap_clear (depends_on_elim);
4833 }
4834 /* The bound is a loop invariant, so it will be only computed
4835 once. */
4836 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4837 }
4838 else
4839 elim_cost = infinite_cost;
4840
4841 /* Try expressing the original giv. If it is compared with an invariant,
4842 note that we cannot get rid of it. */
4843 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4844 NULL, &cmp_iv);
4845 gcc_assert (ok);
4846
4847 /* When the condition is a comparison of the candidate IV against
4848 zero, prefer this IV.
4849
4850 TODO: The constant that we're subtracting from the cost should
4851 be target-dependent. This information should be added to the
4852 target costs for each backend. */
4853 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4854 && integer_zerop (*bound_cst)
4855 && (operand_equal_p (*control_var, cand->var_after, 0)
4856 || operand_equal_p (*control_var, cand->var_before, 0)))
4857 elim_cost.cost -= 1;
4858
4859 express_cost = get_computation_cost (data, use, cand, false,
4860 &depends_on_express, NULL,
4861 &express_inv_expr_id);
4862 fd_ivopts_data = data;
4863 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4864
4865 /* Count the cost of the original bound as well. */
4866 bound_cost = force_var_cost (data, *bound_cst, NULL);
4867 if (bound_cost.cost == 0)
4868 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4869 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4870 bound_cost.cost = 0;
4871 express_cost.cost += bound_cost.cost;
4872
4873 /* Choose the better approach, preferring the eliminated IV. */
4874 if (compare_costs (elim_cost, express_cost) <= 0)
4875 {
4876 cost = elim_cost;
4877 depends_on = depends_on_elim;
4878 depends_on_elim = NULL;
4879 inv_expr_id = elim_inv_expr_id;
4880 }
4881 else
4882 {
4883 cost = express_cost;
4884 depends_on = depends_on_express;
4885 depends_on_express = NULL;
4886 bound = NULL_TREE;
4887 comp = ERROR_MARK;
4888 inv_expr_id = express_inv_expr_id;
4889 }
4890
4891 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4892
4893 if (depends_on_elim)
4894 BITMAP_FREE (depends_on_elim);
4895 if (depends_on_express)
4896 BITMAP_FREE (depends_on_express);
4897
4898 return !infinite_cost_p (cost);
4899 }
4900
4901 /* Determines cost of basing replacement of USE on CAND. Returns false
4902 if USE cannot be based on CAND. */
4903
4904 static bool
4905 determine_use_iv_cost (struct ivopts_data *data,
4906 struct iv_use *use, struct iv_cand *cand)
4907 {
4908 switch (use->type)
4909 {
4910 case USE_NONLINEAR_EXPR:
4911 return determine_use_iv_cost_generic (data, use, cand);
4912
4913 case USE_ADDRESS:
4914 return determine_use_iv_cost_address (data, use, cand);
4915
4916 case USE_COMPARE:
4917 return determine_use_iv_cost_condition (data, use, cand);
4918
4919 default:
4920 gcc_unreachable ();
4921 }
4922 }
4923
4924 /* Return true if get_computation_cost indicates that autoincrement is
4925 a possibility for the pair of USE and CAND, false otherwise. */
4926
4927 static bool
4928 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4929 struct iv_cand *cand)
4930 {
4931 bitmap depends_on;
4932 bool can_autoinc;
4933 comp_cost cost;
4934
4935 if (use->type != USE_ADDRESS)
4936 return false;
4937
4938 cost = get_computation_cost (data, use, cand, true, &depends_on,
4939 &can_autoinc, NULL);
4940
4941 BITMAP_FREE (depends_on);
4942
4943 return !infinite_cost_p (cost) && can_autoinc;
4944 }
4945
4946 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4947 use that allows autoincrement, and set their AINC_USE if possible. */
4948
4949 static void
4950 set_autoinc_for_original_candidates (struct ivopts_data *data)
4951 {
4952 unsigned i, j;
4953
4954 for (i = 0; i < n_iv_cands (data); i++)
4955 {
4956 struct iv_cand *cand = iv_cand (data, i);
4957 struct iv_use *closest_before = NULL;
4958 struct iv_use *closest_after = NULL;
4959 if (cand->pos != IP_ORIGINAL)
4960 continue;
4961
4962 for (j = 0; j < n_iv_uses (data); j++)
4963 {
4964 struct iv_use *use = iv_use (data, j);
4965 unsigned uid = gimple_uid (use->stmt);
4966
4967 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4968 continue;
4969
4970 if (uid < gimple_uid (cand->incremented_at)
4971 && (closest_before == NULL
4972 || uid > gimple_uid (closest_before->stmt)))
4973 closest_before = use;
4974
4975 if (uid > gimple_uid (cand->incremented_at)
4976 && (closest_after == NULL
4977 || uid < gimple_uid (closest_after->stmt)))
4978 closest_after = use;
4979 }
4980
4981 if (closest_before != NULL
4982 && autoinc_possible_for_pair (data, closest_before, cand))
4983 cand->ainc_use = closest_before;
4984 else if (closest_after != NULL
4985 && autoinc_possible_for_pair (data, closest_after, cand))
4986 cand->ainc_use = closest_after;
4987 }
4988 }
4989
4990 /* Finds the candidates for the induction variables. */
4991
4992 static void
4993 find_iv_candidates (struct ivopts_data *data)
4994 {
4995 /* Add commonly used ivs. */
4996 add_standard_iv_candidates (data);
4997
4998 /* Add old induction variables. */
4999 add_old_ivs_candidates (data);
5000
5001 /* Add induction variables derived from uses. */
5002 add_derived_ivs_candidates (data);
5003
5004 set_autoinc_for_original_candidates (data);
5005
5006 /* Record the important candidates. */
5007 record_important_candidates (data);
5008 }
5009
5010 /* Determines costs of basing the use of the iv on an iv candidate. */
5011
5012 static void
5013 determine_use_iv_costs (struct ivopts_data *data)
5014 {
5015 unsigned i, j;
5016 struct iv_use *use;
5017 struct iv_cand *cand;
5018 bitmap to_clear = BITMAP_ALLOC (NULL);
5019
5020 alloc_use_cost_map (data);
5021
5022 for (i = 0; i < n_iv_uses (data); i++)
5023 {
5024 use = iv_use (data, i);
5025
5026 if (data->consider_all_candidates)
5027 {
5028 for (j = 0; j < n_iv_cands (data); j++)
5029 {
5030 cand = iv_cand (data, j);
5031 determine_use_iv_cost (data, use, cand);
5032 }
5033 }
5034 else
5035 {
5036 bitmap_iterator bi;
5037
5038 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5039 {
5040 cand = iv_cand (data, j);
5041 if (!determine_use_iv_cost (data, use, cand))
5042 bitmap_set_bit (to_clear, j);
5043 }
5044
5045 /* Remove the candidates for that the cost is infinite from
5046 the list of related candidates. */
5047 bitmap_and_compl_into (use->related_cands, to_clear);
5048 bitmap_clear (to_clear);
5049 }
5050 }
5051
5052 BITMAP_FREE (to_clear);
5053
5054 if (dump_file && (dump_flags & TDF_DETAILS))
5055 {
5056 fprintf (dump_file, "Use-candidate costs:\n");
5057
5058 for (i = 0; i < n_iv_uses (data); i++)
5059 {
5060 use = iv_use (data, i);
5061
5062 fprintf (dump_file, "Use %d:\n", i);
5063 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5064 for (j = 0; j < use->n_map_members; j++)
5065 {
5066 if (!use->cost_map[j].cand
5067 || infinite_cost_p (use->cost_map[j].cost))
5068 continue;
5069
5070 fprintf (dump_file, " %d\t%d\t%d\t",
5071 use->cost_map[j].cand->id,
5072 use->cost_map[j].cost.cost,
5073 use->cost_map[j].cost.complexity);
5074 if (use->cost_map[j].depends_on)
5075 bitmap_print (dump_file,
5076 use->cost_map[j].depends_on, "","");
5077 if (use->cost_map[j].inv_expr_id != -1)
5078 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5079 fprintf (dump_file, "\n");
5080 }
5081
5082 fprintf (dump_file, "\n");
5083 }
5084 fprintf (dump_file, "\n");
5085 }
5086 }
5087
5088 /* Determines cost of the candidate CAND. */
5089
5090 static void
5091 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5092 {
5093 comp_cost cost_base;
5094 unsigned cost, cost_step;
5095 tree base;
5096
5097 if (!cand->iv)
5098 {
5099 cand->cost = 0;
5100 return;
5101 }
5102
5103 /* There are two costs associated with the candidate -- its increment
5104 and its initialization. The second is almost negligible for any loop
5105 that rolls enough, so we take it just very little into account. */
5106
5107 base = cand->iv->base;
5108 cost_base = force_var_cost (data, base, NULL);
5109 /* It will be exceptional that the iv register happens to be initialized with
5110 the proper value at no cost. In general, there will at least be a regcopy
5111 or a const set. */
5112 if (cost_base.cost == 0)
5113 cost_base.cost = COSTS_N_INSNS (1);
5114 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5115
5116 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5117
5118 /* Prefer the original ivs unless we may gain something by replacing it.
5119 The reason is to make debugging simpler; so this is not relevant for
5120 artificial ivs created by other optimization passes. */
5121 if (cand->pos != IP_ORIGINAL
5122 || !SSA_NAME_VAR (cand->var_before)
5123 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5124 cost++;
5125
5126 /* Prefer not to insert statements into latch unless there are some
5127 already (so that we do not create unnecessary jumps). */
5128 if (cand->pos == IP_END
5129 && empty_block_p (ip_end_pos (data->current_loop)))
5130 cost++;
5131
5132 cand->cost = cost;
5133 cand->cost_step = cost_step;
5134 }
5135
5136 /* Determines costs of computation of the candidates. */
5137
5138 static void
5139 determine_iv_costs (struct ivopts_data *data)
5140 {
5141 unsigned i;
5142
5143 if (dump_file && (dump_flags & TDF_DETAILS))
5144 {
5145 fprintf (dump_file, "Candidate costs:\n");
5146 fprintf (dump_file, " cand\tcost\n");
5147 }
5148
5149 for (i = 0; i < n_iv_cands (data); i++)
5150 {
5151 struct iv_cand *cand = iv_cand (data, i);
5152
5153 determine_iv_cost (data, cand);
5154
5155 if (dump_file && (dump_flags & TDF_DETAILS))
5156 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5157 }
5158
5159 if (dump_file && (dump_flags & TDF_DETAILS))
5160 fprintf (dump_file, "\n");
5161 }
5162
5163 /* Calculates cost for having SIZE induction variables. */
5164
5165 static unsigned
5166 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5167 {
5168 /* We add size to the cost, so that we prefer eliminating ivs
5169 if possible. */
5170 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5171 data->body_includes_call);
5172 }
5173
5174 /* For each size of the induction variable set determine the penalty. */
5175
5176 static void
5177 determine_set_costs (struct ivopts_data *data)
5178 {
5179 unsigned j, n;
5180 gimple phi;
5181 gimple_stmt_iterator psi;
5182 tree op;
5183 struct loop *loop = data->current_loop;
5184 bitmap_iterator bi;
5185
5186 if (dump_file && (dump_flags & TDF_DETAILS))
5187 {
5188 fprintf (dump_file, "Global costs:\n");
5189 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5190 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5191 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5192 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5193 }
5194
5195 n = 0;
5196 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5197 {
5198 phi = gsi_stmt (psi);
5199 op = PHI_RESULT (phi);
5200
5201 if (virtual_operand_p (op))
5202 continue;
5203
5204 if (get_iv (data, op))
5205 continue;
5206
5207 n++;
5208 }
5209
5210 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5211 {
5212 struct version_info *info = ver_info (data, j);
5213
5214 if (info->inv_id && info->has_nonlin_use)
5215 n++;
5216 }
5217
5218 data->regs_used = n;
5219 if (dump_file && (dump_flags & TDF_DETAILS))
5220 fprintf (dump_file, " regs_used %d\n", n);
5221
5222 if (dump_file && (dump_flags & TDF_DETAILS))
5223 {
5224 fprintf (dump_file, " cost for size:\n");
5225 fprintf (dump_file, " ivs\tcost\n");
5226 for (j = 0; j <= 2 * target_avail_regs; j++)
5227 fprintf (dump_file, " %d\t%d\n", j,
5228 ivopts_global_cost_for_size (data, j));
5229 fprintf (dump_file, "\n");
5230 }
5231 }
5232
5233 /* Returns true if A is a cheaper cost pair than B. */
5234
5235 static bool
5236 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5237 {
5238 int cmp;
5239
5240 if (!a)
5241 return false;
5242
5243 if (!b)
5244 return true;
5245
5246 cmp = compare_costs (a->cost, b->cost);
5247 if (cmp < 0)
5248 return true;
5249
5250 if (cmp > 0)
5251 return false;
5252
5253 /* In case the costs are the same, prefer the cheaper candidate. */
5254 if (a->cand->cost < b->cand->cost)
5255 return true;
5256
5257 return false;
5258 }
5259
5260
5261 /* Returns candidate by that USE is expressed in IVS. */
5262
5263 static struct cost_pair *
5264 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5265 {
5266 return ivs->cand_for_use[use->id];
5267 }
5268
5269 /* Computes the cost field of IVS structure. */
5270
5271 static void
5272 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5273 {
5274 comp_cost cost = ivs->cand_use_cost;
5275
5276 cost.cost += ivs->cand_cost;
5277
5278 cost.cost += ivopts_global_cost_for_size (data,
5279 ivs->n_regs + ivs->num_used_inv_expr);
5280
5281 ivs->cost = cost;
5282 }
5283
5284 /* Remove invariants in set INVS to set IVS. */
5285
5286 static void
5287 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5288 {
5289 bitmap_iterator bi;
5290 unsigned iid;
5291
5292 if (!invs)
5293 return;
5294
5295 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5296 {
5297 ivs->n_invariant_uses[iid]--;
5298 if (ivs->n_invariant_uses[iid] == 0)
5299 ivs->n_regs--;
5300 }
5301 }
5302
5303 /* Set USE not to be expressed by any candidate in IVS. */
5304
5305 static void
5306 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5307 struct iv_use *use)
5308 {
5309 unsigned uid = use->id, cid;
5310 struct cost_pair *cp;
5311
5312 cp = ivs->cand_for_use[uid];
5313 if (!cp)
5314 return;
5315 cid = cp->cand->id;
5316
5317 ivs->bad_uses++;
5318 ivs->cand_for_use[uid] = NULL;
5319 ivs->n_cand_uses[cid]--;
5320
5321 if (ivs->n_cand_uses[cid] == 0)
5322 {
5323 bitmap_clear_bit (ivs->cands, cid);
5324 /* Do not count the pseudocandidates. */
5325 if (cp->cand->iv)
5326 ivs->n_regs--;
5327 ivs->n_cands--;
5328 ivs->cand_cost -= cp->cand->cost;
5329
5330 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5331 }
5332
5333 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5334
5335 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5336
5337 if (cp->inv_expr_id != -1)
5338 {
5339 ivs->used_inv_expr[cp->inv_expr_id]--;
5340 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5341 ivs->num_used_inv_expr--;
5342 }
5343 iv_ca_recount_cost (data, ivs);
5344 }
5345
5346 /* Add invariants in set INVS to set IVS. */
5347
5348 static void
5349 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5350 {
5351 bitmap_iterator bi;
5352 unsigned iid;
5353
5354 if (!invs)
5355 return;
5356
5357 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5358 {
5359 ivs->n_invariant_uses[iid]++;
5360 if (ivs->n_invariant_uses[iid] == 1)
5361 ivs->n_regs++;
5362 }
5363 }
5364
5365 /* Set cost pair for USE in set IVS to CP. */
5366
5367 static void
5368 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5369 struct iv_use *use, struct cost_pair *cp)
5370 {
5371 unsigned uid = use->id, cid;
5372
5373 if (ivs->cand_for_use[uid] == cp)
5374 return;
5375
5376 if (ivs->cand_for_use[uid])
5377 iv_ca_set_no_cp (data, ivs, use);
5378
5379 if (cp)
5380 {
5381 cid = cp->cand->id;
5382
5383 ivs->bad_uses--;
5384 ivs->cand_for_use[uid] = cp;
5385 ivs->n_cand_uses[cid]++;
5386 if (ivs->n_cand_uses[cid] == 1)
5387 {
5388 bitmap_set_bit (ivs->cands, cid);
5389 /* Do not count the pseudocandidates. */
5390 if (cp->cand->iv)
5391 ivs->n_regs++;
5392 ivs->n_cands++;
5393 ivs->cand_cost += cp->cand->cost;
5394
5395 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5396 }
5397
5398 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5399 iv_ca_set_add_invariants (ivs, cp->depends_on);
5400
5401 if (cp->inv_expr_id != -1)
5402 {
5403 ivs->used_inv_expr[cp->inv_expr_id]++;
5404 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5405 ivs->num_used_inv_expr++;
5406 }
5407 iv_ca_recount_cost (data, ivs);
5408 }
5409 }
5410
5411 /* Extend set IVS by expressing USE by some of the candidates in it
5412 if possible. Consider all important candidates if candidates in
5413 set IVS don't give any result. */
5414
5415 static void
5416 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5417 struct iv_use *use)
5418 {
5419 struct cost_pair *best_cp = NULL, *cp;
5420 bitmap_iterator bi;
5421 unsigned i;
5422 struct iv_cand *cand;
5423
5424 gcc_assert (ivs->upto >= use->id);
5425 ivs->upto++;
5426 ivs->bad_uses++;
5427
5428 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5429 {
5430 cand = iv_cand (data, i);
5431 cp = get_use_iv_cost (data, use, cand);
5432 if (cheaper_cost_pair (cp, best_cp))
5433 best_cp = cp;
5434 }
5435
5436 if (best_cp == NULL)
5437 {
5438 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5439 {
5440 cand = iv_cand (data, i);
5441 cp = get_use_iv_cost (data, use, cand);
5442 if (cheaper_cost_pair (cp, best_cp))
5443 best_cp = cp;
5444 }
5445 }
5446
5447 iv_ca_set_cp (data, ivs, use, best_cp);
5448 }
5449
5450 /* Get cost for assignment IVS. */
5451
5452 static comp_cost
5453 iv_ca_cost (struct iv_ca *ivs)
5454 {
5455 /* This was a conditional expression but it triggered a bug in
5456 Sun C 5.5. */
5457 if (ivs->bad_uses)
5458 return infinite_cost;
5459 else
5460 return ivs->cost;
5461 }
5462
5463 /* Returns true if all dependences of CP are among invariants in IVS. */
5464
5465 static bool
5466 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5467 {
5468 unsigned i;
5469 bitmap_iterator bi;
5470
5471 if (!cp->depends_on)
5472 return true;
5473
5474 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5475 {
5476 if (ivs->n_invariant_uses[i] == 0)
5477 return false;
5478 }
5479
5480 return true;
5481 }
5482
5483 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5484 it before NEXT_CHANGE. */
5485
5486 static struct iv_ca_delta *
5487 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5488 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5489 {
5490 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5491
5492 change->use = use;
5493 change->old_cp = old_cp;
5494 change->new_cp = new_cp;
5495 change->next_change = next_change;
5496
5497 return change;
5498 }
5499
5500 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5501 are rewritten. */
5502
5503 static struct iv_ca_delta *
5504 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5505 {
5506 struct iv_ca_delta *last;
5507
5508 if (!l2)
5509 return l1;
5510
5511 if (!l1)
5512 return l2;
5513
5514 for (last = l1; last->next_change; last = last->next_change)
5515 continue;
5516 last->next_change = l2;
5517
5518 return l1;
5519 }
5520
5521 /* Reverse the list of changes DELTA, forming the inverse to it. */
5522
5523 static struct iv_ca_delta *
5524 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5525 {
5526 struct iv_ca_delta *act, *next, *prev = NULL;
5527 struct cost_pair *tmp;
5528
5529 for (act = delta; act; act = next)
5530 {
5531 next = act->next_change;
5532 act->next_change = prev;
5533 prev = act;
5534
5535 tmp = act->old_cp;
5536 act->old_cp = act->new_cp;
5537 act->new_cp = tmp;
5538 }
5539
5540 return prev;
5541 }
5542
5543 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5544 reverted instead. */
5545
5546 static void
5547 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5548 struct iv_ca_delta *delta, bool forward)
5549 {
5550 struct cost_pair *from, *to;
5551 struct iv_ca_delta *act;
5552
5553 if (!forward)
5554 delta = iv_ca_delta_reverse (delta);
5555
5556 for (act = delta; act; act = act->next_change)
5557 {
5558 from = act->old_cp;
5559 to = act->new_cp;
5560 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5561 iv_ca_set_cp (data, ivs, act->use, to);
5562 }
5563
5564 if (!forward)
5565 iv_ca_delta_reverse (delta);
5566 }
5567
5568 /* Returns true if CAND is used in IVS. */
5569
5570 static bool
5571 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5572 {
5573 return ivs->n_cand_uses[cand->id] > 0;
5574 }
5575
5576 /* Returns number of induction variable candidates in the set IVS. */
5577
5578 static unsigned
5579 iv_ca_n_cands (struct iv_ca *ivs)
5580 {
5581 return ivs->n_cands;
5582 }
5583
5584 /* Free the list of changes DELTA. */
5585
5586 static void
5587 iv_ca_delta_free (struct iv_ca_delta **delta)
5588 {
5589 struct iv_ca_delta *act, *next;
5590
5591 for (act = *delta; act; act = next)
5592 {
5593 next = act->next_change;
5594 free (act);
5595 }
5596
5597 *delta = NULL;
5598 }
5599
5600 /* Allocates new iv candidates assignment. */
5601
5602 static struct iv_ca *
5603 iv_ca_new (struct ivopts_data *data)
5604 {
5605 struct iv_ca *nw = XNEW (struct iv_ca);
5606
5607 nw->upto = 0;
5608 nw->bad_uses = 0;
5609 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5610 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5611 nw->cands = BITMAP_ALLOC (NULL);
5612 nw->n_cands = 0;
5613 nw->n_regs = 0;
5614 nw->cand_use_cost = no_cost;
5615 nw->cand_cost = 0;
5616 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5617 nw->cost = no_cost;
5618 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5619 nw->num_used_inv_expr = 0;
5620
5621 return nw;
5622 }
5623
5624 /* Free memory occupied by the set IVS. */
5625
5626 static void
5627 iv_ca_free (struct iv_ca **ivs)
5628 {
5629 free ((*ivs)->cand_for_use);
5630 free ((*ivs)->n_cand_uses);
5631 BITMAP_FREE ((*ivs)->cands);
5632 free ((*ivs)->n_invariant_uses);
5633 free ((*ivs)->used_inv_expr);
5634 free (*ivs);
5635 *ivs = NULL;
5636 }
5637
5638 /* Dumps IVS to FILE. */
5639
5640 static void
5641 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5642 {
5643 const char *pref = " invariants ";
5644 unsigned i;
5645 comp_cost cost = iv_ca_cost (ivs);
5646
5647 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5648 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5649 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5650 bitmap_print (file, ivs->cands, " candidates: ","\n");
5651
5652 for (i = 0; i < ivs->upto; i++)
5653 {
5654 struct iv_use *use = iv_use (data, i);
5655 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5656 if (cp)
5657 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5658 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5659 else
5660 fprintf (file, " use:%d --> ??\n", use->id);
5661 }
5662
5663 for (i = 1; i <= data->max_inv_id; i++)
5664 if (ivs->n_invariant_uses[i])
5665 {
5666 fprintf (file, "%s%d", pref, i);
5667 pref = ", ";
5668 }
5669 fprintf (file, "\n\n");
5670 }
5671
5672 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5673 new set, and store differences in DELTA. Number of induction variables
5674 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5675 the function will try to find a solution with mimimal iv candidates. */
5676
5677 static comp_cost
5678 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5679 struct iv_cand *cand, struct iv_ca_delta **delta,
5680 unsigned *n_ivs, bool min_ncand)
5681 {
5682 unsigned i;
5683 comp_cost cost;
5684 struct iv_use *use;
5685 struct cost_pair *old_cp, *new_cp;
5686
5687 *delta = NULL;
5688 for (i = 0; i < ivs->upto; i++)
5689 {
5690 use = iv_use (data, i);
5691 old_cp = iv_ca_cand_for_use (ivs, use);
5692
5693 if (old_cp
5694 && old_cp->cand == cand)
5695 continue;
5696
5697 new_cp = get_use_iv_cost (data, use, cand);
5698 if (!new_cp)
5699 continue;
5700
5701 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5702 continue;
5703
5704 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5705 continue;
5706
5707 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5708 }
5709
5710 iv_ca_delta_commit (data, ivs, *delta, true);
5711 cost = iv_ca_cost (ivs);
5712 if (n_ivs)
5713 *n_ivs = iv_ca_n_cands (ivs);
5714 iv_ca_delta_commit (data, ivs, *delta, false);
5715
5716 return cost;
5717 }
5718
5719 /* Try narrowing set IVS by removing CAND. Return the cost of
5720 the new set and store the differences in DELTA. START is
5721 the candidate with which we start narrowing. */
5722
5723 static comp_cost
5724 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5725 struct iv_cand *cand, struct iv_cand *start,
5726 struct iv_ca_delta **delta)
5727 {
5728 unsigned i, ci;
5729 struct iv_use *use;
5730 struct cost_pair *old_cp, *new_cp, *cp;
5731 bitmap_iterator bi;
5732 struct iv_cand *cnd;
5733 comp_cost cost, best_cost, acost;
5734
5735 *delta = NULL;
5736 for (i = 0; i < n_iv_uses (data); i++)
5737 {
5738 use = iv_use (data, i);
5739
5740 old_cp = iv_ca_cand_for_use (ivs, use);
5741 if (old_cp->cand != cand)
5742 continue;
5743
5744 best_cost = iv_ca_cost (ivs);
5745 /* Start narrowing with START. */
5746 new_cp = get_use_iv_cost (data, use, start);
5747
5748 if (data->consider_all_candidates)
5749 {
5750 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5751 {
5752 if (ci == cand->id || (start && ci == start->id))
5753 continue;
5754
5755 cnd = iv_cand (data, ci);
5756
5757 cp = get_use_iv_cost (data, use, cnd);
5758 if (!cp)
5759 continue;
5760
5761 iv_ca_set_cp (data, ivs, use, cp);
5762 acost = iv_ca_cost (ivs);
5763
5764 if (compare_costs (acost, best_cost) < 0)
5765 {
5766 best_cost = acost;
5767 new_cp = cp;
5768 }
5769 }
5770 }
5771 else
5772 {
5773 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5774 {
5775 if (ci == cand->id || (start && ci == start->id))
5776 continue;
5777
5778 cnd = iv_cand (data, ci);
5779
5780 cp = get_use_iv_cost (data, use, cnd);
5781 if (!cp)
5782 continue;
5783
5784 iv_ca_set_cp (data, ivs, use, cp);
5785 acost = iv_ca_cost (ivs);
5786
5787 if (compare_costs (acost, best_cost) < 0)
5788 {
5789 best_cost = acost;
5790 new_cp = cp;
5791 }
5792 }
5793 }
5794 /* Restore to old cp for use. */
5795 iv_ca_set_cp (data, ivs, use, old_cp);
5796
5797 if (!new_cp)
5798 {
5799 iv_ca_delta_free (delta);
5800 return infinite_cost;
5801 }
5802
5803 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5804 }
5805
5806 iv_ca_delta_commit (data, ivs, *delta, true);
5807 cost = iv_ca_cost (ivs);
5808 iv_ca_delta_commit (data, ivs, *delta, false);
5809
5810 return cost;
5811 }
5812
5813 /* Try optimizing the set of candidates IVS by removing candidates different
5814 from to EXCEPT_CAND from it. Return cost of the new set, and store
5815 differences in DELTA. */
5816
5817 static comp_cost
5818 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5819 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5820 {
5821 bitmap_iterator bi;
5822 struct iv_ca_delta *act_delta, *best_delta;
5823 unsigned i;
5824 comp_cost best_cost, acost;
5825 struct iv_cand *cand;
5826
5827 best_delta = NULL;
5828 best_cost = iv_ca_cost (ivs);
5829
5830 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5831 {
5832 cand = iv_cand (data, i);
5833
5834 if (cand == except_cand)
5835 continue;
5836
5837 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5838
5839 if (compare_costs (acost, best_cost) < 0)
5840 {
5841 best_cost = acost;
5842 iv_ca_delta_free (&best_delta);
5843 best_delta = act_delta;
5844 }
5845 else
5846 iv_ca_delta_free (&act_delta);
5847 }
5848
5849 if (!best_delta)
5850 {
5851 *delta = NULL;
5852 return best_cost;
5853 }
5854
5855 /* Recurse to possibly remove other unnecessary ivs. */
5856 iv_ca_delta_commit (data, ivs, best_delta, true);
5857 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5858 iv_ca_delta_commit (data, ivs, best_delta, false);
5859 *delta = iv_ca_delta_join (best_delta, *delta);
5860 return best_cost;
5861 }
5862
5863 /* Tries to extend the sets IVS in the best possible way in order
5864 to express the USE. If ORIGINALP is true, prefer candidates from
5865 the original set of IVs, otherwise favor important candidates not
5866 based on any memory object. */
5867
5868 static bool
5869 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5870 struct iv_use *use, bool originalp)
5871 {
5872 comp_cost best_cost, act_cost;
5873 unsigned i;
5874 bitmap_iterator bi;
5875 struct iv_cand *cand;
5876 struct iv_ca_delta *best_delta = NULL, *act_delta;
5877 struct cost_pair *cp;
5878
5879 iv_ca_add_use (data, ivs, use);
5880 best_cost = iv_ca_cost (ivs);
5881 cp = iv_ca_cand_for_use (ivs, use);
5882 if (cp)
5883 {
5884 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5885 iv_ca_set_no_cp (data, ivs, use);
5886 }
5887
5888 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5889 first try important candidates not based on any memory object. Only if
5890 this fails, try the specific ones. Rationale -- in loops with many
5891 variables the best choice often is to use just one generic biv. If we
5892 added here many ivs specific to the uses, the optimization algorithm later
5893 would be likely to get stuck in a local minimum, thus causing us to create
5894 too many ivs. The approach from few ivs to more seems more likely to be
5895 successful -- starting from few ivs, replacing an expensive use by a
5896 specific iv should always be a win. */
5897 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5898 {
5899 cand = iv_cand (data, i);
5900
5901 if (originalp && cand->pos !=IP_ORIGINAL)
5902 continue;
5903
5904 if (!originalp && cand->iv->base_object != NULL_TREE)
5905 continue;
5906
5907 if (iv_ca_cand_used_p (ivs, cand))
5908 continue;
5909
5910 cp = get_use_iv_cost (data, use, cand);
5911 if (!cp)
5912 continue;
5913
5914 iv_ca_set_cp (data, ivs, use, cp);
5915 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5916 true);
5917 iv_ca_set_no_cp (data, ivs, use);
5918 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5919
5920 if (compare_costs (act_cost, best_cost) < 0)
5921 {
5922 best_cost = act_cost;
5923
5924 iv_ca_delta_free (&best_delta);
5925 best_delta = act_delta;
5926 }
5927 else
5928 iv_ca_delta_free (&act_delta);
5929 }
5930
5931 if (infinite_cost_p (best_cost))
5932 {
5933 for (i = 0; i < use->n_map_members; i++)
5934 {
5935 cp = use->cost_map + i;
5936 cand = cp->cand;
5937 if (!cand)
5938 continue;
5939
5940 /* Already tried this. */
5941 if (cand->important)
5942 {
5943 if (originalp && cand->pos == IP_ORIGINAL)
5944 continue;
5945 if (!originalp && cand->iv->base_object == NULL_TREE)
5946 continue;
5947 }
5948
5949 if (iv_ca_cand_used_p (ivs, cand))
5950 continue;
5951
5952 act_delta = NULL;
5953 iv_ca_set_cp (data, ivs, use, cp);
5954 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5955 iv_ca_set_no_cp (data, ivs, use);
5956 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5957 cp, act_delta);
5958
5959 if (compare_costs (act_cost, best_cost) < 0)
5960 {
5961 best_cost = act_cost;
5962
5963 if (best_delta)
5964 iv_ca_delta_free (&best_delta);
5965 best_delta = act_delta;
5966 }
5967 else
5968 iv_ca_delta_free (&act_delta);
5969 }
5970 }
5971
5972 iv_ca_delta_commit (data, ivs, best_delta, true);
5973 iv_ca_delta_free (&best_delta);
5974
5975 return !infinite_cost_p (best_cost);
5976 }
5977
5978 /* Finds an initial assignment of candidates to uses. */
5979
5980 static struct iv_ca *
5981 get_initial_solution (struct ivopts_data *data, bool originalp)
5982 {
5983 struct iv_ca *ivs = iv_ca_new (data);
5984 unsigned i;
5985
5986 for (i = 0; i < n_iv_uses (data); i++)
5987 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5988 {
5989 iv_ca_free (&ivs);
5990 return NULL;
5991 }
5992
5993 return ivs;
5994 }
5995
5996 /* Tries to improve set of induction variables IVS. */
5997
5998 static bool
5999 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6000 {
6001 unsigned i, n_ivs;
6002 comp_cost acost, best_cost = iv_ca_cost (ivs);
6003 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6004 struct iv_cand *cand;
6005
6006 /* Try extending the set of induction variables by one. */
6007 for (i = 0; i < n_iv_cands (data); i++)
6008 {
6009 cand = iv_cand (data, i);
6010
6011 if (iv_ca_cand_used_p (ivs, cand))
6012 continue;
6013
6014 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6015 if (!act_delta)
6016 continue;
6017
6018 /* If we successfully added the candidate and the set is small enough,
6019 try optimizing it by removing other candidates. */
6020 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6021 {
6022 iv_ca_delta_commit (data, ivs, act_delta, true);
6023 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6024 iv_ca_delta_commit (data, ivs, act_delta, false);
6025 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6026 }
6027
6028 if (compare_costs (acost, best_cost) < 0)
6029 {
6030 best_cost = acost;
6031 iv_ca_delta_free (&best_delta);
6032 best_delta = act_delta;
6033 }
6034 else
6035 iv_ca_delta_free (&act_delta);
6036 }
6037
6038 if (!best_delta)
6039 {
6040 /* Try removing the candidates from the set instead. */
6041 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6042
6043 /* Nothing more we can do. */
6044 if (!best_delta)
6045 return false;
6046 }
6047
6048 iv_ca_delta_commit (data, ivs, best_delta, true);
6049 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6050 iv_ca_delta_free (&best_delta);
6051 return true;
6052 }
6053
6054 /* Attempts to find the optimal set of induction variables. We do simple
6055 greedy heuristic -- we try to replace at most one candidate in the selected
6056 solution and remove the unused ivs while this improves the cost. */
6057
6058 static struct iv_ca *
6059 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6060 {
6061 struct iv_ca *set;
6062
6063 /* Get the initial solution. */
6064 set = get_initial_solution (data, originalp);
6065 if (!set)
6066 {
6067 if (dump_file && (dump_flags & TDF_DETAILS))
6068 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6069 return NULL;
6070 }
6071
6072 if (dump_file && (dump_flags & TDF_DETAILS))
6073 {
6074 fprintf (dump_file, "Initial set of candidates:\n");
6075 iv_ca_dump (data, dump_file, set);
6076 }
6077
6078 while (try_improve_iv_set (data, set))
6079 {
6080 if (dump_file && (dump_flags & TDF_DETAILS))
6081 {
6082 fprintf (dump_file, "Improved to:\n");
6083 iv_ca_dump (data, dump_file, set);
6084 }
6085 }
6086
6087 return set;
6088 }
6089
6090 static struct iv_ca *
6091 find_optimal_iv_set (struct ivopts_data *data)
6092 {
6093 unsigned i;
6094 struct iv_ca *set, *origset;
6095 struct iv_use *use;
6096 comp_cost cost, origcost;
6097
6098 /* Determine the cost based on a strategy that starts with original IVs,
6099 and try again using a strategy that prefers candidates not based
6100 on any IVs. */
6101 origset = find_optimal_iv_set_1 (data, true);
6102 set = find_optimal_iv_set_1 (data, false);
6103
6104 if (!origset && !set)
6105 return NULL;
6106
6107 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6108 cost = set ? iv_ca_cost (set) : infinite_cost;
6109
6110 if (dump_file && (dump_flags & TDF_DETAILS))
6111 {
6112 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6113 origcost.cost, origcost.complexity);
6114 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6115 cost.cost, cost.complexity);
6116 }
6117
6118 /* Choose the one with the best cost. */
6119 if (compare_costs (origcost, cost) <= 0)
6120 {
6121 if (set)
6122 iv_ca_free (&set);
6123 set = origset;
6124 }
6125 else if (origset)
6126 iv_ca_free (&origset);
6127
6128 for (i = 0; i < n_iv_uses (data); i++)
6129 {
6130 use = iv_use (data, i);
6131 use->selected = iv_ca_cand_for_use (set, use)->cand;
6132 }
6133
6134 return set;
6135 }
6136
6137 /* Creates a new induction variable corresponding to CAND. */
6138
6139 static void
6140 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6141 {
6142 gimple_stmt_iterator incr_pos;
6143 tree base;
6144 bool after = false;
6145
6146 if (!cand->iv)
6147 return;
6148
6149 switch (cand->pos)
6150 {
6151 case IP_NORMAL:
6152 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6153 break;
6154
6155 case IP_END:
6156 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6157 after = true;
6158 break;
6159
6160 case IP_AFTER_USE:
6161 after = true;
6162 /* fall through */
6163 case IP_BEFORE_USE:
6164 incr_pos = gsi_for_stmt (cand->incremented_at);
6165 break;
6166
6167 case IP_ORIGINAL:
6168 /* Mark that the iv is preserved. */
6169 name_info (data, cand->var_before)->preserve_biv = true;
6170 name_info (data, cand->var_after)->preserve_biv = true;
6171
6172 /* Rewrite the increment so that it uses var_before directly. */
6173 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6174 return;
6175 }
6176
6177 gimple_add_tmp_var (cand->var_before);
6178
6179 base = unshare_expr (cand->iv->base);
6180
6181 create_iv (base, unshare_expr (cand->iv->step),
6182 cand->var_before, data->current_loop,
6183 &incr_pos, after, &cand->var_before, &cand->var_after);
6184 }
6185
6186 /* Creates new induction variables described in SET. */
6187
6188 static void
6189 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6190 {
6191 unsigned i;
6192 struct iv_cand *cand;
6193 bitmap_iterator bi;
6194
6195 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6196 {
6197 cand = iv_cand (data, i);
6198 create_new_iv (data, cand);
6199 }
6200
6201 if (dump_file && (dump_flags & TDF_DETAILS))
6202 {
6203 fprintf (dump_file, "\nSelected IV set: \n");
6204 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6205 {
6206 cand = iv_cand (data, i);
6207 dump_cand (dump_file, cand);
6208 }
6209 fprintf (dump_file, "\n");
6210 }
6211 }
6212
6213 /* Rewrites USE (definition of iv used in a nonlinear expression)
6214 using candidate CAND. */
6215
6216 static void
6217 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6218 struct iv_use *use, struct iv_cand *cand)
6219 {
6220 tree comp;
6221 tree op, tgt;
6222 gimple ass;
6223 gimple_stmt_iterator bsi;
6224
6225 /* An important special case -- if we are asked to express value of
6226 the original iv by itself, just exit; there is no need to
6227 introduce a new computation (that might also need casting the
6228 variable to unsigned and back). */
6229 if (cand->pos == IP_ORIGINAL
6230 && cand->incremented_at == use->stmt)
6231 {
6232 enum tree_code stmt_code;
6233
6234 gcc_assert (is_gimple_assign (use->stmt));
6235 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6236
6237 /* Check whether we may leave the computation unchanged.
6238 This is the case only if it does not rely on other
6239 computations in the loop -- otherwise, the computation
6240 we rely upon may be removed in remove_unused_ivs,
6241 thus leading to ICE. */
6242 stmt_code = gimple_assign_rhs_code (use->stmt);
6243 if (stmt_code == PLUS_EXPR
6244 || stmt_code == MINUS_EXPR
6245 || stmt_code == POINTER_PLUS_EXPR)
6246 {
6247 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6248 op = gimple_assign_rhs2 (use->stmt);
6249 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6250 op = gimple_assign_rhs1 (use->stmt);
6251 else
6252 op = NULL_TREE;
6253 }
6254 else
6255 op = NULL_TREE;
6256
6257 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6258 return;
6259 }
6260
6261 comp = get_computation (data->current_loop, use, cand);
6262 gcc_assert (comp != NULL_TREE);
6263
6264 switch (gimple_code (use->stmt))
6265 {
6266 case GIMPLE_PHI:
6267 tgt = PHI_RESULT (use->stmt);
6268
6269 /* If we should keep the biv, do not replace it. */
6270 if (name_info (data, tgt)->preserve_biv)
6271 return;
6272
6273 bsi = gsi_after_labels (gimple_bb (use->stmt));
6274 break;
6275
6276 case GIMPLE_ASSIGN:
6277 tgt = gimple_assign_lhs (use->stmt);
6278 bsi = gsi_for_stmt (use->stmt);
6279 break;
6280
6281 default:
6282 gcc_unreachable ();
6283 }
6284
6285 if (!valid_gimple_rhs_p (comp)
6286 || (gimple_code (use->stmt) != GIMPLE_PHI
6287 /* We can't allow re-allocating the stmt as it might be pointed
6288 to still. */
6289 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6290 >= gimple_num_ops (gsi_stmt (bsi)))))
6291 {
6292 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6293 true, GSI_SAME_STMT);
6294 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6295 {
6296 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6297 /* As this isn't a plain copy we have to reset alignment
6298 information. */
6299 if (SSA_NAME_PTR_INFO (comp))
6300 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6301 }
6302 }
6303
6304 if (gimple_code (use->stmt) == GIMPLE_PHI)
6305 {
6306 ass = gimple_build_assign (tgt, comp);
6307 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6308
6309 bsi = gsi_for_stmt (use->stmt);
6310 remove_phi_node (&bsi, false);
6311 }
6312 else
6313 {
6314 gimple_assign_set_rhs_from_tree (&bsi, comp);
6315 use->stmt = gsi_stmt (bsi);
6316 }
6317 }
6318
6319 /* Performs a peephole optimization to reorder the iv update statement with
6320 a mem ref to enable instruction combining in later phases. The mem ref uses
6321 the iv value before the update, so the reordering transformation requires
6322 adjustment of the offset. CAND is the selected IV_CAND.
6323
6324 Example:
6325
6326 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6327 iv2 = iv1 + 1;
6328
6329 if (t < val) (1)
6330 goto L;
6331 goto Head;
6332
6333
6334 directly propagating t over to (1) will introduce overlapping live range
6335 thus increase register pressure. This peephole transform it into:
6336
6337
6338 iv2 = iv1 + 1;
6339 t = MEM_REF (base, iv2, 8, 8);
6340 if (t < val)
6341 goto L;
6342 goto Head;
6343 */
6344
6345 static void
6346 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6347 {
6348 tree var_after;
6349 gimple iv_update, stmt;
6350 basic_block bb;
6351 gimple_stmt_iterator gsi, gsi_iv;
6352
6353 if (cand->pos != IP_NORMAL)
6354 return;
6355
6356 var_after = cand->var_after;
6357 iv_update = SSA_NAME_DEF_STMT (var_after);
6358
6359 bb = gimple_bb (iv_update);
6360 gsi = gsi_last_nondebug_bb (bb);
6361 stmt = gsi_stmt (gsi);
6362
6363 /* Only handle conditional statement for now. */
6364 if (gimple_code (stmt) != GIMPLE_COND)
6365 return;
6366
6367 gsi_prev_nondebug (&gsi);
6368 stmt = gsi_stmt (gsi);
6369 if (stmt != iv_update)
6370 return;
6371
6372 gsi_prev_nondebug (&gsi);
6373 if (gsi_end_p (gsi))
6374 return;
6375
6376 stmt = gsi_stmt (gsi);
6377 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6378 return;
6379
6380 if (stmt != use->stmt)
6381 return;
6382
6383 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6384 return;
6385
6386 if (dump_file && (dump_flags & TDF_DETAILS))
6387 {
6388 fprintf (dump_file, "Reordering \n");
6389 print_gimple_stmt (dump_file, iv_update, 0, 0);
6390 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6391 fprintf (dump_file, "\n");
6392 }
6393
6394 gsi = gsi_for_stmt (use->stmt);
6395 gsi_iv = gsi_for_stmt (iv_update);
6396 gsi_move_before (&gsi_iv, &gsi);
6397
6398 cand->pos = IP_BEFORE_USE;
6399 cand->incremented_at = use->stmt;
6400 }
6401
6402 /* Rewrites USE (address that is an iv) using candidate CAND. */
6403
6404 static void
6405 rewrite_use_address (struct ivopts_data *data,
6406 struct iv_use *use, struct iv_cand *cand)
6407 {
6408 aff_tree aff;
6409 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6410 tree base_hint = NULL_TREE;
6411 tree ref, iv;
6412 bool ok;
6413
6414 adjust_iv_update_pos (cand, use);
6415 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6416 gcc_assert (ok);
6417 unshare_aff_combination (&aff);
6418
6419 /* To avoid undefined overflow problems, all IV candidates use unsigned
6420 integer types. The drawback is that this makes it impossible for
6421 create_mem_ref to distinguish an IV that is based on a memory object
6422 from one that represents simply an offset.
6423
6424 To work around this problem, we pass a hint to create_mem_ref that
6425 indicates which variable (if any) in aff is an IV based on a memory
6426 object. Note that we only consider the candidate. If this is not
6427 based on an object, the base of the reference is in some subexpression
6428 of the use -- but these will use pointer types, so they are recognized
6429 by the create_mem_ref heuristics anyway. */
6430 if (cand->iv->base_object)
6431 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6432
6433 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6434 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6435 reference_alias_ptr_type (*use->op_p),
6436 iv, base_hint, data->speed);
6437 copy_ref_info (ref, *use->op_p);
6438 *use->op_p = ref;
6439 }
6440
6441 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6442 candidate CAND. */
6443
6444 static void
6445 rewrite_use_compare (struct ivopts_data *data,
6446 struct iv_use *use, struct iv_cand *cand)
6447 {
6448 tree comp, *var_p, op, bound;
6449 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6450 enum tree_code compare;
6451 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6452 bool ok;
6453
6454 bound = cp->value;
6455 if (bound)
6456 {
6457 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6458 tree var_type = TREE_TYPE (var);
6459 gimple_seq stmts;
6460
6461 if (dump_file && (dump_flags & TDF_DETAILS))
6462 {
6463 fprintf (dump_file, "Replacing exit test: ");
6464 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6465 }
6466 compare = cp->comp;
6467 bound = unshare_expr (fold_convert (var_type, bound));
6468 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6469 if (stmts)
6470 gsi_insert_seq_on_edge_immediate (
6471 loop_preheader_edge (data->current_loop),
6472 stmts);
6473
6474 gimple_cond_set_lhs (use->stmt, var);
6475 gimple_cond_set_code (use->stmt, compare);
6476 gimple_cond_set_rhs (use->stmt, op);
6477 return;
6478 }
6479
6480 /* The induction variable elimination failed; just express the original
6481 giv. */
6482 comp = get_computation (data->current_loop, use, cand);
6483 gcc_assert (comp != NULL_TREE);
6484
6485 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6486 gcc_assert (ok);
6487
6488 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6489 true, GSI_SAME_STMT);
6490 }
6491
6492 /* Rewrites USE using candidate CAND. */
6493
6494 static void
6495 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6496 {
6497 switch (use->type)
6498 {
6499 case USE_NONLINEAR_EXPR:
6500 rewrite_use_nonlinear_expr (data, use, cand);
6501 break;
6502
6503 case USE_ADDRESS:
6504 rewrite_use_address (data, use, cand);
6505 break;
6506
6507 case USE_COMPARE:
6508 rewrite_use_compare (data, use, cand);
6509 break;
6510
6511 default:
6512 gcc_unreachable ();
6513 }
6514
6515 update_stmt (use->stmt);
6516 }
6517
6518 /* Rewrite the uses using the selected induction variables. */
6519
6520 static void
6521 rewrite_uses (struct ivopts_data *data)
6522 {
6523 unsigned i;
6524 struct iv_cand *cand;
6525 struct iv_use *use;
6526
6527 for (i = 0; i < n_iv_uses (data); i++)
6528 {
6529 use = iv_use (data, i);
6530 cand = use->selected;
6531 gcc_assert (cand);
6532
6533 rewrite_use (data, use, cand);
6534 }
6535 }
6536
6537 /* Removes the ivs that are not used after rewriting. */
6538
6539 static void
6540 remove_unused_ivs (struct ivopts_data *data)
6541 {
6542 unsigned j;
6543 bitmap_iterator bi;
6544 bitmap toremove = BITMAP_ALLOC (NULL);
6545
6546 /* Figure out an order in which to release SSA DEFs so that we don't
6547 release something that we'd have to propagate into a debug stmt
6548 afterwards. */
6549 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6550 {
6551 struct version_info *info;
6552
6553 info = ver_info (data, j);
6554 if (info->iv
6555 && !integer_zerop (info->iv->step)
6556 && !info->inv_id
6557 && !info->iv->have_use_for
6558 && !info->preserve_biv)
6559 {
6560 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6561
6562 tree def = info->iv->ssa_name;
6563
6564 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6565 {
6566 imm_use_iterator imm_iter;
6567 use_operand_p use_p;
6568 gimple stmt;
6569 int count = 0;
6570
6571 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6572 {
6573 if (!gimple_debug_bind_p (stmt))
6574 continue;
6575
6576 /* We just want to determine whether to do nothing
6577 (count == 0), to substitute the computed
6578 expression into a single use of the SSA DEF by
6579 itself (count == 1), or to use a debug temp
6580 because the SSA DEF is used multiple times or as
6581 part of a larger expression (count > 1). */
6582 count++;
6583 if (gimple_debug_bind_get_value (stmt) != def)
6584 count++;
6585
6586 if (count > 1)
6587 BREAK_FROM_IMM_USE_STMT (imm_iter);
6588 }
6589
6590 if (!count)
6591 continue;
6592
6593 struct iv_use dummy_use;
6594 struct iv_cand *best_cand = NULL, *cand;
6595 unsigned i, best_pref = 0, cand_pref;
6596
6597 memset (&dummy_use, 0, sizeof (dummy_use));
6598 dummy_use.iv = info->iv;
6599 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6600 {
6601 cand = iv_use (data, i)->selected;
6602 if (cand == best_cand)
6603 continue;
6604 cand_pref = operand_equal_p (cand->iv->step,
6605 info->iv->step, 0)
6606 ? 4 : 0;
6607 cand_pref
6608 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6609 == TYPE_MODE (TREE_TYPE (info->iv->base))
6610 ? 2 : 0;
6611 cand_pref
6612 += TREE_CODE (cand->iv->base) == INTEGER_CST
6613 ? 1 : 0;
6614 if (best_cand == NULL || best_pref < cand_pref)
6615 {
6616 best_cand = cand;
6617 best_pref = cand_pref;
6618 }
6619 }
6620
6621 if (!best_cand)
6622 continue;
6623
6624 tree comp = get_computation_at (data->current_loop,
6625 &dummy_use, best_cand,
6626 SSA_NAME_DEF_STMT (def));
6627 if (!comp)
6628 continue;
6629
6630 if (count > 1)
6631 {
6632 tree vexpr = make_node (DEBUG_EXPR_DECL);
6633 DECL_ARTIFICIAL (vexpr) = 1;
6634 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6635 if (SSA_NAME_VAR (def))
6636 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6637 else
6638 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6639 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6640 gimple_stmt_iterator gsi;
6641
6642 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6643 gsi = gsi_after_labels (gimple_bb
6644 (SSA_NAME_DEF_STMT (def)));
6645 else
6646 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6647
6648 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6649 comp = vexpr;
6650 }
6651
6652 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6653 {
6654 if (!gimple_debug_bind_p (stmt))
6655 continue;
6656
6657 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6658 SET_USE (use_p, comp);
6659
6660 update_stmt (stmt);
6661 }
6662 }
6663 }
6664 }
6665
6666 release_defs_bitset (toremove);
6667
6668 BITMAP_FREE (toremove);
6669 }
6670
6671 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6672 for hash_map::traverse. */
6673
6674 bool
6675 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6676 {
6677 free (value);
6678 return true;
6679 }
6680
6681 /* Frees data allocated by the optimization of a single loop. */
6682
6683 static void
6684 free_loop_data (struct ivopts_data *data)
6685 {
6686 unsigned i, j;
6687 bitmap_iterator bi;
6688 tree obj;
6689
6690 if (data->niters)
6691 {
6692 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6693 delete data->niters;
6694 data->niters = NULL;
6695 }
6696
6697 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6698 {
6699 struct version_info *info;
6700
6701 info = ver_info (data, i);
6702 free (info->iv);
6703 info->iv = NULL;
6704 info->has_nonlin_use = false;
6705 info->preserve_biv = false;
6706 info->inv_id = 0;
6707 }
6708 bitmap_clear (data->relevant);
6709 bitmap_clear (data->important_candidates);
6710
6711 for (i = 0; i < n_iv_uses (data); i++)
6712 {
6713 struct iv_use *use = iv_use (data, i);
6714
6715 free (use->iv);
6716 BITMAP_FREE (use->related_cands);
6717 for (j = 0; j < use->n_map_members; j++)
6718 if (use->cost_map[j].depends_on)
6719 BITMAP_FREE (use->cost_map[j].depends_on);
6720 free (use->cost_map);
6721 free (use);
6722 }
6723 data->iv_uses.truncate (0);
6724
6725 for (i = 0; i < n_iv_cands (data); i++)
6726 {
6727 struct iv_cand *cand = iv_cand (data, i);
6728
6729 free (cand->iv);
6730 if (cand->depends_on)
6731 BITMAP_FREE (cand->depends_on);
6732 free (cand);
6733 }
6734 data->iv_candidates.truncate (0);
6735
6736 if (data->version_info_size < num_ssa_names)
6737 {
6738 data->version_info_size = 2 * num_ssa_names;
6739 free (data->version_info);
6740 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6741 }
6742
6743 data->max_inv_id = 0;
6744
6745 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6746 SET_DECL_RTL (obj, NULL_RTX);
6747
6748 decl_rtl_to_reset.truncate (0);
6749
6750 data->inv_expr_tab->empty ();
6751 data->inv_expr_id = 0;
6752 }
6753
6754 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6755 loop tree. */
6756
6757 static void
6758 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6759 {
6760 free_loop_data (data);
6761 free (data->version_info);
6762 BITMAP_FREE (data->relevant);
6763 BITMAP_FREE (data->important_candidates);
6764
6765 decl_rtl_to_reset.release ();
6766 data->iv_uses.release ();
6767 data->iv_candidates.release ();
6768 delete data->inv_expr_tab;
6769 data->inv_expr_tab = NULL;
6770 free_affine_expand_cache (&data->name_expansion_cache);
6771 }
6772
6773 /* Returns true if the loop body BODY includes any function calls. */
6774
6775 static bool
6776 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6777 {
6778 gimple_stmt_iterator gsi;
6779 unsigned i;
6780
6781 for (i = 0; i < num_nodes; i++)
6782 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6783 {
6784 gimple stmt = gsi_stmt (gsi);
6785 if (is_gimple_call (stmt)
6786 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6787 return true;
6788 }
6789 return false;
6790 }
6791
6792 /* Optimizes the LOOP. Returns true if anything changed. */
6793
6794 static bool
6795 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6796 {
6797 bool changed = false;
6798 struct iv_ca *iv_ca;
6799 edge exit = single_dom_exit (loop);
6800 basic_block *body;
6801
6802 gcc_assert (!data->niters);
6803 data->current_loop = loop;
6804 data->speed = optimize_loop_for_speed_p (loop);
6805
6806 if (dump_file && (dump_flags & TDF_DETAILS))
6807 {
6808 fprintf (dump_file, "Processing loop %d\n", loop->num);
6809
6810 if (exit)
6811 {
6812 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6813 exit->src->index, exit->dest->index);
6814 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6815 fprintf (dump_file, "\n");
6816 }
6817
6818 fprintf (dump_file, "\n");
6819 }
6820
6821 body = get_loop_body (loop);
6822 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6823 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6824 free (body);
6825
6826 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6827
6828 /* For each ssa name determines whether it behaves as an induction variable
6829 in some loop. */
6830 if (!find_induction_variables (data))
6831 goto finish;
6832
6833 /* Finds interesting uses (item 1). */
6834 find_interesting_uses (data);
6835 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6836 goto finish;
6837
6838 /* Finds candidates for the induction variables (item 2). */
6839 find_iv_candidates (data);
6840
6841 /* Calculates the costs (item 3, part 1). */
6842 determine_iv_costs (data);
6843 determine_use_iv_costs (data);
6844 determine_set_costs (data);
6845
6846 /* Find the optimal set of induction variables (item 3, part 2). */
6847 iv_ca = find_optimal_iv_set (data);
6848 if (!iv_ca)
6849 goto finish;
6850 changed = true;
6851
6852 /* Create the new induction variables (item 4, part 1). */
6853 create_new_ivs (data, iv_ca);
6854 iv_ca_free (&iv_ca);
6855
6856 /* Rewrite the uses (item 4, part 2). */
6857 rewrite_uses (data);
6858
6859 /* Remove the ivs that are unused after rewriting. */
6860 remove_unused_ivs (data);
6861
6862 /* We have changed the structure of induction variables; it might happen
6863 that definitions in the scev database refer to some of them that were
6864 eliminated. */
6865 scev_reset ();
6866
6867 finish:
6868 free_loop_data (data);
6869
6870 return changed;
6871 }
6872
6873 /* Main entry point. Optimizes induction variables in loops. */
6874
6875 void
6876 tree_ssa_iv_optimize (void)
6877 {
6878 struct loop *loop;
6879 struct ivopts_data data;
6880
6881 tree_ssa_iv_optimize_init (&data);
6882
6883 /* Optimize the loops starting with the innermost ones. */
6884 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6885 {
6886 if (dump_file && (dump_flags & TDF_DETAILS))
6887 flow_loop_dump (loop, dump_file, NULL, 1);
6888
6889 tree_ssa_iv_optimize_loop (&data, loop);
6890 }
6891
6892 tree_ssa_iv_optimize_finalize (&data);
6893 }