* doc/invoke.texi (Warning Options): Document -Winvalid-memory-model.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "rtl.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "cfghooks.h"
72 #include "tree-pass.h"
73 #include "tm_p.h"
74 #include "ssa.h"
75 #include "expmed.h"
76 #include "insn-config.h"
77 #include "emit-rtl.h"
78 #include "recog.h"
79 #include "cgraph.h"
80 #include "gimple-pretty-print.h"
81 #include "alias.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
84 #include "tree-eh.h"
85 #include "gimplify.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
88 #include "tree-cfg.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
93 #include "explow.h"
94 #include "expr.h"
95 #include "tree-dfa.h"
96 #include "tree-ssa.h"
97 #include "cfgloop.h"
98 #include "tree-scalar-evolution.h"
99 #include "params.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
105
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
109
110 /* The infinite cost. */
111 #define INFTY 10000000
112
113 #define AVG_LOOP_NITER(LOOP) 5
114
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
117 exists. */
118
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop *loop)
121 {
122 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
123 if (niter == -1)
124 return AVG_LOOP_NITER (loop);
125
126 return niter;
127 }
128
129 /* Representation of the induction variable. */
130 struct iv
131 {
132 tree base; /* Initial value of the iv. */
133 tree base_object; /* A memory object to that the induction variable points. */
134 tree step; /* Step of the iv (constant only). */
135 tree ssa_name; /* The ssa name with the value. */
136 unsigned use_id; /* The identifier in the use if it is the case. */
137 bool biv_p; /* Is it a biv? */
138 bool have_use_for; /* Do we already have a use for it? */
139 bool no_overflow; /* True if the iv doesn't overflow. */
140 bool have_address_use;/* For biv, indicate if it's used in any address
141 type use. */
142 };
143
144 /* Per-ssa version information (induction variable descriptions, etc.). */
145 struct version_info
146 {
147 tree name; /* The ssa name. */
148 struct iv *iv; /* Induction variable description. */
149 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv; /* For the original biv, whether to preserve it. */
152 unsigned inv_id; /* Id of an invariant. */
153 };
154
155 /* Types of uses. */
156 enum use_type
157 {
158 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
159 USE_ADDRESS, /* Use in an address. */
160 USE_COMPARE /* Use is a compare. */
161 };
162
163 /* Cost of a computation. */
164 struct comp_cost
165 {
166 int cost; /* The runtime cost. */
167 unsigned complexity; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
171 };
172
173 static const comp_cost no_cost = {0, 0};
174 static const comp_cost infinite_cost = {INFTY, INFTY};
175
176 /* The candidate - cost pair. */
177 struct cost_pair
178 {
179 struct iv_cand *cand; /* The candidate. */
180 comp_cost cost; /* The cost. */
181 bitmap depends_on; /* The list of invariants that have to be
182 preserved. */
183 tree value; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp; /* For iv elimination, the comparison. */
187 int inv_expr_id; /* Loop invariant expression id. */
188 };
189
190 /* Use. */
191 struct iv_use
192 {
193 unsigned id; /* The id of the use. */
194 unsigned sub_id; /* The id of the sub use. */
195 enum use_type type; /* Type of the use. */
196 struct iv *iv; /* The induction variable it is based on. */
197 gimple *stmt; /* Statement in that it occurs. */
198 tree *op_p; /* The place where it occurs. */
199 bitmap related_cands; /* The set of "related" iv candidates, plus the common
200 important ones. */
201
202 unsigned n_map_members; /* Number of candidates in the cost_map list. */
203 struct cost_pair *cost_map;
204 /* The costs wrto the iv candidates. */
205
206 struct iv_cand *selected;
207 /* The selected candidate. */
208
209 struct iv_use *next; /* The next sub use. */
210 tree addr_base; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset;
212 /* Const offset stripped from base address. */
213 };
214
215 /* The position where the iv is computed. */
216 enum iv_position
217 {
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
223 };
224
225 /* The induction variable candidate. */
226 struct iv_cand
227 {
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple *incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 struct iv *orig_iv; /* The original iv if this cand is added from biv with
247 smaller type. */
248 };
249
250 /* Hashtable entry for common candidate derived from iv uses. */
251 struct iv_common_cand
252 {
253 tree base;
254 tree step;
255 /* IV uses from which this common candidate is derived. */
256 auto_vec<iv_use *> uses;
257 hashval_t hash;
258 };
259
260 /* Hashtable helpers. */
261
262 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
263 {
264 static inline hashval_t hash (const iv_common_cand *);
265 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
266 };
267
268 /* Hash function for possible common candidates. */
269
270 inline hashval_t
271 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
272 {
273 return ccand->hash;
274 }
275
276 /* Hash table equality function for common candidates. */
277
278 inline bool
279 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
280 const iv_common_cand *ccand2)
281 {
282 return (ccand1->hash == ccand2->hash
283 && operand_equal_p (ccand1->base, ccand2->base, 0)
284 && operand_equal_p (ccand1->step, ccand2->step, 0)
285 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
286 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
287 }
288
289 /* Loop invariant expression hashtable entry. */
290 struct iv_inv_expr_ent
291 {
292 tree expr;
293 int id;
294 hashval_t hash;
295 };
296
297 /* Hashtable helpers. */
298
299 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
300 {
301 static inline hashval_t hash (const iv_inv_expr_ent *);
302 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
303 };
304
305 /* Hash function for loop invariant expressions. */
306
307 inline hashval_t
308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
309 {
310 return expr->hash;
311 }
312
313 /* Hash table equality function for expressions. */
314
315 inline bool
316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
317 const iv_inv_expr_ent *expr2)
318 {
319 return expr1->hash == expr2->hash
320 && operand_equal_p (expr1->expr, expr2->expr, 0);
321 }
322
323 struct ivopts_data
324 {
325 /* The currently optimized loop. */
326 struct loop *current_loop;
327 source_location loop_loc;
328
329 /* Numbers of iterations for all exits of the current loop. */
330 hash_map<edge, tree_niter_desc *> *niters;
331
332 /* Number of registers used in it. */
333 unsigned regs_used;
334
335 /* The size of version_info array allocated. */
336 unsigned version_info_size;
337
338 /* The array of information for the ssa names. */
339 struct version_info *version_info;
340
341 /* The hashtable of loop invariant expressions created
342 by ivopt. */
343 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
344
345 /* Loop invariant expression id. */
346 int inv_expr_id;
347
348 /* The bitmap of indices in version_info whose value was changed. */
349 bitmap relevant;
350
351 /* The uses of induction variables. */
352 vec<iv_use *> iv_uses;
353
354 /* The candidates. */
355 vec<iv_cand *> iv_candidates;
356
357 /* A bitmap of important candidates. */
358 bitmap important_candidates;
359
360 /* Cache used by tree_to_aff_combination_expand. */
361 hash_map<tree, name_expansion *> *name_expansion_cache;
362
363 /* The hashtable of common candidates derived from iv uses. */
364 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
365
366 /* The common candidates. */
367 vec<iv_common_cand *> iv_common_cands;
368
369 /* The maximum invariant id. */
370 unsigned max_inv_id;
371
372 /* Number of no_overflow BIVs which are not used in memory address. */
373 unsigned bivs_not_used_in_addr;
374
375 /* Obstack for iv structure. */
376 struct obstack iv_obstack;
377
378 /* Whether to consider just related and important candidates when replacing a
379 use. */
380 bool consider_all_candidates;
381
382 /* Are we optimizing for speed? */
383 bool speed;
384
385 /* Whether the loop body includes any function calls. */
386 bool body_includes_call;
387
388 /* Whether the loop body can only be exited via single exit. */
389 bool loop_single_exit_p;
390 };
391
392 /* An assignment of iv candidates to uses. */
393
394 struct iv_ca
395 {
396 /* The number of uses covered by the assignment. */
397 unsigned upto;
398
399 /* Number of uses that cannot be expressed by the candidates in the set. */
400 unsigned bad_uses;
401
402 /* Candidate assigned to a use, together with the related costs. */
403 struct cost_pair **cand_for_use;
404
405 /* Number of times each candidate is used. */
406 unsigned *n_cand_uses;
407
408 /* The candidates used. */
409 bitmap cands;
410
411 /* The number of candidates in the set. */
412 unsigned n_cands;
413
414 /* Total number of registers needed. */
415 unsigned n_regs;
416
417 /* Total cost of expressing uses. */
418 comp_cost cand_use_cost;
419
420 /* Total cost of candidates. */
421 unsigned cand_cost;
422
423 /* Number of times each invariant is used. */
424 unsigned *n_invariant_uses;
425
426 /* The array holding the number of uses of each loop
427 invariant expressions created by ivopt. */
428 unsigned *used_inv_expr;
429
430 /* The number of created loop invariants. */
431 unsigned num_used_inv_expr;
432
433 /* Total cost of the assignment. */
434 comp_cost cost;
435 };
436
437 /* Difference of two iv candidate assignments. */
438
439 struct iv_ca_delta
440 {
441 /* Changed use. */
442 struct iv_use *use;
443
444 /* An old assignment (for rollback purposes). */
445 struct cost_pair *old_cp;
446
447 /* A new assignment. */
448 struct cost_pair *new_cp;
449
450 /* Next change in the list. */
451 struct iv_ca_delta *next_change;
452 };
453
454 /* Bound on number of candidates below that all candidates are considered. */
455
456 #define CONSIDER_ALL_CANDIDATES_BOUND \
457 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
458
459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
460 optimizing such a loop would help, and it would take ages). */
461
462 #define MAX_CONSIDERED_USES \
463 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
464
465 /* If there are at most this number of ivs in the set, try removing unnecessary
466 ivs from the set always. */
467
468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
469 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
470
471 /* The list of trees for that the decl_rtl field must be reset is stored
472 here. */
473
474 static vec<tree> decl_rtl_to_reset;
475
476 static comp_cost force_expr_to_var_cost (tree, bool);
477
478 /* Number of uses recorded in DATA. */
479
480 static inline unsigned
481 n_iv_uses (struct ivopts_data *data)
482 {
483 return data->iv_uses.length ();
484 }
485
486 /* Ith use recorded in DATA. */
487
488 static inline struct iv_use *
489 iv_use (struct ivopts_data *data, unsigned i)
490 {
491 return data->iv_uses[i];
492 }
493
494 /* Number of candidates recorded in DATA. */
495
496 static inline unsigned
497 n_iv_cands (struct ivopts_data *data)
498 {
499 return data->iv_candidates.length ();
500 }
501
502 /* Ith candidate recorded in DATA. */
503
504 static inline struct iv_cand *
505 iv_cand (struct ivopts_data *data, unsigned i)
506 {
507 return data->iv_candidates[i];
508 }
509
510 /* The single loop exit if it dominates the latch, NULL otherwise. */
511
512 edge
513 single_dom_exit (struct loop *loop)
514 {
515 edge exit = single_exit (loop);
516
517 if (!exit)
518 return NULL;
519
520 if (!just_once_each_iteration_p (loop, exit->src))
521 return NULL;
522
523 return exit;
524 }
525
526 /* Dumps information about the induction variable IV to FILE. */
527
528 void
529 dump_iv (FILE *file, struct iv *iv, bool dump_name)
530 {
531 if (iv->ssa_name && dump_name)
532 {
533 fprintf (file, "ssa name ");
534 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
535 fprintf (file, "\n");
536 }
537
538 fprintf (file, " type ");
539 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
540 fprintf (file, "\n");
541
542 if (iv->step)
543 {
544 fprintf (file, " base ");
545 print_generic_expr (file, iv->base, TDF_SLIM);
546 fprintf (file, "\n");
547
548 fprintf (file, " step ");
549 print_generic_expr (file, iv->step, TDF_SLIM);
550 fprintf (file, "\n");
551 }
552 else
553 {
554 fprintf (file, " invariant ");
555 print_generic_expr (file, iv->base, TDF_SLIM);
556 fprintf (file, "\n");
557 }
558
559 if (iv->base_object)
560 {
561 fprintf (file, " base object ");
562 print_generic_expr (file, iv->base_object, TDF_SLIM);
563 fprintf (file, "\n");
564 }
565
566 if (iv->biv_p)
567 fprintf (file, " is a biv\n");
568
569 if (iv->no_overflow)
570 fprintf (file, " iv doesn't overflow wrto loop niter\n");
571 }
572
573 /* Dumps information about the USE to FILE. */
574
575 void
576 dump_use (FILE *file, struct iv_use *use)
577 {
578 fprintf (file, "use %d", use->id);
579 if (use->sub_id)
580 fprintf (file, ".%d", use->sub_id);
581
582 fprintf (file, "\n");
583
584 switch (use->type)
585 {
586 case USE_NONLINEAR_EXPR:
587 fprintf (file, " generic\n");
588 break;
589
590 case USE_ADDRESS:
591 fprintf (file, " address\n");
592 break;
593
594 case USE_COMPARE:
595 fprintf (file, " compare\n");
596 break;
597
598 default:
599 gcc_unreachable ();
600 }
601
602 fprintf (file, " in statement ");
603 print_gimple_stmt (file, use->stmt, 0, 0);
604 fprintf (file, "\n");
605
606 fprintf (file, " at position ");
607 if (use->op_p)
608 print_generic_expr (file, *use->op_p, TDF_SLIM);
609 fprintf (file, "\n");
610
611 dump_iv (file, use->iv, false);
612
613 if (use->related_cands)
614 {
615 fprintf (file, " related candidates ");
616 dump_bitmap (file, use->related_cands);
617 }
618 }
619
620 /* Dumps information about the uses to FILE. */
621
622 void
623 dump_uses (FILE *file, struct ivopts_data *data)
624 {
625 unsigned i;
626 struct iv_use *use;
627
628 for (i = 0; i < n_iv_uses (data); i++)
629 {
630 use = iv_use (data, i);
631 do
632 {
633 dump_use (file, use);
634 use = use->next;
635 }
636 while (use);
637 fprintf (file, "\n");
638 }
639 }
640
641 /* Dumps information about induction variable candidate CAND to FILE. */
642
643 void
644 dump_cand (FILE *file, struct iv_cand *cand)
645 {
646 struct iv *iv = cand->iv;
647
648 fprintf (file, "candidate %d%s\n",
649 cand->id, cand->important ? " (important)" : "");
650
651 if (cand->depends_on)
652 {
653 fprintf (file, " depends on ");
654 dump_bitmap (file, cand->depends_on);
655 }
656
657 if (!iv)
658 {
659 fprintf (file, " final value replacement\n");
660 return;
661 }
662
663 if (cand->var_before)
664 {
665 fprintf (file, " var_before ");
666 print_generic_expr (file, cand->var_before, TDF_SLIM);
667 fprintf (file, "\n");
668 }
669 if (cand->var_after)
670 {
671 fprintf (file, " var_after ");
672 print_generic_expr (file, cand->var_after, TDF_SLIM);
673 fprintf (file, "\n");
674 }
675
676 switch (cand->pos)
677 {
678 case IP_NORMAL:
679 fprintf (file, " incremented before exit test\n");
680 break;
681
682 case IP_BEFORE_USE:
683 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
684 break;
685
686 case IP_AFTER_USE:
687 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
688 break;
689
690 case IP_END:
691 fprintf (file, " incremented at end\n");
692 break;
693
694 case IP_ORIGINAL:
695 fprintf (file, " original biv\n");
696 break;
697 }
698
699 dump_iv (file, iv, false);
700 }
701
702 /* Returns the info for ssa version VER. */
703
704 static inline struct version_info *
705 ver_info (struct ivopts_data *data, unsigned ver)
706 {
707 return data->version_info + ver;
708 }
709
710 /* Returns the info for ssa name NAME. */
711
712 static inline struct version_info *
713 name_info (struct ivopts_data *data, tree name)
714 {
715 return ver_info (data, SSA_NAME_VERSION (name));
716 }
717
718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
719 emitted in LOOP. */
720
721 static bool
722 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
723 {
724 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
725
726 gcc_assert (bb);
727
728 if (sbb == loop->latch)
729 return true;
730
731 if (sbb != bb)
732 return false;
733
734 return stmt == last_stmt (bb);
735 }
736
737 /* Returns true if STMT if after the place where the original induction
738 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
739 if the positions are identical. */
740
741 static bool
742 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
743 {
744 basic_block cand_bb = gimple_bb (cand->incremented_at);
745 basic_block stmt_bb = gimple_bb (stmt);
746
747 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
748 return false;
749
750 if (stmt_bb != cand_bb)
751 return true;
752
753 if (true_if_equal
754 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
755 return true;
756 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
757 }
758
759 /* Returns true if STMT if after the place where the induction variable
760 CAND is incremented in LOOP. */
761
762 static bool
763 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
764 {
765 switch (cand->pos)
766 {
767 case IP_END:
768 return false;
769
770 case IP_NORMAL:
771 return stmt_after_ip_normal_pos (loop, stmt);
772
773 case IP_ORIGINAL:
774 case IP_AFTER_USE:
775 return stmt_after_inc_pos (cand, stmt, false);
776
777 case IP_BEFORE_USE:
778 return stmt_after_inc_pos (cand, stmt, true);
779
780 default:
781 gcc_unreachable ();
782 }
783 }
784
785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
786
787 static bool
788 abnormal_ssa_name_p (tree exp)
789 {
790 if (!exp)
791 return false;
792
793 if (TREE_CODE (exp) != SSA_NAME)
794 return false;
795
796 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
797 }
798
799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
800 abnormal phi node. Callback for for_each_index. */
801
802 static bool
803 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
804 void *data ATTRIBUTE_UNUSED)
805 {
806 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
807 {
808 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
809 return false;
810 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
811 return false;
812 }
813
814 return !abnormal_ssa_name_p (*index);
815 }
816
817 /* Returns true if EXPR contains a ssa name that occurs in an
818 abnormal phi node. */
819
820 bool
821 contains_abnormal_ssa_name_p (tree expr)
822 {
823 enum tree_code code;
824 enum tree_code_class codeclass;
825
826 if (!expr)
827 return false;
828
829 code = TREE_CODE (expr);
830 codeclass = TREE_CODE_CLASS (code);
831
832 if (code == SSA_NAME)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
834
835 if (code == INTEGER_CST
836 || is_gimple_min_invariant (expr))
837 return false;
838
839 if (code == ADDR_EXPR)
840 return !for_each_index (&TREE_OPERAND (expr, 0),
841 idx_contains_abnormal_ssa_name_p,
842 NULL);
843
844 if (code == COND_EXPR)
845 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
846 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
848
849 switch (codeclass)
850 {
851 case tcc_binary:
852 case tcc_comparison:
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
854 return true;
855
856 /* Fallthru. */
857 case tcc_unary:
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
859 return true;
860
861 break;
862
863 default:
864 gcc_unreachable ();
865 }
866
867 return false;
868 }
869
870 /* Returns the structure describing number of iterations determined from
871 EXIT of DATA->current_loop, or NULL if something goes wrong. */
872
873 static struct tree_niter_desc *
874 niter_for_exit (struct ivopts_data *data, edge exit)
875 {
876 struct tree_niter_desc *desc;
877 tree_niter_desc **slot;
878
879 if (!data->niters)
880 {
881 data->niters = new hash_map<edge, tree_niter_desc *>;
882 slot = NULL;
883 }
884 else
885 slot = data->niters->get (exit);
886
887 if (!slot)
888 {
889 /* Try to determine number of iterations. We cannot safely work with ssa
890 names that appear in phi nodes on abnormal edges, so that we do not
891 create overlapping life ranges for them (PR 27283). */
892 desc = XNEW (struct tree_niter_desc);
893 if (!number_of_iterations_exit (data->current_loop,
894 exit, desc, true)
895 || contains_abnormal_ssa_name_p (desc->niter))
896 {
897 XDELETE (desc);
898 desc = NULL;
899 }
900 data->niters->put (exit, desc);
901 }
902 else
903 desc = *slot;
904
905 return desc;
906 }
907
908 /* Returns the structure describing number of iterations determined from
909 single dominating exit of DATA->current_loop, or NULL if something
910 goes wrong. */
911
912 static struct tree_niter_desc *
913 niter_for_single_dom_exit (struct ivopts_data *data)
914 {
915 edge exit = single_dom_exit (data->current_loop);
916
917 if (!exit)
918 return NULL;
919
920 return niter_for_exit (data, exit);
921 }
922
923 /* Initializes data structures used by the iv optimization pass, stored
924 in DATA. */
925
926 static void
927 tree_ssa_iv_optimize_init (struct ivopts_data *data)
928 {
929 data->version_info_size = 2 * num_ssa_names;
930 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
931 data->relevant = BITMAP_ALLOC (NULL);
932 data->important_candidates = BITMAP_ALLOC (NULL);
933 data->max_inv_id = 0;
934 data->niters = NULL;
935 data->iv_uses.create (20);
936 data->iv_candidates.create (20);
937 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
938 data->inv_expr_id = 0;
939 data->name_expansion_cache = NULL;
940 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
941 data->iv_common_cands.create (20);
942 decl_rtl_to_reset.create (20);
943 gcc_obstack_init (&data->iv_obstack);
944 }
945
946 /* Returns a memory object to that EXPR points. In case we are able to
947 determine that it does not point to any such object, NULL is returned. */
948
949 static tree
950 determine_base_object (tree expr)
951 {
952 enum tree_code code = TREE_CODE (expr);
953 tree base, obj;
954
955 /* If this is a pointer casted to any type, we need to determine
956 the base object for the pointer; so handle conversions before
957 throwing away non-pointer expressions. */
958 if (CONVERT_EXPR_P (expr))
959 return determine_base_object (TREE_OPERAND (expr, 0));
960
961 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
962 return NULL_TREE;
963
964 switch (code)
965 {
966 case INTEGER_CST:
967 return NULL_TREE;
968
969 case ADDR_EXPR:
970 obj = TREE_OPERAND (expr, 0);
971 base = get_base_address (obj);
972
973 if (!base)
974 return expr;
975
976 if (TREE_CODE (base) == MEM_REF)
977 return determine_base_object (TREE_OPERAND (base, 0));
978
979 return fold_convert (ptr_type_node,
980 build_fold_addr_expr (base));
981
982 case POINTER_PLUS_EXPR:
983 return determine_base_object (TREE_OPERAND (expr, 0));
984
985 case PLUS_EXPR:
986 case MINUS_EXPR:
987 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
988 gcc_unreachable ();
989
990 default:
991 return fold_convert (ptr_type_node, expr);
992 }
993 }
994
995 /* Return true if address expression with non-DECL_P operand appears
996 in EXPR. */
997
998 static bool
999 contain_complex_addr_expr (tree expr)
1000 {
1001 bool res = false;
1002
1003 STRIP_NOPS (expr);
1004 switch (TREE_CODE (expr))
1005 {
1006 case POINTER_PLUS_EXPR:
1007 case PLUS_EXPR:
1008 case MINUS_EXPR:
1009 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1010 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1011 break;
1012
1013 case ADDR_EXPR:
1014 return (!DECL_P (TREE_OPERAND (expr, 0)));
1015
1016 default:
1017 return false;
1018 }
1019
1020 return res;
1021 }
1022
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1025
1026 static struct iv *
1027 alloc_iv (struct ivopts_data *data, tree base, tree step,
1028 bool no_overflow = false)
1029 {
1030 tree expr = base;
1031 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1032 sizeof (struct iv));
1033 gcc_assert (step != NULL_TREE);
1034
1035 /* Lower address expression in base except ones with DECL_P as operand.
1036 By doing this:
1037 1) More accurate cost can be computed for address expressions;
1038 2) Duplicate candidates won't be created for bases in different
1039 forms, like &a[0] and &a. */
1040 STRIP_NOPS (expr);
1041 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1042 || contain_complex_addr_expr (expr))
1043 {
1044 aff_tree comb;
1045 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1046 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1047 }
1048
1049 iv->base = base;
1050 iv->base_object = determine_base_object (base);
1051 iv->step = step;
1052 iv->biv_p = false;
1053 iv->have_use_for = false;
1054 iv->use_id = 0;
1055 iv->ssa_name = NULL_TREE;
1056 iv->no_overflow = no_overflow;
1057 iv->have_address_use = false;
1058
1059 return iv;
1060 }
1061
1062 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1063 doesn't overflow. */
1064
1065 static void
1066 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1067 bool no_overflow)
1068 {
1069 struct version_info *info = name_info (data, iv);
1070
1071 gcc_assert (!info->iv);
1072
1073 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1074 info->iv = alloc_iv (data, base, step, no_overflow);
1075 info->iv->ssa_name = iv;
1076 }
1077
1078 /* Finds induction variable declaration for VAR. */
1079
1080 static struct iv *
1081 get_iv (struct ivopts_data *data, tree var)
1082 {
1083 basic_block bb;
1084 tree type = TREE_TYPE (var);
1085
1086 if (!POINTER_TYPE_P (type)
1087 && !INTEGRAL_TYPE_P (type))
1088 return NULL;
1089
1090 if (!name_info (data, var)->iv)
1091 {
1092 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1093
1094 if (!bb
1095 || !flow_bb_inside_loop_p (data->current_loop, bb))
1096 set_iv (data, var, var, build_int_cst (type, 0), true);
1097 }
1098
1099 return name_info (data, var)->iv;
1100 }
1101
1102 /* Return the first non-invariant ssa var found in EXPR. */
1103
1104 static tree
1105 extract_single_var_from_expr (tree expr)
1106 {
1107 int i, n;
1108 tree tmp;
1109 enum tree_code code;
1110
1111 if (!expr || is_gimple_min_invariant (expr))
1112 return NULL;
1113
1114 code = TREE_CODE (expr);
1115 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1116 {
1117 n = TREE_OPERAND_LENGTH (expr);
1118 for (i = 0; i < n; i++)
1119 {
1120 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1121
1122 if (tmp)
1123 return tmp;
1124 }
1125 }
1126 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1127 }
1128
1129 /* Finds basic ivs. */
1130
1131 static bool
1132 find_bivs (struct ivopts_data *data)
1133 {
1134 gphi *phi;
1135 affine_iv iv;
1136 tree step, type, base, stop;
1137 bool found = false;
1138 struct loop *loop = data->current_loop;
1139 gphi_iterator psi;
1140
1141 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1142 {
1143 phi = psi.phi ();
1144
1145 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1146 continue;
1147
1148 if (virtual_operand_p (PHI_RESULT (phi)))
1149 continue;
1150
1151 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1152 continue;
1153
1154 if (integer_zerop (iv.step))
1155 continue;
1156
1157 step = iv.step;
1158 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1159 /* Stop expanding iv base at the first ssa var referred by iv step.
1160 Ideally we should stop at any ssa var, because that's expensive
1161 and unusual to happen, we just do it on the first one.
1162
1163 See PR64705 for the rationale. */
1164 stop = extract_single_var_from_expr (step);
1165 base = expand_simple_operations (base, stop);
1166 if (contains_abnormal_ssa_name_p (base)
1167 || contains_abnormal_ssa_name_p (step))
1168 continue;
1169
1170 type = TREE_TYPE (PHI_RESULT (phi));
1171 base = fold_convert (type, base);
1172 if (step)
1173 {
1174 if (POINTER_TYPE_P (type))
1175 step = convert_to_ptrofftype (step);
1176 else
1177 step = fold_convert (type, step);
1178 }
1179
1180 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1181 found = true;
1182 }
1183
1184 return found;
1185 }
1186
1187 /* Marks basic ivs. */
1188
1189 static void
1190 mark_bivs (struct ivopts_data *data)
1191 {
1192 gphi *phi;
1193 gimple *def;
1194 tree var;
1195 struct iv *iv, *incr_iv;
1196 struct loop *loop = data->current_loop;
1197 basic_block incr_bb;
1198 gphi_iterator psi;
1199
1200 data->bivs_not_used_in_addr = 0;
1201 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1202 {
1203 phi = psi.phi ();
1204
1205 iv = get_iv (data, PHI_RESULT (phi));
1206 if (!iv)
1207 continue;
1208
1209 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1210 def = SSA_NAME_DEF_STMT (var);
1211 /* Don't mark iv peeled from other one as biv. */
1212 if (def
1213 && gimple_code (def) == GIMPLE_PHI
1214 && gimple_bb (def) == loop->header)
1215 continue;
1216
1217 incr_iv = get_iv (data, var);
1218 if (!incr_iv)
1219 continue;
1220
1221 /* If the increment is in the subloop, ignore it. */
1222 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1223 if (incr_bb->loop_father != data->current_loop
1224 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1225 continue;
1226
1227 iv->biv_p = true;
1228 incr_iv->biv_p = true;
1229 if (iv->no_overflow)
1230 data->bivs_not_used_in_addr++;
1231 if (incr_iv->no_overflow)
1232 data->bivs_not_used_in_addr++;
1233 }
1234 }
1235
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237 parameters to IV. */
1238
1239 static bool
1240 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1241 {
1242 tree lhs, stop;
1243 struct loop *loop = data->current_loop;
1244
1245 iv->base = NULL_TREE;
1246 iv->step = NULL_TREE;
1247
1248 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1249 return false;
1250
1251 lhs = gimple_assign_lhs (stmt);
1252 if (TREE_CODE (lhs) != SSA_NAME)
1253 return false;
1254
1255 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1256 return false;
1257
1258 /* Stop expanding iv base at the first ssa var referred by iv step.
1259 Ideally we should stop at any ssa var, because that's expensive
1260 and unusual to happen, we just do it on the first one.
1261
1262 See PR64705 for the rationale. */
1263 stop = extract_single_var_from_expr (iv->step);
1264 iv->base = expand_simple_operations (iv->base, stop);
1265 if (contains_abnormal_ssa_name_p (iv->base)
1266 || contains_abnormal_ssa_name_p (iv->step))
1267 return false;
1268
1269 /* If STMT could throw, then do not consider STMT as defining a GIV.
1270 While this will suppress optimizations, we can not safely delete this
1271 GIV and associated statements, even if it appears it is not used. */
1272 if (stmt_could_throw_p (stmt))
1273 return false;
1274
1275 return true;
1276 }
1277
1278 /* Finds general ivs in statement STMT. */
1279
1280 static void
1281 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1282 {
1283 affine_iv iv;
1284
1285 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1286 return;
1287
1288 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1289 }
1290
1291 /* Finds general ivs in basic block BB. */
1292
1293 static void
1294 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1295 {
1296 gimple_stmt_iterator bsi;
1297
1298 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1299 find_givs_in_stmt (data, gsi_stmt (bsi));
1300 }
1301
1302 /* Finds general ivs. */
1303
1304 static void
1305 find_givs (struct ivopts_data *data)
1306 {
1307 struct loop *loop = data->current_loop;
1308 basic_block *body = get_loop_body_in_dom_order (loop);
1309 unsigned i;
1310
1311 for (i = 0; i < loop->num_nodes; i++)
1312 find_givs_in_bb (data, body[i]);
1313 free (body);
1314 }
1315
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317 variable and if so, its initial value and step. */
1318
1319 static bool
1320 find_induction_variables (struct ivopts_data *data)
1321 {
1322 unsigned i;
1323 bitmap_iterator bi;
1324
1325 if (!find_bivs (data))
1326 return false;
1327
1328 find_givs (data);
1329 mark_bivs (data);
1330
1331 if (dump_file && (dump_flags & TDF_DETAILS))
1332 {
1333 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1334
1335 if (niter)
1336 {
1337 fprintf (dump_file, " number of iterations ");
1338 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1339 if (!integer_zerop (niter->may_be_zero))
1340 {
1341 fprintf (dump_file, "; zero if ");
1342 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1343 }
1344 fprintf (dump_file, "\n\n");
1345 };
1346
1347 fprintf (dump_file, "Induction variables:\n\n");
1348
1349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1350 {
1351 if (ver_info (data, i)->iv)
1352 dump_iv (dump_file, ver_info (data, i)->iv, true);
1353 }
1354 }
1355
1356 return true;
1357 }
1358
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361 is the const offset stripped from IV base. For uses of other types,
1362 ADDR_BASE and ADDR_OFFSET are zero by default. */
1363
1364 static struct iv_use *
1365 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1366 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1367 unsigned HOST_WIDE_INT addr_offset = 0)
1368 {
1369 struct iv_use *use = XCNEW (struct iv_use);
1370
1371 use->id = n_iv_uses (data);
1372 use->sub_id = 0;
1373 use->type = use_type;
1374 use->iv = iv;
1375 use->stmt = stmt;
1376 use->op_p = use_p;
1377 use->related_cands = BITMAP_ALLOC (NULL);
1378 use->next = NULL;
1379 use->addr_base = addr_base;
1380 use->addr_offset = addr_offset;
1381
1382 data->iv_uses.safe_push (use);
1383
1384 return use;
1385 }
1386
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388 The sub use is recorded under the one whose use id is ID_GROUP. */
1389
1390 static struct iv_use *
1391 record_sub_use (struct ivopts_data *data, tree *use_p,
1392 struct iv *iv, gimple *stmt, enum use_type use_type,
1393 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1394 unsigned int id_group)
1395 {
1396 struct iv_use *use = XCNEW (struct iv_use);
1397 struct iv_use *group = iv_use (data, id_group);
1398
1399 use->id = group->id;
1400 use->sub_id = 0;
1401 use->type = use_type;
1402 use->iv = iv;
1403 use->stmt = stmt;
1404 use->op_p = use_p;
1405 use->related_cands = NULL;
1406 use->addr_base = addr_base;
1407 use->addr_offset = addr_offset;
1408
1409 /* Sub use list is maintained in offset ascending order. */
1410 if (addr_offset <= group->addr_offset)
1411 {
1412 use->related_cands = group->related_cands;
1413 group->related_cands = NULL;
1414 use->next = group;
1415 data->iv_uses[id_group] = use;
1416 }
1417 else
1418 {
1419 struct iv_use *pre;
1420 do
1421 {
1422 pre = group;
1423 group = group->next;
1424 }
1425 while (group && addr_offset > group->addr_offset);
1426 use->next = pre->next;
1427 pre->next = use;
1428 }
1429
1430 return use;
1431 }
1432
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434 NONLINEAR_USE is true if the invariant is used in a way we do not
1435 handle specially. */
1436
1437 static void
1438 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1439 {
1440 basic_block bb;
1441 struct version_info *info;
1442
1443 if (TREE_CODE (op) != SSA_NAME
1444 || virtual_operand_p (op))
1445 return;
1446
1447 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1448 if (bb
1449 && flow_bb_inside_loop_p (data->current_loop, bb))
1450 return;
1451
1452 info = name_info (data, op);
1453 info->name = op;
1454 info->has_nonlin_use |= nonlinear_use;
1455 if (!info->inv_id)
1456 info->inv_id = ++data->max_inv_id;
1457 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1458 }
1459
1460 /* Checks whether the use OP is interesting and if so, records it. */
1461
1462 static struct iv_use *
1463 find_interesting_uses_op (struct ivopts_data *data, tree op)
1464 {
1465 struct iv *iv;
1466 gimple *stmt;
1467 struct iv_use *use;
1468
1469 if (TREE_CODE (op) != SSA_NAME)
1470 return NULL;
1471
1472 iv = get_iv (data, op);
1473 if (!iv)
1474 return NULL;
1475
1476 if (iv->have_use_for)
1477 {
1478 use = iv_use (data, iv->use_id);
1479
1480 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1481 return use;
1482 }
1483
1484 if (integer_zerop (iv->step))
1485 {
1486 record_invariant (data, op, true);
1487 return NULL;
1488 }
1489 iv->have_use_for = true;
1490
1491 stmt = SSA_NAME_DEF_STMT (op);
1492 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1493 || is_gimple_assign (stmt));
1494
1495 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1496 iv->use_id = use->id;
1497
1498 return use;
1499 }
1500
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502 of an induction variable and an invariant. If this is the case,
1503 CONTROL_VAR is set to location of the iv, BOUND to the location of
1504 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505 induction variable descriptions, and true is returned. If this is not
1506 the case, CONTROL_VAR and BOUND are set to the arguments of the
1507 condition and false is returned. */
1508
1509 static bool
1510 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1511 tree **control_var, tree **bound,
1512 struct iv **iv_var, struct iv **iv_bound)
1513 {
1514 /* The objects returned when COND has constant operands. */
1515 static struct iv const_iv;
1516 static tree zero;
1517 tree *op0 = &zero, *op1 = &zero;
1518 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1519 bool ret = false;
1520
1521 if (gimple_code (stmt) == GIMPLE_COND)
1522 {
1523 gcond *cond_stmt = as_a <gcond *> (stmt);
1524 op0 = gimple_cond_lhs_ptr (cond_stmt);
1525 op1 = gimple_cond_rhs_ptr (cond_stmt);
1526 }
1527 else
1528 {
1529 op0 = gimple_assign_rhs1_ptr (stmt);
1530 op1 = gimple_assign_rhs2_ptr (stmt);
1531 }
1532
1533 zero = integer_zero_node;
1534 const_iv.step = integer_zero_node;
1535
1536 if (TREE_CODE (*op0) == SSA_NAME)
1537 iv0 = get_iv (data, *op0);
1538 if (TREE_CODE (*op1) == SSA_NAME)
1539 iv1 = get_iv (data, *op1);
1540
1541 /* Exactly one of the compared values must be an iv, and the other one must
1542 be an invariant. */
1543 if (!iv0 || !iv1)
1544 goto end;
1545
1546 if (integer_zerop (iv0->step))
1547 {
1548 /* Control variable may be on the other side. */
1549 std::swap (op0, op1);
1550 std::swap (iv0, iv1);
1551 }
1552 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1553
1554 end:
1555 if (control_var)
1556 *control_var = op0;
1557 if (iv_var)
1558 *iv_var = iv0;
1559 if (bound)
1560 *bound = op1;
1561 if (iv_bound)
1562 *iv_bound = iv1;
1563
1564 return ret;
1565 }
1566
1567 /* Checks whether the condition in STMT is interesting and if so,
1568 records it. */
1569
1570 static void
1571 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1572 {
1573 tree *var_p, *bound_p;
1574 struct iv *var_iv;
1575
1576 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1577 {
1578 find_interesting_uses_op (data, *var_p);
1579 find_interesting_uses_op (data, *bound_p);
1580 return;
1581 }
1582
1583 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1584 }
1585
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587 relative to the loop LOOP, i.e. if all its operands are defined
1588 outside of the returned loop. Returns NULL if EXPR is not
1589 even obviously invariant in LOOP. */
1590
1591 struct loop *
1592 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1593 {
1594 basic_block def_bb;
1595 unsigned i, len;
1596
1597 if (is_gimple_min_invariant (expr))
1598 return current_loops->tree_root;
1599
1600 if (TREE_CODE (expr) == SSA_NAME)
1601 {
1602 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1603 if (def_bb)
1604 {
1605 if (flow_bb_inside_loop_p (loop, def_bb))
1606 return NULL;
1607 return superloop_at_depth (loop,
1608 loop_depth (def_bb->loop_father) + 1);
1609 }
1610
1611 return current_loops->tree_root;
1612 }
1613
1614 if (!EXPR_P (expr))
1615 return NULL;
1616
1617 unsigned maxdepth = 0;
1618 len = TREE_OPERAND_LENGTH (expr);
1619 for (i = 0; i < len; i++)
1620 {
1621 struct loop *ivloop;
1622 if (!TREE_OPERAND (expr, i))
1623 continue;
1624
1625 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1626 if (!ivloop)
1627 return NULL;
1628 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1629 }
1630
1631 return superloop_at_depth (loop, maxdepth);
1632 }
1633
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635 i.e. if all its operands are defined outside of the LOOP. LOOP
1636 should not be the function body. */
1637
1638 bool
1639 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1640 {
1641 basic_block def_bb;
1642 unsigned i, len;
1643
1644 gcc_assert (loop_depth (loop) > 0);
1645
1646 if (is_gimple_min_invariant (expr))
1647 return true;
1648
1649 if (TREE_CODE (expr) == SSA_NAME)
1650 {
1651 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1652 if (def_bb
1653 && flow_bb_inside_loop_p (loop, def_bb))
1654 return false;
1655
1656 return true;
1657 }
1658
1659 if (!EXPR_P (expr))
1660 return false;
1661
1662 len = TREE_OPERAND_LENGTH (expr);
1663 for (i = 0; i < len; i++)
1664 if (TREE_OPERAND (expr, i)
1665 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1666 return false;
1667
1668 return true;
1669 }
1670
1671 /* Given expression EXPR which computes inductive values with respect
1672 to loop recorded in DATA, this function returns biv from which EXPR
1673 is derived by tracing definition chains of ssa variables in EXPR. */
1674
1675 static struct iv*
1676 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1677 {
1678 struct iv *iv;
1679 unsigned i, n;
1680 tree e2, e1;
1681 enum tree_code code;
1682 gimple *stmt;
1683
1684 if (expr == NULL_TREE)
1685 return NULL;
1686
1687 if (is_gimple_min_invariant (expr))
1688 return NULL;
1689
1690 code = TREE_CODE (expr);
1691 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1692 {
1693 n = TREE_OPERAND_LENGTH (expr);
1694 for (i = 0; i < n; i++)
1695 {
1696 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1697 if (iv)
1698 return iv;
1699 }
1700 }
1701
1702 /* Stop if it's not ssa name. */
1703 if (code != SSA_NAME)
1704 return NULL;
1705
1706 iv = get_iv (data, expr);
1707 if (!iv || integer_zerop (iv->step))
1708 return NULL;
1709 else if (iv->biv_p)
1710 return iv;
1711
1712 stmt = SSA_NAME_DEF_STMT (expr);
1713 if (gphi *phi = dyn_cast <gphi *> (stmt))
1714 {
1715 ssa_op_iter iter;
1716 use_operand_p use_p;
1717
1718 if (virtual_operand_p (gimple_phi_result (phi)))
1719 return NULL;
1720
1721 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1722 {
1723 tree use = USE_FROM_PTR (use_p);
1724 iv = find_deriving_biv_for_expr (data, use);
1725 if (iv)
1726 return iv;
1727 }
1728 return NULL;
1729 }
1730 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1731 return NULL;
1732
1733 e1 = gimple_assign_rhs1 (stmt);
1734 code = gimple_assign_rhs_code (stmt);
1735 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1736 return find_deriving_biv_for_expr (data, e1);
1737
1738 switch (code)
1739 {
1740 case MULT_EXPR:
1741 case PLUS_EXPR:
1742 case MINUS_EXPR:
1743 case POINTER_PLUS_EXPR:
1744 /* Increments, decrements and multiplications by a constant
1745 are simple. */
1746 e2 = gimple_assign_rhs2 (stmt);
1747 iv = find_deriving_biv_for_expr (data, e2);
1748 if (iv)
1749 return iv;
1750
1751 /* Fallthru. */
1752 CASE_CONVERT:
1753 /* Casts are simple. */
1754 return find_deriving_biv_for_expr (data, e1);
1755
1756 default:
1757 break;
1758 }
1759
1760 return NULL;
1761 }
1762
1763 /* Record BIV, its predecessor and successor that they are used in
1764 address type uses. */
1765
1766 static void
1767 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1768 {
1769 unsigned i;
1770 tree type, base_1, base_2;
1771 bitmap_iterator bi;
1772
1773 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1774 || biv->have_address_use || !biv->no_overflow)
1775 return;
1776
1777 type = TREE_TYPE (biv->base);
1778 if (!INTEGRAL_TYPE_P (type))
1779 return;
1780
1781 biv->have_address_use = true;
1782 data->bivs_not_used_in_addr--;
1783 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1784 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1785 {
1786 struct iv *iv = ver_info (data, i)->iv;
1787
1788 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1789 || iv->have_address_use || !iv->no_overflow)
1790 continue;
1791
1792 if (type != TREE_TYPE (iv->base)
1793 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1794 continue;
1795
1796 if (!operand_equal_p (biv->step, iv->step, 0))
1797 continue;
1798
1799 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1800 if (operand_equal_p (base_1, iv->base, 0)
1801 || operand_equal_p (base_2, biv->base, 0))
1802 {
1803 iv->have_address_use = true;
1804 data->bivs_not_used_in_addr--;
1805 }
1806 }
1807 }
1808
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810 initial ones. Returns false when the value of the index cannot be determined.
1811 Callback for for_each_index. */
1812
1813 struct ifs_ivopts_data
1814 {
1815 struct ivopts_data *ivopts_data;
1816 gimple *stmt;
1817 tree step;
1818 };
1819
1820 static bool
1821 idx_find_step (tree base, tree *idx, void *data)
1822 {
1823 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1824 struct iv *iv;
1825 bool use_overflow_semantics = false;
1826 tree step, iv_base, iv_step, lbound, off;
1827 struct loop *loop = dta->ivopts_data->current_loop;
1828
1829 /* If base is a component ref, require that the offset of the reference
1830 be invariant. */
1831 if (TREE_CODE (base) == COMPONENT_REF)
1832 {
1833 off = component_ref_field_offset (base);
1834 return expr_invariant_in_loop_p (loop, off);
1835 }
1836
1837 /* If base is array, first check whether we will be able to move the
1838 reference out of the loop (in order to take its address in strength
1839 reduction). In order for this to work we need both lower bound
1840 and step to be loop invariants. */
1841 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1842 {
1843 /* Moreover, for a range, the size needs to be invariant as well. */
1844 if (TREE_CODE (base) == ARRAY_RANGE_REF
1845 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1846 return false;
1847
1848 step = array_ref_element_size (base);
1849 lbound = array_ref_low_bound (base);
1850
1851 if (!expr_invariant_in_loop_p (loop, step)
1852 || !expr_invariant_in_loop_p (loop, lbound))
1853 return false;
1854 }
1855
1856 if (TREE_CODE (*idx) != SSA_NAME)
1857 return true;
1858
1859 iv = get_iv (dta->ivopts_data, *idx);
1860 if (!iv)
1861 return false;
1862
1863 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1864 *&x[0], which is not folded and does not trigger the
1865 ARRAY_REF path below. */
1866 *idx = iv->base;
1867
1868 if (integer_zerop (iv->step))
1869 return true;
1870
1871 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1872 {
1873 step = array_ref_element_size (base);
1874
1875 /* We only handle addresses whose step is an integer constant. */
1876 if (TREE_CODE (step) != INTEGER_CST)
1877 return false;
1878 }
1879 else
1880 /* The step for pointer arithmetics already is 1 byte. */
1881 step = size_one_node;
1882
1883 iv_base = iv->base;
1884 iv_step = iv->step;
1885 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1886 use_overflow_semantics = true;
1887
1888 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1889 sizetype, &iv_base, &iv_step, dta->stmt,
1890 use_overflow_semantics))
1891 {
1892 /* The index might wrap. */
1893 return false;
1894 }
1895
1896 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1897 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1898
1899 if (dta->ivopts_data->bivs_not_used_in_addr)
1900 {
1901 if (!iv->biv_p)
1902 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1903
1904 record_biv_for_address_use (dta->ivopts_data, iv);
1905 }
1906 return true;
1907 }
1908
1909 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1910 object is passed to it in DATA. */
1911
1912 static bool
1913 idx_record_use (tree base, tree *idx,
1914 void *vdata)
1915 {
1916 struct ivopts_data *data = (struct ivopts_data *) vdata;
1917 find_interesting_uses_op (data, *idx);
1918 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1919 {
1920 find_interesting_uses_op (data, array_ref_element_size (base));
1921 find_interesting_uses_op (data, array_ref_low_bound (base));
1922 }
1923 return true;
1924 }
1925
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927 store cst to MUL and return true. Otherwise return false.
1928 The returned value is always sign-extended, regardless of the
1929 signedness of TOP and BOT. */
1930
1931 static bool
1932 constant_multiple_of (tree top, tree bot, widest_int *mul)
1933 {
1934 tree mby;
1935 enum tree_code code;
1936 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1937 widest_int res, p0, p1;
1938
1939 STRIP_NOPS (top);
1940 STRIP_NOPS (bot);
1941
1942 if (operand_equal_p (top, bot, 0))
1943 {
1944 *mul = 1;
1945 return true;
1946 }
1947
1948 code = TREE_CODE (top);
1949 switch (code)
1950 {
1951 case MULT_EXPR:
1952 mby = TREE_OPERAND (top, 1);
1953 if (TREE_CODE (mby) != INTEGER_CST)
1954 return false;
1955
1956 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1957 return false;
1958
1959 *mul = wi::sext (res * wi::to_widest (mby), precision);
1960 return true;
1961
1962 case PLUS_EXPR:
1963 case MINUS_EXPR:
1964 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1965 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1966 return false;
1967
1968 if (code == MINUS_EXPR)
1969 p1 = -p1;
1970 *mul = wi::sext (p0 + p1, precision);
1971 return true;
1972
1973 case INTEGER_CST:
1974 if (TREE_CODE (bot) != INTEGER_CST)
1975 return false;
1976
1977 p0 = widest_int::from (top, SIGNED);
1978 p1 = widest_int::from (bot, SIGNED);
1979 if (p1 == 0)
1980 return false;
1981 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1982 return res == 0;
1983
1984 default:
1985 return false;
1986 }
1987 }
1988
1989 /* Return true if memory reference REF with step STEP may be unaligned. */
1990
1991 static bool
1992 may_be_unaligned_p (tree ref, tree step)
1993 {
1994 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995 thus they are not misaligned. */
1996 if (TREE_CODE (ref) == TARGET_MEM_REF)
1997 return false;
1998
1999 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2000 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2001 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2002
2003 unsigned HOST_WIDE_INT bitpos;
2004 unsigned int ref_align;
2005 get_object_alignment_1 (ref, &ref_align, &bitpos);
2006 if (ref_align < align
2007 || (bitpos % align) != 0
2008 || (bitpos % BITS_PER_UNIT) != 0)
2009 return true;
2010
2011 unsigned int trailing_zeros = tree_ctz (step);
2012 if (trailing_zeros < HOST_BITS_PER_INT
2013 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2014 return true;
2015
2016 return false;
2017 }
2018
2019 /* Return true if EXPR may be non-addressable. */
2020
2021 bool
2022 may_be_nonaddressable_p (tree expr)
2023 {
2024 switch (TREE_CODE (expr))
2025 {
2026 case TARGET_MEM_REF:
2027 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028 target, thus they are always addressable. */
2029 return false;
2030
2031 case MEM_REF:
2032 /* Likewise for MEM_REFs, modulo the storage order. */
2033 return REF_REVERSE_STORAGE_ORDER (expr);
2034
2035 case BIT_FIELD_REF:
2036 if (REF_REVERSE_STORAGE_ORDER (expr))
2037 return true;
2038 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2039
2040 case COMPONENT_REF:
2041 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2042 return true;
2043 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2044 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2045
2046 case ARRAY_REF:
2047 case ARRAY_RANGE_REF:
2048 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2049 return true;
2050 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2051
2052 case VIEW_CONVERT_EXPR:
2053 /* This kind of view-conversions may wrap non-addressable objects
2054 and make them look addressable. After some processing the
2055 non-addressability may be uncovered again, causing ADDR_EXPRs
2056 of inappropriate objects to be built. */
2057 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2058 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2059 return true;
2060 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2061
2062 CASE_CONVERT:
2063 return true;
2064
2065 default:
2066 break;
2067 }
2068
2069 return false;
2070 }
2071
2072 static tree
2073 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2074
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076 If there is an existing use which has same stripped iv base and step,
2077 this function records this one as a sub use to that; otherwise records
2078 it as a normal one. */
2079
2080 static struct iv_use *
2081 record_group_use (struct ivopts_data *data, tree *use_p,
2082 struct iv *iv, gimple *stmt, enum use_type use_type)
2083 {
2084 unsigned int i;
2085 struct iv_use *use;
2086 tree addr_base;
2087 unsigned HOST_WIDE_INT addr_offset;
2088
2089 /* Only support sub use for address type uses, that is, with base
2090 object. */
2091 if (!iv->base_object)
2092 return record_use (data, use_p, iv, stmt, use_type);
2093
2094 addr_base = strip_offset (iv->base, &addr_offset);
2095 for (i = 0; i < n_iv_uses (data); i++)
2096 {
2097 use = iv_use (data, i);
2098 if (use->type != USE_ADDRESS || !use->iv->base_object)
2099 continue;
2100
2101 /* Check if it has the same stripped base and step. */
2102 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2103 && operand_equal_p (iv->step, use->iv->step, 0)
2104 && operand_equal_p (addr_base, use->addr_base, 0))
2105 break;
2106 }
2107
2108 if (i == n_iv_uses (data))
2109 return record_use (data, use_p, iv, stmt,
2110 use_type, addr_base, addr_offset);
2111 else
2112 return record_sub_use (data, use_p, iv, stmt,
2113 use_type, addr_base, addr_offset, i);
2114 }
2115
2116 /* Finds addresses in *OP_P inside STMT. */
2117
2118 static void
2119 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2120 tree *op_p)
2121 {
2122 tree base = *op_p, step = size_zero_node;
2123 struct iv *civ;
2124 struct ifs_ivopts_data ifs_ivopts_data;
2125
2126 /* Do not play with volatile memory references. A bit too conservative,
2127 perhaps, but safe. */
2128 if (gimple_has_volatile_ops (stmt))
2129 goto fail;
2130
2131 /* Ignore bitfields for now. Not really something terribly complicated
2132 to handle. TODO. */
2133 if (TREE_CODE (base) == BIT_FIELD_REF)
2134 goto fail;
2135
2136 base = unshare_expr (base);
2137
2138 if (TREE_CODE (base) == TARGET_MEM_REF)
2139 {
2140 tree type = build_pointer_type (TREE_TYPE (base));
2141 tree astep;
2142
2143 if (TMR_BASE (base)
2144 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2145 {
2146 civ = get_iv (data, TMR_BASE (base));
2147 if (!civ)
2148 goto fail;
2149
2150 TMR_BASE (base) = civ->base;
2151 step = civ->step;
2152 }
2153 if (TMR_INDEX2 (base)
2154 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2155 {
2156 civ = get_iv (data, TMR_INDEX2 (base));
2157 if (!civ)
2158 goto fail;
2159
2160 TMR_INDEX2 (base) = civ->base;
2161 step = civ->step;
2162 }
2163 if (TMR_INDEX (base)
2164 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2165 {
2166 civ = get_iv (data, TMR_INDEX (base));
2167 if (!civ)
2168 goto fail;
2169
2170 TMR_INDEX (base) = civ->base;
2171 astep = civ->step;
2172
2173 if (astep)
2174 {
2175 if (TMR_STEP (base))
2176 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2177
2178 step = fold_build2 (PLUS_EXPR, type, step, astep);
2179 }
2180 }
2181
2182 if (integer_zerop (step))
2183 goto fail;
2184 base = tree_mem_ref_addr (type, base);
2185 }
2186 else
2187 {
2188 ifs_ivopts_data.ivopts_data = data;
2189 ifs_ivopts_data.stmt = stmt;
2190 ifs_ivopts_data.step = size_zero_node;
2191 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2192 || integer_zerop (ifs_ivopts_data.step))
2193 goto fail;
2194 step = ifs_ivopts_data.step;
2195
2196 /* Check that the base expression is addressable. This needs
2197 to be done after substituting bases of IVs into it. */
2198 if (may_be_nonaddressable_p (base))
2199 goto fail;
2200
2201 /* Moreover, on strict alignment platforms, check that it is
2202 sufficiently aligned. */
2203 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2204 goto fail;
2205
2206 base = build_fold_addr_expr (base);
2207
2208 /* Substituting bases of IVs into the base expression might
2209 have caused folding opportunities. */
2210 if (TREE_CODE (base) == ADDR_EXPR)
2211 {
2212 tree *ref = &TREE_OPERAND (base, 0);
2213 while (handled_component_p (*ref))
2214 ref = &TREE_OPERAND (*ref, 0);
2215 if (TREE_CODE (*ref) == MEM_REF)
2216 {
2217 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2218 TREE_OPERAND (*ref, 0),
2219 TREE_OPERAND (*ref, 1));
2220 if (tem)
2221 *ref = tem;
2222 }
2223 }
2224 }
2225
2226 civ = alloc_iv (data, base, step);
2227 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2228 return;
2229
2230 fail:
2231 for_each_index (op_p, idx_record_use, data);
2232 }
2233
2234 /* Finds and records invariants used in STMT. */
2235
2236 static void
2237 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2238 {
2239 ssa_op_iter iter;
2240 use_operand_p use_p;
2241 tree op;
2242
2243 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2244 {
2245 op = USE_FROM_PTR (use_p);
2246 record_invariant (data, op, false);
2247 }
2248 }
2249
2250 /* Finds interesting uses of induction variables in the statement STMT. */
2251
2252 static void
2253 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2254 {
2255 struct iv *iv;
2256 tree op, *lhs, *rhs;
2257 ssa_op_iter iter;
2258 use_operand_p use_p;
2259 enum tree_code code;
2260
2261 find_invariants_stmt (data, stmt);
2262
2263 if (gimple_code (stmt) == GIMPLE_COND)
2264 {
2265 find_interesting_uses_cond (data, stmt);
2266 return;
2267 }
2268
2269 if (is_gimple_assign (stmt))
2270 {
2271 lhs = gimple_assign_lhs_ptr (stmt);
2272 rhs = gimple_assign_rhs1_ptr (stmt);
2273
2274 if (TREE_CODE (*lhs) == SSA_NAME)
2275 {
2276 /* If the statement defines an induction variable, the uses are not
2277 interesting by themselves. */
2278
2279 iv = get_iv (data, *lhs);
2280
2281 if (iv && !integer_zerop (iv->step))
2282 return;
2283 }
2284
2285 code = gimple_assign_rhs_code (stmt);
2286 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2287 && (REFERENCE_CLASS_P (*rhs)
2288 || is_gimple_val (*rhs)))
2289 {
2290 if (REFERENCE_CLASS_P (*rhs))
2291 find_interesting_uses_address (data, stmt, rhs);
2292 else
2293 find_interesting_uses_op (data, *rhs);
2294
2295 if (REFERENCE_CLASS_P (*lhs))
2296 find_interesting_uses_address (data, stmt, lhs);
2297 return;
2298 }
2299 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2300 {
2301 find_interesting_uses_cond (data, stmt);
2302 return;
2303 }
2304
2305 /* TODO -- we should also handle address uses of type
2306
2307 memory = call (whatever);
2308
2309 and
2310
2311 call (memory). */
2312 }
2313
2314 if (gimple_code (stmt) == GIMPLE_PHI
2315 && gimple_bb (stmt) == data->current_loop->header)
2316 {
2317 iv = get_iv (data, PHI_RESULT (stmt));
2318
2319 if (iv && !integer_zerop (iv->step))
2320 return;
2321 }
2322
2323 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2324 {
2325 op = USE_FROM_PTR (use_p);
2326
2327 if (TREE_CODE (op) != SSA_NAME)
2328 continue;
2329
2330 iv = get_iv (data, op);
2331 if (!iv)
2332 continue;
2333
2334 find_interesting_uses_op (data, op);
2335 }
2336 }
2337
2338 /* Finds interesting uses of induction variables outside of loops
2339 on loop exit edge EXIT. */
2340
2341 static void
2342 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2343 {
2344 gphi *phi;
2345 gphi_iterator psi;
2346 tree def;
2347
2348 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2349 {
2350 phi = psi.phi ();
2351 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2352 if (!virtual_operand_p (def))
2353 find_interesting_uses_op (data, def);
2354 }
2355 }
2356
2357 /* Finds uses of the induction variables that are interesting. */
2358
2359 static void
2360 find_interesting_uses (struct ivopts_data *data)
2361 {
2362 basic_block bb;
2363 gimple_stmt_iterator bsi;
2364 basic_block *body = get_loop_body (data->current_loop);
2365 unsigned i;
2366 struct version_info *info;
2367 edge e;
2368
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2370 fprintf (dump_file, "Uses:\n\n");
2371
2372 for (i = 0; i < data->current_loop->num_nodes; i++)
2373 {
2374 edge_iterator ei;
2375 bb = body[i];
2376
2377 FOR_EACH_EDGE (e, ei, bb->succs)
2378 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2380 find_interesting_uses_outside (data, e);
2381
2382 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2383 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2384 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2385 if (!is_gimple_debug (gsi_stmt (bsi)))
2386 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2387 }
2388
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2390 {
2391 bitmap_iterator bi;
2392
2393 fprintf (dump_file, "\n");
2394
2395 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2396 {
2397 info = ver_info (data, i);
2398 if (info->inv_id)
2399 {
2400 fprintf (dump_file, " ");
2401 print_generic_expr (dump_file, info->name, TDF_SLIM);
2402 fprintf (dump_file, " is invariant (%d)%s\n",
2403 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2404 }
2405 }
2406
2407 fprintf (dump_file, "\n");
2408 }
2409
2410 free (body);
2411 }
2412
2413 /* Compute maximum offset of [base + offset] addressing mode
2414 for memory reference represented by USE. */
2415
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use *use)
2418 {
2419 int width;
2420 rtx reg, addr;
2421 HOST_WIDE_INT i, off;
2422 unsigned list_index, num;
2423 addr_space_t as;
2424 machine_mode mem_mode, addr_mode;
2425 static vec<HOST_WIDE_INT> max_offset_list;
2426
2427 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2428 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2429
2430 num = max_offset_list.length ();
2431 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2432 if (list_index >= num)
2433 {
2434 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2435 for (; num < max_offset_list.length (); num++)
2436 max_offset_list[num] = -1;
2437 }
2438
2439 off = max_offset_list[list_index];
2440 if (off != -1)
2441 return off;
2442
2443 addr_mode = targetm.addr_space.address_mode (as);
2444 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2445 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2446
2447 width = GET_MODE_BITSIZE (addr_mode) - 1;
2448 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2449 width = HOST_BITS_PER_WIDE_INT - 1;
2450
2451 for (i = width; i > 0; i--)
2452 {
2453 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2454 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2455 if (memory_address_addr_space_p (mem_mode, addr, as))
2456 break;
2457
2458 /* For some strict-alignment targets, the offset must be naturally
2459 aligned. Try an aligned offset if mem_mode is not QImode. */
2460 off = ((unsigned HOST_WIDE_INT) 1 << i);
2461 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2462 {
2463 off -= GET_MODE_SIZE (mem_mode);
2464 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2465 if (memory_address_addr_space_p (mem_mode, addr, as))
2466 break;
2467 }
2468 }
2469 if (i == 0)
2470 off = 0;
2471
2472 max_offset_list[list_index] = off;
2473 return off;
2474 }
2475
2476 /* Check if all small groups should be split. Return true if and
2477 only if:
2478
2479 1) At least one groups contain two uses with different offsets.
2480 2) No group contains more than two uses with different offsets.
2481
2482 Return false otherwise. We want to split such groups because:
2483
2484 1) Small groups don't have much benefit and may interfer with
2485 general candidate selection.
2486 2) Size for problem with only small groups is usually small and
2487 general algorithm can handle it well.
2488
2489 TODO -- Above claim may not hold when auto increment is supported. */
2490
2491 static bool
2492 split_all_small_groups (struct ivopts_data *data)
2493 {
2494 bool split_p = false;
2495 unsigned int i, n, distinct;
2496 struct iv_use *pre, *use;
2497
2498 n = n_iv_uses (data);
2499 for (i = 0; i < n; i++)
2500 {
2501 use = iv_use (data, i);
2502 if (!use->next)
2503 continue;
2504
2505 distinct = 1;
2506 gcc_assert (use->type == USE_ADDRESS);
2507 for (pre = use, use = use->next; use; pre = use, use = use->next)
2508 {
2509 if (pre->addr_offset != use->addr_offset)
2510 distinct++;
2511
2512 if (distinct > 2)
2513 return false;
2514 }
2515 if (distinct == 2)
2516 split_p = true;
2517 }
2518
2519 return split_p;
2520 }
2521
2522 /* For each group of address type uses, this function further groups
2523 these uses according to the maximum offset supported by target's
2524 [base + offset] addressing mode. */
2525
2526 static void
2527 group_address_uses (struct ivopts_data *data)
2528 {
2529 HOST_WIDE_INT max_offset = -1;
2530 unsigned int i, n, sub_id;
2531 struct iv_use *pre, *use;
2532 unsigned HOST_WIDE_INT addr_offset_first;
2533
2534 /* Reset max offset to split all small groups. */
2535 if (split_all_small_groups (data))
2536 max_offset = 0;
2537
2538 n = n_iv_uses (data);
2539 for (i = 0; i < n; i++)
2540 {
2541 use = iv_use (data, i);
2542 if (!use->next)
2543 continue;
2544
2545 gcc_assert (use->type == USE_ADDRESS);
2546 if (max_offset != 0)
2547 max_offset = compute_max_addr_offset (use);
2548
2549 while (use)
2550 {
2551 sub_id = 0;
2552 addr_offset_first = use->addr_offset;
2553 /* Only uses with offset that can fit in offset part against
2554 the first use can be grouped together. */
2555 for (pre = use, use = use->next;
2556 use && (use->addr_offset - addr_offset_first
2557 <= (unsigned HOST_WIDE_INT) max_offset);
2558 pre = use, use = use->next)
2559 {
2560 use->id = pre->id;
2561 use->sub_id = ++sub_id;
2562 }
2563
2564 /* Break the list and create new group. */
2565 if (use)
2566 {
2567 pre->next = NULL;
2568 use->id = n_iv_uses (data);
2569 use->related_cands = BITMAP_ALLOC (NULL);
2570 data->iv_uses.safe_push (use);
2571 }
2572 }
2573 }
2574
2575 if (dump_file && (dump_flags & TDF_DETAILS))
2576 dump_uses (dump_file, data);
2577 }
2578
2579 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2580 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2581 we are at the top-level of the processed address. */
2582
2583 static tree
2584 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2585 HOST_WIDE_INT *offset)
2586 {
2587 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2588 enum tree_code code;
2589 tree type, orig_type = TREE_TYPE (expr);
2590 HOST_WIDE_INT off0, off1, st;
2591 tree orig_expr = expr;
2592
2593 STRIP_NOPS (expr);
2594
2595 type = TREE_TYPE (expr);
2596 code = TREE_CODE (expr);
2597 *offset = 0;
2598
2599 switch (code)
2600 {
2601 case INTEGER_CST:
2602 if (!cst_and_fits_in_hwi (expr)
2603 || integer_zerop (expr))
2604 return orig_expr;
2605
2606 *offset = int_cst_value (expr);
2607 return build_int_cst (orig_type, 0);
2608
2609 case POINTER_PLUS_EXPR:
2610 case PLUS_EXPR:
2611 case MINUS_EXPR:
2612 op0 = TREE_OPERAND (expr, 0);
2613 op1 = TREE_OPERAND (expr, 1);
2614
2615 op0 = strip_offset_1 (op0, false, false, &off0);
2616 op1 = strip_offset_1 (op1, false, false, &off1);
2617
2618 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2619 if (op0 == TREE_OPERAND (expr, 0)
2620 && op1 == TREE_OPERAND (expr, 1))
2621 return orig_expr;
2622
2623 if (integer_zerop (op1))
2624 expr = op0;
2625 else if (integer_zerop (op0))
2626 {
2627 if (code == MINUS_EXPR)
2628 expr = fold_build1 (NEGATE_EXPR, type, op1);
2629 else
2630 expr = op1;
2631 }
2632 else
2633 expr = fold_build2 (code, type, op0, op1);
2634
2635 return fold_convert (orig_type, expr);
2636
2637 case MULT_EXPR:
2638 op1 = TREE_OPERAND (expr, 1);
2639 if (!cst_and_fits_in_hwi (op1))
2640 return orig_expr;
2641
2642 op0 = TREE_OPERAND (expr, 0);
2643 op0 = strip_offset_1 (op0, false, false, &off0);
2644 if (op0 == TREE_OPERAND (expr, 0))
2645 return orig_expr;
2646
2647 *offset = off0 * int_cst_value (op1);
2648 if (integer_zerop (op0))
2649 expr = op0;
2650 else
2651 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2652
2653 return fold_convert (orig_type, expr);
2654
2655 case ARRAY_REF:
2656 case ARRAY_RANGE_REF:
2657 if (!inside_addr)
2658 return orig_expr;
2659
2660 step = array_ref_element_size (expr);
2661 if (!cst_and_fits_in_hwi (step))
2662 break;
2663
2664 st = int_cst_value (step);
2665 op1 = TREE_OPERAND (expr, 1);
2666 op1 = strip_offset_1 (op1, false, false, &off1);
2667 *offset = off1 * st;
2668
2669 if (top_compref
2670 && integer_zerop (op1))
2671 {
2672 /* Strip the component reference completely. */
2673 op0 = TREE_OPERAND (expr, 0);
2674 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2675 *offset += off0;
2676 return op0;
2677 }
2678 break;
2679
2680 case COMPONENT_REF:
2681 {
2682 tree field;
2683
2684 if (!inside_addr)
2685 return orig_expr;
2686
2687 tmp = component_ref_field_offset (expr);
2688 field = TREE_OPERAND (expr, 1);
2689 if (top_compref
2690 && cst_and_fits_in_hwi (tmp)
2691 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2692 {
2693 HOST_WIDE_INT boffset, abs_off;
2694
2695 /* Strip the component reference completely. */
2696 op0 = TREE_OPERAND (expr, 0);
2697 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2698 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2699 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2700 if (boffset < 0)
2701 abs_off = -abs_off;
2702
2703 *offset = off0 + int_cst_value (tmp) + abs_off;
2704 return op0;
2705 }
2706 }
2707 break;
2708
2709 case ADDR_EXPR:
2710 op0 = TREE_OPERAND (expr, 0);
2711 op0 = strip_offset_1 (op0, true, true, &off0);
2712 *offset += off0;
2713
2714 if (op0 == TREE_OPERAND (expr, 0))
2715 return orig_expr;
2716
2717 expr = build_fold_addr_expr (op0);
2718 return fold_convert (orig_type, expr);
2719
2720 case MEM_REF:
2721 /* ??? Offset operand? */
2722 inside_addr = false;
2723 break;
2724
2725 default:
2726 return orig_expr;
2727 }
2728
2729 /* Default handling of expressions for that we want to recurse into
2730 the first operand. */
2731 op0 = TREE_OPERAND (expr, 0);
2732 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2733 *offset += off0;
2734
2735 if (op0 == TREE_OPERAND (expr, 0)
2736 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2737 return orig_expr;
2738
2739 expr = copy_node (expr);
2740 TREE_OPERAND (expr, 0) = op0;
2741 if (op1)
2742 TREE_OPERAND (expr, 1) = op1;
2743
2744 /* Inside address, we might strip the top level component references,
2745 thus changing type of the expression. Handling of ADDR_EXPR
2746 will fix that. */
2747 expr = fold_convert (orig_type, expr);
2748
2749 return expr;
2750 }
2751
2752 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2753
2754 static tree
2755 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2756 {
2757 HOST_WIDE_INT off;
2758 tree core = strip_offset_1 (expr, false, false, &off);
2759 *offset = off;
2760 return core;
2761 }
2762
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764 We return unsigned type with the same precision, which avoids problems
2765 with overflows. */
2766
2767 static tree
2768 generic_type_for (tree type)
2769 {
2770 if (POINTER_TYPE_P (type))
2771 return unsigned_type_for (type);
2772
2773 if (TYPE_UNSIGNED (type))
2774 return type;
2775
2776 return unsigned_type_for (type);
2777 }
2778
2779 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2780 the bitmap to that we should store it. */
2781
2782 static struct ivopts_data *fd_ivopts_data;
2783 static tree
2784 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2785 {
2786 bitmap *depends_on = (bitmap *) data;
2787 struct version_info *info;
2788
2789 if (TREE_CODE (*expr_p) != SSA_NAME)
2790 return NULL_TREE;
2791 info = name_info (fd_ivopts_data, *expr_p);
2792
2793 if (!info->inv_id || info->has_nonlin_use)
2794 return NULL_TREE;
2795
2796 if (!*depends_on)
2797 *depends_on = BITMAP_ALLOC (NULL);
2798 bitmap_set_bit (*depends_on, info->inv_id);
2799
2800 return NULL_TREE;
2801 }
2802
2803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2804 position to POS. If USE is not NULL, the candidate is set as related to
2805 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2806 replacement of the final value of the iv by a direct computation. */
2807
2808 static struct iv_cand *
2809 add_candidate_1 (struct ivopts_data *data,
2810 tree base, tree step, bool important, enum iv_position pos,
2811 struct iv_use *use, gimple *incremented_at,
2812 struct iv *orig_iv = NULL)
2813 {
2814 unsigned i;
2815 struct iv_cand *cand = NULL;
2816 tree type, orig_type;
2817
2818 /* For non-original variables, make sure their values are computed in a type
2819 that does not invoke undefined behavior on overflows (since in general,
2820 we cannot prove that these induction variables are non-wrapping). */
2821 if (pos != IP_ORIGINAL)
2822 {
2823 orig_type = TREE_TYPE (base);
2824 type = generic_type_for (orig_type);
2825 if (type != orig_type)
2826 {
2827 base = fold_convert (type, base);
2828 step = fold_convert (type, step);
2829 }
2830 }
2831
2832 for (i = 0; i < n_iv_cands (data); i++)
2833 {
2834 cand = iv_cand (data, i);
2835
2836 if (cand->pos != pos)
2837 continue;
2838
2839 if (cand->incremented_at != incremented_at
2840 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2841 && cand->ainc_use != use))
2842 continue;
2843
2844 if (!cand->iv)
2845 {
2846 if (!base && !step)
2847 break;
2848
2849 continue;
2850 }
2851
2852 if (!base && !step)
2853 continue;
2854
2855 if (operand_equal_p (base, cand->iv->base, 0)
2856 && operand_equal_p (step, cand->iv->step, 0)
2857 && (TYPE_PRECISION (TREE_TYPE (base))
2858 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2859 break;
2860 }
2861
2862 if (i == n_iv_cands (data))
2863 {
2864 cand = XCNEW (struct iv_cand);
2865 cand->id = i;
2866
2867 if (!base && !step)
2868 cand->iv = NULL;
2869 else
2870 cand->iv = alloc_iv (data, base, step);
2871
2872 cand->pos = pos;
2873 if (pos != IP_ORIGINAL && cand->iv)
2874 {
2875 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2876 cand->var_after = cand->var_before;
2877 }
2878 cand->important = important;
2879 cand->incremented_at = incremented_at;
2880 data->iv_candidates.safe_push (cand);
2881
2882 if (step
2883 && TREE_CODE (step) != INTEGER_CST)
2884 {
2885 fd_ivopts_data = data;
2886 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2887 }
2888
2889 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2890 cand->ainc_use = use;
2891 else
2892 cand->ainc_use = NULL;
2893
2894 cand->orig_iv = orig_iv;
2895 if (dump_file && (dump_flags & TDF_DETAILS))
2896 dump_cand (dump_file, cand);
2897 }
2898
2899 if (important && !cand->important)
2900 {
2901 cand->important = true;
2902 if (dump_file && (dump_flags & TDF_DETAILS))
2903 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2904 }
2905
2906 if (use)
2907 {
2908 bitmap_set_bit (use->related_cands, i);
2909 if (dump_file && (dump_flags & TDF_DETAILS))
2910 fprintf (dump_file, "Candidate %d is related to use %d\n",
2911 cand->id, use->id);
2912 }
2913
2914 return cand;
2915 }
2916
2917 /* Returns true if incrementing the induction variable at the end of the LOOP
2918 is allowed.
2919
2920 The purpose is to avoid splitting latch edge with a biv increment, thus
2921 creating a jump, possibly confusing other optimization passes and leaving
2922 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2923 is not available (so we do not have a better alternative), or if the latch
2924 edge is already nonempty. */
2925
2926 static bool
2927 allow_ip_end_pos_p (struct loop *loop)
2928 {
2929 if (!ip_normal_pos (loop))
2930 return true;
2931
2932 if (!empty_block_p (ip_end_pos (loop)))
2933 return true;
2934
2935 return false;
2936 }
2937
2938 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2939 Important field is set to IMPORTANT. */
2940
2941 static void
2942 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2943 bool important, struct iv_use *use)
2944 {
2945 basic_block use_bb = gimple_bb (use->stmt);
2946 machine_mode mem_mode;
2947 unsigned HOST_WIDE_INT cstepi;
2948
2949 /* If we insert the increment in any position other than the standard
2950 ones, we must ensure that it is incremented once per iteration.
2951 It must not be in an inner nested loop, or one side of an if
2952 statement. */
2953 if (use_bb->loop_father != data->current_loop
2954 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2955 || stmt_could_throw_p (use->stmt)
2956 || !cst_and_fits_in_hwi (step))
2957 return;
2958
2959 cstepi = int_cst_value (step);
2960
2961 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2962 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2963 || USE_STORE_PRE_INCREMENT (mem_mode))
2964 && GET_MODE_SIZE (mem_mode) == cstepi)
2965 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2966 || USE_STORE_PRE_DECREMENT (mem_mode))
2967 && GET_MODE_SIZE (mem_mode) == -cstepi))
2968 {
2969 enum tree_code code = MINUS_EXPR;
2970 tree new_base;
2971 tree new_step = step;
2972
2973 if (POINTER_TYPE_P (TREE_TYPE (base)))
2974 {
2975 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2976 code = POINTER_PLUS_EXPR;
2977 }
2978 else
2979 new_step = fold_convert (TREE_TYPE (base), new_step);
2980 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2981 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2982 use->stmt);
2983 }
2984 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2985 || USE_STORE_POST_INCREMENT (mem_mode))
2986 && GET_MODE_SIZE (mem_mode) == cstepi)
2987 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2988 || USE_STORE_POST_DECREMENT (mem_mode))
2989 && GET_MODE_SIZE (mem_mode) == -cstepi))
2990 {
2991 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2992 use->stmt);
2993 }
2994 }
2995
2996 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2997 position to POS. If USE is not NULL, the candidate is set as related to
2998 it. The candidate computation is scheduled before exit condition and at
2999 the end of loop. */
3000
3001 static void
3002 add_candidate (struct ivopts_data *data,
3003 tree base, tree step, bool important, struct iv_use *use,
3004 struct iv *orig_iv = NULL)
3005 {
3006 gcc_assert (use == NULL || use->sub_id == 0);
3007
3008 if (ip_normal_pos (data->current_loop))
3009 add_candidate_1 (data, base, step, important,
3010 IP_NORMAL, use, NULL, orig_iv);
3011 if (ip_end_pos (data->current_loop)
3012 && allow_ip_end_pos_p (data->current_loop))
3013 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3014 }
3015
3016 /* Adds standard iv candidates. */
3017
3018 static void
3019 add_standard_iv_candidates (struct ivopts_data *data)
3020 {
3021 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3022
3023 /* The same for a double-integer type if it is still fast enough. */
3024 if (TYPE_PRECISION
3025 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3026 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3027 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3028 build_int_cst (long_integer_type_node, 1), true, NULL);
3029
3030 /* The same for a double-integer type if it is still fast enough. */
3031 if (TYPE_PRECISION
3032 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3033 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3034 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3035 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3036 }
3037
3038
3039 /* Adds candidates bases on the old induction variable IV. */
3040
3041 static void
3042 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3043 {
3044 gimple *phi;
3045 tree def;
3046 struct iv_cand *cand;
3047
3048 /* Check if this biv is used in address type use. */
3049 if (iv->no_overflow && iv->have_address_use
3050 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3051 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3052 {
3053 tree base = fold_convert (sizetype, iv->base);
3054 tree step = fold_convert (sizetype, iv->step);
3055
3056 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3057 add_candidate (data, base, step, true, NULL, iv);
3058 /* Add iv cand of the original type only if it has nonlinear use. */
3059 if (iv->have_use_for)
3060 add_candidate (data, iv->base, iv->step, true, NULL);
3061 }
3062 else
3063 add_candidate (data, iv->base, iv->step, true, NULL);
3064
3065 /* The same, but with initial value zero. */
3066 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3067 add_candidate (data, size_int (0), iv->step, true, NULL);
3068 else
3069 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3070 iv->step, true, NULL);
3071
3072 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3073 if (gimple_code (phi) == GIMPLE_PHI)
3074 {
3075 /* Additionally record the possibility of leaving the original iv
3076 untouched. */
3077 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3078 /* Don't add candidate if it's from another PHI node because
3079 it's an affine iv appearing in the form of PEELED_CHREC. */
3080 phi = SSA_NAME_DEF_STMT (def);
3081 if (gimple_code (phi) != GIMPLE_PHI)
3082 {
3083 cand = add_candidate_1 (data,
3084 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3085 SSA_NAME_DEF_STMT (def));
3086 cand->var_before = iv->ssa_name;
3087 cand->var_after = def;
3088 }
3089 else
3090 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3091 }
3092 }
3093
3094 /* Adds candidates based on the old induction variables. */
3095
3096 static void
3097 add_iv_candidate_for_bivs (struct ivopts_data *data)
3098 {
3099 unsigned i;
3100 struct iv *iv;
3101 bitmap_iterator bi;
3102
3103 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3104 {
3105 iv = ver_info (data, i)->iv;
3106 if (iv && iv->biv_p && !integer_zerop (iv->step))
3107 add_iv_candidate_for_biv (data, iv);
3108 }
3109 }
3110
3111 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3112
3113 static void
3114 record_common_cand (struct ivopts_data *data, tree base,
3115 tree step, struct iv_use *use)
3116 {
3117 struct iv_common_cand ent;
3118 struct iv_common_cand **slot;
3119
3120 gcc_assert (use != NULL);
3121
3122 ent.base = base;
3123 ent.step = step;
3124 ent.hash = iterative_hash_expr (base, 0);
3125 ent.hash = iterative_hash_expr (step, ent.hash);
3126
3127 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3128 if (*slot == NULL)
3129 {
3130 *slot = new iv_common_cand ();
3131 (*slot)->base = base;
3132 (*slot)->step = step;
3133 (*slot)->uses.create (8);
3134 (*slot)->hash = ent.hash;
3135 data->iv_common_cands.safe_push ((*slot));
3136 }
3137 (*slot)->uses.safe_push (use);
3138 return;
3139 }
3140
3141 /* Comparison function used to sort common candidates. */
3142
3143 static int
3144 common_cand_cmp (const void *p1, const void *p2)
3145 {
3146 unsigned n1, n2;
3147 const struct iv_common_cand *const *const ccand1
3148 = (const struct iv_common_cand *const *)p1;
3149 const struct iv_common_cand *const *const ccand2
3150 = (const struct iv_common_cand *const *)p2;
3151
3152 n1 = (*ccand1)->uses.length ();
3153 n2 = (*ccand2)->uses.length ();
3154 return n2 - n1;
3155 }
3156
3157 /* Adds IV candidates based on common candidated recorded. */
3158
3159 static void
3160 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3161 {
3162 unsigned i, j;
3163 struct iv_cand *cand_1, *cand_2;
3164
3165 data->iv_common_cands.qsort (common_cand_cmp);
3166 for (i = 0; i < data->iv_common_cands.length (); i++)
3167 {
3168 struct iv_common_cand *ptr = data->iv_common_cands[i];
3169
3170 /* Only add IV candidate if it's derived from multiple uses. */
3171 if (ptr->uses.length () <= 1)
3172 break;
3173
3174 cand_1 = NULL;
3175 cand_2 = NULL;
3176 if (ip_normal_pos (data->current_loop))
3177 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3178 false, IP_NORMAL, NULL, NULL);
3179
3180 if (ip_end_pos (data->current_loop)
3181 && allow_ip_end_pos_p (data->current_loop))
3182 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3183 false, IP_END, NULL, NULL);
3184
3185 /* Bind deriving uses and the new candidates. */
3186 for (j = 0; j < ptr->uses.length (); j++)
3187 {
3188 struct iv_use *use = ptr->uses[j];
3189 if (cand_1)
3190 bitmap_set_bit (use->related_cands, cand_1->id);
3191 if (cand_2)
3192 bitmap_set_bit (use->related_cands, cand_2->id);
3193 }
3194 }
3195
3196 /* Release data since it is useless from this point. */
3197 data->iv_common_cand_tab->empty ();
3198 data->iv_common_cands.truncate (0);
3199 }
3200
3201 /* Adds candidates based on the value of USE's iv. */
3202
3203 static void
3204 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3205 {
3206 unsigned HOST_WIDE_INT offset;
3207 tree base;
3208 tree basetype;
3209 struct iv *iv = use->iv;
3210
3211 add_candidate (data, iv->base, iv->step, false, use);
3212
3213 /* Record common candidate for use in case it can be shared by others. */
3214 record_common_cand (data, iv->base, iv->step, use);
3215
3216 /* Record common candidate with initial value zero. */
3217 basetype = TREE_TYPE (iv->base);
3218 if (POINTER_TYPE_P (basetype))
3219 basetype = sizetype;
3220 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3221
3222 /* Record common candidate with constant offset stripped in base. */
3223 {
3224 base = strip_offset (iv->base, &offset);
3225 if (offset || base != iv->base)
3226 record_common_cand (data, base, iv->step, use);
3227 }
3228
3229 /* Record common candidate with base_object removed in base. */
3230 if (iv->base_object != NULL)
3231 {
3232 unsigned i;
3233 aff_tree aff_base;
3234 tree step, base_object = iv->base_object;
3235
3236 base = iv->base;
3237 step = iv->step;
3238 STRIP_NOPS (base);
3239 STRIP_NOPS (step);
3240 STRIP_NOPS (base_object);
3241 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3242 for (i = 0; i < aff_base.n; i++)
3243 {
3244 if (aff_base.elts[i].coef != 1)
3245 continue;
3246
3247 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3248 break;
3249 }
3250 if (i < aff_base.n)
3251 {
3252 aff_combination_remove_elt (&aff_base, i);
3253 base = aff_combination_to_tree (&aff_base);
3254 basetype = TREE_TYPE (base);
3255 if (POINTER_TYPE_P (basetype))
3256 basetype = sizetype;
3257
3258 step = fold_convert (basetype, step);
3259 record_common_cand (data, base, step, use);
3260 /* Also record common candidate with offset stripped. */
3261 base = strip_offset (base, &offset);
3262 if (offset)
3263 record_common_cand (data, base, step, use);
3264 }
3265 }
3266
3267 /* At last, add auto-incremental candidates. Make such variables
3268 important since other iv uses with same base object may be based
3269 on it. */
3270 if (use != NULL && use->type == USE_ADDRESS)
3271 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3272 }
3273
3274 /* Adds candidates based on the uses. */
3275
3276 static void
3277 add_iv_candidate_for_uses (struct ivopts_data *data)
3278 {
3279 unsigned i;
3280
3281 for (i = 0; i < n_iv_uses (data); i++)
3282 {
3283 struct iv_use *use = iv_use (data, i);
3284
3285 if (!use)
3286 continue;
3287
3288 switch (use->type)
3289 {
3290 case USE_NONLINEAR_EXPR:
3291 case USE_COMPARE:
3292 case USE_ADDRESS:
3293 /* Just add the ivs based on the value of the iv used here. */
3294 add_iv_candidate_for_use (data, use);
3295 break;
3296
3297 default:
3298 gcc_unreachable ();
3299 }
3300 }
3301 add_iv_candidate_derived_from_uses (data);
3302 }
3303
3304 /* Record important candidates and add them to related_cands bitmaps. */
3305
3306 static void
3307 record_important_candidates (struct ivopts_data *data)
3308 {
3309 unsigned i;
3310 struct iv_use *use;
3311
3312 for (i = 0; i < n_iv_cands (data); i++)
3313 {
3314 struct iv_cand *cand = iv_cand (data, i);
3315
3316 if (cand->important)
3317 bitmap_set_bit (data->important_candidates, i);
3318 }
3319
3320 data->consider_all_candidates = (n_iv_cands (data)
3321 <= CONSIDER_ALL_CANDIDATES_BOUND);
3322
3323 /* Add important candidates to uses' related_cands bitmaps. */
3324 for (i = 0; i < n_iv_uses (data); i++)
3325 {
3326 use = iv_use (data, i);
3327 bitmap_ior_into (use->related_cands, data->important_candidates);
3328 }
3329 }
3330
3331 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3332 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3333 we allocate a simple list to every use. */
3334
3335 static void
3336 alloc_use_cost_map (struct ivopts_data *data)
3337 {
3338 unsigned i, size, s;
3339
3340 for (i = 0; i < n_iv_uses (data); i++)
3341 {
3342 struct iv_use *use = iv_use (data, i);
3343
3344 if (data->consider_all_candidates)
3345 size = n_iv_cands (data);
3346 else
3347 {
3348 s = bitmap_count_bits (use->related_cands);
3349
3350 /* Round up to the power of two, so that moduling by it is fast. */
3351 size = s ? (1 << ceil_log2 (s)) : 1;
3352 }
3353
3354 use->n_map_members = size;
3355 use->cost_map = XCNEWVEC (struct cost_pair, size);
3356 }
3357 }
3358
3359 /* Returns description of computation cost of expression whose runtime
3360 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3361
3362 static comp_cost
3363 new_cost (unsigned runtime, unsigned complexity)
3364 {
3365 comp_cost cost;
3366
3367 cost.cost = runtime;
3368 cost.complexity = complexity;
3369
3370 return cost;
3371 }
3372
3373 /* Returns true if COST is infinite. */
3374
3375 static bool
3376 infinite_cost_p (comp_cost cost)
3377 {
3378 return cost.cost == INFTY;
3379 }
3380
3381 /* Adds costs COST1 and COST2. */
3382
3383 static comp_cost
3384 add_costs (comp_cost cost1, comp_cost cost2)
3385 {
3386 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3387 return infinite_cost;
3388
3389 cost1.cost += cost2.cost;
3390 cost1.complexity += cost2.complexity;
3391
3392 return cost1;
3393 }
3394 /* Subtracts costs COST1 and COST2. */
3395
3396 static comp_cost
3397 sub_costs (comp_cost cost1, comp_cost cost2)
3398 {
3399 cost1.cost -= cost2.cost;
3400 cost1.complexity -= cost2.complexity;
3401
3402 return cost1;
3403 }
3404
3405 /* Returns a negative number if COST1 < COST2, a positive number if
3406 COST1 > COST2, and 0 if COST1 = COST2. */
3407
3408 static int
3409 compare_costs (comp_cost cost1, comp_cost cost2)
3410 {
3411 if (cost1.cost == cost2.cost)
3412 return cost1.complexity - cost2.complexity;
3413
3414 return cost1.cost - cost2.cost;
3415 }
3416
3417 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3418 on invariants DEPENDS_ON and that the value used in expressing it
3419 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3420
3421 static void
3422 set_use_iv_cost (struct ivopts_data *data,
3423 struct iv_use *use, struct iv_cand *cand,
3424 comp_cost cost, bitmap depends_on, tree value,
3425 enum tree_code comp, int inv_expr_id)
3426 {
3427 unsigned i, s;
3428
3429 if (infinite_cost_p (cost))
3430 {
3431 BITMAP_FREE (depends_on);
3432 return;
3433 }
3434
3435 if (data->consider_all_candidates)
3436 {
3437 use->cost_map[cand->id].cand = cand;
3438 use->cost_map[cand->id].cost = cost;
3439 use->cost_map[cand->id].depends_on = depends_on;
3440 use->cost_map[cand->id].value = value;
3441 use->cost_map[cand->id].comp = comp;
3442 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3443 return;
3444 }
3445
3446 /* n_map_members is a power of two, so this computes modulo. */
3447 s = cand->id & (use->n_map_members - 1);
3448 for (i = s; i < use->n_map_members; i++)
3449 if (!use->cost_map[i].cand)
3450 goto found;
3451 for (i = 0; i < s; i++)
3452 if (!use->cost_map[i].cand)
3453 goto found;
3454
3455 gcc_unreachable ();
3456
3457 found:
3458 use->cost_map[i].cand = cand;
3459 use->cost_map[i].cost = cost;
3460 use->cost_map[i].depends_on = depends_on;
3461 use->cost_map[i].value = value;
3462 use->cost_map[i].comp = comp;
3463 use->cost_map[i].inv_expr_id = inv_expr_id;
3464 }
3465
3466 /* Gets cost of (USE, CANDIDATE) pair. */
3467
3468 static struct cost_pair *
3469 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3470 struct iv_cand *cand)
3471 {
3472 unsigned i, s;
3473 struct cost_pair *ret;
3474
3475 if (!cand)
3476 return NULL;
3477
3478 if (data->consider_all_candidates)
3479 {
3480 ret = use->cost_map + cand->id;
3481 if (!ret->cand)
3482 return NULL;
3483
3484 return ret;
3485 }
3486
3487 /* n_map_members is a power of two, so this computes modulo. */
3488 s = cand->id & (use->n_map_members - 1);
3489 for (i = s; i < use->n_map_members; i++)
3490 if (use->cost_map[i].cand == cand)
3491 return use->cost_map + i;
3492 else if (use->cost_map[i].cand == NULL)
3493 return NULL;
3494 for (i = 0; i < s; i++)
3495 if (use->cost_map[i].cand == cand)
3496 return use->cost_map + i;
3497 else if (use->cost_map[i].cand == NULL)
3498 return NULL;
3499
3500 return NULL;
3501 }
3502
3503 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3504 static rtx
3505 produce_memory_decl_rtl (tree obj, int *regno)
3506 {
3507 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3508 machine_mode address_mode = targetm.addr_space.address_mode (as);
3509 rtx x;
3510
3511 gcc_assert (obj);
3512 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3513 {
3514 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3515 x = gen_rtx_SYMBOL_REF (address_mode, name);
3516 SET_SYMBOL_REF_DECL (x, obj);
3517 x = gen_rtx_MEM (DECL_MODE (obj), x);
3518 set_mem_addr_space (x, as);
3519 targetm.encode_section_info (obj, x, true);
3520 }
3521 else
3522 {
3523 x = gen_raw_REG (address_mode, (*regno)++);
3524 x = gen_rtx_MEM (DECL_MODE (obj), x);
3525 set_mem_addr_space (x, as);
3526 }
3527
3528 return x;
3529 }
3530
3531 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3532 walk_tree. DATA contains the actual fake register number. */
3533
3534 static tree
3535 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3536 {
3537 tree obj = NULL_TREE;
3538 rtx x = NULL_RTX;
3539 int *regno = (int *) data;
3540
3541 switch (TREE_CODE (*expr_p))
3542 {
3543 case ADDR_EXPR:
3544 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3545 handled_component_p (*expr_p);
3546 expr_p = &TREE_OPERAND (*expr_p, 0))
3547 continue;
3548 obj = *expr_p;
3549 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3550 x = produce_memory_decl_rtl (obj, regno);
3551 break;
3552
3553 case SSA_NAME:
3554 *ws = 0;
3555 obj = SSA_NAME_VAR (*expr_p);
3556 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3557 if (!obj)
3558 return NULL_TREE;
3559 if (!DECL_RTL_SET_P (obj))
3560 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3561 break;
3562
3563 case VAR_DECL:
3564 case PARM_DECL:
3565 case RESULT_DECL:
3566 *ws = 0;
3567 obj = *expr_p;
3568
3569 if (DECL_RTL_SET_P (obj))
3570 break;
3571
3572 if (DECL_MODE (obj) == BLKmode)
3573 x = produce_memory_decl_rtl (obj, regno);
3574 else
3575 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3576
3577 break;
3578
3579 default:
3580 break;
3581 }
3582
3583 if (x)
3584 {
3585 decl_rtl_to_reset.safe_push (obj);
3586 SET_DECL_RTL (obj, x);
3587 }
3588
3589 return NULL_TREE;
3590 }
3591
3592 /* Determines cost of the computation of EXPR. */
3593
3594 static unsigned
3595 computation_cost (tree expr, bool speed)
3596 {
3597 rtx_insn *seq;
3598 rtx rslt;
3599 tree type = TREE_TYPE (expr);
3600 unsigned cost;
3601 /* Avoid using hard regs in ways which may be unsupported. */
3602 int regno = LAST_VIRTUAL_REGISTER + 1;
3603 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3604 enum node_frequency real_frequency = node->frequency;
3605
3606 node->frequency = NODE_FREQUENCY_NORMAL;
3607 crtl->maybe_hot_insn_p = speed;
3608 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3609 start_sequence ();
3610 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3611 seq = get_insns ();
3612 end_sequence ();
3613 default_rtl_profile ();
3614 node->frequency = real_frequency;
3615
3616 cost = seq_cost (seq, speed);
3617 if (MEM_P (rslt))
3618 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3619 TYPE_ADDR_SPACE (type), speed);
3620 else if (!REG_P (rslt))
3621 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3622
3623 return cost;
3624 }
3625
3626 /* Returns variable containing the value of candidate CAND at statement AT. */
3627
3628 static tree
3629 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3630 {
3631 if (stmt_after_increment (loop, cand, stmt))
3632 return cand->var_after;
3633 else
3634 return cand->var_before;
3635 }
3636
3637 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3638 same precision that is at least as wide as the precision of TYPE, stores
3639 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3640 type of A and B. */
3641
3642 static tree
3643 determine_common_wider_type (tree *a, tree *b)
3644 {
3645 tree wider_type = NULL;
3646 tree suba, subb;
3647 tree atype = TREE_TYPE (*a);
3648
3649 if (CONVERT_EXPR_P (*a))
3650 {
3651 suba = TREE_OPERAND (*a, 0);
3652 wider_type = TREE_TYPE (suba);
3653 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3654 return atype;
3655 }
3656 else
3657 return atype;
3658
3659 if (CONVERT_EXPR_P (*b))
3660 {
3661 subb = TREE_OPERAND (*b, 0);
3662 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3663 return atype;
3664 }
3665 else
3666 return atype;
3667
3668 *a = suba;
3669 *b = subb;
3670 return wider_type;
3671 }
3672
3673 /* Determines the expression by that USE is expressed from induction variable
3674 CAND at statement AT in LOOP. The expression is stored in a decomposed
3675 form into AFF. Returns false if USE cannot be expressed using CAND. */
3676
3677 static bool
3678 get_computation_aff (struct loop *loop,
3679 struct iv_use *use, struct iv_cand *cand, gimple *at,
3680 struct aff_tree *aff)
3681 {
3682 tree ubase = use->iv->base;
3683 tree ustep = use->iv->step;
3684 tree cbase = cand->iv->base;
3685 tree cstep = cand->iv->step, cstep_common;
3686 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3687 tree common_type, var;
3688 tree uutype;
3689 aff_tree cbase_aff, var_aff;
3690 widest_int rat;
3691
3692 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3693 {
3694 /* We do not have a precision to express the values of use. */
3695 return false;
3696 }
3697
3698 var = var_at_stmt (loop, cand, at);
3699 uutype = unsigned_type_for (utype);
3700
3701 /* If the conversion is not noop, perform it. */
3702 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3703 {
3704 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3705 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3706 {
3707 tree inner_base, inner_step, inner_type;
3708 inner_base = TREE_OPERAND (cbase, 0);
3709 if (CONVERT_EXPR_P (cstep))
3710 inner_step = TREE_OPERAND (cstep, 0);
3711 else
3712 inner_step = cstep;
3713
3714 inner_type = TREE_TYPE (inner_base);
3715 /* If candidate is added from a biv whose type is smaller than
3716 ctype, we know both candidate and the biv won't overflow.
3717 In this case, it's safe to skip the convertion in candidate.
3718 As an example, (unsigned short)((unsigned long)A) equals to
3719 (unsigned short)A, if A has a type no larger than short. */
3720 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3721 {
3722 cbase = inner_base;
3723 cstep = inner_step;
3724 }
3725 }
3726 cstep = fold_convert (uutype, cstep);
3727 cbase = fold_convert (uutype, cbase);
3728 var = fold_convert (uutype, var);
3729 }
3730
3731 if (!constant_multiple_of (ustep, cstep, &rat))
3732 return false;
3733
3734 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3735 type, we achieve better folding by computing their difference in this
3736 wider type, and cast the result to UUTYPE. We do not need to worry about
3737 overflows, as all the arithmetics will in the end be performed in UUTYPE
3738 anyway. */
3739 common_type = determine_common_wider_type (&ubase, &cbase);
3740
3741 /* use = ubase - ratio * cbase + ratio * var. */
3742 tree_to_aff_combination (ubase, common_type, aff);
3743 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3744 tree_to_aff_combination (var, uutype, &var_aff);
3745
3746 /* We need to shift the value if we are after the increment. */
3747 if (stmt_after_increment (loop, cand, at))
3748 {
3749 aff_tree cstep_aff;
3750
3751 if (common_type != uutype)
3752 cstep_common = fold_convert (common_type, cstep);
3753 else
3754 cstep_common = cstep;
3755
3756 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3757 aff_combination_add (&cbase_aff, &cstep_aff);
3758 }
3759
3760 aff_combination_scale (&cbase_aff, -rat);
3761 aff_combination_add (aff, &cbase_aff);
3762 if (common_type != uutype)
3763 aff_combination_convert (aff, uutype);
3764
3765 aff_combination_scale (&var_aff, rat);
3766 aff_combination_add (aff, &var_aff);
3767
3768 return true;
3769 }
3770
3771 /* Return the type of USE. */
3772
3773 static tree
3774 get_use_type (struct iv_use *use)
3775 {
3776 tree base_type = TREE_TYPE (use->iv->base);
3777 tree type;
3778
3779 if (use->type == USE_ADDRESS)
3780 {
3781 /* The base_type may be a void pointer. Create a pointer type based on
3782 the mem_ref instead. */
3783 type = build_pointer_type (TREE_TYPE (*use->op_p));
3784 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3785 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3786 }
3787 else
3788 type = base_type;
3789
3790 return type;
3791 }
3792
3793 /* Determines the expression by that USE is expressed from induction variable
3794 CAND at statement AT in LOOP. The computation is unshared. */
3795
3796 static tree
3797 get_computation_at (struct loop *loop,
3798 struct iv_use *use, struct iv_cand *cand, gimple *at)
3799 {
3800 aff_tree aff;
3801 tree type = get_use_type (use);
3802
3803 if (!get_computation_aff (loop, use, cand, at, &aff))
3804 return NULL_TREE;
3805 unshare_aff_combination (&aff);
3806 return fold_convert (type, aff_combination_to_tree (&aff));
3807 }
3808
3809 /* Determines the expression by that USE is expressed from induction variable
3810 CAND in LOOP. The computation is unshared. */
3811
3812 static tree
3813 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3814 {
3815 return get_computation_at (loop, use, cand, use->stmt);
3816 }
3817
3818 /* Adjust the cost COST for being in loop setup rather than loop body.
3819 If we're optimizing for space, the loop setup overhead is constant;
3820 if we're optimizing for speed, amortize it over the per-iteration cost. */
3821 static unsigned
3822 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3823 {
3824 if (cost == INFTY)
3825 return cost;
3826 else if (optimize_loop_for_speed_p (data->current_loop))
3827 return cost / avg_loop_niter (data->current_loop);
3828 else
3829 return cost;
3830 }
3831
3832 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3833 validity for a memory reference accessing memory of mode MODE in
3834 address space AS. */
3835
3836
3837 bool
3838 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3839 addr_space_t as)
3840 {
3841 #define MAX_RATIO 128
3842 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3843 static vec<sbitmap> valid_mult_list;
3844 sbitmap valid_mult;
3845
3846 if (data_index >= valid_mult_list.length ())
3847 valid_mult_list.safe_grow_cleared (data_index + 1);
3848
3849 valid_mult = valid_mult_list[data_index];
3850 if (!valid_mult)
3851 {
3852 machine_mode address_mode = targetm.addr_space.address_mode (as);
3853 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3854 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3855 rtx addr, scaled;
3856 HOST_WIDE_INT i;
3857
3858 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3859 bitmap_clear (valid_mult);
3860 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3861 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3862 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3863 {
3864 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3865 if (memory_address_addr_space_p (mode, addr, as)
3866 || memory_address_addr_space_p (mode, scaled, as))
3867 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3868 }
3869
3870 if (dump_file && (dump_flags & TDF_DETAILS))
3871 {
3872 fprintf (dump_file, " allowed multipliers:");
3873 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3874 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3875 fprintf (dump_file, " %d", (int) i);
3876 fprintf (dump_file, "\n");
3877 fprintf (dump_file, "\n");
3878 }
3879
3880 valid_mult_list[data_index] = valid_mult;
3881 }
3882
3883 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3884 return false;
3885
3886 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3887 }
3888
3889 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3890 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3891 variable is omitted. Compute the cost for a memory reference that accesses
3892 a memory location of mode MEM_MODE in address space AS.
3893
3894 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3895 size of MEM_MODE / RATIO) is available. To make this determination, we
3896 look at the size of the increment to be made, which is given in CSTEP.
3897 CSTEP may be zero if the step is unknown.
3898 STMT_AFTER_INC is true iff the statement we're looking at is after the
3899 increment of the original biv.
3900
3901 TODO -- there must be some better way. This all is quite crude. */
3902
3903 enum ainc_type
3904 {
3905 AINC_PRE_INC, /* Pre increment. */
3906 AINC_PRE_DEC, /* Pre decrement. */
3907 AINC_POST_INC, /* Post increment. */
3908 AINC_POST_DEC, /* Post decrement. */
3909 AINC_NONE /* Also the number of auto increment types. */
3910 };
3911
3912 struct address_cost_data
3913 {
3914 HOST_WIDE_INT min_offset, max_offset;
3915 unsigned costs[2][2][2][2];
3916 unsigned ainc_costs[AINC_NONE];
3917 };
3918
3919
3920 static comp_cost
3921 get_address_cost (bool symbol_present, bool var_present,
3922 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3923 HOST_WIDE_INT cstep, machine_mode mem_mode,
3924 addr_space_t as, bool speed,
3925 bool stmt_after_inc, bool *may_autoinc)
3926 {
3927 machine_mode address_mode = targetm.addr_space.address_mode (as);
3928 static vec<address_cost_data *> address_cost_data_list;
3929 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3930 address_cost_data *data;
3931 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3932 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3933 unsigned cost, acost, complexity;
3934 enum ainc_type autoinc_type;
3935 bool offset_p, ratio_p, autoinc;
3936 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3937 unsigned HOST_WIDE_INT mask;
3938 unsigned bits;
3939
3940 if (data_index >= address_cost_data_list.length ())
3941 address_cost_data_list.safe_grow_cleared (data_index + 1);
3942
3943 data = address_cost_data_list[data_index];
3944 if (!data)
3945 {
3946 HOST_WIDE_INT i;
3947 HOST_WIDE_INT rat, off = 0;
3948 int old_cse_not_expected, width;
3949 unsigned sym_p, var_p, off_p, rat_p, add_c;
3950 rtx_insn *seq;
3951 rtx addr, base;
3952 rtx reg0, reg1;
3953
3954 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3955
3956 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3957
3958 width = GET_MODE_BITSIZE (address_mode) - 1;
3959 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3960 width = HOST_BITS_PER_WIDE_INT - 1;
3961 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3962
3963 for (i = width; i >= 0; i--)
3964 {
3965 off = -((unsigned HOST_WIDE_INT) 1 << i);
3966 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3967 if (memory_address_addr_space_p (mem_mode, addr, as))
3968 break;
3969 }
3970 data->min_offset = (i == -1? 0 : off);
3971
3972 for (i = width; i >= 0; i--)
3973 {
3974 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3975 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3976 if (memory_address_addr_space_p (mem_mode, addr, as))
3977 break;
3978 /* For some strict-alignment targets, the offset must be naturally
3979 aligned. Try an aligned offset if mem_mode is not QImode. */
3980 off = mem_mode != QImode
3981 ? ((unsigned HOST_WIDE_INT) 1 << i)
3982 - GET_MODE_SIZE (mem_mode)
3983 : 0;
3984 if (off > 0)
3985 {
3986 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3987 if (memory_address_addr_space_p (mem_mode, addr, as))
3988 break;
3989 }
3990 }
3991 if (i == -1)
3992 off = 0;
3993 data->max_offset = off;
3994
3995 if (dump_file && (dump_flags & TDF_DETAILS))
3996 {
3997 fprintf (dump_file, "get_address_cost:\n");
3998 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3999 GET_MODE_NAME (mem_mode),
4000 data->min_offset);
4001 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4002 GET_MODE_NAME (mem_mode),
4003 data->max_offset);
4004 }
4005
4006 rat = 1;
4007 for (i = 2; i <= MAX_RATIO; i++)
4008 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4009 {
4010 rat = i;
4011 break;
4012 }
4013
4014 /* Compute the cost of various addressing modes. */
4015 acost = 0;
4016 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4017 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4018
4019 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4020 || USE_STORE_PRE_DECREMENT (mem_mode))
4021 {
4022 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4023 has_predec[mem_mode]
4024 = memory_address_addr_space_p (mem_mode, addr, as);
4025
4026 if (has_predec[mem_mode])
4027 data->ainc_costs[AINC_PRE_DEC]
4028 = address_cost (addr, mem_mode, as, speed);
4029 }
4030 if (USE_LOAD_POST_DECREMENT (mem_mode)
4031 || USE_STORE_POST_DECREMENT (mem_mode))
4032 {
4033 addr = gen_rtx_POST_DEC (address_mode, reg0);
4034 has_postdec[mem_mode]
4035 = memory_address_addr_space_p (mem_mode, addr, as);
4036
4037 if (has_postdec[mem_mode])
4038 data->ainc_costs[AINC_POST_DEC]
4039 = address_cost (addr, mem_mode, as, speed);
4040 }
4041 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4042 || USE_STORE_PRE_DECREMENT (mem_mode))
4043 {
4044 addr = gen_rtx_PRE_INC (address_mode, reg0);
4045 has_preinc[mem_mode]
4046 = memory_address_addr_space_p (mem_mode, addr, as);
4047
4048 if (has_preinc[mem_mode])
4049 data->ainc_costs[AINC_PRE_INC]
4050 = address_cost (addr, mem_mode, as, speed);
4051 }
4052 if (USE_LOAD_POST_INCREMENT (mem_mode)
4053 || USE_STORE_POST_INCREMENT (mem_mode))
4054 {
4055 addr = gen_rtx_POST_INC (address_mode, reg0);
4056 has_postinc[mem_mode]
4057 = memory_address_addr_space_p (mem_mode, addr, as);
4058
4059 if (has_postinc[mem_mode])
4060 data->ainc_costs[AINC_POST_INC]
4061 = address_cost (addr, mem_mode, as, speed);
4062 }
4063 for (i = 0; i < 16; i++)
4064 {
4065 sym_p = i & 1;
4066 var_p = (i >> 1) & 1;
4067 off_p = (i >> 2) & 1;
4068 rat_p = (i >> 3) & 1;
4069
4070 addr = reg0;
4071 if (rat_p)
4072 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4073 gen_int_mode (rat, address_mode));
4074
4075 if (var_p)
4076 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4077
4078 if (sym_p)
4079 {
4080 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4081 /* ??? We can run into trouble with some backends by presenting
4082 it with symbols which haven't been properly passed through
4083 targetm.encode_section_info. By setting the local bit, we
4084 enhance the probability of things working. */
4085 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4086
4087 if (off_p)
4088 base = gen_rtx_fmt_e (CONST, address_mode,
4089 gen_rtx_fmt_ee
4090 (PLUS, address_mode, base,
4091 gen_int_mode (off, address_mode)));
4092 }
4093 else if (off_p)
4094 base = gen_int_mode (off, address_mode);
4095 else
4096 base = NULL_RTX;
4097
4098 if (base)
4099 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4100
4101 start_sequence ();
4102 /* To avoid splitting addressing modes, pretend that no cse will
4103 follow. */
4104 old_cse_not_expected = cse_not_expected;
4105 cse_not_expected = true;
4106 addr = memory_address_addr_space (mem_mode, addr, as);
4107 cse_not_expected = old_cse_not_expected;
4108 seq = get_insns ();
4109 end_sequence ();
4110
4111 acost = seq_cost (seq, speed);
4112 acost += address_cost (addr, mem_mode, as, speed);
4113
4114 if (!acost)
4115 acost = 1;
4116 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4117 }
4118
4119 /* On some targets, it is quite expensive to load symbol to a register,
4120 which makes addresses that contain symbols look much more expensive.
4121 However, the symbol will have to be loaded in any case before the
4122 loop (and quite likely we have it in register already), so it does not
4123 make much sense to penalize them too heavily. So make some final
4124 tweaks for the SYMBOL_PRESENT modes:
4125
4126 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4127 var is cheaper, use this mode with small penalty.
4128 If VAR_PRESENT is true, try whether the mode with
4129 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4130 if this is the case, use it. */
4131 add_c = add_cost (speed, address_mode);
4132 for (i = 0; i < 8; i++)
4133 {
4134 var_p = i & 1;
4135 off_p = (i >> 1) & 1;
4136 rat_p = (i >> 2) & 1;
4137
4138 acost = data->costs[0][1][off_p][rat_p] + 1;
4139 if (var_p)
4140 acost += add_c;
4141
4142 if (acost < data->costs[1][var_p][off_p][rat_p])
4143 data->costs[1][var_p][off_p][rat_p] = acost;
4144 }
4145
4146 if (dump_file && (dump_flags & TDF_DETAILS))
4147 {
4148 fprintf (dump_file, "Address costs:\n");
4149
4150 for (i = 0; i < 16; i++)
4151 {
4152 sym_p = i & 1;
4153 var_p = (i >> 1) & 1;
4154 off_p = (i >> 2) & 1;
4155 rat_p = (i >> 3) & 1;
4156
4157 fprintf (dump_file, " ");
4158 if (sym_p)
4159 fprintf (dump_file, "sym + ");
4160 if (var_p)
4161 fprintf (dump_file, "var + ");
4162 if (off_p)
4163 fprintf (dump_file, "cst + ");
4164 if (rat_p)
4165 fprintf (dump_file, "rat * ");
4166
4167 acost = data->costs[sym_p][var_p][off_p][rat_p];
4168 fprintf (dump_file, "index costs %d\n", acost);
4169 }
4170 if (has_predec[mem_mode] || has_postdec[mem_mode]
4171 || has_preinc[mem_mode] || has_postinc[mem_mode])
4172 fprintf (dump_file, " May include autoinc/dec\n");
4173 fprintf (dump_file, "\n");
4174 }
4175
4176 address_cost_data_list[data_index] = data;
4177 }
4178
4179 bits = GET_MODE_BITSIZE (address_mode);
4180 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4181 offset &= mask;
4182 if ((offset >> (bits - 1) & 1))
4183 offset |= ~mask;
4184 s_offset = offset;
4185
4186 autoinc = false;
4187 autoinc_type = AINC_NONE;
4188 msize = GET_MODE_SIZE (mem_mode);
4189 autoinc_offset = offset;
4190 if (stmt_after_inc)
4191 autoinc_offset += ratio * cstep;
4192 if (symbol_present || var_present || ratio != 1)
4193 autoinc = false;
4194 else
4195 {
4196 if (has_postinc[mem_mode] && autoinc_offset == 0
4197 && msize == cstep)
4198 autoinc_type = AINC_POST_INC;
4199 else if (has_postdec[mem_mode] && autoinc_offset == 0
4200 && msize == -cstep)
4201 autoinc_type = AINC_POST_DEC;
4202 else if (has_preinc[mem_mode] && autoinc_offset == msize
4203 && msize == cstep)
4204 autoinc_type = AINC_PRE_INC;
4205 else if (has_predec[mem_mode] && autoinc_offset == -msize
4206 && msize == -cstep)
4207 autoinc_type = AINC_PRE_DEC;
4208
4209 if (autoinc_type != AINC_NONE)
4210 autoinc = true;
4211 }
4212
4213 cost = 0;
4214 offset_p = (s_offset != 0
4215 && data->min_offset <= s_offset
4216 && s_offset <= data->max_offset);
4217 ratio_p = (ratio != 1
4218 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4219
4220 if (ratio != 1 && !ratio_p)
4221 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4222
4223 if (s_offset && !offset_p && !symbol_present)
4224 cost += add_cost (speed, address_mode);
4225
4226 if (may_autoinc)
4227 *may_autoinc = autoinc;
4228 if (autoinc)
4229 acost = data->ainc_costs[autoinc_type];
4230 else
4231 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4232 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4233 return new_cost (cost + acost, complexity);
4234 }
4235
4236 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4237 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4238 calculating the operands of EXPR. Returns true if successful, and returns
4239 the cost in COST. */
4240
4241 static bool
4242 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4243 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4244 {
4245 comp_cost res;
4246 tree op1 = TREE_OPERAND (expr, 1);
4247 tree cst = TREE_OPERAND (mult, 1);
4248 tree multop = TREE_OPERAND (mult, 0);
4249 int m = exact_log2 (int_cst_value (cst));
4250 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4251 int as_cost, sa_cost;
4252 bool mult_in_op1;
4253
4254 if (!(m >= 0 && m < maxm))
4255 return false;
4256
4257 STRIP_NOPS (op1);
4258 mult_in_op1 = operand_equal_p (op1, mult, 0);
4259
4260 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4261
4262 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4263 use that in preference to a shift insn followed by an add insn. */
4264 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4265 ? shiftadd_cost (speed, mode, m)
4266 : (mult_in_op1
4267 ? shiftsub1_cost (speed, mode, m)
4268 : shiftsub0_cost (speed, mode, m)));
4269
4270 res = new_cost (MIN (as_cost, sa_cost), 0);
4271 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4272
4273 STRIP_NOPS (multop);
4274 if (!is_gimple_val (multop))
4275 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4276
4277 *cost = res;
4278 return true;
4279 }
4280
4281 /* Estimates cost of forcing expression EXPR into a variable. */
4282
4283 static comp_cost
4284 force_expr_to_var_cost (tree expr, bool speed)
4285 {
4286 static bool costs_initialized = false;
4287 static unsigned integer_cost [2];
4288 static unsigned symbol_cost [2];
4289 static unsigned address_cost [2];
4290 tree op0, op1;
4291 comp_cost cost0, cost1, cost;
4292 machine_mode mode;
4293
4294 if (!costs_initialized)
4295 {
4296 tree type = build_pointer_type (integer_type_node);
4297 tree var, addr;
4298 rtx x;
4299 int i;
4300
4301 var = create_tmp_var_raw (integer_type_node, "test_var");
4302 TREE_STATIC (var) = 1;
4303 x = produce_memory_decl_rtl (var, NULL);
4304 SET_DECL_RTL (var, x);
4305
4306 addr = build1 (ADDR_EXPR, type, var);
4307
4308
4309 for (i = 0; i < 2; i++)
4310 {
4311 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4312 2000), i);
4313
4314 symbol_cost[i] = computation_cost (addr, i) + 1;
4315
4316 address_cost[i]
4317 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4318 if (dump_file && (dump_flags & TDF_DETAILS))
4319 {
4320 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4321 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4322 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4323 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4324 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4325 fprintf (dump_file, "\n");
4326 }
4327 }
4328
4329 costs_initialized = true;
4330 }
4331
4332 STRIP_NOPS (expr);
4333
4334 if (SSA_VAR_P (expr))
4335 return no_cost;
4336
4337 if (is_gimple_min_invariant (expr))
4338 {
4339 if (TREE_CODE (expr) == INTEGER_CST)
4340 return new_cost (integer_cost [speed], 0);
4341
4342 if (TREE_CODE (expr) == ADDR_EXPR)
4343 {
4344 tree obj = TREE_OPERAND (expr, 0);
4345
4346 if (TREE_CODE (obj) == VAR_DECL
4347 || TREE_CODE (obj) == PARM_DECL
4348 || TREE_CODE (obj) == RESULT_DECL)
4349 return new_cost (symbol_cost [speed], 0);
4350 }
4351
4352 return new_cost (address_cost [speed], 0);
4353 }
4354
4355 switch (TREE_CODE (expr))
4356 {
4357 case POINTER_PLUS_EXPR:
4358 case PLUS_EXPR:
4359 case MINUS_EXPR:
4360 case MULT_EXPR:
4361 op0 = TREE_OPERAND (expr, 0);
4362 op1 = TREE_OPERAND (expr, 1);
4363 STRIP_NOPS (op0);
4364 STRIP_NOPS (op1);
4365 break;
4366
4367 CASE_CONVERT:
4368 case NEGATE_EXPR:
4369 op0 = TREE_OPERAND (expr, 0);
4370 STRIP_NOPS (op0);
4371 op1 = NULL_TREE;
4372 break;
4373
4374 default:
4375 /* Just an arbitrary value, FIXME. */
4376 return new_cost (target_spill_cost[speed], 0);
4377 }
4378
4379 if (op0 == NULL_TREE
4380 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4381 cost0 = no_cost;
4382 else
4383 cost0 = force_expr_to_var_cost (op0, speed);
4384
4385 if (op1 == NULL_TREE
4386 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4387 cost1 = no_cost;
4388 else
4389 cost1 = force_expr_to_var_cost (op1, speed);
4390
4391 mode = TYPE_MODE (TREE_TYPE (expr));
4392 switch (TREE_CODE (expr))
4393 {
4394 case POINTER_PLUS_EXPR:
4395 case PLUS_EXPR:
4396 case MINUS_EXPR:
4397 case NEGATE_EXPR:
4398 cost = new_cost (add_cost (speed, mode), 0);
4399 if (TREE_CODE (expr) != NEGATE_EXPR)
4400 {
4401 tree mult = NULL_TREE;
4402 comp_cost sa_cost;
4403 if (TREE_CODE (op1) == MULT_EXPR)
4404 mult = op1;
4405 else if (TREE_CODE (op0) == MULT_EXPR)
4406 mult = op0;
4407
4408 if (mult != NULL_TREE
4409 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4410 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4411 speed, &sa_cost))
4412 return sa_cost;
4413 }
4414 break;
4415
4416 CASE_CONVERT:
4417 {
4418 tree inner_mode, outer_mode;
4419 outer_mode = TREE_TYPE (expr);
4420 inner_mode = TREE_TYPE (op0);
4421 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4422 TYPE_MODE (inner_mode), speed), 0);
4423 }
4424 break;
4425
4426 case MULT_EXPR:
4427 if (cst_and_fits_in_hwi (op0))
4428 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4429 mode, speed), 0);
4430 else if (cst_and_fits_in_hwi (op1))
4431 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4432 mode, speed), 0);
4433 else
4434 return new_cost (target_spill_cost [speed], 0);
4435 break;
4436
4437 default:
4438 gcc_unreachable ();
4439 }
4440
4441 cost = add_costs (cost, cost0);
4442 cost = add_costs (cost, cost1);
4443
4444 /* Bound the cost by target_spill_cost. The parts of complicated
4445 computations often are either loop invariant or at least can
4446 be shared between several iv uses, so letting this grow without
4447 limits would not give reasonable results. */
4448 if (cost.cost > (int) target_spill_cost [speed])
4449 cost.cost = target_spill_cost [speed];
4450
4451 return cost;
4452 }
4453
4454 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4455 invariants the computation depends on. */
4456
4457 static comp_cost
4458 force_var_cost (struct ivopts_data *data,
4459 tree expr, bitmap *depends_on)
4460 {
4461 if (depends_on)
4462 {
4463 fd_ivopts_data = data;
4464 walk_tree (&expr, find_depends, depends_on, NULL);
4465 }
4466
4467 return force_expr_to_var_cost (expr, data->speed);
4468 }
4469
4470 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4471 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4472 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4473 invariants the computation depends on. */
4474
4475 static comp_cost
4476 split_address_cost (struct ivopts_data *data,
4477 tree addr, bool *symbol_present, bool *var_present,
4478 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4479 {
4480 tree core;
4481 HOST_WIDE_INT bitsize;
4482 HOST_WIDE_INT bitpos;
4483 tree toffset;
4484 machine_mode mode;
4485 int unsignedp, reversep, volatilep;
4486
4487 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4488 &unsignedp, &reversep, &volatilep, false);
4489
4490 if (toffset != 0
4491 || bitpos % BITS_PER_UNIT != 0
4492 || reversep
4493 || TREE_CODE (core) != VAR_DECL)
4494 {
4495 *symbol_present = false;
4496 *var_present = true;
4497 fd_ivopts_data = data;
4498 if (depends_on)
4499 walk_tree (&addr, find_depends, depends_on, NULL);
4500
4501 return new_cost (target_spill_cost[data->speed], 0);
4502 }
4503
4504 *offset += bitpos / BITS_PER_UNIT;
4505 if (TREE_STATIC (core)
4506 || DECL_EXTERNAL (core))
4507 {
4508 *symbol_present = true;
4509 *var_present = false;
4510 return no_cost;
4511 }
4512
4513 *symbol_present = false;
4514 *var_present = true;
4515 return no_cost;
4516 }
4517
4518 /* Estimates cost of expressing difference of addresses E1 - E2 as
4519 var + symbol + offset. The value of offset is added to OFFSET,
4520 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4521 part is missing. DEPENDS_ON is a set of the invariants the computation
4522 depends on. */
4523
4524 static comp_cost
4525 ptr_difference_cost (struct ivopts_data *data,
4526 tree e1, tree e2, bool *symbol_present, bool *var_present,
4527 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4528 {
4529 HOST_WIDE_INT diff = 0;
4530 aff_tree aff_e1, aff_e2;
4531 tree type;
4532
4533 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4534
4535 if (ptr_difference_const (e1, e2, &diff))
4536 {
4537 *offset += diff;
4538 *symbol_present = false;
4539 *var_present = false;
4540 return no_cost;
4541 }
4542
4543 if (integer_zerop (e2))
4544 return split_address_cost (data, TREE_OPERAND (e1, 0),
4545 symbol_present, var_present, offset, depends_on);
4546
4547 *symbol_present = false;
4548 *var_present = true;
4549
4550 type = signed_type_for (TREE_TYPE (e1));
4551 tree_to_aff_combination (e1, type, &aff_e1);
4552 tree_to_aff_combination (e2, type, &aff_e2);
4553 aff_combination_scale (&aff_e2, -1);
4554 aff_combination_add (&aff_e1, &aff_e2);
4555
4556 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4557 }
4558
4559 /* Estimates cost of expressing difference E1 - E2 as
4560 var + symbol + offset. The value of offset is added to OFFSET,
4561 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4562 part is missing. DEPENDS_ON is a set of the invariants the computation
4563 depends on. */
4564
4565 static comp_cost
4566 difference_cost (struct ivopts_data *data,
4567 tree e1, tree e2, bool *symbol_present, bool *var_present,
4568 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4569 {
4570 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4571 unsigned HOST_WIDE_INT off1, off2;
4572 aff_tree aff_e1, aff_e2;
4573 tree type;
4574
4575 e1 = strip_offset (e1, &off1);
4576 e2 = strip_offset (e2, &off2);
4577 *offset += off1 - off2;
4578
4579 STRIP_NOPS (e1);
4580 STRIP_NOPS (e2);
4581
4582 if (TREE_CODE (e1) == ADDR_EXPR)
4583 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4584 offset, depends_on);
4585 *symbol_present = false;
4586
4587 if (operand_equal_p (e1, e2, 0))
4588 {
4589 *var_present = false;
4590 return no_cost;
4591 }
4592
4593 *var_present = true;
4594
4595 if (integer_zerop (e2))
4596 return force_var_cost (data, e1, depends_on);
4597
4598 if (integer_zerop (e1))
4599 {
4600 comp_cost cost = force_var_cost (data, e2, depends_on);
4601 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4602 return cost;
4603 }
4604
4605 type = signed_type_for (TREE_TYPE (e1));
4606 tree_to_aff_combination (e1, type, &aff_e1);
4607 tree_to_aff_combination (e2, type, &aff_e2);
4608 aff_combination_scale (&aff_e2, -1);
4609 aff_combination_add (&aff_e1, &aff_e2);
4610
4611 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4612 }
4613
4614 /* Returns true if AFF1 and AFF2 are identical. */
4615
4616 static bool
4617 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4618 {
4619 unsigned i;
4620
4621 if (aff1->n != aff2->n)
4622 return false;
4623
4624 for (i = 0; i < aff1->n; i++)
4625 {
4626 if (aff1->elts[i].coef != aff2->elts[i].coef)
4627 return false;
4628
4629 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4630 return false;
4631 }
4632 return true;
4633 }
4634
4635 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4636
4637 static int
4638 get_expr_id (struct ivopts_data *data, tree expr)
4639 {
4640 struct iv_inv_expr_ent ent;
4641 struct iv_inv_expr_ent **slot;
4642
4643 ent.expr = expr;
4644 ent.hash = iterative_hash_expr (expr, 0);
4645 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4646 if (*slot)
4647 return (*slot)->id;
4648
4649 *slot = XNEW (struct iv_inv_expr_ent);
4650 (*slot)->expr = expr;
4651 (*slot)->hash = ent.hash;
4652 (*slot)->id = data->inv_expr_id++;
4653 return (*slot)->id;
4654 }
4655
4656 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4657 requires a new compiler generated temporary. Returns -1 otherwise.
4658 ADDRESS_P is a flag indicating if the expression is for address
4659 computation. */
4660
4661 static int
4662 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4663 tree cbase, HOST_WIDE_INT ratio,
4664 bool address_p)
4665 {
4666 aff_tree ubase_aff, cbase_aff;
4667 tree expr, ub, cb;
4668
4669 STRIP_NOPS (ubase);
4670 STRIP_NOPS (cbase);
4671 ub = ubase;
4672 cb = cbase;
4673
4674 if ((TREE_CODE (ubase) == INTEGER_CST)
4675 && (TREE_CODE (cbase) == INTEGER_CST))
4676 return -1;
4677
4678 /* Strips the constant part. */
4679 if (TREE_CODE (ubase) == PLUS_EXPR
4680 || TREE_CODE (ubase) == MINUS_EXPR
4681 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4682 {
4683 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4684 ubase = TREE_OPERAND (ubase, 0);
4685 }
4686
4687 /* Strips the constant part. */
4688 if (TREE_CODE (cbase) == PLUS_EXPR
4689 || TREE_CODE (cbase) == MINUS_EXPR
4690 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4691 {
4692 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4693 cbase = TREE_OPERAND (cbase, 0);
4694 }
4695
4696 if (address_p)
4697 {
4698 if (((TREE_CODE (ubase) == SSA_NAME)
4699 || (TREE_CODE (ubase) == ADDR_EXPR
4700 && is_gimple_min_invariant (ubase)))
4701 && (TREE_CODE (cbase) == INTEGER_CST))
4702 return -1;
4703
4704 if (((TREE_CODE (cbase) == SSA_NAME)
4705 || (TREE_CODE (cbase) == ADDR_EXPR
4706 && is_gimple_min_invariant (cbase)))
4707 && (TREE_CODE (ubase) == INTEGER_CST))
4708 return -1;
4709 }
4710
4711 if (ratio == 1)
4712 {
4713 if (operand_equal_p (ubase, cbase, 0))
4714 return -1;
4715
4716 if (TREE_CODE (ubase) == ADDR_EXPR
4717 && TREE_CODE (cbase) == ADDR_EXPR)
4718 {
4719 tree usym, csym;
4720
4721 usym = TREE_OPERAND (ubase, 0);
4722 csym = TREE_OPERAND (cbase, 0);
4723 if (TREE_CODE (usym) == ARRAY_REF)
4724 {
4725 tree ind = TREE_OPERAND (usym, 1);
4726 if (TREE_CODE (ind) == INTEGER_CST
4727 && tree_fits_shwi_p (ind)
4728 && tree_to_shwi (ind) == 0)
4729 usym = TREE_OPERAND (usym, 0);
4730 }
4731 if (TREE_CODE (csym) == ARRAY_REF)
4732 {
4733 tree ind = TREE_OPERAND (csym, 1);
4734 if (TREE_CODE (ind) == INTEGER_CST
4735 && tree_fits_shwi_p (ind)
4736 && tree_to_shwi (ind) == 0)
4737 csym = TREE_OPERAND (csym, 0);
4738 }
4739 if (operand_equal_p (usym, csym, 0))
4740 return -1;
4741 }
4742 /* Now do more complex comparison */
4743 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4744 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4745 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4746 return -1;
4747 }
4748
4749 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4750 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4751
4752 aff_combination_scale (&cbase_aff, -1 * ratio);
4753 aff_combination_add (&ubase_aff, &cbase_aff);
4754 expr = aff_combination_to_tree (&ubase_aff);
4755 return get_expr_id (data, expr);
4756 }
4757
4758
4759
4760 /* Determines the cost of the computation by that USE is expressed
4761 from induction variable CAND. If ADDRESS_P is true, we just need
4762 to create an address from it, otherwise we want to get it into
4763 register. A set of invariants we depend on is stored in
4764 DEPENDS_ON. AT is the statement at that the value is computed.
4765 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4766 addressing is likely. */
4767
4768 static comp_cost
4769 get_computation_cost_at (struct ivopts_data *data,
4770 struct iv_use *use, struct iv_cand *cand,
4771 bool address_p, bitmap *depends_on, gimple *at,
4772 bool *can_autoinc,
4773 int *inv_expr_id)
4774 {
4775 tree ubase = use->iv->base, ustep = use->iv->step;
4776 tree cbase, cstep;
4777 tree utype = TREE_TYPE (ubase), ctype;
4778 unsigned HOST_WIDE_INT cstepi, offset = 0;
4779 HOST_WIDE_INT ratio, aratio;
4780 bool var_present, symbol_present, stmt_is_after_inc;
4781 comp_cost cost;
4782 widest_int rat;
4783 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4784 machine_mode mem_mode = (address_p
4785 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4786 : VOIDmode);
4787
4788 if (depends_on)
4789 *depends_on = NULL;
4790
4791 /* Only consider real candidates. */
4792 if (!cand->iv)
4793 return infinite_cost;
4794
4795 cbase = cand->iv->base;
4796 cstep = cand->iv->step;
4797 ctype = TREE_TYPE (cbase);
4798
4799 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4800 {
4801 /* We do not have a precision to express the values of use. */
4802 return infinite_cost;
4803 }
4804
4805 if (address_p
4806 || (use->iv->base_object
4807 && cand->iv->base_object
4808 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4809 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4810 {
4811 /* Do not try to express address of an object with computation based
4812 on address of a different object. This may cause problems in rtl
4813 level alias analysis (that does not expect this to be happening,
4814 as this is illegal in C), and would be unlikely to be useful
4815 anyway. */
4816 if (use->iv->base_object
4817 && cand->iv->base_object
4818 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4819 return infinite_cost;
4820 }
4821
4822 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4823 {
4824 /* TODO -- add direct handling of this case. */
4825 goto fallback;
4826 }
4827
4828 /* CSTEPI is removed from the offset in case statement is after the
4829 increment. If the step is not constant, we use zero instead.
4830 This is a bit imprecise (there is the extra addition), but
4831 redundancy elimination is likely to transform the code so that
4832 it uses value of the variable before increment anyway,
4833 so it is not that much unrealistic. */
4834 if (cst_and_fits_in_hwi (cstep))
4835 cstepi = int_cst_value (cstep);
4836 else
4837 cstepi = 0;
4838
4839 if (!constant_multiple_of (ustep, cstep, &rat))
4840 return infinite_cost;
4841
4842 if (wi::fits_shwi_p (rat))
4843 ratio = rat.to_shwi ();
4844 else
4845 return infinite_cost;
4846
4847 STRIP_NOPS (cbase);
4848 ctype = TREE_TYPE (cbase);
4849
4850 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4851
4852 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4853 or ratio == 1, it is better to handle this like
4854
4855 ubase - ratio * cbase + ratio * var
4856
4857 (also holds in the case ratio == -1, TODO. */
4858
4859 if (cst_and_fits_in_hwi (cbase))
4860 {
4861 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4862 cost = difference_cost (data,
4863 ubase, build_int_cst (utype, 0),
4864 &symbol_present, &var_present, &offset,
4865 depends_on);
4866 cost.cost /= avg_loop_niter (data->current_loop);
4867 }
4868 else if (ratio == 1)
4869 {
4870 tree real_cbase = cbase;
4871
4872 /* Check to see if any adjustment is needed. */
4873 if (cstepi == 0 && stmt_is_after_inc)
4874 {
4875 aff_tree real_cbase_aff;
4876 aff_tree cstep_aff;
4877
4878 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4879 &real_cbase_aff);
4880 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4881
4882 aff_combination_add (&real_cbase_aff, &cstep_aff);
4883 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4884 }
4885
4886 cost = difference_cost (data,
4887 ubase, real_cbase,
4888 &symbol_present, &var_present, &offset,
4889 depends_on);
4890 cost.cost /= avg_loop_niter (data->current_loop);
4891 }
4892 else if (address_p
4893 && !POINTER_TYPE_P (ctype)
4894 && multiplier_allowed_in_address_p
4895 (ratio, mem_mode,
4896 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4897 {
4898 if (cstepi == 0 && stmt_is_after_inc)
4899 {
4900 if (POINTER_TYPE_P (ctype))
4901 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4902 else
4903 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4904 }
4905 cbase
4906 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4907 cost = difference_cost (data,
4908 ubase, cbase,
4909 &symbol_present, &var_present, &offset,
4910 depends_on);
4911 cost.cost /= avg_loop_niter (data->current_loop);
4912 }
4913 else
4914 {
4915 cost = force_var_cost (data, cbase, depends_on);
4916 cost = add_costs (cost,
4917 difference_cost (data,
4918 ubase, build_int_cst (utype, 0),
4919 &symbol_present, &var_present,
4920 &offset, depends_on));
4921 cost.cost /= avg_loop_niter (data->current_loop);
4922 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4923 }
4924
4925 /* Set of invariants depended on by sub use has already been computed
4926 for the first use in the group. */
4927 if (use->sub_id)
4928 {
4929 cost.cost = 0;
4930 if (depends_on && *depends_on)
4931 bitmap_clear (*depends_on);
4932 }
4933 else if (inv_expr_id)
4934 {
4935 *inv_expr_id =
4936 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4937 /* Clear depends on. */
4938 if (*inv_expr_id != -1 && depends_on && *depends_on)
4939 bitmap_clear (*depends_on);
4940 }
4941
4942 /* If we are after the increment, the value of the candidate is higher by
4943 one iteration. */
4944 if (stmt_is_after_inc)
4945 offset -= ratio * cstepi;
4946
4947 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4948 (symbol/var1/const parts may be omitted). If we are looking for an
4949 address, find the cost of addressing this. */
4950 if (address_p)
4951 return add_costs (cost,
4952 get_address_cost (symbol_present, var_present,
4953 offset, ratio, cstepi,
4954 mem_mode,
4955 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4956 speed, stmt_is_after_inc,
4957 can_autoinc));
4958
4959 /* Otherwise estimate the costs for computing the expression. */
4960 if (!symbol_present && !var_present && !offset)
4961 {
4962 if (ratio != 1)
4963 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4964 return cost;
4965 }
4966
4967 /* Symbol + offset should be compile-time computable so consider that they
4968 are added once to the variable, if present. */
4969 if (var_present && (symbol_present || offset))
4970 cost.cost += adjust_setup_cost (data,
4971 add_cost (speed, TYPE_MODE (ctype)));
4972
4973 /* Having offset does not affect runtime cost in case it is added to
4974 symbol, but it increases complexity. */
4975 if (offset)
4976 cost.complexity++;
4977
4978 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4979
4980 aratio = ratio > 0 ? ratio : -ratio;
4981 if (aratio != 1)
4982 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4983 return cost;
4984
4985 fallback:
4986 if (can_autoinc)
4987 *can_autoinc = false;
4988
4989 {
4990 /* Just get the expression, expand it and measure the cost. */
4991 tree comp = get_computation_at (data->current_loop, use, cand, at);
4992
4993 if (!comp)
4994 return infinite_cost;
4995
4996 if (address_p)
4997 comp = build_simple_mem_ref (comp);
4998
4999 return new_cost (computation_cost (comp, speed), 0);
5000 }
5001 }
5002
5003 /* Determines the cost of the computation by that USE is expressed
5004 from induction variable CAND. If ADDRESS_P is true, we just need
5005 to create an address from it, otherwise we want to get it into
5006 register. A set of invariants we depend on is stored in
5007 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5008 autoinc addressing is likely. */
5009
5010 static comp_cost
5011 get_computation_cost (struct ivopts_data *data,
5012 struct iv_use *use, struct iv_cand *cand,
5013 bool address_p, bitmap *depends_on,
5014 bool *can_autoinc, int *inv_expr_id)
5015 {
5016 return get_computation_cost_at (data,
5017 use, cand, address_p, depends_on, use->stmt,
5018 can_autoinc, inv_expr_id);
5019 }
5020
5021 /* Determines cost of basing replacement of USE on CAND in a generic
5022 expression. */
5023
5024 static bool
5025 determine_use_iv_cost_generic (struct ivopts_data *data,
5026 struct iv_use *use, struct iv_cand *cand)
5027 {
5028 bitmap depends_on;
5029 comp_cost cost;
5030 int inv_expr_id = -1;
5031
5032 /* The simple case first -- if we need to express value of the preserved
5033 original biv, the cost is 0. This also prevents us from counting the
5034 cost of increment twice -- once at this use and once in the cost of
5035 the candidate. */
5036 if (cand->pos == IP_ORIGINAL
5037 && cand->incremented_at == use->stmt)
5038 {
5039 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5040 ERROR_MARK, -1);
5041 return true;
5042 }
5043
5044 cost = get_computation_cost (data, use, cand, false, &depends_on,
5045 NULL, &inv_expr_id);
5046
5047 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5048 inv_expr_id);
5049
5050 return !infinite_cost_p (cost);
5051 }
5052
5053 /* Determines cost of basing replacement of USE on CAND in an address. */
5054
5055 static bool
5056 determine_use_iv_cost_address (struct ivopts_data *data,
5057 struct iv_use *use, struct iv_cand *cand)
5058 {
5059 bitmap depends_on;
5060 bool can_autoinc;
5061 int inv_expr_id = -1;
5062 struct iv_use *sub_use;
5063 comp_cost sub_cost;
5064 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5065 &can_autoinc, &inv_expr_id);
5066
5067 if (cand->ainc_use == use)
5068 {
5069 if (can_autoinc)
5070 cost.cost -= cand->cost_step;
5071 /* If we generated the candidate solely for exploiting autoincrement
5072 opportunities, and it turns out it can't be used, set the cost to
5073 infinity to make sure we ignore it. */
5074 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5075 cost = infinite_cost;
5076 }
5077 for (sub_use = use->next;
5078 sub_use && !infinite_cost_p (cost);
5079 sub_use = sub_use->next)
5080 {
5081 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
5082 &can_autoinc, NULL);
5083 cost = add_costs (cost, sub_cost);
5084 }
5085
5086 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5087 inv_expr_id);
5088
5089 return !infinite_cost_p (cost);
5090 }
5091
5092 /* Computes value of candidate CAND at position AT in iteration NITER, and
5093 stores it to VAL. */
5094
5095 static void
5096 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5097 aff_tree *val)
5098 {
5099 aff_tree step, delta, nit;
5100 struct iv *iv = cand->iv;
5101 tree type = TREE_TYPE (iv->base);
5102 tree steptype = type;
5103 if (POINTER_TYPE_P (type))
5104 steptype = sizetype;
5105 steptype = unsigned_type_for (type);
5106
5107 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5108 aff_combination_convert (&step, steptype);
5109 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5110 aff_combination_convert (&nit, steptype);
5111 aff_combination_mult (&nit, &step, &delta);
5112 if (stmt_after_increment (loop, cand, at))
5113 aff_combination_add (&delta, &step);
5114
5115 tree_to_aff_combination (iv->base, type, val);
5116 if (!POINTER_TYPE_P (type))
5117 aff_combination_convert (val, steptype);
5118 aff_combination_add (val, &delta);
5119 }
5120
5121 /* Returns period of induction variable iv. */
5122
5123 static tree
5124 iv_period (struct iv *iv)
5125 {
5126 tree step = iv->step, period, type;
5127 tree pow2div;
5128
5129 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5130
5131 type = unsigned_type_for (TREE_TYPE (step));
5132 /* Period of the iv is lcm (step, type_range)/step -1,
5133 i.e., N*type_range/step - 1. Since type range is power
5134 of two, N == (step >> num_of_ending_zeros_binary (step),
5135 so the final result is
5136
5137 (type_range >> num_of_ending_zeros_binary (step)) - 1
5138
5139 */
5140 pow2div = num_ending_zeros (step);
5141
5142 period = build_low_bits_mask (type,
5143 (TYPE_PRECISION (type)
5144 - tree_to_uhwi (pow2div)));
5145
5146 return period;
5147 }
5148
5149 /* Returns the comparison operator used when eliminating the iv USE. */
5150
5151 static enum tree_code
5152 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5153 {
5154 struct loop *loop = data->current_loop;
5155 basic_block ex_bb;
5156 edge exit;
5157
5158 ex_bb = gimple_bb (use->stmt);
5159 exit = EDGE_SUCC (ex_bb, 0);
5160 if (flow_bb_inside_loop_p (loop, exit->dest))
5161 exit = EDGE_SUCC (ex_bb, 1);
5162
5163 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5164 }
5165
5166 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5167 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5168 calculation is performed in non-wrapping type.
5169
5170 TODO: More generally, we could test for the situation that
5171 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5172 This would require knowing the sign of OFFSET. */
5173
5174 static bool
5175 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5176 {
5177 enum tree_code code;
5178 tree e1, e2;
5179 aff_tree aff_e1, aff_e2, aff_offset;
5180
5181 if (!nowrap_type_p (TREE_TYPE (base)))
5182 return false;
5183
5184 base = expand_simple_operations (base);
5185
5186 if (TREE_CODE (base) == SSA_NAME)
5187 {
5188 gimple *stmt = SSA_NAME_DEF_STMT (base);
5189
5190 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5191 return false;
5192
5193 code = gimple_assign_rhs_code (stmt);
5194 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5195 return false;
5196
5197 e1 = gimple_assign_rhs1 (stmt);
5198 e2 = gimple_assign_rhs2 (stmt);
5199 }
5200 else
5201 {
5202 code = TREE_CODE (base);
5203 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5204 return false;
5205 e1 = TREE_OPERAND (base, 0);
5206 e2 = TREE_OPERAND (base, 1);
5207 }
5208
5209 /* Use affine expansion as deeper inspection to prove the equality. */
5210 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5211 &aff_e2, &data->name_expansion_cache);
5212 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5213 &aff_offset, &data->name_expansion_cache);
5214 aff_combination_scale (&aff_offset, -1);
5215 switch (code)
5216 {
5217 case PLUS_EXPR:
5218 aff_combination_add (&aff_e2, &aff_offset);
5219 if (aff_combination_zero_p (&aff_e2))
5220 return true;
5221
5222 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5223 &aff_e1, &data->name_expansion_cache);
5224 aff_combination_add (&aff_e1, &aff_offset);
5225 return aff_combination_zero_p (&aff_e1);
5226
5227 case POINTER_PLUS_EXPR:
5228 aff_combination_add (&aff_e2, &aff_offset);
5229 return aff_combination_zero_p (&aff_e2);
5230
5231 default:
5232 return false;
5233 }
5234 }
5235
5236 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5237 comparison with CAND. NITER describes the number of iterations of
5238 the loops. If successful, the comparison in COMP_P is altered accordingly.
5239
5240 We aim to handle the following situation:
5241
5242 sometype *base, *p;
5243 int a, b, i;
5244
5245 i = a;
5246 p = p_0 = base + a;
5247
5248 do
5249 {
5250 bla (*p);
5251 p++;
5252 i++;
5253 }
5254 while (i < b);
5255
5256 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5257 We aim to optimize this to
5258
5259 p = p_0 = base + a;
5260 do
5261 {
5262 bla (*p);
5263 p++;
5264 }
5265 while (p < p_0 - a + b);
5266
5267 This preserves the correctness, since the pointer arithmetics does not
5268 overflow. More precisely:
5269
5270 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5271 overflow in computing it or the values of p.
5272 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5273 overflow. To prove this, we use the fact that p_0 = base + a. */
5274
5275 static bool
5276 iv_elimination_compare_lt (struct ivopts_data *data,
5277 struct iv_cand *cand, enum tree_code *comp_p,
5278 struct tree_niter_desc *niter)
5279 {
5280 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5281 struct aff_tree nit, tmpa, tmpb;
5282 enum tree_code comp;
5283 HOST_WIDE_INT step;
5284
5285 /* We need to know that the candidate induction variable does not overflow.
5286 While more complex analysis may be used to prove this, for now just
5287 check that the variable appears in the original program and that it
5288 is computed in a type that guarantees no overflows. */
5289 cand_type = TREE_TYPE (cand->iv->base);
5290 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5291 return false;
5292
5293 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5294 the calculation of the BOUND could overflow, making the comparison
5295 invalid. */
5296 if (!data->loop_single_exit_p)
5297 return false;
5298
5299 /* We need to be able to decide whether candidate is increasing or decreasing
5300 in order to choose the right comparison operator. */
5301 if (!cst_and_fits_in_hwi (cand->iv->step))
5302 return false;
5303 step = int_cst_value (cand->iv->step);
5304
5305 /* Check that the number of iterations matches the expected pattern:
5306 a + 1 > b ? 0 : b - a - 1. */
5307 mbz = niter->may_be_zero;
5308 if (TREE_CODE (mbz) == GT_EXPR)
5309 {
5310 /* Handle a + 1 > b. */
5311 tree op0 = TREE_OPERAND (mbz, 0);
5312 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5313 {
5314 a = TREE_OPERAND (op0, 0);
5315 b = TREE_OPERAND (mbz, 1);
5316 }
5317 else
5318 return false;
5319 }
5320 else if (TREE_CODE (mbz) == LT_EXPR)
5321 {
5322 tree op1 = TREE_OPERAND (mbz, 1);
5323
5324 /* Handle b < a + 1. */
5325 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5326 {
5327 a = TREE_OPERAND (op1, 0);
5328 b = TREE_OPERAND (mbz, 0);
5329 }
5330 else
5331 return false;
5332 }
5333 else
5334 return false;
5335
5336 /* Expected number of iterations is B - A - 1. Check that it matches
5337 the actual number, i.e., that B - A - NITER = 1. */
5338 tree_to_aff_combination (niter->niter, nit_type, &nit);
5339 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5340 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5341 aff_combination_scale (&nit, -1);
5342 aff_combination_scale (&tmpa, -1);
5343 aff_combination_add (&tmpb, &tmpa);
5344 aff_combination_add (&tmpb, &nit);
5345 if (tmpb.n != 0 || tmpb.offset != 1)
5346 return false;
5347
5348 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5349 overflow. */
5350 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5351 cand->iv->step,
5352 fold_convert (TREE_TYPE (cand->iv->step), a));
5353 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5354 return false;
5355
5356 /* Determine the new comparison operator. */
5357 comp = step < 0 ? GT_EXPR : LT_EXPR;
5358 if (*comp_p == NE_EXPR)
5359 *comp_p = comp;
5360 else if (*comp_p == EQ_EXPR)
5361 *comp_p = invert_tree_comparison (comp, false);
5362 else
5363 gcc_unreachable ();
5364
5365 return true;
5366 }
5367
5368 /* Check whether it is possible to express the condition in USE by comparison
5369 of candidate CAND. If so, store the value compared with to BOUND, and the
5370 comparison operator to COMP. */
5371
5372 static bool
5373 may_eliminate_iv (struct ivopts_data *data,
5374 struct iv_use *use, struct iv_cand *cand, tree *bound,
5375 enum tree_code *comp)
5376 {
5377 basic_block ex_bb;
5378 edge exit;
5379 tree period;
5380 struct loop *loop = data->current_loop;
5381 aff_tree bnd;
5382 struct tree_niter_desc *desc = NULL;
5383
5384 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5385 return false;
5386
5387 /* For now works only for exits that dominate the loop latch.
5388 TODO: extend to other conditions inside loop body. */
5389 ex_bb = gimple_bb (use->stmt);
5390 if (use->stmt != last_stmt (ex_bb)
5391 || gimple_code (use->stmt) != GIMPLE_COND
5392 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5393 return false;
5394
5395 exit = EDGE_SUCC (ex_bb, 0);
5396 if (flow_bb_inside_loop_p (loop, exit->dest))
5397 exit = EDGE_SUCC (ex_bb, 1);
5398 if (flow_bb_inside_loop_p (loop, exit->dest))
5399 return false;
5400
5401 desc = niter_for_exit (data, exit);
5402 if (!desc)
5403 return false;
5404
5405 /* Determine whether we can use the variable to test the exit condition.
5406 This is the case iff the period of the induction variable is greater
5407 than the number of iterations for which the exit condition is true. */
5408 period = iv_period (cand->iv);
5409
5410 /* If the number of iterations is constant, compare against it directly. */
5411 if (TREE_CODE (desc->niter) == INTEGER_CST)
5412 {
5413 /* See cand_value_at. */
5414 if (stmt_after_increment (loop, cand, use->stmt))
5415 {
5416 if (!tree_int_cst_lt (desc->niter, period))
5417 return false;
5418 }
5419 else
5420 {
5421 if (tree_int_cst_lt (period, desc->niter))
5422 return false;
5423 }
5424 }
5425
5426 /* If not, and if this is the only possible exit of the loop, see whether
5427 we can get a conservative estimate on the number of iterations of the
5428 entire loop and compare against that instead. */
5429 else
5430 {
5431 widest_int period_value, max_niter;
5432
5433 max_niter = desc->max;
5434 if (stmt_after_increment (loop, cand, use->stmt))
5435 max_niter += 1;
5436 period_value = wi::to_widest (period);
5437 if (wi::gtu_p (max_niter, period_value))
5438 {
5439 /* See if we can take advantage of inferred loop bound information. */
5440 if (data->loop_single_exit_p)
5441 {
5442 if (!max_loop_iterations (loop, &max_niter))
5443 return false;
5444 /* The loop bound is already adjusted by adding 1. */
5445 if (wi::gtu_p (max_niter, period_value))
5446 return false;
5447 }
5448 else
5449 return false;
5450 }
5451 }
5452
5453 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5454
5455 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5456 aff_combination_to_tree (&bnd));
5457 *comp = iv_elimination_compare (data, use);
5458
5459 /* It is unlikely that computing the number of iterations using division
5460 would be more profitable than keeping the original induction variable. */
5461 if (expression_expensive_p (*bound))
5462 return false;
5463
5464 /* Sometimes, it is possible to handle the situation that the number of
5465 iterations may be zero unless additional assumtions by using <
5466 instead of != in the exit condition.
5467
5468 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5469 base the exit condition on it. However, that is often too
5470 expensive. */
5471 if (!integer_zerop (desc->may_be_zero))
5472 return iv_elimination_compare_lt (data, cand, comp, desc);
5473
5474 return true;
5475 }
5476
5477 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5478 be copied, if it is used in the loop body and DATA->body_includes_call. */
5479
5480 static int
5481 parm_decl_cost (struct ivopts_data *data, tree bound)
5482 {
5483 tree sbound = bound;
5484 STRIP_NOPS (sbound);
5485
5486 if (TREE_CODE (sbound) == SSA_NAME
5487 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5488 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5489 && data->body_includes_call)
5490 return COSTS_N_INSNS (1);
5491
5492 return 0;
5493 }
5494
5495 /* Determines cost of basing replacement of USE on CAND in a condition. */
5496
5497 static bool
5498 determine_use_iv_cost_condition (struct ivopts_data *data,
5499 struct iv_use *use, struct iv_cand *cand)
5500 {
5501 tree bound = NULL_TREE;
5502 struct iv *cmp_iv;
5503 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5504 comp_cost elim_cost, express_cost, cost, bound_cost;
5505 bool ok;
5506 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5507 tree *control_var, *bound_cst;
5508 enum tree_code comp = ERROR_MARK;
5509
5510 /* Only consider real candidates. */
5511 if (!cand->iv)
5512 {
5513 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5514 ERROR_MARK, -1);
5515 return false;
5516 }
5517
5518 /* Try iv elimination. */
5519 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5520 {
5521 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5522 if (elim_cost.cost == 0)
5523 elim_cost.cost = parm_decl_cost (data, bound);
5524 else if (TREE_CODE (bound) == INTEGER_CST)
5525 elim_cost.cost = 0;
5526 /* If we replace a loop condition 'i < n' with 'p < base + n',
5527 depends_on_elim will have 'base' and 'n' set, which implies
5528 that both 'base' and 'n' will be live during the loop. More likely,
5529 'base + n' will be loop invariant, resulting in only one live value
5530 during the loop. So in that case we clear depends_on_elim and set
5531 elim_inv_expr_id instead. */
5532 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5533 {
5534 elim_inv_expr_id = get_expr_id (data, bound);
5535 bitmap_clear (depends_on_elim);
5536 }
5537 /* The bound is a loop invariant, so it will be only computed
5538 once. */
5539 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5540 }
5541 else
5542 elim_cost = infinite_cost;
5543
5544 /* Try expressing the original giv. If it is compared with an invariant,
5545 note that we cannot get rid of it. */
5546 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5547 NULL, &cmp_iv);
5548 gcc_assert (ok);
5549
5550 /* When the condition is a comparison of the candidate IV against
5551 zero, prefer this IV.
5552
5553 TODO: The constant that we're subtracting from the cost should
5554 be target-dependent. This information should be added to the
5555 target costs for each backend. */
5556 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5557 && integer_zerop (*bound_cst)
5558 && (operand_equal_p (*control_var, cand->var_after, 0)
5559 || operand_equal_p (*control_var, cand->var_before, 0)))
5560 elim_cost.cost -= 1;
5561
5562 express_cost = get_computation_cost (data, use, cand, false,
5563 &depends_on_express, NULL,
5564 &express_inv_expr_id);
5565 fd_ivopts_data = data;
5566 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5567
5568 /* Count the cost of the original bound as well. */
5569 bound_cost = force_var_cost (data, *bound_cst, NULL);
5570 if (bound_cost.cost == 0)
5571 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5572 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5573 bound_cost.cost = 0;
5574 express_cost.cost += bound_cost.cost;
5575
5576 /* Choose the better approach, preferring the eliminated IV. */
5577 if (compare_costs (elim_cost, express_cost) <= 0)
5578 {
5579 cost = elim_cost;
5580 depends_on = depends_on_elim;
5581 depends_on_elim = NULL;
5582 inv_expr_id = elim_inv_expr_id;
5583 }
5584 else
5585 {
5586 cost = express_cost;
5587 depends_on = depends_on_express;
5588 depends_on_express = NULL;
5589 bound = NULL_TREE;
5590 comp = ERROR_MARK;
5591 inv_expr_id = express_inv_expr_id;
5592 }
5593
5594 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5595
5596 if (depends_on_elim)
5597 BITMAP_FREE (depends_on_elim);
5598 if (depends_on_express)
5599 BITMAP_FREE (depends_on_express);
5600
5601 return !infinite_cost_p (cost);
5602 }
5603
5604 /* Determines cost of basing replacement of USE on CAND. Returns false
5605 if USE cannot be based on CAND. */
5606
5607 static bool
5608 determine_use_iv_cost (struct ivopts_data *data,
5609 struct iv_use *use, struct iv_cand *cand)
5610 {
5611 switch (use->type)
5612 {
5613 case USE_NONLINEAR_EXPR:
5614 return determine_use_iv_cost_generic (data, use, cand);
5615
5616 case USE_ADDRESS:
5617 return determine_use_iv_cost_address (data, use, cand);
5618
5619 case USE_COMPARE:
5620 return determine_use_iv_cost_condition (data, use, cand);
5621
5622 default:
5623 gcc_unreachable ();
5624 }
5625 }
5626
5627 /* Return true if get_computation_cost indicates that autoincrement is
5628 a possibility for the pair of USE and CAND, false otherwise. */
5629
5630 static bool
5631 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5632 struct iv_cand *cand)
5633 {
5634 bitmap depends_on;
5635 bool can_autoinc;
5636 comp_cost cost;
5637
5638 if (use->type != USE_ADDRESS)
5639 return false;
5640
5641 cost = get_computation_cost (data, use, cand, true, &depends_on,
5642 &can_autoinc, NULL);
5643
5644 BITMAP_FREE (depends_on);
5645
5646 return !infinite_cost_p (cost) && can_autoinc;
5647 }
5648
5649 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5650 use that allows autoincrement, and set their AINC_USE if possible. */
5651
5652 static void
5653 set_autoinc_for_original_candidates (struct ivopts_data *data)
5654 {
5655 unsigned i, j;
5656
5657 for (i = 0; i < n_iv_cands (data); i++)
5658 {
5659 struct iv_cand *cand = iv_cand (data, i);
5660 struct iv_use *closest_before = NULL;
5661 struct iv_use *closest_after = NULL;
5662 if (cand->pos != IP_ORIGINAL)
5663 continue;
5664
5665 for (j = 0; j < n_iv_uses (data); j++)
5666 {
5667 struct iv_use *use = iv_use (data, j);
5668 unsigned uid = gimple_uid (use->stmt);
5669
5670 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5671 continue;
5672
5673 if (uid < gimple_uid (cand->incremented_at)
5674 && (closest_before == NULL
5675 || uid > gimple_uid (closest_before->stmt)))
5676 closest_before = use;
5677
5678 if (uid > gimple_uid (cand->incremented_at)
5679 && (closest_after == NULL
5680 || uid < gimple_uid (closest_after->stmt)))
5681 closest_after = use;
5682 }
5683
5684 if (closest_before != NULL
5685 && autoinc_possible_for_pair (data, closest_before, cand))
5686 cand->ainc_use = closest_before;
5687 else if (closest_after != NULL
5688 && autoinc_possible_for_pair (data, closest_after, cand))
5689 cand->ainc_use = closest_after;
5690 }
5691 }
5692
5693 /* Finds the candidates for the induction variables. */
5694
5695 static void
5696 find_iv_candidates (struct ivopts_data *data)
5697 {
5698 /* Add commonly used ivs. */
5699 add_standard_iv_candidates (data);
5700
5701 /* Add old induction variables. */
5702 add_iv_candidate_for_bivs (data);
5703
5704 /* Add induction variables derived from uses. */
5705 add_iv_candidate_for_uses (data);
5706
5707 set_autoinc_for_original_candidates (data);
5708
5709 /* Record the important candidates. */
5710 record_important_candidates (data);
5711 }
5712
5713 /* Determines costs of basing the use of the iv on an iv candidate. */
5714
5715 static void
5716 determine_use_iv_costs (struct ivopts_data *data)
5717 {
5718 unsigned i, j;
5719 struct iv_use *use;
5720 struct iv_cand *cand;
5721 bitmap to_clear = BITMAP_ALLOC (NULL);
5722
5723 alloc_use_cost_map (data);
5724
5725 for (i = 0; i < n_iv_uses (data); i++)
5726 {
5727 use = iv_use (data, i);
5728
5729 if (data->consider_all_candidates)
5730 {
5731 for (j = 0; j < n_iv_cands (data); j++)
5732 {
5733 cand = iv_cand (data, j);
5734 determine_use_iv_cost (data, use, cand);
5735 }
5736 }
5737 else
5738 {
5739 bitmap_iterator bi;
5740
5741 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5742 {
5743 cand = iv_cand (data, j);
5744 if (!determine_use_iv_cost (data, use, cand))
5745 bitmap_set_bit (to_clear, j);
5746 }
5747
5748 /* Remove the candidates for that the cost is infinite from
5749 the list of related candidates. */
5750 bitmap_and_compl_into (use->related_cands, to_clear);
5751 bitmap_clear (to_clear);
5752 }
5753 }
5754
5755 BITMAP_FREE (to_clear);
5756
5757 if (dump_file && (dump_flags & TDF_DETAILS))
5758 {
5759 fprintf (dump_file, "Use-candidate costs:\n");
5760
5761 for (i = 0; i < n_iv_uses (data); i++)
5762 {
5763 use = iv_use (data, i);
5764
5765 fprintf (dump_file, "Use %d:\n", i);
5766 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5767 for (j = 0; j < use->n_map_members; j++)
5768 {
5769 if (!use->cost_map[j].cand
5770 || infinite_cost_p (use->cost_map[j].cost))
5771 continue;
5772
5773 fprintf (dump_file, " %d\t%d\t%d\t",
5774 use->cost_map[j].cand->id,
5775 use->cost_map[j].cost.cost,
5776 use->cost_map[j].cost.complexity);
5777 if (use->cost_map[j].depends_on)
5778 bitmap_print (dump_file,
5779 use->cost_map[j].depends_on, "","");
5780 if (use->cost_map[j].inv_expr_id != -1)
5781 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5782 fprintf (dump_file, "\n");
5783 }
5784
5785 fprintf (dump_file, "\n");
5786 }
5787 fprintf (dump_file, "\n");
5788 }
5789 }
5790
5791 /* Determines cost of the candidate CAND. */
5792
5793 static void
5794 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5795 {
5796 comp_cost cost_base;
5797 unsigned cost, cost_step;
5798 tree base;
5799
5800 if (!cand->iv)
5801 {
5802 cand->cost = 0;
5803 return;
5804 }
5805
5806 /* There are two costs associated with the candidate -- its increment
5807 and its initialization. The second is almost negligible for any loop
5808 that rolls enough, so we take it just very little into account. */
5809
5810 base = cand->iv->base;
5811 cost_base = force_var_cost (data, base, NULL);
5812 /* It will be exceptional that the iv register happens to be initialized with
5813 the proper value at no cost. In general, there will at least be a regcopy
5814 or a const set. */
5815 if (cost_base.cost == 0)
5816 cost_base.cost = COSTS_N_INSNS (1);
5817 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5818
5819 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5820
5821 /* Prefer the original ivs unless we may gain something by replacing it.
5822 The reason is to make debugging simpler; so this is not relevant for
5823 artificial ivs created by other optimization passes. */
5824 if (cand->pos != IP_ORIGINAL
5825 || !SSA_NAME_VAR (cand->var_before)
5826 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5827 cost++;
5828
5829 /* Prefer not to insert statements into latch unless there are some
5830 already (so that we do not create unnecessary jumps). */
5831 if (cand->pos == IP_END
5832 && empty_block_p (ip_end_pos (data->current_loop)))
5833 cost++;
5834
5835 cand->cost = cost;
5836 cand->cost_step = cost_step;
5837 }
5838
5839 /* Determines costs of computation of the candidates. */
5840
5841 static void
5842 determine_iv_costs (struct ivopts_data *data)
5843 {
5844 unsigned i;
5845
5846 if (dump_file && (dump_flags & TDF_DETAILS))
5847 {
5848 fprintf (dump_file, "Candidate costs:\n");
5849 fprintf (dump_file, " cand\tcost\n");
5850 }
5851
5852 for (i = 0; i < n_iv_cands (data); i++)
5853 {
5854 struct iv_cand *cand = iv_cand (data, i);
5855
5856 determine_iv_cost (data, cand);
5857
5858 if (dump_file && (dump_flags & TDF_DETAILS))
5859 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5860 }
5861
5862 if (dump_file && (dump_flags & TDF_DETAILS))
5863 fprintf (dump_file, "\n");
5864 }
5865
5866 /* Calculates cost for having SIZE induction variables. */
5867
5868 static unsigned
5869 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5870 {
5871 /* We add size to the cost, so that we prefer eliminating ivs
5872 if possible. */
5873 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5874 data->body_includes_call);
5875 }
5876
5877 /* For each size of the induction variable set determine the penalty. */
5878
5879 static void
5880 determine_set_costs (struct ivopts_data *data)
5881 {
5882 unsigned j, n;
5883 gphi *phi;
5884 gphi_iterator psi;
5885 tree op;
5886 struct loop *loop = data->current_loop;
5887 bitmap_iterator bi;
5888
5889 if (dump_file && (dump_flags & TDF_DETAILS))
5890 {
5891 fprintf (dump_file, "Global costs:\n");
5892 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5893 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5894 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5895 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5896 }
5897
5898 n = 0;
5899 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5900 {
5901 phi = psi.phi ();
5902 op = PHI_RESULT (phi);
5903
5904 if (virtual_operand_p (op))
5905 continue;
5906
5907 if (get_iv (data, op))
5908 continue;
5909
5910 n++;
5911 }
5912
5913 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5914 {
5915 struct version_info *info = ver_info (data, j);
5916
5917 if (info->inv_id && info->has_nonlin_use)
5918 n++;
5919 }
5920
5921 data->regs_used = n;
5922 if (dump_file && (dump_flags & TDF_DETAILS))
5923 fprintf (dump_file, " regs_used %d\n", n);
5924
5925 if (dump_file && (dump_flags & TDF_DETAILS))
5926 {
5927 fprintf (dump_file, " cost for size:\n");
5928 fprintf (dump_file, " ivs\tcost\n");
5929 for (j = 0; j <= 2 * target_avail_regs; j++)
5930 fprintf (dump_file, " %d\t%d\n", j,
5931 ivopts_global_cost_for_size (data, j));
5932 fprintf (dump_file, "\n");
5933 }
5934 }
5935
5936 /* Returns true if A is a cheaper cost pair than B. */
5937
5938 static bool
5939 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5940 {
5941 int cmp;
5942
5943 if (!a)
5944 return false;
5945
5946 if (!b)
5947 return true;
5948
5949 cmp = compare_costs (a->cost, b->cost);
5950 if (cmp < 0)
5951 return true;
5952
5953 if (cmp > 0)
5954 return false;
5955
5956 /* In case the costs are the same, prefer the cheaper candidate. */
5957 if (a->cand->cost < b->cand->cost)
5958 return true;
5959
5960 return false;
5961 }
5962
5963
5964 /* Returns candidate by that USE is expressed in IVS. */
5965
5966 static struct cost_pair *
5967 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5968 {
5969 return ivs->cand_for_use[use->id];
5970 }
5971
5972 /* Computes the cost field of IVS structure. */
5973
5974 static void
5975 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5976 {
5977 comp_cost cost = ivs->cand_use_cost;
5978
5979 cost.cost += ivs->cand_cost;
5980
5981 cost.cost += ivopts_global_cost_for_size (data,
5982 ivs->n_regs + ivs->num_used_inv_expr);
5983
5984 ivs->cost = cost;
5985 }
5986
5987 /* Remove invariants in set INVS to set IVS. */
5988
5989 static void
5990 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5991 {
5992 bitmap_iterator bi;
5993 unsigned iid;
5994
5995 if (!invs)
5996 return;
5997
5998 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5999 {
6000 ivs->n_invariant_uses[iid]--;
6001 if (ivs->n_invariant_uses[iid] == 0)
6002 ivs->n_regs--;
6003 }
6004 }
6005
6006 /* Set USE not to be expressed by any candidate in IVS. */
6007
6008 static void
6009 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6010 struct iv_use *use)
6011 {
6012 unsigned uid = use->id, cid;
6013 struct cost_pair *cp;
6014
6015 cp = ivs->cand_for_use[uid];
6016 if (!cp)
6017 return;
6018 cid = cp->cand->id;
6019
6020 ivs->bad_uses++;
6021 ivs->cand_for_use[uid] = NULL;
6022 ivs->n_cand_uses[cid]--;
6023
6024 if (ivs->n_cand_uses[cid] == 0)
6025 {
6026 bitmap_clear_bit (ivs->cands, cid);
6027 /* Do not count the pseudocandidates. */
6028 if (cp->cand->iv)
6029 ivs->n_regs--;
6030 ivs->n_cands--;
6031 ivs->cand_cost -= cp->cand->cost;
6032
6033 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6034 }
6035
6036 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6037
6038 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6039
6040 if (cp->inv_expr_id != -1)
6041 {
6042 ivs->used_inv_expr[cp->inv_expr_id]--;
6043 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6044 ivs->num_used_inv_expr--;
6045 }
6046 iv_ca_recount_cost (data, ivs);
6047 }
6048
6049 /* Add invariants in set INVS to set IVS. */
6050
6051 static void
6052 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6053 {
6054 bitmap_iterator bi;
6055 unsigned iid;
6056
6057 if (!invs)
6058 return;
6059
6060 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6061 {
6062 ivs->n_invariant_uses[iid]++;
6063 if (ivs->n_invariant_uses[iid] == 1)
6064 ivs->n_regs++;
6065 }
6066 }
6067
6068 /* Set cost pair for USE in set IVS to CP. */
6069
6070 static void
6071 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6072 struct iv_use *use, struct cost_pair *cp)
6073 {
6074 unsigned uid = use->id, cid;
6075
6076 if (ivs->cand_for_use[uid] == cp)
6077 return;
6078
6079 if (ivs->cand_for_use[uid])
6080 iv_ca_set_no_cp (data, ivs, use);
6081
6082 if (cp)
6083 {
6084 cid = cp->cand->id;
6085
6086 ivs->bad_uses--;
6087 ivs->cand_for_use[uid] = cp;
6088 ivs->n_cand_uses[cid]++;
6089 if (ivs->n_cand_uses[cid] == 1)
6090 {
6091 bitmap_set_bit (ivs->cands, cid);
6092 /* Do not count the pseudocandidates. */
6093 if (cp->cand->iv)
6094 ivs->n_regs++;
6095 ivs->n_cands++;
6096 ivs->cand_cost += cp->cand->cost;
6097
6098 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6099 }
6100
6101 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6102 iv_ca_set_add_invariants (ivs, cp->depends_on);
6103
6104 if (cp->inv_expr_id != -1)
6105 {
6106 ivs->used_inv_expr[cp->inv_expr_id]++;
6107 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6108 ivs->num_used_inv_expr++;
6109 }
6110 iv_ca_recount_cost (data, ivs);
6111 }
6112 }
6113
6114 /* Extend set IVS by expressing USE by some of the candidates in it
6115 if possible. Consider all important candidates if candidates in
6116 set IVS don't give any result. */
6117
6118 static void
6119 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6120 struct iv_use *use)
6121 {
6122 struct cost_pair *best_cp = NULL, *cp;
6123 bitmap_iterator bi;
6124 unsigned i;
6125 struct iv_cand *cand;
6126
6127 gcc_assert (ivs->upto >= use->id);
6128 ivs->upto++;
6129 ivs->bad_uses++;
6130
6131 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6132 {
6133 cand = iv_cand (data, i);
6134 cp = get_use_iv_cost (data, use, cand);
6135 if (cheaper_cost_pair (cp, best_cp))
6136 best_cp = cp;
6137 }
6138
6139 if (best_cp == NULL)
6140 {
6141 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6142 {
6143 cand = iv_cand (data, i);
6144 cp = get_use_iv_cost (data, use, cand);
6145 if (cheaper_cost_pair (cp, best_cp))
6146 best_cp = cp;
6147 }
6148 }
6149
6150 iv_ca_set_cp (data, ivs, use, best_cp);
6151 }
6152
6153 /* Get cost for assignment IVS. */
6154
6155 static comp_cost
6156 iv_ca_cost (struct iv_ca *ivs)
6157 {
6158 /* This was a conditional expression but it triggered a bug in
6159 Sun C 5.5. */
6160 if (ivs->bad_uses)
6161 return infinite_cost;
6162 else
6163 return ivs->cost;
6164 }
6165
6166 /* Returns true if all dependences of CP are among invariants in IVS. */
6167
6168 static bool
6169 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6170 {
6171 unsigned i;
6172 bitmap_iterator bi;
6173
6174 if (!cp->depends_on)
6175 return true;
6176
6177 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6178 {
6179 if (ivs->n_invariant_uses[i] == 0)
6180 return false;
6181 }
6182
6183 return true;
6184 }
6185
6186 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6187 it before NEXT_CHANGE. */
6188
6189 static struct iv_ca_delta *
6190 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6191 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6192 {
6193 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6194
6195 change->use = use;
6196 change->old_cp = old_cp;
6197 change->new_cp = new_cp;
6198 change->next_change = next_change;
6199
6200 return change;
6201 }
6202
6203 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6204 are rewritten. */
6205
6206 static struct iv_ca_delta *
6207 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6208 {
6209 struct iv_ca_delta *last;
6210
6211 if (!l2)
6212 return l1;
6213
6214 if (!l1)
6215 return l2;
6216
6217 for (last = l1; last->next_change; last = last->next_change)
6218 continue;
6219 last->next_change = l2;
6220
6221 return l1;
6222 }
6223
6224 /* Reverse the list of changes DELTA, forming the inverse to it. */
6225
6226 static struct iv_ca_delta *
6227 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6228 {
6229 struct iv_ca_delta *act, *next, *prev = NULL;
6230
6231 for (act = delta; act; act = next)
6232 {
6233 next = act->next_change;
6234 act->next_change = prev;
6235 prev = act;
6236
6237 std::swap (act->old_cp, act->new_cp);
6238 }
6239
6240 return prev;
6241 }
6242
6243 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6244 reverted instead. */
6245
6246 static void
6247 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6248 struct iv_ca_delta *delta, bool forward)
6249 {
6250 struct cost_pair *from, *to;
6251 struct iv_ca_delta *act;
6252
6253 if (!forward)
6254 delta = iv_ca_delta_reverse (delta);
6255
6256 for (act = delta; act; act = act->next_change)
6257 {
6258 from = act->old_cp;
6259 to = act->new_cp;
6260 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6261 iv_ca_set_cp (data, ivs, act->use, to);
6262 }
6263
6264 if (!forward)
6265 iv_ca_delta_reverse (delta);
6266 }
6267
6268 /* Returns true if CAND is used in IVS. */
6269
6270 static bool
6271 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6272 {
6273 return ivs->n_cand_uses[cand->id] > 0;
6274 }
6275
6276 /* Returns number of induction variable candidates in the set IVS. */
6277
6278 static unsigned
6279 iv_ca_n_cands (struct iv_ca *ivs)
6280 {
6281 return ivs->n_cands;
6282 }
6283
6284 /* Free the list of changes DELTA. */
6285
6286 static void
6287 iv_ca_delta_free (struct iv_ca_delta **delta)
6288 {
6289 struct iv_ca_delta *act, *next;
6290
6291 for (act = *delta; act; act = next)
6292 {
6293 next = act->next_change;
6294 free (act);
6295 }
6296
6297 *delta = NULL;
6298 }
6299
6300 /* Allocates new iv candidates assignment. */
6301
6302 static struct iv_ca *
6303 iv_ca_new (struct ivopts_data *data)
6304 {
6305 struct iv_ca *nw = XNEW (struct iv_ca);
6306
6307 nw->upto = 0;
6308 nw->bad_uses = 0;
6309 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6310 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6311 nw->cands = BITMAP_ALLOC (NULL);
6312 nw->n_cands = 0;
6313 nw->n_regs = 0;
6314 nw->cand_use_cost = no_cost;
6315 nw->cand_cost = 0;
6316 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6317 nw->cost = no_cost;
6318 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6319 nw->num_used_inv_expr = 0;
6320
6321 return nw;
6322 }
6323
6324 /* Free memory occupied by the set IVS. */
6325
6326 static void
6327 iv_ca_free (struct iv_ca **ivs)
6328 {
6329 free ((*ivs)->cand_for_use);
6330 free ((*ivs)->n_cand_uses);
6331 BITMAP_FREE ((*ivs)->cands);
6332 free ((*ivs)->n_invariant_uses);
6333 free ((*ivs)->used_inv_expr);
6334 free (*ivs);
6335 *ivs = NULL;
6336 }
6337
6338 /* Dumps IVS to FILE. */
6339
6340 static void
6341 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6342 {
6343 const char *pref = " invariants ";
6344 unsigned i;
6345 comp_cost cost = iv_ca_cost (ivs);
6346
6347 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6348 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6349 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6350 bitmap_print (file, ivs->cands, " candidates: ","\n");
6351
6352 for (i = 0; i < ivs->upto; i++)
6353 {
6354 struct iv_use *use = iv_use (data, i);
6355 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6356 if (cp)
6357 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6358 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6359 else
6360 fprintf (file, " use:%d --> ??\n", use->id);
6361 }
6362
6363 for (i = 1; i <= data->max_inv_id; i++)
6364 if (ivs->n_invariant_uses[i])
6365 {
6366 fprintf (file, "%s%d", pref, i);
6367 pref = ", ";
6368 }
6369 fprintf (file, "\n\n");
6370 }
6371
6372 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6373 new set, and store differences in DELTA. Number of induction variables
6374 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6375 the function will try to find a solution with mimimal iv candidates. */
6376
6377 static comp_cost
6378 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6379 struct iv_cand *cand, struct iv_ca_delta **delta,
6380 unsigned *n_ivs, bool min_ncand)
6381 {
6382 unsigned i;
6383 comp_cost cost;
6384 struct iv_use *use;
6385 struct cost_pair *old_cp, *new_cp;
6386
6387 *delta = NULL;
6388 for (i = 0; i < ivs->upto; i++)
6389 {
6390 use = iv_use (data, i);
6391 old_cp = iv_ca_cand_for_use (ivs, use);
6392
6393 if (old_cp
6394 && old_cp->cand == cand)
6395 continue;
6396
6397 new_cp = get_use_iv_cost (data, use, cand);
6398 if (!new_cp)
6399 continue;
6400
6401 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6402 continue;
6403
6404 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6405 continue;
6406
6407 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6408 }
6409
6410 iv_ca_delta_commit (data, ivs, *delta, true);
6411 cost = iv_ca_cost (ivs);
6412 if (n_ivs)
6413 *n_ivs = iv_ca_n_cands (ivs);
6414 iv_ca_delta_commit (data, ivs, *delta, false);
6415
6416 return cost;
6417 }
6418
6419 /* Try narrowing set IVS by removing CAND. Return the cost of
6420 the new set and store the differences in DELTA. START is
6421 the candidate with which we start narrowing. */
6422
6423 static comp_cost
6424 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6425 struct iv_cand *cand, struct iv_cand *start,
6426 struct iv_ca_delta **delta)
6427 {
6428 unsigned i, ci;
6429 struct iv_use *use;
6430 struct cost_pair *old_cp, *new_cp, *cp;
6431 bitmap_iterator bi;
6432 struct iv_cand *cnd;
6433 comp_cost cost, best_cost, acost;
6434
6435 *delta = NULL;
6436 for (i = 0; i < n_iv_uses (data); i++)
6437 {
6438 use = iv_use (data, i);
6439
6440 old_cp = iv_ca_cand_for_use (ivs, use);
6441 if (old_cp->cand != cand)
6442 continue;
6443
6444 best_cost = iv_ca_cost (ivs);
6445 /* Start narrowing with START. */
6446 new_cp = get_use_iv_cost (data, use, start);
6447
6448 if (data->consider_all_candidates)
6449 {
6450 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6451 {
6452 if (ci == cand->id || (start && ci == start->id))
6453 continue;
6454
6455 cnd = iv_cand (data, ci);
6456
6457 cp = get_use_iv_cost (data, use, cnd);
6458 if (!cp)
6459 continue;
6460
6461 iv_ca_set_cp (data, ivs, use, cp);
6462 acost = iv_ca_cost (ivs);
6463
6464 if (compare_costs (acost, best_cost) < 0)
6465 {
6466 best_cost = acost;
6467 new_cp = cp;
6468 }
6469 }
6470 }
6471 else
6472 {
6473 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6474 {
6475 if (ci == cand->id || (start && ci == start->id))
6476 continue;
6477
6478 cnd = iv_cand (data, ci);
6479
6480 cp = get_use_iv_cost (data, use, cnd);
6481 if (!cp)
6482 continue;
6483
6484 iv_ca_set_cp (data, ivs, use, cp);
6485 acost = iv_ca_cost (ivs);
6486
6487 if (compare_costs (acost, best_cost) < 0)
6488 {
6489 best_cost = acost;
6490 new_cp = cp;
6491 }
6492 }
6493 }
6494 /* Restore to old cp for use. */
6495 iv_ca_set_cp (data, ivs, use, old_cp);
6496
6497 if (!new_cp)
6498 {
6499 iv_ca_delta_free (delta);
6500 return infinite_cost;
6501 }
6502
6503 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6504 }
6505
6506 iv_ca_delta_commit (data, ivs, *delta, true);
6507 cost = iv_ca_cost (ivs);
6508 iv_ca_delta_commit (data, ivs, *delta, false);
6509
6510 return cost;
6511 }
6512
6513 /* Try optimizing the set of candidates IVS by removing candidates different
6514 from to EXCEPT_CAND from it. Return cost of the new set, and store
6515 differences in DELTA. */
6516
6517 static comp_cost
6518 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6519 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6520 {
6521 bitmap_iterator bi;
6522 struct iv_ca_delta *act_delta, *best_delta;
6523 unsigned i;
6524 comp_cost best_cost, acost;
6525 struct iv_cand *cand;
6526
6527 best_delta = NULL;
6528 best_cost = iv_ca_cost (ivs);
6529
6530 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6531 {
6532 cand = iv_cand (data, i);
6533
6534 if (cand == except_cand)
6535 continue;
6536
6537 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6538
6539 if (compare_costs (acost, best_cost) < 0)
6540 {
6541 best_cost = acost;
6542 iv_ca_delta_free (&best_delta);
6543 best_delta = act_delta;
6544 }
6545 else
6546 iv_ca_delta_free (&act_delta);
6547 }
6548
6549 if (!best_delta)
6550 {
6551 *delta = NULL;
6552 return best_cost;
6553 }
6554
6555 /* Recurse to possibly remove other unnecessary ivs. */
6556 iv_ca_delta_commit (data, ivs, best_delta, true);
6557 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6558 iv_ca_delta_commit (data, ivs, best_delta, false);
6559 *delta = iv_ca_delta_join (best_delta, *delta);
6560 return best_cost;
6561 }
6562
6563 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6564 cheaper local cost for USE than BEST_CP. Return pointer to
6565 the corresponding cost_pair, otherwise just return BEST_CP. */
6566
6567 static struct cost_pair*
6568 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6569 unsigned int cand_idx, struct iv_cand *old_cand,
6570 struct cost_pair *best_cp)
6571 {
6572 struct iv_cand *cand;
6573 struct cost_pair *cp;
6574
6575 gcc_assert (old_cand != NULL && best_cp != NULL);
6576 if (cand_idx == old_cand->id)
6577 return best_cp;
6578
6579 cand = iv_cand (data, cand_idx);
6580 cp = get_use_iv_cost (data, use, cand);
6581 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6582 return cp;
6583
6584 return best_cp;
6585 }
6586
6587 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6588 which are used by more than one iv uses. For each of those candidates,
6589 this function tries to represent iv uses under that candidate using
6590 other ones with lower local cost, then tries to prune the new set.
6591 If the new set has lower cost, It returns the new cost after recording
6592 candidate replacement in list DELTA. */
6593
6594 static comp_cost
6595 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6596 struct iv_ca_delta **delta)
6597 {
6598 bitmap_iterator bi, bj;
6599 unsigned int i, j, k;
6600 struct iv_use *use;
6601 struct iv_cand *cand;
6602 comp_cost orig_cost, acost;
6603 struct iv_ca_delta *act_delta, *tmp_delta;
6604 struct cost_pair *old_cp, *best_cp = NULL;
6605
6606 *delta = NULL;
6607 orig_cost = iv_ca_cost (ivs);
6608
6609 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6610 {
6611 if (ivs->n_cand_uses[i] == 1
6612 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6613 continue;
6614
6615 cand = iv_cand (data, i);
6616
6617 act_delta = NULL;
6618 /* Represent uses under current candidate using other ones with
6619 lower local cost. */
6620 for (j = 0; j < ivs->upto; j++)
6621 {
6622 use = iv_use (data, j);
6623 old_cp = iv_ca_cand_for_use (ivs, use);
6624
6625 if (old_cp->cand != cand)
6626 continue;
6627
6628 best_cp = old_cp;
6629 if (data->consider_all_candidates)
6630 for (k = 0; k < n_iv_cands (data); k++)
6631 best_cp = cheaper_cost_with_cand (data, use, k,
6632 old_cp->cand, best_cp);
6633 else
6634 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6635 best_cp = cheaper_cost_with_cand (data, use, k,
6636 old_cp->cand, best_cp);
6637
6638 if (best_cp == old_cp)
6639 continue;
6640
6641 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6642 }
6643 /* No need for further prune. */
6644 if (!act_delta)
6645 continue;
6646
6647 /* Prune the new candidate set. */
6648 iv_ca_delta_commit (data, ivs, act_delta, true);
6649 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6650 iv_ca_delta_commit (data, ivs, act_delta, false);
6651 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6652
6653 if (compare_costs (acost, orig_cost) < 0)
6654 {
6655 *delta = act_delta;
6656 return acost;
6657 }
6658 else
6659 iv_ca_delta_free (&act_delta);
6660 }
6661
6662 return orig_cost;
6663 }
6664
6665 /* Tries to extend the sets IVS in the best possible way in order
6666 to express the USE. If ORIGINALP is true, prefer candidates from
6667 the original set of IVs, otherwise favor important candidates not
6668 based on any memory object. */
6669
6670 static bool
6671 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6672 struct iv_use *use, bool originalp)
6673 {
6674 comp_cost best_cost, act_cost;
6675 unsigned i;
6676 bitmap_iterator bi;
6677 struct iv_cand *cand;
6678 struct iv_ca_delta *best_delta = NULL, *act_delta;
6679 struct cost_pair *cp;
6680
6681 iv_ca_add_use (data, ivs, use);
6682 best_cost = iv_ca_cost (ivs);
6683 cp = iv_ca_cand_for_use (ivs, use);
6684 if (cp)
6685 {
6686 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6687 iv_ca_set_no_cp (data, ivs, use);
6688 }
6689
6690 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6691 first try important candidates not based on any memory object. Only if
6692 this fails, try the specific ones. Rationale -- in loops with many
6693 variables the best choice often is to use just one generic biv. If we
6694 added here many ivs specific to the uses, the optimization algorithm later
6695 would be likely to get stuck in a local minimum, thus causing us to create
6696 too many ivs. The approach from few ivs to more seems more likely to be
6697 successful -- starting from few ivs, replacing an expensive use by a
6698 specific iv should always be a win. */
6699 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6700 {
6701 cand = iv_cand (data, i);
6702
6703 if (originalp && cand->pos !=IP_ORIGINAL)
6704 continue;
6705
6706 if (!originalp && cand->iv->base_object != NULL_TREE)
6707 continue;
6708
6709 if (iv_ca_cand_used_p (ivs, cand))
6710 continue;
6711
6712 cp = get_use_iv_cost (data, use, cand);
6713 if (!cp)
6714 continue;
6715
6716 iv_ca_set_cp (data, ivs, use, cp);
6717 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6718 true);
6719 iv_ca_set_no_cp (data, ivs, use);
6720 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6721
6722 if (compare_costs (act_cost, best_cost) < 0)
6723 {
6724 best_cost = act_cost;
6725
6726 iv_ca_delta_free (&best_delta);
6727 best_delta = act_delta;
6728 }
6729 else
6730 iv_ca_delta_free (&act_delta);
6731 }
6732
6733 if (infinite_cost_p (best_cost))
6734 {
6735 for (i = 0; i < use->n_map_members; i++)
6736 {
6737 cp = use->cost_map + i;
6738 cand = cp->cand;
6739 if (!cand)
6740 continue;
6741
6742 /* Already tried this. */
6743 if (cand->important)
6744 {
6745 if (originalp && cand->pos == IP_ORIGINAL)
6746 continue;
6747 if (!originalp && cand->iv->base_object == NULL_TREE)
6748 continue;
6749 }
6750
6751 if (iv_ca_cand_used_p (ivs, cand))
6752 continue;
6753
6754 act_delta = NULL;
6755 iv_ca_set_cp (data, ivs, use, cp);
6756 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6757 iv_ca_set_no_cp (data, ivs, use);
6758 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6759 cp, act_delta);
6760
6761 if (compare_costs (act_cost, best_cost) < 0)
6762 {
6763 best_cost = act_cost;
6764
6765 if (best_delta)
6766 iv_ca_delta_free (&best_delta);
6767 best_delta = act_delta;
6768 }
6769 else
6770 iv_ca_delta_free (&act_delta);
6771 }
6772 }
6773
6774 iv_ca_delta_commit (data, ivs, best_delta, true);
6775 iv_ca_delta_free (&best_delta);
6776
6777 return !infinite_cost_p (best_cost);
6778 }
6779
6780 /* Finds an initial assignment of candidates to uses. */
6781
6782 static struct iv_ca *
6783 get_initial_solution (struct ivopts_data *data, bool originalp)
6784 {
6785 struct iv_ca *ivs = iv_ca_new (data);
6786 unsigned i;
6787
6788 for (i = 0; i < n_iv_uses (data); i++)
6789 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6790 {
6791 iv_ca_free (&ivs);
6792 return NULL;
6793 }
6794
6795 return ivs;
6796 }
6797
6798 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6799 points to a bool variable, this function tries to break local
6800 optimal fixed-point by replacing candidates in IVS if it's true. */
6801
6802 static bool
6803 try_improve_iv_set (struct ivopts_data *data,
6804 struct iv_ca *ivs, bool *try_replace_p)
6805 {
6806 unsigned i, n_ivs;
6807 comp_cost acost, best_cost = iv_ca_cost (ivs);
6808 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6809 struct iv_cand *cand;
6810
6811 /* Try extending the set of induction variables by one. */
6812 for (i = 0; i < n_iv_cands (data); i++)
6813 {
6814 cand = iv_cand (data, i);
6815
6816 if (iv_ca_cand_used_p (ivs, cand))
6817 continue;
6818
6819 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6820 if (!act_delta)
6821 continue;
6822
6823 /* If we successfully added the candidate and the set is small enough,
6824 try optimizing it by removing other candidates. */
6825 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6826 {
6827 iv_ca_delta_commit (data, ivs, act_delta, true);
6828 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6829 iv_ca_delta_commit (data, ivs, act_delta, false);
6830 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6831 }
6832
6833 if (compare_costs (acost, best_cost) < 0)
6834 {
6835 best_cost = acost;
6836 iv_ca_delta_free (&best_delta);
6837 best_delta = act_delta;
6838 }
6839 else
6840 iv_ca_delta_free (&act_delta);
6841 }
6842
6843 if (!best_delta)
6844 {
6845 /* Try removing the candidates from the set instead. */
6846 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6847
6848 if (!best_delta && *try_replace_p)
6849 {
6850 *try_replace_p = false;
6851 /* So far candidate selecting algorithm tends to choose fewer IVs
6852 so that it can handle cases in which loops have many variables
6853 but the best choice is often to use only one general biv. One
6854 weakness is it can't handle opposite cases, in which different
6855 candidates should be chosen with respect to each use. To solve
6856 the problem, we replace candidates in a manner described by the
6857 comments of iv_ca_replace, thus give general algorithm a chance
6858 to break local optimal fixed-point in these cases. */
6859 best_cost = iv_ca_replace (data, ivs, &best_delta);
6860 }
6861
6862 if (!best_delta)
6863 return false;
6864 }
6865
6866 iv_ca_delta_commit (data, ivs, best_delta, true);
6867 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6868 iv_ca_delta_free (&best_delta);
6869 return true;
6870 }
6871
6872 /* Attempts to find the optimal set of induction variables. We do simple
6873 greedy heuristic -- we try to replace at most one candidate in the selected
6874 solution and remove the unused ivs while this improves the cost. */
6875
6876 static struct iv_ca *
6877 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6878 {
6879 struct iv_ca *set;
6880 bool try_replace_p = true;
6881
6882 /* Get the initial solution. */
6883 set = get_initial_solution (data, originalp);
6884 if (!set)
6885 {
6886 if (dump_file && (dump_flags & TDF_DETAILS))
6887 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6888 return NULL;
6889 }
6890
6891 if (dump_file && (dump_flags & TDF_DETAILS))
6892 {
6893 fprintf (dump_file, "Initial set of candidates:\n");
6894 iv_ca_dump (data, dump_file, set);
6895 }
6896
6897 while (try_improve_iv_set (data, set, &try_replace_p))
6898 {
6899 if (dump_file && (dump_flags & TDF_DETAILS))
6900 {
6901 fprintf (dump_file, "Improved to:\n");
6902 iv_ca_dump (data, dump_file, set);
6903 }
6904 }
6905
6906 return set;
6907 }
6908
6909 static struct iv_ca *
6910 find_optimal_iv_set (struct ivopts_data *data)
6911 {
6912 unsigned i;
6913 struct iv_ca *set, *origset;
6914 struct iv_use *use;
6915 comp_cost cost, origcost;
6916
6917 /* Determine the cost based on a strategy that starts with original IVs,
6918 and try again using a strategy that prefers candidates not based
6919 on any IVs. */
6920 origset = find_optimal_iv_set_1 (data, true);
6921 set = find_optimal_iv_set_1 (data, false);
6922
6923 if (!origset && !set)
6924 return NULL;
6925
6926 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6927 cost = set ? iv_ca_cost (set) : infinite_cost;
6928
6929 if (dump_file && (dump_flags & TDF_DETAILS))
6930 {
6931 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6932 origcost.cost, origcost.complexity);
6933 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6934 cost.cost, cost.complexity);
6935 }
6936
6937 /* Choose the one with the best cost. */
6938 if (compare_costs (origcost, cost) <= 0)
6939 {
6940 if (set)
6941 iv_ca_free (&set);
6942 set = origset;
6943 }
6944 else if (origset)
6945 iv_ca_free (&origset);
6946
6947 for (i = 0; i < n_iv_uses (data); i++)
6948 {
6949 use = iv_use (data, i);
6950 use->selected = iv_ca_cand_for_use (set, use)->cand;
6951 }
6952
6953 return set;
6954 }
6955
6956 /* Creates a new induction variable corresponding to CAND. */
6957
6958 static void
6959 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6960 {
6961 gimple_stmt_iterator incr_pos;
6962 tree base;
6963 bool after = false;
6964
6965 if (!cand->iv)
6966 return;
6967
6968 switch (cand->pos)
6969 {
6970 case IP_NORMAL:
6971 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6972 break;
6973
6974 case IP_END:
6975 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6976 after = true;
6977 break;
6978
6979 case IP_AFTER_USE:
6980 after = true;
6981 /* fall through */
6982 case IP_BEFORE_USE:
6983 incr_pos = gsi_for_stmt (cand->incremented_at);
6984 break;
6985
6986 case IP_ORIGINAL:
6987 /* Mark that the iv is preserved. */
6988 name_info (data, cand->var_before)->preserve_biv = true;
6989 name_info (data, cand->var_after)->preserve_biv = true;
6990
6991 /* Rewrite the increment so that it uses var_before directly. */
6992 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6993 return;
6994 }
6995
6996 gimple_add_tmp_var (cand->var_before);
6997
6998 base = unshare_expr (cand->iv->base);
6999
7000 create_iv (base, unshare_expr (cand->iv->step),
7001 cand->var_before, data->current_loop,
7002 &incr_pos, after, &cand->var_before, &cand->var_after);
7003 }
7004
7005 /* Creates new induction variables described in SET. */
7006
7007 static void
7008 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7009 {
7010 unsigned i;
7011 struct iv_cand *cand;
7012 bitmap_iterator bi;
7013
7014 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7015 {
7016 cand = iv_cand (data, i);
7017 create_new_iv (data, cand);
7018 }
7019
7020 if (dump_file && (dump_flags & TDF_DETAILS))
7021 {
7022 fprintf (dump_file, "Selected IV set for loop %d",
7023 data->current_loop->num);
7024 if (data->loop_loc != UNKNOWN_LOCATION)
7025 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7026 LOCATION_LINE (data->loop_loc));
7027 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7028 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7029 {
7030 cand = iv_cand (data, i);
7031 dump_cand (dump_file, cand);
7032 }
7033 fprintf (dump_file, "\n");
7034 }
7035 }
7036
7037 /* Rewrites USE (definition of iv used in a nonlinear expression)
7038 using candidate CAND. */
7039
7040 static void
7041 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7042 struct iv_use *use, struct iv_cand *cand)
7043 {
7044 tree comp;
7045 tree op, tgt;
7046 gassign *ass;
7047 gimple_stmt_iterator bsi;
7048
7049 /* An important special case -- if we are asked to express value of
7050 the original iv by itself, just exit; there is no need to
7051 introduce a new computation (that might also need casting the
7052 variable to unsigned and back). */
7053 if (cand->pos == IP_ORIGINAL
7054 && cand->incremented_at == use->stmt)
7055 {
7056 enum tree_code stmt_code;
7057
7058 gcc_assert (is_gimple_assign (use->stmt));
7059 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7060
7061 /* Check whether we may leave the computation unchanged.
7062 This is the case only if it does not rely on other
7063 computations in the loop -- otherwise, the computation
7064 we rely upon may be removed in remove_unused_ivs,
7065 thus leading to ICE. */
7066 stmt_code = gimple_assign_rhs_code (use->stmt);
7067 if (stmt_code == PLUS_EXPR
7068 || stmt_code == MINUS_EXPR
7069 || stmt_code == POINTER_PLUS_EXPR)
7070 {
7071 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7072 op = gimple_assign_rhs2 (use->stmt);
7073 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7074 op = gimple_assign_rhs1 (use->stmt);
7075 else
7076 op = NULL_TREE;
7077 }
7078 else
7079 op = NULL_TREE;
7080
7081 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7082 return;
7083 }
7084
7085 comp = get_computation (data->current_loop, use, cand);
7086 gcc_assert (comp != NULL_TREE);
7087
7088 switch (gimple_code (use->stmt))
7089 {
7090 case GIMPLE_PHI:
7091 tgt = PHI_RESULT (use->stmt);
7092
7093 /* If we should keep the biv, do not replace it. */
7094 if (name_info (data, tgt)->preserve_biv)
7095 return;
7096
7097 bsi = gsi_after_labels (gimple_bb (use->stmt));
7098 break;
7099
7100 case GIMPLE_ASSIGN:
7101 tgt = gimple_assign_lhs (use->stmt);
7102 bsi = gsi_for_stmt (use->stmt);
7103 break;
7104
7105 default:
7106 gcc_unreachable ();
7107 }
7108
7109 if (!valid_gimple_rhs_p (comp)
7110 || (gimple_code (use->stmt) != GIMPLE_PHI
7111 /* We can't allow re-allocating the stmt as it might be pointed
7112 to still. */
7113 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7114 >= gimple_num_ops (gsi_stmt (bsi)))))
7115 {
7116 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7117 true, GSI_SAME_STMT);
7118 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7119 {
7120 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7121 /* As this isn't a plain copy we have to reset alignment
7122 information. */
7123 if (SSA_NAME_PTR_INFO (comp))
7124 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7125 }
7126 }
7127
7128 if (gimple_code (use->stmt) == GIMPLE_PHI)
7129 {
7130 ass = gimple_build_assign (tgt, comp);
7131 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7132
7133 bsi = gsi_for_stmt (use->stmt);
7134 remove_phi_node (&bsi, false);
7135 }
7136 else
7137 {
7138 gimple_assign_set_rhs_from_tree (&bsi, comp);
7139 use->stmt = gsi_stmt (bsi);
7140 }
7141 }
7142
7143 /* Performs a peephole optimization to reorder the iv update statement with
7144 a mem ref to enable instruction combining in later phases. The mem ref uses
7145 the iv value before the update, so the reordering transformation requires
7146 adjustment of the offset. CAND is the selected IV_CAND.
7147
7148 Example:
7149
7150 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7151 iv2 = iv1 + 1;
7152
7153 if (t < val) (1)
7154 goto L;
7155 goto Head;
7156
7157
7158 directly propagating t over to (1) will introduce overlapping live range
7159 thus increase register pressure. This peephole transform it into:
7160
7161
7162 iv2 = iv1 + 1;
7163 t = MEM_REF (base, iv2, 8, 8);
7164 if (t < val)
7165 goto L;
7166 goto Head;
7167 */
7168
7169 static void
7170 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7171 {
7172 tree var_after;
7173 gimple *iv_update, *stmt;
7174 basic_block bb;
7175 gimple_stmt_iterator gsi, gsi_iv;
7176
7177 if (cand->pos != IP_NORMAL)
7178 return;
7179
7180 var_after = cand->var_after;
7181 iv_update = SSA_NAME_DEF_STMT (var_after);
7182
7183 bb = gimple_bb (iv_update);
7184 gsi = gsi_last_nondebug_bb (bb);
7185 stmt = gsi_stmt (gsi);
7186
7187 /* Only handle conditional statement for now. */
7188 if (gimple_code (stmt) != GIMPLE_COND)
7189 return;
7190
7191 gsi_prev_nondebug (&gsi);
7192 stmt = gsi_stmt (gsi);
7193 if (stmt != iv_update)
7194 return;
7195
7196 gsi_prev_nondebug (&gsi);
7197 if (gsi_end_p (gsi))
7198 return;
7199
7200 stmt = gsi_stmt (gsi);
7201 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7202 return;
7203
7204 if (stmt != use->stmt)
7205 return;
7206
7207 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7208 return;
7209
7210 if (dump_file && (dump_flags & TDF_DETAILS))
7211 {
7212 fprintf (dump_file, "Reordering \n");
7213 print_gimple_stmt (dump_file, iv_update, 0, 0);
7214 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7215 fprintf (dump_file, "\n");
7216 }
7217
7218 gsi = gsi_for_stmt (use->stmt);
7219 gsi_iv = gsi_for_stmt (iv_update);
7220 gsi_move_before (&gsi_iv, &gsi);
7221
7222 cand->pos = IP_BEFORE_USE;
7223 cand->incremented_at = use->stmt;
7224 }
7225
7226 /* Rewrites USE (address that is an iv) using candidate CAND. */
7227
7228 static void
7229 rewrite_use_address_1 (struct ivopts_data *data,
7230 struct iv_use *use, struct iv_cand *cand)
7231 {
7232 aff_tree aff;
7233 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7234 tree base_hint = NULL_TREE;
7235 tree ref, iv;
7236 bool ok;
7237
7238 adjust_iv_update_pos (cand, use);
7239 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7240 gcc_assert (ok);
7241 unshare_aff_combination (&aff);
7242
7243 /* To avoid undefined overflow problems, all IV candidates use unsigned
7244 integer types. The drawback is that this makes it impossible for
7245 create_mem_ref to distinguish an IV that is based on a memory object
7246 from one that represents simply an offset.
7247
7248 To work around this problem, we pass a hint to create_mem_ref that
7249 indicates which variable (if any) in aff is an IV based on a memory
7250 object. Note that we only consider the candidate. If this is not
7251 based on an object, the base of the reference is in some subexpression
7252 of the use -- but these will use pointer types, so they are recognized
7253 by the create_mem_ref heuristics anyway. */
7254 if (cand->iv->base_object)
7255 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7256
7257 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7258 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7259 reference_alias_ptr_type (*use->op_p),
7260 iv, base_hint, data->speed);
7261 copy_ref_info (ref, *use->op_p);
7262 *use->op_p = ref;
7263 }
7264
7265 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7266 first use of a group, rewrites sub uses in the group too. */
7267
7268 static void
7269 rewrite_use_address (struct ivopts_data *data,
7270 struct iv_use *use, struct iv_cand *cand)
7271 {
7272 struct iv_use *next;
7273
7274 gcc_assert (use->sub_id == 0);
7275 rewrite_use_address_1 (data, use, cand);
7276 update_stmt (use->stmt);
7277
7278 for (next = use->next; next != NULL; next = next->next)
7279 {
7280 rewrite_use_address_1 (data, next, cand);
7281 update_stmt (next->stmt);
7282 }
7283
7284 return;
7285 }
7286
7287 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7288 candidate CAND. */
7289
7290 static void
7291 rewrite_use_compare (struct ivopts_data *data,
7292 struct iv_use *use, struct iv_cand *cand)
7293 {
7294 tree comp, *var_p, op, bound;
7295 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7296 enum tree_code compare;
7297 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7298 bool ok;
7299
7300 bound = cp->value;
7301 if (bound)
7302 {
7303 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7304 tree var_type = TREE_TYPE (var);
7305 gimple_seq stmts;
7306
7307 if (dump_file && (dump_flags & TDF_DETAILS))
7308 {
7309 fprintf (dump_file, "Replacing exit test: ");
7310 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7311 }
7312 compare = cp->comp;
7313 bound = unshare_expr (fold_convert (var_type, bound));
7314 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7315 if (stmts)
7316 gsi_insert_seq_on_edge_immediate (
7317 loop_preheader_edge (data->current_loop),
7318 stmts);
7319
7320 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7321 gimple_cond_set_lhs (cond_stmt, var);
7322 gimple_cond_set_code (cond_stmt, compare);
7323 gimple_cond_set_rhs (cond_stmt, op);
7324 return;
7325 }
7326
7327 /* The induction variable elimination failed; just express the original
7328 giv. */
7329 comp = get_computation (data->current_loop, use, cand);
7330 gcc_assert (comp != NULL_TREE);
7331
7332 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7333 gcc_assert (ok);
7334
7335 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7336 true, GSI_SAME_STMT);
7337 }
7338
7339 /* Rewrites USE using candidate CAND. */
7340
7341 static void
7342 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7343 {
7344 switch (use->type)
7345 {
7346 case USE_NONLINEAR_EXPR:
7347 rewrite_use_nonlinear_expr (data, use, cand);
7348 break;
7349
7350 case USE_ADDRESS:
7351 rewrite_use_address (data, use, cand);
7352 break;
7353
7354 case USE_COMPARE:
7355 rewrite_use_compare (data, use, cand);
7356 break;
7357
7358 default:
7359 gcc_unreachable ();
7360 }
7361
7362 update_stmt (use->stmt);
7363 }
7364
7365 /* Rewrite the uses using the selected induction variables. */
7366
7367 static void
7368 rewrite_uses (struct ivopts_data *data)
7369 {
7370 unsigned i;
7371 struct iv_cand *cand;
7372 struct iv_use *use;
7373
7374 for (i = 0; i < n_iv_uses (data); i++)
7375 {
7376 use = iv_use (data, i);
7377 cand = use->selected;
7378 gcc_assert (cand);
7379
7380 rewrite_use (data, use, cand);
7381 }
7382 }
7383
7384 /* Removes the ivs that are not used after rewriting. */
7385
7386 static void
7387 remove_unused_ivs (struct ivopts_data *data)
7388 {
7389 unsigned j;
7390 bitmap_iterator bi;
7391 bitmap toremove = BITMAP_ALLOC (NULL);
7392
7393 /* Figure out an order in which to release SSA DEFs so that we don't
7394 release something that we'd have to propagate into a debug stmt
7395 afterwards. */
7396 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7397 {
7398 struct version_info *info;
7399
7400 info = ver_info (data, j);
7401 if (info->iv
7402 && !integer_zerop (info->iv->step)
7403 && !info->inv_id
7404 && !info->iv->have_use_for
7405 && !info->preserve_biv)
7406 {
7407 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7408
7409 tree def = info->iv->ssa_name;
7410
7411 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7412 {
7413 imm_use_iterator imm_iter;
7414 use_operand_p use_p;
7415 gimple *stmt;
7416 int count = 0;
7417
7418 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7419 {
7420 if (!gimple_debug_bind_p (stmt))
7421 continue;
7422
7423 /* We just want to determine whether to do nothing
7424 (count == 0), to substitute the computed
7425 expression into a single use of the SSA DEF by
7426 itself (count == 1), or to use a debug temp
7427 because the SSA DEF is used multiple times or as
7428 part of a larger expression (count > 1). */
7429 count++;
7430 if (gimple_debug_bind_get_value (stmt) != def)
7431 count++;
7432
7433 if (count > 1)
7434 BREAK_FROM_IMM_USE_STMT (imm_iter);
7435 }
7436
7437 if (!count)
7438 continue;
7439
7440 struct iv_use dummy_use;
7441 struct iv_cand *best_cand = NULL, *cand;
7442 unsigned i, best_pref = 0, cand_pref;
7443
7444 memset (&dummy_use, 0, sizeof (dummy_use));
7445 dummy_use.iv = info->iv;
7446 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7447 {
7448 cand = iv_use (data, i)->selected;
7449 if (cand == best_cand)
7450 continue;
7451 cand_pref = operand_equal_p (cand->iv->step,
7452 info->iv->step, 0)
7453 ? 4 : 0;
7454 cand_pref
7455 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7456 == TYPE_MODE (TREE_TYPE (info->iv->base))
7457 ? 2 : 0;
7458 cand_pref
7459 += TREE_CODE (cand->iv->base) == INTEGER_CST
7460 ? 1 : 0;
7461 if (best_cand == NULL || best_pref < cand_pref)
7462 {
7463 best_cand = cand;
7464 best_pref = cand_pref;
7465 }
7466 }
7467
7468 if (!best_cand)
7469 continue;
7470
7471 tree comp = get_computation_at (data->current_loop,
7472 &dummy_use, best_cand,
7473 SSA_NAME_DEF_STMT (def));
7474 if (!comp)
7475 continue;
7476
7477 if (count > 1)
7478 {
7479 tree vexpr = make_node (DEBUG_EXPR_DECL);
7480 DECL_ARTIFICIAL (vexpr) = 1;
7481 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7482 if (SSA_NAME_VAR (def))
7483 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7484 else
7485 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7486 gdebug *def_temp
7487 = gimple_build_debug_bind (vexpr, comp, NULL);
7488 gimple_stmt_iterator gsi;
7489
7490 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7491 gsi = gsi_after_labels (gimple_bb
7492 (SSA_NAME_DEF_STMT (def)));
7493 else
7494 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7495
7496 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7497 comp = vexpr;
7498 }
7499
7500 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7501 {
7502 if (!gimple_debug_bind_p (stmt))
7503 continue;
7504
7505 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7506 SET_USE (use_p, comp);
7507
7508 update_stmt (stmt);
7509 }
7510 }
7511 }
7512 }
7513
7514 release_defs_bitset (toremove);
7515
7516 BITMAP_FREE (toremove);
7517 }
7518
7519 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7520 for hash_map::traverse. */
7521
7522 bool
7523 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7524 {
7525 free (value);
7526 return true;
7527 }
7528
7529 /* Frees data allocated by the optimization of a single loop. */
7530
7531 static void
7532 free_loop_data (struct ivopts_data *data)
7533 {
7534 unsigned i, j;
7535 bitmap_iterator bi;
7536 tree obj;
7537
7538 if (data->niters)
7539 {
7540 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7541 delete data->niters;
7542 data->niters = NULL;
7543 }
7544
7545 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7546 {
7547 struct version_info *info;
7548
7549 info = ver_info (data, i);
7550 info->iv = NULL;
7551 info->has_nonlin_use = false;
7552 info->preserve_biv = false;
7553 info->inv_id = 0;
7554 }
7555 bitmap_clear (data->relevant);
7556 bitmap_clear (data->important_candidates);
7557
7558 for (i = 0; i < n_iv_uses (data); i++)
7559 {
7560 struct iv_use *use = iv_use (data, i);
7561 struct iv_use *pre = use, *sub = use->next;
7562
7563 while (sub)
7564 {
7565 gcc_assert (sub->related_cands == NULL);
7566 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7567
7568 pre = sub;
7569 sub = sub->next;
7570 free (pre);
7571 }
7572
7573 BITMAP_FREE (use->related_cands);
7574 for (j = 0; j < use->n_map_members; j++)
7575 if (use->cost_map[j].depends_on)
7576 BITMAP_FREE (use->cost_map[j].depends_on);
7577 free (use->cost_map);
7578 free (use);
7579 }
7580 data->iv_uses.truncate (0);
7581
7582 for (i = 0; i < n_iv_cands (data); i++)
7583 {
7584 struct iv_cand *cand = iv_cand (data, i);
7585
7586 if (cand->depends_on)
7587 BITMAP_FREE (cand->depends_on);
7588 free (cand);
7589 }
7590 data->iv_candidates.truncate (0);
7591
7592 if (data->version_info_size < num_ssa_names)
7593 {
7594 data->version_info_size = 2 * num_ssa_names;
7595 free (data->version_info);
7596 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7597 }
7598
7599 data->max_inv_id = 0;
7600
7601 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7602 SET_DECL_RTL (obj, NULL_RTX);
7603
7604 decl_rtl_to_reset.truncate (0);
7605
7606 data->inv_expr_tab->empty ();
7607 data->inv_expr_id = 0;
7608
7609 data->iv_common_cand_tab->empty ();
7610 data->iv_common_cands.truncate (0);
7611 }
7612
7613 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7614 loop tree. */
7615
7616 static void
7617 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7618 {
7619 free_loop_data (data);
7620 free (data->version_info);
7621 BITMAP_FREE (data->relevant);
7622 BITMAP_FREE (data->important_candidates);
7623
7624 decl_rtl_to_reset.release ();
7625 data->iv_uses.release ();
7626 data->iv_candidates.release ();
7627 delete data->inv_expr_tab;
7628 data->inv_expr_tab = NULL;
7629 free_affine_expand_cache (&data->name_expansion_cache);
7630 delete data->iv_common_cand_tab;
7631 data->iv_common_cand_tab = NULL;
7632 data->iv_common_cands.release ();
7633 obstack_free (&data->iv_obstack, NULL);
7634 }
7635
7636 /* Returns true if the loop body BODY includes any function calls. */
7637
7638 static bool
7639 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7640 {
7641 gimple_stmt_iterator gsi;
7642 unsigned i;
7643
7644 for (i = 0; i < num_nodes; i++)
7645 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7646 {
7647 gimple *stmt = gsi_stmt (gsi);
7648 if (is_gimple_call (stmt)
7649 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7650 return true;
7651 }
7652 return false;
7653 }
7654
7655 /* Optimizes the LOOP. Returns true if anything changed. */
7656
7657 static bool
7658 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7659 {
7660 bool changed = false;
7661 struct iv_ca *iv_ca;
7662 edge exit = single_dom_exit (loop);
7663 basic_block *body;
7664
7665 gcc_assert (!data->niters);
7666 data->current_loop = loop;
7667 data->loop_loc = find_loop_location (loop);
7668 data->speed = optimize_loop_for_speed_p (loop);
7669
7670 if (dump_file && (dump_flags & TDF_DETAILS))
7671 {
7672 fprintf (dump_file, "Processing loop %d", loop->num);
7673 if (data->loop_loc != UNKNOWN_LOCATION)
7674 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7675 LOCATION_LINE (data->loop_loc));
7676 fprintf (dump_file, "\n");
7677
7678 if (exit)
7679 {
7680 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7681 exit->src->index, exit->dest->index);
7682 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7683 fprintf (dump_file, "\n");
7684 }
7685
7686 fprintf (dump_file, "\n");
7687 }
7688
7689 body = get_loop_body (loop);
7690 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7691 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7692 free (body);
7693
7694 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7695
7696 /* For each ssa name determines whether it behaves as an induction variable
7697 in some loop. */
7698 if (!find_induction_variables (data))
7699 goto finish;
7700
7701 /* Finds interesting uses (item 1). */
7702 find_interesting_uses (data);
7703 group_address_uses (data);
7704 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7705 goto finish;
7706
7707 /* Finds candidates for the induction variables (item 2). */
7708 find_iv_candidates (data);
7709
7710 /* Calculates the costs (item 3, part 1). */
7711 determine_iv_costs (data);
7712 determine_use_iv_costs (data);
7713 determine_set_costs (data);
7714
7715 /* Find the optimal set of induction variables (item 3, part 2). */
7716 iv_ca = find_optimal_iv_set (data);
7717 if (!iv_ca)
7718 goto finish;
7719 changed = true;
7720
7721 /* Create the new induction variables (item 4, part 1). */
7722 create_new_ivs (data, iv_ca);
7723 iv_ca_free (&iv_ca);
7724
7725 /* Rewrite the uses (item 4, part 2). */
7726 rewrite_uses (data);
7727
7728 /* Remove the ivs that are unused after rewriting. */
7729 remove_unused_ivs (data);
7730
7731 /* We have changed the structure of induction variables; it might happen
7732 that definitions in the scev database refer to some of them that were
7733 eliminated. */
7734 scev_reset ();
7735
7736 finish:
7737 free_loop_data (data);
7738
7739 return changed;
7740 }
7741
7742 /* Main entry point. Optimizes induction variables in loops. */
7743
7744 void
7745 tree_ssa_iv_optimize (void)
7746 {
7747 struct loop *loop;
7748 struct ivopts_data data;
7749
7750 tree_ssa_iv_optimize_init (&data);
7751
7752 /* Optimize the loops starting with the innermost ones. */
7753 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7754 {
7755 if (dump_file && (dump_flags & TDF_DETAILS))
7756 flow_loop_dump (loop, dump_file, NULL, 1);
7757
7758 tree_ssa_iv_optimize_loop (&data, loop);
7759 }
7760
7761 tree_ssa_iv_optimize_finalize (&data);
7762 }