Multiple exit loop handling in ivopts. Regression tested on x86-64/linux
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-ssa-propagate.h"
93
94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
95 cost of different addressing modes. This should be moved to a TBD
96 interface between the GIMPLE and RTL worlds. */
97 #include "expr.h"
98
99 /* The infinite cost. */
100 #define INFTY 10000000
101
102 /* The expected number of loop iterations. TODO -- use profiling instead of
103 this. */
104 #define AVG_LOOP_NITER(LOOP) 5
105
106
107 /* Representation of the induction variable. */
108 struct iv
109 {
110 tree base; /* Initial value of the iv. */
111 tree base_object; /* A memory object to that the induction variable points. */
112 tree step; /* Step of the iv (constant only). */
113 tree ssa_name; /* The ssa name with the value. */
114 bool biv_p; /* Is it a biv? */
115 bool have_use_for; /* Do we already have a use for it? */
116 unsigned use_id; /* The identifier in the use if it is the case. */
117 };
118
119 /* Per-ssa version information (induction variable descriptions, etc.). */
120 struct version_info
121 {
122 tree name; /* The ssa name. */
123 struct iv *iv; /* Induction variable description. */
124 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
125 an expression that is not an induction variable. */
126 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 unsigned inv_id; /* Id of an invariant. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
136 };
137
138 /* Cost of a computation. */
139 typedef struct
140 {
141 int cost; /* The runtime cost. */
142 unsigned complexity; /* The estimate of the complexity of the code for
143 the computation (in no concrete units --
144 complexity field should be larger for more
145 complex expressions and addressing modes). */
146 } comp_cost;
147
148 static const comp_cost zero_cost = {0, 0};
149 static const comp_cost infinite_cost = {INFTY, INFTY};
150
151 /* The candidate - cost pair. */
152 struct cost_pair
153 {
154 struct iv_cand *cand; /* The candidate. */
155 comp_cost cost; /* The cost. */
156 bitmap depends_on; /* The list of invariants that have to be
157 preserved. */
158 tree value; /* For final value elimination, the expression for
159 the final value of the iv. For iv elimination,
160 the new bound to compare with. */
161 };
162
163 /* Use. */
164 struct iv_use
165 {
166 unsigned id; /* The id of the use. */
167 enum use_type type; /* Type of the use. */
168 struct iv *iv; /* The induction variable it is based on. */
169 gimple stmt; /* Statement in that it occurs. */
170 tree *op_p; /* The place where it occurs. */
171 bitmap related_cands; /* The set of "related" iv candidates, plus the common
172 important ones. */
173
174 unsigned n_map_members; /* Number of candidates in the cost_map list. */
175 struct cost_pair *cost_map;
176 /* The costs wrto the iv candidates. */
177
178 struct iv_cand *selected;
179 /* The selected candidate. */
180 };
181
182 /* The position where the iv is computed. */
183 enum iv_position
184 {
185 IP_NORMAL, /* At the end, just before the exit condition. */
186 IP_END, /* At the end of the latch block. */
187 IP_BEFORE_USE, /* Immediately before a specific use. */
188 IP_AFTER_USE, /* Immediately after a specific use. */
189 IP_ORIGINAL /* The original biv. */
190 };
191
192 /* The induction variable candidate. */
193 struct iv_cand
194 {
195 unsigned id; /* The number of the candidate. */
196 bool important; /* Whether this is an "important" candidate, i.e. such
197 that it should be considered by all uses. */
198 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
199 gimple incremented_at;/* For original biv, the statement where it is
200 incremented. */
201 tree var_before; /* The variable used for it before increment. */
202 tree var_after; /* The variable used for it after increment. */
203 struct iv *iv; /* The value of the candidate. NULL for
204 "pseudocandidate" used to indicate the possibility
205 to replace the final value of an iv by direct
206 computation of the value. */
207 unsigned cost; /* Cost of the candidate. */
208 unsigned cost_step; /* Cost of the candidate's increment operation. */
209 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
210 where it is incremented. */
211 bitmap depends_on; /* The list of invariants that are used in step of the
212 biv. */
213 };
214
215 /* The data used by the induction variable optimizations. */
216
217 typedef struct iv_use *iv_use_p;
218 DEF_VEC_P(iv_use_p);
219 DEF_VEC_ALLOC_P(iv_use_p,heap);
220
221 typedef struct iv_cand *iv_cand_p;
222 DEF_VEC_P(iv_cand_p);
223 DEF_VEC_ALLOC_P(iv_cand_p,heap);
224
225 struct ivopts_data
226 {
227 /* The currently optimized loop. */
228 struct loop *current_loop;
229
230 /* Numbers of iterations for all exits of the current loop. */
231 struct pointer_map_t *niters;
232
233 /* Number of registers used in it. */
234 unsigned regs_used;
235
236 /* The size of version_info array allocated. */
237 unsigned version_info_size;
238
239 /* The array of information for the ssa names. */
240 struct version_info *version_info;
241
242 /* The bitmap of indices in version_info whose value was changed. */
243 bitmap relevant;
244
245 /* The uses of induction variables. */
246 VEC(iv_use_p,heap) *iv_uses;
247
248 /* The candidates. */
249 VEC(iv_cand_p,heap) *iv_candidates;
250
251 /* A bitmap of important candidates. */
252 bitmap important_candidates;
253
254 /* The maximum invariant id. */
255 unsigned max_inv_id;
256
257 /* Whether to consider just related and important candidates when replacing a
258 use. */
259 bool consider_all_candidates;
260
261 /* Are we optimizing for speed? */
262 bool speed;
263
264 /* Whether the loop body includes any function calls. */
265 bool body_includes_call;
266 };
267
268 /* An assignment of iv candidates to uses. */
269
270 struct iv_ca
271 {
272 /* The number of uses covered by the assignment. */
273 unsigned upto;
274
275 /* Number of uses that cannot be expressed by the candidates in the set. */
276 unsigned bad_uses;
277
278 /* Candidate assigned to a use, together with the related costs. */
279 struct cost_pair **cand_for_use;
280
281 /* Number of times each candidate is used. */
282 unsigned *n_cand_uses;
283
284 /* The candidates used. */
285 bitmap cands;
286
287 /* The number of candidates in the set. */
288 unsigned n_cands;
289
290 /* Total number of registers needed. */
291 unsigned n_regs;
292
293 /* Total cost of expressing uses. */
294 comp_cost cand_use_cost;
295
296 /* Total cost of candidates. */
297 unsigned cand_cost;
298
299 /* Number of times each invariant is used. */
300 unsigned *n_invariant_uses;
301
302 /* Total cost of the assignment. */
303 comp_cost cost;
304 };
305
306 /* Difference of two iv candidate assignments. */
307
308 struct iv_ca_delta
309 {
310 /* Changed use. */
311 struct iv_use *use;
312
313 /* An old assignment (for rollback purposes). */
314 struct cost_pair *old_cp;
315
316 /* A new assignment. */
317 struct cost_pair *new_cp;
318
319 /* Next change in the list. */
320 struct iv_ca_delta *next_change;
321 };
322
323 /* Bound on number of candidates below that all candidates are considered. */
324
325 #define CONSIDER_ALL_CANDIDATES_BOUND \
326 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
327
328 /* If there are more iv occurrences, we just give up (it is quite unlikely that
329 optimizing such a loop would help, and it would take ages). */
330
331 #define MAX_CONSIDERED_USES \
332 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
333
334 /* If there are at most this number of ivs in the set, try removing unnecessary
335 ivs from the set always. */
336
337 #define ALWAYS_PRUNE_CAND_SET_BOUND \
338 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
339
340 /* The list of trees for that the decl_rtl field must be reset is stored
341 here. */
342
343 static VEC(tree,heap) *decl_rtl_to_reset;
344
345 /* Number of uses recorded in DATA. */
346
347 static inline unsigned
348 n_iv_uses (struct ivopts_data *data)
349 {
350 return VEC_length (iv_use_p, data->iv_uses);
351 }
352
353 /* Ith use recorded in DATA. */
354
355 static inline struct iv_use *
356 iv_use (struct ivopts_data *data, unsigned i)
357 {
358 return VEC_index (iv_use_p, data->iv_uses, i);
359 }
360
361 /* Number of candidates recorded in DATA. */
362
363 static inline unsigned
364 n_iv_cands (struct ivopts_data *data)
365 {
366 return VEC_length (iv_cand_p, data->iv_candidates);
367 }
368
369 /* Ith candidate recorded in DATA. */
370
371 static inline struct iv_cand *
372 iv_cand (struct ivopts_data *data, unsigned i)
373 {
374 return VEC_index (iv_cand_p, data->iv_candidates, i);
375 }
376
377 /* The single loop exit if it dominates the latch, NULL otherwise. */
378
379 edge
380 single_dom_exit (struct loop *loop)
381 {
382 edge exit = single_exit (loop);
383
384 if (!exit)
385 return NULL;
386
387 if (!just_once_each_iteration_p (loop, exit->src))
388 return NULL;
389
390 return exit;
391 }
392
393 /* Dumps information about the induction variable IV to FILE. */
394
395 extern void dump_iv (FILE *, struct iv *);
396 void
397 dump_iv (FILE *file, struct iv *iv)
398 {
399 if (iv->ssa_name)
400 {
401 fprintf (file, "ssa name ");
402 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
403 fprintf (file, "\n");
404 }
405
406 fprintf (file, " type ");
407 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
408 fprintf (file, "\n");
409
410 if (iv->step)
411 {
412 fprintf (file, " base ");
413 print_generic_expr (file, iv->base, TDF_SLIM);
414 fprintf (file, "\n");
415
416 fprintf (file, " step ");
417 print_generic_expr (file, iv->step, TDF_SLIM);
418 fprintf (file, "\n");
419 }
420 else
421 {
422 fprintf (file, " invariant ");
423 print_generic_expr (file, iv->base, TDF_SLIM);
424 fprintf (file, "\n");
425 }
426
427 if (iv->base_object)
428 {
429 fprintf (file, " base object ");
430 print_generic_expr (file, iv->base_object, TDF_SLIM);
431 fprintf (file, "\n");
432 }
433
434 if (iv->biv_p)
435 fprintf (file, " is a biv\n");
436 }
437
438 /* Dumps information about the USE to FILE. */
439
440 extern void dump_use (FILE *, struct iv_use *);
441 void
442 dump_use (FILE *file, struct iv_use *use)
443 {
444 fprintf (file, "use %d\n", use->id);
445
446 switch (use->type)
447 {
448 case USE_NONLINEAR_EXPR:
449 fprintf (file, " generic\n");
450 break;
451
452 case USE_ADDRESS:
453 fprintf (file, " address\n");
454 break;
455
456 case USE_COMPARE:
457 fprintf (file, " compare\n");
458 break;
459
460 default:
461 gcc_unreachable ();
462 }
463
464 fprintf (file, " in statement ");
465 print_gimple_stmt (file, use->stmt, 0, 0);
466 fprintf (file, "\n");
467
468 fprintf (file, " at position ");
469 if (use->op_p)
470 print_generic_expr (file, *use->op_p, TDF_SLIM);
471 fprintf (file, "\n");
472
473 dump_iv (file, use->iv);
474
475 if (use->related_cands)
476 {
477 fprintf (file, " related candidates ");
478 dump_bitmap (file, use->related_cands);
479 }
480 }
481
482 /* Dumps information about the uses to FILE. */
483
484 extern void dump_uses (FILE *, struct ivopts_data *);
485 void
486 dump_uses (FILE *file, struct ivopts_data *data)
487 {
488 unsigned i;
489 struct iv_use *use;
490
491 for (i = 0; i < n_iv_uses (data); i++)
492 {
493 use = iv_use (data, i);
494
495 dump_use (file, use);
496 fprintf (file, "\n");
497 }
498 }
499
500 /* Dumps information about induction variable candidate CAND to FILE. */
501
502 extern void dump_cand (FILE *, struct iv_cand *);
503 void
504 dump_cand (FILE *file, struct iv_cand *cand)
505 {
506 struct iv *iv = cand->iv;
507
508 fprintf (file, "candidate %d%s\n",
509 cand->id, cand->important ? " (important)" : "");
510
511 if (cand->depends_on)
512 {
513 fprintf (file, " depends on ");
514 dump_bitmap (file, cand->depends_on);
515 }
516
517 if (!iv)
518 {
519 fprintf (file, " final value replacement\n");
520 return;
521 }
522
523 switch (cand->pos)
524 {
525 case IP_NORMAL:
526 fprintf (file, " incremented before exit test\n");
527 break;
528
529 case IP_BEFORE_USE:
530 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
531 break;
532
533 case IP_AFTER_USE:
534 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
535 break;
536
537 case IP_END:
538 fprintf (file, " incremented at end\n");
539 break;
540
541 case IP_ORIGINAL:
542 fprintf (file, " original biv\n");
543 break;
544 }
545
546 dump_iv (file, iv);
547 }
548
549 /* Returns the info for ssa version VER. */
550
551 static inline struct version_info *
552 ver_info (struct ivopts_data *data, unsigned ver)
553 {
554 return data->version_info + ver;
555 }
556
557 /* Returns the info for ssa name NAME. */
558
559 static inline struct version_info *
560 name_info (struct ivopts_data *data, tree name)
561 {
562 return ver_info (data, SSA_NAME_VERSION (name));
563 }
564
565 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
566 emitted in LOOP. */
567
568 static bool
569 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
570 {
571 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
572
573 gcc_assert (bb);
574
575 if (sbb == loop->latch)
576 return true;
577
578 if (sbb != bb)
579 return false;
580
581 return stmt == last_stmt (bb);
582 }
583
584 /* Returns true if STMT if after the place where the original induction
585 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
586 if the positions are identical. */
587
588 static bool
589 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
590 {
591 basic_block cand_bb = gimple_bb (cand->incremented_at);
592 basic_block stmt_bb = gimple_bb (stmt);
593
594 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
595 return false;
596
597 if (stmt_bb != cand_bb)
598 return true;
599
600 if (true_if_equal
601 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
602 return true;
603 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
604 }
605
606 /* Returns true if STMT if after the place where the induction variable
607 CAND is incremented in LOOP. */
608
609 static bool
610 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
611 {
612 switch (cand->pos)
613 {
614 case IP_END:
615 return false;
616
617 case IP_NORMAL:
618 return stmt_after_ip_normal_pos (loop, stmt);
619
620 case IP_ORIGINAL:
621 case IP_AFTER_USE:
622 return stmt_after_inc_pos (cand, stmt, false);
623
624 case IP_BEFORE_USE:
625 return stmt_after_inc_pos (cand, stmt, true);
626
627 default:
628 gcc_unreachable ();
629 }
630 }
631
632 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
633
634 static bool
635 abnormal_ssa_name_p (tree exp)
636 {
637 if (!exp)
638 return false;
639
640 if (TREE_CODE (exp) != SSA_NAME)
641 return false;
642
643 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
644 }
645
646 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
647 abnormal phi node. Callback for for_each_index. */
648
649 static bool
650 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
651 void *data ATTRIBUTE_UNUSED)
652 {
653 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
654 {
655 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
656 return false;
657 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
658 return false;
659 }
660
661 return !abnormal_ssa_name_p (*index);
662 }
663
664 /* Returns true if EXPR contains a ssa name that occurs in an
665 abnormal phi node. */
666
667 bool
668 contains_abnormal_ssa_name_p (tree expr)
669 {
670 enum tree_code code;
671 enum tree_code_class codeclass;
672
673 if (!expr)
674 return false;
675
676 code = TREE_CODE (expr);
677 codeclass = TREE_CODE_CLASS (code);
678
679 if (code == SSA_NAME)
680 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
681
682 if (code == INTEGER_CST
683 || is_gimple_min_invariant (expr))
684 return false;
685
686 if (code == ADDR_EXPR)
687 return !for_each_index (&TREE_OPERAND (expr, 0),
688 idx_contains_abnormal_ssa_name_p,
689 NULL);
690
691 if (code == COND_EXPR)
692 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
693 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
694 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
695
696 switch (codeclass)
697 {
698 case tcc_binary:
699 case tcc_comparison:
700 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
701 return true;
702
703 /* Fallthru. */
704 case tcc_unary:
705 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
706 return true;
707
708 break;
709
710 default:
711 gcc_unreachable ();
712 }
713
714 return false;
715 }
716
717 /* Returns tree describing number of iterations determined from
718 EXIT of DATA->current_loop, or NULL if something goes wrong. */
719
720 static tree
721 niter_for_exit (struct ivopts_data *data, edge exit,
722 struct tree_niter_desc **desc_p)
723 {
724 struct tree_niter_desc* desc = NULL;
725 tree niter;
726 void **slot;
727
728 if (!data->niters)
729 {
730 data->niters = pointer_map_create ();
731 slot = NULL;
732 }
733 else
734 slot = pointer_map_contains (data->niters, exit);
735
736 if (!slot)
737 {
738 /* Try to determine number of iterations. We must know it
739 unconditionally (i.e., without possibility of # of iterations
740 being zero). Also, we cannot safely work with ssa names that
741 appear in phi nodes on abnormal edges, so that we do not create
742 overlapping life ranges for them (PR 27283). */
743 desc = XNEW (struct tree_niter_desc);
744 if (number_of_iterations_exit (data->current_loop,
745 exit, desc, true)
746 && integer_zerop (desc->may_be_zero)
747 && !contains_abnormal_ssa_name_p (desc->niter))
748 niter = desc->niter;
749 else
750 niter = NULL_TREE;
751
752 desc->niter = niter;
753 slot = pointer_map_insert (data->niters, exit);
754 *slot = desc;
755 }
756 else
757 niter = ((struct tree_niter_desc *) *slot)->niter;
758
759 if (desc_p)
760 *desc_p = (struct tree_niter_desc *) *slot;
761 return niter;
762 }
763
764 /* Returns tree describing number of iterations determined from
765 single dominating exit of DATA->current_loop, or NULL if something
766 goes wrong. */
767
768 static tree
769 niter_for_single_dom_exit (struct ivopts_data *data)
770 {
771 edge exit = single_dom_exit (data->current_loop);
772
773 if (!exit)
774 return NULL;
775
776 return niter_for_exit (data, exit, NULL);
777 }
778
779 /* Initializes data structures used by the iv optimization pass, stored
780 in DATA. */
781
782 static void
783 tree_ssa_iv_optimize_init (struct ivopts_data *data)
784 {
785 data->version_info_size = 2 * num_ssa_names;
786 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
787 data->relevant = BITMAP_ALLOC (NULL);
788 data->important_candidates = BITMAP_ALLOC (NULL);
789 data->max_inv_id = 0;
790 data->niters = NULL;
791 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
792 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
793 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
794 }
795
796 /* Returns a memory object to that EXPR points. In case we are able to
797 determine that it does not point to any such object, NULL is returned. */
798
799 static tree
800 determine_base_object (tree expr)
801 {
802 enum tree_code code = TREE_CODE (expr);
803 tree base, obj;
804
805 /* If this is a pointer casted to any type, we need to determine
806 the base object for the pointer; so handle conversions before
807 throwing away non-pointer expressions. */
808 if (CONVERT_EXPR_P (expr))
809 return determine_base_object (TREE_OPERAND (expr, 0));
810
811 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
812 return NULL_TREE;
813
814 switch (code)
815 {
816 case INTEGER_CST:
817 return NULL_TREE;
818
819 case ADDR_EXPR:
820 obj = TREE_OPERAND (expr, 0);
821 base = get_base_address (obj);
822
823 if (!base)
824 return expr;
825
826 if (TREE_CODE (base) == MEM_REF)
827 return determine_base_object (TREE_OPERAND (base, 0));
828
829 return fold_convert (ptr_type_node,
830 build_fold_addr_expr (base));
831
832 case POINTER_PLUS_EXPR:
833 return determine_base_object (TREE_OPERAND (expr, 0));
834
835 case PLUS_EXPR:
836 case MINUS_EXPR:
837 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
838 gcc_unreachable ();
839
840 default:
841 return fold_convert (ptr_type_node, expr);
842 }
843 }
844
845 /* Allocates an induction variable with given initial value BASE and step STEP
846 for loop LOOP. */
847
848 static struct iv *
849 alloc_iv (tree base, tree step)
850 {
851 struct iv *iv = XCNEW (struct iv);
852 gcc_assert (step != NULL_TREE);
853
854 iv->base = base;
855 iv->base_object = determine_base_object (base);
856 iv->step = step;
857 iv->biv_p = false;
858 iv->have_use_for = false;
859 iv->use_id = 0;
860 iv->ssa_name = NULL_TREE;
861
862 return iv;
863 }
864
865 /* Sets STEP and BASE for induction variable IV. */
866
867 static void
868 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
869 {
870 struct version_info *info = name_info (data, iv);
871
872 gcc_assert (!info->iv);
873
874 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
875 info->iv = alloc_iv (base, step);
876 info->iv->ssa_name = iv;
877 }
878
879 /* Finds induction variable declaration for VAR. */
880
881 static struct iv *
882 get_iv (struct ivopts_data *data, tree var)
883 {
884 basic_block bb;
885 tree type = TREE_TYPE (var);
886
887 if (!POINTER_TYPE_P (type)
888 && !INTEGRAL_TYPE_P (type))
889 return NULL;
890
891 if (!name_info (data, var)->iv)
892 {
893 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
894
895 if (!bb
896 || !flow_bb_inside_loop_p (data->current_loop, bb))
897 set_iv (data, var, var, build_int_cst (type, 0));
898 }
899
900 return name_info (data, var)->iv;
901 }
902
903 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
904 not define a simple affine biv with nonzero step. */
905
906 static tree
907 determine_biv_step (gimple phi)
908 {
909 struct loop *loop = gimple_bb (phi)->loop_father;
910 tree name = PHI_RESULT (phi);
911 affine_iv iv;
912
913 if (!is_gimple_reg (name))
914 return NULL_TREE;
915
916 if (!simple_iv (loop, loop, name, &iv, true))
917 return NULL_TREE;
918
919 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
920 }
921
922 /* Finds basic ivs. */
923
924 static bool
925 find_bivs (struct ivopts_data *data)
926 {
927 gimple phi;
928 tree step, type, base;
929 bool found = false;
930 struct loop *loop = data->current_loop;
931 gimple_stmt_iterator psi;
932
933 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
934 {
935 phi = gsi_stmt (psi);
936
937 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
938 continue;
939
940 step = determine_biv_step (phi);
941 if (!step)
942 continue;
943
944 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
945 base = expand_simple_operations (base);
946 if (contains_abnormal_ssa_name_p (base)
947 || contains_abnormal_ssa_name_p (step))
948 continue;
949
950 type = TREE_TYPE (PHI_RESULT (phi));
951 base = fold_convert (type, base);
952 if (step)
953 {
954 if (POINTER_TYPE_P (type))
955 step = fold_convert (sizetype, step);
956 else
957 step = fold_convert (type, step);
958 }
959
960 set_iv (data, PHI_RESULT (phi), base, step);
961 found = true;
962 }
963
964 return found;
965 }
966
967 /* Marks basic ivs. */
968
969 static void
970 mark_bivs (struct ivopts_data *data)
971 {
972 gimple phi;
973 tree var;
974 struct iv *iv, *incr_iv;
975 struct loop *loop = data->current_loop;
976 basic_block incr_bb;
977 gimple_stmt_iterator psi;
978
979 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
980 {
981 phi = gsi_stmt (psi);
982
983 iv = get_iv (data, PHI_RESULT (phi));
984 if (!iv)
985 continue;
986
987 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
988 incr_iv = get_iv (data, var);
989 if (!incr_iv)
990 continue;
991
992 /* If the increment is in the subloop, ignore it. */
993 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
994 if (incr_bb->loop_father != data->current_loop
995 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
996 continue;
997
998 iv->biv_p = true;
999 incr_iv->biv_p = true;
1000 }
1001 }
1002
1003 /* Checks whether STMT defines a linear induction variable and stores its
1004 parameters to IV. */
1005
1006 static bool
1007 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1008 {
1009 tree lhs;
1010 struct loop *loop = data->current_loop;
1011
1012 iv->base = NULL_TREE;
1013 iv->step = NULL_TREE;
1014
1015 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1016 return false;
1017
1018 lhs = gimple_assign_lhs (stmt);
1019 if (TREE_CODE (lhs) != SSA_NAME)
1020 return false;
1021
1022 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1023 return false;
1024 iv->base = expand_simple_operations (iv->base);
1025
1026 if (contains_abnormal_ssa_name_p (iv->base)
1027 || contains_abnormal_ssa_name_p (iv->step))
1028 return false;
1029
1030 return true;
1031 }
1032
1033 /* Finds general ivs in statement STMT. */
1034
1035 static void
1036 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1037 {
1038 affine_iv iv;
1039
1040 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1041 return;
1042
1043 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1044 }
1045
1046 /* Finds general ivs in basic block BB. */
1047
1048 static void
1049 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1050 {
1051 gimple_stmt_iterator bsi;
1052
1053 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1054 find_givs_in_stmt (data, gsi_stmt (bsi));
1055 }
1056
1057 /* Finds general ivs. */
1058
1059 static void
1060 find_givs (struct ivopts_data *data)
1061 {
1062 struct loop *loop = data->current_loop;
1063 basic_block *body = get_loop_body_in_dom_order (loop);
1064 unsigned i;
1065
1066 for (i = 0; i < loop->num_nodes; i++)
1067 find_givs_in_bb (data, body[i]);
1068 free (body);
1069 }
1070
1071 /* For each ssa name defined in LOOP determines whether it is an induction
1072 variable and if so, its initial value and step. */
1073
1074 static bool
1075 find_induction_variables (struct ivopts_data *data)
1076 {
1077 unsigned i;
1078 bitmap_iterator bi;
1079
1080 if (!find_bivs (data))
1081 return false;
1082
1083 find_givs (data);
1084 mark_bivs (data);
1085
1086 if (dump_file && (dump_flags & TDF_DETAILS))
1087 {
1088 tree niter = niter_for_single_dom_exit (data);
1089
1090 if (niter)
1091 {
1092 fprintf (dump_file, " number of iterations ");
1093 print_generic_expr (dump_file, niter, TDF_SLIM);
1094 fprintf (dump_file, "\n\n");
1095 };
1096
1097 fprintf (dump_file, "Induction variables:\n\n");
1098
1099 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1100 {
1101 if (ver_info (data, i)->iv)
1102 dump_iv (dump_file, ver_info (data, i)->iv);
1103 }
1104 }
1105
1106 return true;
1107 }
1108
1109 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1110
1111 static struct iv_use *
1112 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1113 gimple stmt, enum use_type use_type)
1114 {
1115 struct iv_use *use = XCNEW (struct iv_use);
1116
1117 use->id = n_iv_uses (data);
1118 use->type = use_type;
1119 use->iv = iv;
1120 use->stmt = stmt;
1121 use->op_p = use_p;
1122 use->related_cands = BITMAP_ALLOC (NULL);
1123
1124 /* To avoid showing ssa name in the dumps, if it was not reset by the
1125 caller. */
1126 iv->ssa_name = NULL_TREE;
1127
1128 if (dump_file && (dump_flags & TDF_DETAILS))
1129 dump_use (dump_file, use);
1130
1131 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1132
1133 return use;
1134 }
1135
1136 /* Checks whether OP is a loop-level invariant and if so, records it.
1137 NONLINEAR_USE is true if the invariant is used in a way we do not
1138 handle specially. */
1139
1140 static void
1141 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1142 {
1143 basic_block bb;
1144 struct version_info *info;
1145
1146 if (TREE_CODE (op) != SSA_NAME
1147 || !is_gimple_reg (op))
1148 return;
1149
1150 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1151 if (bb
1152 && flow_bb_inside_loop_p (data->current_loop, bb))
1153 return;
1154
1155 info = name_info (data, op);
1156 info->name = op;
1157 info->has_nonlin_use |= nonlinear_use;
1158 if (!info->inv_id)
1159 info->inv_id = ++data->max_inv_id;
1160 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1161 }
1162
1163 /* Checks whether the use OP is interesting and if so, records it. */
1164
1165 static struct iv_use *
1166 find_interesting_uses_op (struct ivopts_data *data, tree op)
1167 {
1168 struct iv *iv;
1169 struct iv *civ;
1170 gimple stmt;
1171 struct iv_use *use;
1172
1173 if (TREE_CODE (op) != SSA_NAME)
1174 return NULL;
1175
1176 iv = get_iv (data, op);
1177 if (!iv)
1178 return NULL;
1179
1180 if (iv->have_use_for)
1181 {
1182 use = iv_use (data, iv->use_id);
1183
1184 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1185 return use;
1186 }
1187
1188 if (integer_zerop (iv->step))
1189 {
1190 record_invariant (data, op, true);
1191 return NULL;
1192 }
1193 iv->have_use_for = true;
1194
1195 civ = XNEW (struct iv);
1196 *civ = *iv;
1197
1198 stmt = SSA_NAME_DEF_STMT (op);
1199 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1200 || is_gimple_assign (stmt));
1201
1202 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1203 iv->use_id = use->id;
1204
1205 return use;
1206 }
1207
1208 /* Given a condition in statement STMT, checks whether it is a compare
1209 of an induction variable and an invariant. If this is the case,
1210 CONTROL_VAR is set to location of the iv, BOUND to the location of
1211 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1212 induction variable descriptions, and true is returned. If this is not
1213 the case, CONTROL_VAR and BOUND are set to the arguments of the
1214 condition and false is returned. */
1215
1216 static bool
1217 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1218 tree **control_var, tree **bound,
1219 struct iv **iv_var, struct iv **iv_bound)
1220 {
1221 /* The objects returned when COND has constant operands. */
1222 static struct iv const_iv;
1223 static tree zero;
1224 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1225 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1226 bool ret = false;
1227
1228 if (gimple_code (stmt) == GIMPLE_COND)
1229 {
1230 op0 = gimple_cond_lhs_ptr (stmt);
1231 op1 = gimple_cond_rhs_ptr (stmt);
1232 }
1233 else
1234 {
1235 op0 = gimple_assign_rhs1_ptr (stmt);
1236 op1 = gimple_assign_rhs2_ptr (stmt);
1237 }
1238
1239 zero = integer_zero_node;
1240 const_iv.step = integer_zero_node;
1241
1242 if (TREE_CODE (*op0) == SSA_NAME)
1243 iv0 = get_iv (data, *op0);
1244 if (TREE_CODE (*op1) == SSA_NAME)
1245 iv1 = get_iv (data, *op1);
1246
1247 /* Exactly one of the compared values must be an iv, and the other one must
1248 be an invariant. */
1249 if (!iv0 || !iv1)
1250 goto end;
1251
1252 if (integer_zerop (iv0->step))
1253 {
1254 /* Control variable may be on the other side. */
1255 tmp_op = op0; op0 = op1; op1 = tmp_op;
1256 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1257 }
1258 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1259
1260 end:
1261 if (control_var)
1262 *control_var = op0;;
1263 if (iv_var)
1264 *iv_var = iv0;;
1265 if (bound)
1266 *bound = op1;
1267 if (iv_bound)
1268 *iv_bound = iv1;
1269
1270 return ret;
1271 }
1272
1273 /* Checks whether the condition in STMT is interesting and if so,
1274 records it. */
1275
1276 static void
1277 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1278 {
1279 tree *var_p, *bound_p;
1280 struct iv *var_iv, *civ;
1281
1282 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1283 {
1284 find_interesting_uses_op (data, *var_p);
1285 find_interesting_uses_op (data, *bound_p);
1286 return;
1287 }
1288
1289 civ = XNEW (struct iv);
1290 *civ = *var_iv;
1291 record_use (data, NULL, civ, stmt, USE_COMPARE);
1292 }
1293
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. LOOP
1296 should not be the function body. */
1297
1298 bool
1299 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 {
1301 basic_block def_bb;
1302 unsigned i, len;
1303
1304 gcc_assert (loop_depth (loop) > 0);
1305
1306 if (is_gimple_min_invariant (expr))
1307 return true;
1308
1309 if (TREE_CODE (expr) == SSA_NAME)
1310 {
1311 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1312 if (def_bb
1313 && flow_bb_inside_loop_p (loop, def_bb))
1314 return false;
1315
1316 return true;
1317 }
1318
1319 if (!EXPR_P (expr))
1320 return false;
1321
1322 len = TREE_OPERAND_LENGTH (expr);
1323 for (i = 0; i < len; i++)
1324 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1325 return false;
1326
1327 return true;
1328 }
1329
1330 /* Returns true if statement STMT is obviously invariant in LOOP,
1331 i.e. if all its operands on the RHS are defined outside of the LOOP.
1332 LOOP should not be the function body. */
1333
1334 bool
1335 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1336 {
1337 unsigned i;
1338 tree lhs;
1339
1340 gcc_assert (loop_depth (loop) > 0);
1341
1342 lhs = gimple_get_lhs (stmt);
1343 for (i = 0; i < gimple_num_ops (stmt); i++)
1344 {
1345 tree op = gimple_op (stmt, i);
1346 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1347 return false;
1348 }
1349
1350 return true;
1351 }
1352
1353 /* Cumulates the steps of indices into DATA and replaces their values with the
1354 initial ones. Returns false when the value of the index cannot be determined.
1355 Callback for for_each_index. */
1356
1357 struct ifs_ivopts_data
1358 {
1359 struct ivopts_data *ivopts_data;
1360 gimple stmt;
1361 tree step;
1362 };
1363
1364 static bool
1365 idx_find_step (tree base, tree *idx, void *data)
1366 {
1367 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1368 struct iv *iv;
1369 tree step, iv_base, iv_step, lbound, off;
1370 struct loop *loop = dta->ivopts_data->current_loop;
1371
1372 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1373 return false;
1374
1375 /* If base is a component ref, require that the offset of the reference
1376 be invariant. */
1377 if (TREE_CODE (base) == COMPONENT_REF)
1378 {
1379 off = component_ref_field_offset (base);
1380 return expr_invariant_in_loop_p (loop, off);
1381 }
1382
1383 /* If base is array, first check whether we will be able to move the
1384 reference out of the loop (in order to take its address in strength
1385 reduction). In order for this to work we need both lower bound
1386 and step to be loop invariants. */
1387 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1388 {
1389 /* Moreover, for a range, the size needs to be invariant as well. */
1390 if (TREE_CODE (base) == ARRAY_RANGE_REF
1391 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1392 return false;
1393
1394 step = array_ref_element_size (base);
1395 lbound = array_ref_low_bound (base);
1396
1397 if (!expr_invariant_in_loop_p (loop, step)
1398 || !expr_invariant_in_loop_p (loop, lbound))
1399 return false;
1400 }
1401
1402 if (TREE_CODE (*idx) != SSA_NAME)
1403 return true;
1404
1405 iv = get_iv (dta->ivopts_data, *idx);
1406 if (!iv)
1407 return false;
1408
1409 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1410 *&x[0], which is not folded and does not trigger the
1411 ARRAY_REF path below. */
1412 *idx = iv->base;
1413
1414 if (integer_zerop (iv->step))
1415 return true;
1416
1417 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1418 {
1419 step = array_ref_element_size (base);
1420
1421 /* We only handle addresses whose step is an integer constant. */
1422 if (TREE_CODE (step) != INTEGER_CST)
1423 return false;
1424 }
1425 else
1426 /* The step for pointer arithmetics already is 1 byte. */
1427 step = size_one_node;
1428
1429 iv_base = iv->base;
1430 iv_step = iv->step;
1431 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1432 sizetype, &iv_base, &iv_step, dta->stmt,
1433 false))
1434 {
1435 /* The index might wrap. */
1436 return false;
1437 }
1438
1439 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1440 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1441
1442 return true;
1443 }
1444
1445 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1446 object is passed to it in DATA. */
1447
1448 static bool
1449 idx_record_use (tree base, tree *idx,
1450 void *vdata)
1451 {
1452 struct ivopts_data *data = (struct ivopts_data *) vdata;
1453 find_interesting_uses_op (data, *idx);
1454 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1455 {
1456 find_interesting_uses_op (data, array_ref_element_size (base));
1457 find_interesting_uses_op (data, array_ref_low_bound (base));
1458 }
1459 return true;
1460 }
1461
1462 /* If we can prove that TOP = cst * BOT for some constant cst,
1463 store cst to MUL and return true. Otherwise return false.
1464 The returned value is always sign-extended, regardless of the
1465 signedness of TOP and BOT. */
1466
1467 static bool
1468 constant_multiple_of (tree top, tree bot, double_int *mul)
1469 {
1470 tree mby;
1471 enum tree_code code;
1472 double_int res, p0, p1;
1473 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1474
1475 STRIP_NOPS (top);
1476 STRIP_NOPS (bot);
1477
1478 if (operand_equal_p (top, bot, 0))
1479 {
1480 *mul = double_int_one;
1481 return true;
1482 }
1483
1484 code = TREE_CODE (top);
1485 switch (code)
1486 {
1487 case MULT_EXPR:
1488 mby = TREE_OPERAND (top, 1);
1489 if (TREE_CODE (mby) != INTEGER_CST)
1490 return false;
1491
1492 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1493 return false;
1494
1495 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1496 precision);
1497 return true;
1498
1499 case PLUS_EXPR:
1500 case MINUS_EXPR:
1501 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1502 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1503 return false;
1504
1505 if (code == MINUS_EXPR)
1506 p1 = double_int_neg (p1);
1507 *mul = double_int_sext (double_int_add (p0, p1), precision);
1508 return true;
1509
1510 case INTEGER_CST:
1511 if (TREE_CODE (bot) != INTEGER_CST)
1512 return false;
1513
1514 p0 = double_int_sext (tree_to_double_int (top), precision);
1515 p1 = double_int_sext (tree_to_double_int (bot), precision);
1516 if (double_int_zero_p (p1))
1517 return false;
1518 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1519 precision);
1520 return double_int_zero_p (res);
1521
1522 default:
1523 return false;
1524 }
1525 }
1526
1527 /* Returns true if memory reference REF with step STEP may be unaligned. */
1528
1529 static bool
1530 may_be_unaligned_p (tree ref, tree step)
1531 {
1532 tree base;
1533 tree base_type;
1534 HOST_WIDE_INT bitsize;
1535 HOST_WIDE_INT bitpos;
1536 tree toffset;
1537 enum machine_mode mode;
1538 int unsignedp, volatilep;
1539 unsigned base_align;
1540
1541 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1542 thus they are not misaligned. */
1543 if (TREE_CODE (ref) == TARGET_MEM_REF)
1544 return false;
1545
1546 /* The test below is basically copy of what expr.c:normal_inner_ref
1547 does to check whether the object must be loaded by parts when
1548 STRICT_ALIGNMENT is true. */
1549 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1550 &unsignedp, &volatilep, true);
1551 base_type = TREE_TYPE (base);
1552 base_align = TYPE_ALIGN (base_type);
1553
1554 if (mode != BLKmode)
1555 {
1556 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1557
1558 if (base_align < mode_align
1559 || (bitpos % mode_align) != 0
1560 || (bitpos % BITS_PER_UNIT) != 0)
1561 return true;
1562
1563 if (toffset
1564 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1565 return true;
1566
1567 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1568 return true;
1569 }
1570
1571 return false;
1572 }
1573
1574 /* Return true if EXPR may be non-addressable. */
1575
1576 static bool
1577 may_be_nonaddressable_p (tree expr)
1578 {
1579 switch (TREE_CODE (expr))
1580 {
1581 case TARGET_MEM_REF:
1582 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1583 target, thus they are always addressable. */
1584 return false;
1585
1586 case COMPONENT_REF:
1587 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1588 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1589
1590 case VIEW_CONVERT_EXPR:
1591 /* This kind of view-conversions may wrap non-addressable objects
1592 and make them look addressable. After some processing the
1593 non-addressability may be uncovered again, causing ADDR_EXPRs
1594 of inappropriate objects to be built. */
1595 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1596 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1597 return true;
1598
1599 /* ... fall through ... */
1600
1601 case ARRAY_REF:
1602 case ARRAY_RANGE_REF:
1603 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1604
1605 CASE_CONVERT:
1606 return true;
1607
1608 default:
1609 break;
1610 }
1611
1612 return false;
1613 }
1614
1615 /* Finds addresses in *OP_P inside STMT. */
1616
1617 static void
1618 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1619 {
1620 tree base = *op_p, step = size_zero_node;
1621 struct iv *civ;
1622 struct ifs_ivopts_data ifs_ivopts_data;
1623
1624 /* Do not play with volatile memory references. A bit too conservative,
1625 perhaps, but safe. */
1626 if (gimple_has_volatile_ops (stmt))
1627 goto fail;
1628
1629 /* Ignore bitfields for now. Not really something terribly complicated
1630 to handle. TODO. */
1631 if (TREE_CODE (base) == BIT_FIELD_REF)
1632 goto fail;
1633
1634 base = unshare_expr (base);
1635
1636 if (TREE_CODE (base) == TARGET_MEM_REF)
1637 {
1638 tree type = build_pointer_type (TREE_TYPE (base));
1639 tree astep;
1640
1641 if (TMR_BASE (base)
1642 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1643 {
1644 civ = get_iv (data, TMR_BASE (base));
1645 if (!civ)
1646 goto fail;
1647
1648 TMR_BASE (base) = civ->base;
1649 step = civ->step;
1650 }
1651 if (TMR_INDEX (base)
1652 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1653 {
1654 civ = get_iv (data, TMR_INDEX (base));
1655 if (!civ)
1656 goto fail;
1657
1658 TMR_INDEX (base) = civ->base;
1659 astep = civ->step;
1660
1661 if (astep)
1662 {
1663 if (TMR_STEP (base))
1664 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1665
1666 step = fold_build2 (PLUS_EXPR, type, step, astep);
1667 }
1668 }
1669
1670 if (integer_zerop (step))
1671 goto fail;
1672 base = tree_mem_ref_addr (type, base);
1673 }
1674 else
1675 {
1676 ifs_ivopts_data.ivopts_data = data;
1677 ifs_ivopts_data.stmt = stmt;
1678 ifs_ivopts_data.step = size_zero_node;
1679 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1680 || integer_zerop (ifs_ivopts_data.step))
1681 goto fail;
1682 step = ifs_ivopts_data.step;
1683
1684 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1685
1686 /* Check that the base expression is addressable. This needs
1687 to be done after substituting bases of IVs into it. */
1688 if (may_be_nonaddressable_p (base))
1689 goto fail;
1690
1691 /* Moreover, on strict alignment platforms, check that it is
1692 sufficiently aligned. */
1693 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1694 goto fail;
1695
1696 base = build_fold_addr_expr (base);
1697
1698 /* Substituting bases of IVs into the base expression might
1699 have caused folding opportunities. */
1700 if (TREE_CODE (base) == ADDR_EXPR)
1701 {
1702 tree *ref = &TREE_OPERAND (base, 0);
1703 while (handled_component_p (*ref))
1704 ref = &TREE_OPERAND (*ref, 0);
1705 if (TREE_CODE (*ref) == MEM_REF)
1706 {
1707 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1708 TREE_OPERAND (*ref, 0),
1709 TREE_OPERAND (*ref, 1));
1710 if (tem)
1711 *ref = tem;
1712 }
1713 }
1714 }
1715
1716 civ = alloc_iv (base, step);
1717 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1718 return;
1719
1720 fail:
1721 for_each_index (op_p, idx_record_use, data);
1722 }
1723
1724 /* Finds and records invariants used in STMT. */
1725
1726 static void
1727 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1728 {
1729 ssa_op_iter iter;
1730 use_operand_p use_p;
1731 tree op;
1732
1733 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1734 {
1735 op = USE_FROM_PTR (use_p);
1736 record_invariant (data, op, false);
1737 }
1738 }
1739
1740 /* Finds interesting uses of induction variables in the statement STMT. */
1741
1742 static void
1743 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1744 {
1745 struct iv *iv;
1746 tree op, *lhs, *rhs;
1747 ssa_op_iter iter;
1748 use_operand_p use_p;
1749 enum tree_code code;
1750
1751 find_invariants_stmt (data, stmt);
1752
1753 if (gimple_code (stmt) == GIMPLE_COND)
1754 {
1755 find_interesting_uses_cond (data, stmt);
1756 return;
1757 }
1758
1759 if (is_gimple_assign (stmt))
1760 {
1761 lhs = gimple_assign_lhs_ptr (stmt);
1762 rhs = gimple_assign_rhs1_ptr (stmt);
1763
1764 if (TREE_CODE (*lhs) == SSA_NAME)
1765 {
1766 /* If the statement defines an induction variable, the uses are not
1767 interesting by themselves. */
1768
1769 iv = get_iv (data, *lhs);
1770
1771 if (iv && !integer_zerop (iv->step))
1772 return;
1773 }
1774
1775 code = gimple_assign_rhs_code (stmt);
1776 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1777 && (REFERENCE_CLASS_P (*rhs)
1778 || is_gimple_val (*rhs)))
1779 {
1780 if (REFERENCE_CLASS_P (*rhs))
1781 find_interesting_uses_address (data, stmt, rhs);
1782 else
1783 find_interesting_uses_op (data, *rhs);
1784
1785 if (REFERENCE_CLASS_P (*lhs))
1786 find_interesting_uses_address (data, stmt, lhs);
1787 return;
1788 }
1789 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1790 {
1791 find_interesting_uses_cond (data, stmt);
1792 return;
1793 }
1794
1795 /* TODO -- we should also handle address uses of type
1796
1797 memory = call (whatever);
1798
1799 and
1800
1801 call (memory). */
1802 }
1803
1804 if (gimple_code (stmt) == GIMPLE_PHI
1805 && gimple_bb (stmt) == data->current_loop->header)
1806 {
1807 iv = get_iv (data, PHI_RESULT (stmt));
1808
1809 if (iv && !integer_zerop (iv->step))
1810 return;
1811 }
1812
1813 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1814 {
1815 op = USE_FROM_PTR (use_p);
1816
1817 if (TREE_CODE (op) != SSA_NAME)
1818 continue;
1819
1820 iv = get_iv (data, op);
1821 if (!iv)
1822 continue;
1823
1824 find_interesting_uses_op (data, op);
1825 }
1826 }
1827
1828 /* Finds interesting uses of induction variables outside of loops
1829 on loop exit edge EXIT. */
1830
1831 static void
1832 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1833 {
1834 gimple phi;
1835 gimple_stmt_iterator psi;
1836 tree def;
1837
1838 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1839 {
1840 phi = gsi_stmt (psi);
1841 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1842 if (is_gimple_reg (def))
1843 find_interesting_uses_op (data, def);
1844 }
1845 }
1846
1847 /* Finds uses of the induction variables that are interesting. */
1848
1849 static void
1850 find_interesting_uses (struct ivopts_data *data)
1851 {
1852 basic_block bb;
1853 gimple_stmt_iterator bsi;
1854 basic_block *body = get_loop_body (data->current_loop);
1855 unsigned i;
1856 struct version_info *info;
1857 edge e;
1858
1859 if (dump_file && (dump_flags & TDF_DETAILS))
1860 fprintf (dump_file, "Uses:\n\n");
1861
1862 for (i = 0; i < data->current_loop->num_nodes; i++)
1863 {
1864 edge_iterator ei;
1865 bb = body[i];
1866
1867 FOR_EACH_EDGE (e, ei, bb->succs)
1868 if (e->dest != EXIT_BLOCK_PTR
1869 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1870 find_interesting_uses_outside (data, e);
1871
1872 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1873 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1874 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1875 if (!is_gimple_debug (gsi_stmt (bsi)))
1876 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1877 }
1878
1879 if (dump_file && (dump_flags & TDF_DETAILS))
1880 {
1881 bitmap_iterator bi;
1882
1883 fprintf (dump_file, "\n");
1884
1885 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1886 {
1887 info = ver_info (data, i);
1888 if (info->inv_id)
1889 {
1890 fprintf (dump_file, " ");
1891 print_generic_expr (dump_file, info->name, TDF_SLIM);
1892 fprintf (dump_file, " is invariant (%d)%s\n",
1893 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1894 }
1895 }
1896
1897 fprintf (dump_file, "\n");
1898 }
1899
1900 free (body);
1901 }
1902
1903 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1904 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1905 we are at the top-level of the processed address. */
1906
1907 static tree
1908 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1909 unsigned HOST_WIDE_INT *offset)
1910 {
1911 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1912 enum tree_code code;
1913 tree type, orig_type = TREE_TYPE (expr);
1914 unsigned HOST_WIDE_INT off0, off1, st;
1915 tree orig_expr = expr;
1916
1917 STRIP_NOPS (expr);
1918
1919 type = TREE_TYPE (expr);
1920 code = TREE_CODE (expr);
1921 *offset = 0;
1922
1923 switch (code)
1924 {
1925 case INTEGER_CST:
1926 if (!cst_and_fits_in_hwi (expr)
1927 || integer_zerop (expr))
1928 return orig_expr;
1929
1930 *offset = int_cst_value (expr);
1931 return build_int_cst (orig_type, 0);
1932
1933 case POINTER_PLUS_EXPR:
1934 case PLUS_EXPR:
1935 case MINUS_EXPR:
1936 op0 = TREE_OPERAND (expr, 0);
1937 op1 = TREE_OPERAND (expr, 1);
1938
1939 op0 = strip_offset_1 (op0, false, false, &off0);
1940 op1 = strip_offset_1 (op1, false, false, &off1);
1941
1942 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1943 if (op0 == TREE_OPERAND (expr, 0)
1944 && op1 == TREE_OPERAND (expr, 1))
1945 return orig_expr;
1946
1947 if (integer_zerop (op1))
1948 expr = op0;
1949 else if (integer_zerop (op0))
1950 {
1951 if (code == MINUS_EXPR)
1952 expr = fold_build1 (NEGATE_EXPR, type, op1);
1953 else
1954 expr = op1;
1955 }
1956 else
1957 expr = fold_build2 (code, type, op0, op1);
1958
1959 return fold_convert (orig_type, expr);
1960
1961 case MULT_EXPR:
1962 op1 = TREE_OPERAND (expr, 1);
1963 if (!cst_and_fits_in_hwi (op1))
1964 return orig_expr;
1965
1966 op0 = TREE_OPERAND (expr, 0);
1967 op0 = strip_offset_1 (op0, false, false, &off0);
1968 if (op0 == TREE_OPERAND (expr, 0))
1969 return orig_expr;
1970
1971 *offset = off0 * int_cst_value (op1);
1972 if (integer_zerop (op0))
1973 expr = op0;
1974 else
1975 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1976
1977 return fold_convert (orig_type, expr);
1978
1979 case ARRAY_REF:
1980 case ARRAY_RANGE_REF:
1981 if (!inside_addr)
1982 return orig_expr;
1983
1984 step = array_ref_element_size (expr);
1985 if (!cst_and_fits_in_hwi (step))
1986 break;
1987
1988 st = int_cst_value (step);
1989 op1 = TREE_OPERAND (expr, 1);
1990 op1 = strip_offset_1 (op1, false, false, &off1);
1991 *offset = off1 * st;
1992
1993 if (top_compref
1994 && integer_zerop (op1))
1995 {
1996 /* Strip the component reference completely. */
1997 op0 = TREE_OPERAND (expr, 0);
1998 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1999 *offset += off0;
2000 return op0;
2001 }
2002 break;
2003
2004 case COMPONENT_REF:
2005 if (!inside_addr)
2006 return orig_expr;
2007
2008 tmp = component_ref_field_offset (expr);
2009 if (top_compref
2010 && cst_and_fits_in_hwi (tmp))
2011 {
2012 /* Strip the component reference completely. */
2013 op0 = TREE_OPERAND (expr, 0);
2014 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2015 *offset = off0 + int_cst_value (tmp);
2016 return op0;
2017 }
2018 break;
2019
2020 case ADDR_EXPR:
2021 op0 = TREE_OPERAND (expr, 0);
2022 op0 = strip_offset_1 (op0, true, true, &off0);
2023 *offset += off0;
2024
2025 if (op0 == TREE_OPERAND (expr, 0))
2026 return orig_expr;
2027
2028 expr = build_fold_addr_expr (op0);
2029 return fold_convert (orig_type, expr);
2030
2031 case MEM_REF:
2032 /* ??? Offset operand? */
2033 inside_addr = false;
2034 break;
2035
2036 default:
2037 return orig_expr;
2038 }
2039
2040 /* Default handling of expressions for that we want to recurse into
2041 the first operand. */
2042 op0 = TREE_OPERAND (expr, 0);
2043 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2044 *offset += off0;
2045
2046 if (op0 == TREE_OPERAND (expr, 0)
2047 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2048 return orig_expr;
2049
2050 expr = copy_node (expr);
2051 TREE_OPERAND (expr, 0) = op0;
2052 if (op1)
2053 TREE_OPERAND (expr, 1) = op1;
2054
2055 /* Inside address, we might strip the top level component references,
2056 thus changing type of the expression. Handling of ADDR_EXPR
2057 will fix that. */
2058 expr = fold_convert (orig_type, expr);
2059
2060 return expr;
2061 }
2062
2063 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2064
2065 static tree
2066 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2067 {
2068 return strip_offset_1 (expr, false, false, offset);
2069 }
2070
2071 /* Returns variant of TYPE that can be used as base for different uses.
2072 We return unsigned type with the same precision, which avoids problems
2073 with overflows. */
2074
2075 static tree
2076 generic_type_for (tree type)
2077 {
2078 if (POINTER_TYPE_P (type))
2079 return unsigned_type_for (type);
2080
2081 if (TYPE_UNSIGNED (type))
2082 return type;
2083
2084 return unsigned_type_for (type);
2085 }
2086
2087 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2088 the bitmap to that we should store it. */
2089
2090 static struct ivopts_data *fd_ivopts_data;
2091 static tree
2092 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2093 {
2094 bitmap *depends_on = (bitmap *) data;
2095 struct version_info *info;
2096
2097 if (TREE_CODE (*expr_p) != SSA_NAME)
2098 return NULL_TREE;
2099 info = name_info (fd_ivopts_data, *expr_p);
2100
2101 if (!info->inv_id || info->has_nonlin_use)
2102 return NULL_TREE;
2103
2104 if (!*depends_on)
2105 *depends_on = BITMAP_ALLOC (NULL);
2106 bitmap_set_bit (*depends_on, info->inv_id);
2107
2108 return NULL_TREE;
2109 }
2110
2111 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2112 position to POS. If USE is not NULL, the candidate is set as related to
2113 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2114 replacement of the final value of the iv by a direct computation. */
2115
2116 static struct iv_cand *
2117 add_candidate_1 (struct ivopts_data *data,
2118 tree base, tree step, bool important, enum iv_position pos,
2119 struct iv_use *use, gimple incremented_at)
2120 {
2121 unsigned i;
2122 struct iv_cand *cand = NULL;
2123 tree type, orig_type;
2124
2125 if (base)
2126 {
2127 orig_type = TREE_TYPE (base);
2128 type = generic_type_for (orig_type);
2129 if (type != orig_type)
2130 {
2131 base = fold_convert (type, base);
2132 step = fold_convert (type, step);
2133 }
2134 }
2135
2136 for (i = 0; i < n_iv_cands (data); i++)
2137 {
2138 cand = iv_cand (data, i);
2139
2140 if (cand->pos != pos)
2141 continue;
2142
2143 if (cand->incremented_at != incremented_at
2144 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2145 && cand->ainc_use != use))
2146 continue;
2147
2148 if (!cand->iv)
2149 {
2150 if (!base && !step)
2151 break;
2152
2153 continue;
2154 }
2155
2156 if (!base && !step)
2157 continue;
2158
2159 if (operand_equal_p (base, cand->iv->base, 0)
2160 && operand_equal_p (step, cand->iv->step, 0))
2161 break;
2162 }
2163
2164 if (i == n_iv_cands (data))
2165 {
2166 cand = XCNEW (struct iv_cand);
2167 cand->id = i;
2168
2169 if (!base && !step)
2170 cand->iv = NULL;
2171 else
2172 cand->iv = alloc_iv (base, step);
2173
2174 cand->pos = pos;
2175 if (pos != IP_ORIGINAL && cand->iv)
2176 {
2177 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2178 cand->var_after = cand->var_before;
2179 }
2180 cand->important = important;
2181 cand->incremented_at = incremented_at;
2182 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2183
2184 if (step
2185 && TREE_CODE (step) != INTEGER_CST)
2186 {
2187 fd_ivopts_data = data;
2188 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2189 }
2190
2191 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2192 cand->ainc_use = use;
2193 else
2194 cand->ainc_use = NULL;
2195
2196 if (dump_file && (dump_flags & TDF_DETAILS))
2197 dump_cand (dump_file, cand);
2198 }
2199
2200 if (important && !cand->important)
2201 {
2202 cand->important = true;
2203 if (dump_file && (dump_flags & TDF_DETAILS))
2204 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2205 }
2206
2207 if (use)
2208 {
2209 bitmap_set_bit (use->related_cands, i);
2210 if (dump_file && (dump_flags & TDF_DETAILS))
2211 fprintf (dump_file, "Candidate %d is related to use %d\n",
2212 cand->id, use->id);
2213 }
2214
2215 return cand;
2216 }
2217
2218 /* Returns true if incrementing the induction variable at the end of the LOOP
2219 is allowed.
2220
2221 The purpose is to avoid splitting latch edge with a biv increment, thus
2222 creating a jump, possibly confusing other optimization passes and leaving
2223 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2224 is not available (so we do not have a better alternative), or if the latch
2225 edge is already nonempty. */
2226
2227 static bool
2228 allow_ip_end_pos_p (struct loop *loop)
2229 {
2230 if (!ip_normal_pos (loop))
2231 return true;
2232
2233 if (!empty_block_p (ip_end_pos (loop)))
2234 return true;
2235
2236 return false;
2237 }
2238
2239 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2240 Important field is set to IMPORTANT. */
2241
2242 static void
2243 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2244 bool important, struct iv_use *use)
2245 {
2246 basic_block use_bb = gimple_bb (use->stmt);
2247 enum machine_mode mem_mode;
2248 unsigned HOST_WIDE_INT cstepi;
2249
2250 /* If we insert the increment in any position other than the standard
2251 ones, we must ensure that it is incremented once per iteration.
2252 It must not be in an inner nested loop, or one side of an if
2253 statement. */
2254 if (use_bb->loop_father != data->current_loop
2255 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2256 || stmt_could_throw_p (use->stmt)
2257 || !cst_and_fits_in_hwi (step))
2258 return;
2259
2260 cstepi = int_cst_value (step);
2261
2262 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2263 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2264 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2265 {
2266 enum tree_code code = MINUS_EXPR;
2267 tree new_base;
2268 tree new_step = step;
2269
2270 if (POINTER_TYPE_P (TREE_TYPE (base)))
2271 {
2272 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2273 code = POINTER_PLUS_EXPR;
2274 }
2275 else
2276 new_step = fold_convert (TREE_TYPE (base), new_step);
2277 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2278 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2279 use->stmt);
2280 }
2281 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2282 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2283 {
2284 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2285 use->stmt);
2286 }
2287 }
2288
2289 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2290 position to POS. If USE is not NULL, the candidate is set as related to
2291 it. The candidate computation is scheduled on all available positions. */
2292
2293 static void
2294 add_candidate (struct ivopts_data *data,
2295 tree base, tree step, bool important, struct iv_use *use)
2296 {
2297 if (ip_normal_pos (data->current_loop))
2298 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2299 if (ip_end_pos (data->current_loop)
2300 && allow_ip_end_pos_p (data->current_loop))
2301 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2302
2303 if (use != NULL && use->type == USE_ADDRESS)
2304 add_autoinc_candidates (data, base, step, important, use);
2305 }
2306
2307 /* Add a standard "0 + 1 * iteration" iv candidate for a
2308 type with SIZE bits. */
2309
2310 static void
2311 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2312 unsigned int size)
2313 {
2314 tree type = lang_hooks.types.type_for_size (size, true);
2315 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2316 true, NULL);
2317 }
2318
2319 /* Adds standard iv candidates. */
2320
2321 static void
2322 add_standard_iv_candidates (struct ivopts_data *data)
2323 {
2324 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2325
2326 /* The same for a double-integer type if it is still fast enough. */
2327 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2328 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2329 }
2330
2331
2332 /* Adds candidates bases on the old induction variable IV. */
2333
2334 static void
2335 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2336 {
2337 gimple phi;
2338 tree def;
2339 struct iv_cand *cand;
2340
2341 add_candidate (data, iv->base, iv->step, true, NULL);
2342
2343 /* The same, but with initial value zero. */
2344 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2345 add_candidate (data, size_int (0), iv->step, true, NULL);
2346 else
2347 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2348 iv->step, true, NULL);
2349
2350 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2351 if (gimple_code (phi) == GIMPLE_PHI)
2352 {
2353 /* Additionally record the possibility of leaving the original iv
2354 untouched. */
2355 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2356 cand = add_candidate_1 (data,
2357 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2358 SSA_NAME_DEF_STMT (def));
2359 cand->var_before = iv->ssa_name;
2360 cand->var_after = def;
2361 }
2362 }
2363
2364 /* Adds candidates based on the old induction variables. */
2365
2366 static void
2367 add_old_ivs_candidates (struct ivopts_data *data)
2368 {
2369 unsigned i;
2370 struct iv *iv;
2371 bitmap_iterator bi;
2372
2373 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2374 {
2375 iv = ver_info (data, i)->iv;
2376 if (iv && iv->biv_p && !integer_zerop (iv->step))
2377 add_old_iv_candidates (data, iv);
2378 }
2379 }
2380
2381 /* Adds candidates based on the value of the induction variable IV and USE. */
2382
2383 static void
2384 add_iv_value_candidates (struct ivopts_data *data,
2385 struct iv *iv, struct iv_use *use)
2386 {
2387 unsigned HOST_WIDE_INT offset;
2388 tree base;
2389 tree basetype;
2390
2391 add_candidate (data, iv->base, iv->step, false, use);
2392
2393 /* The same, but with initial value zero. Make such variable important,
2394 since it is generic enough so that possibly many uses may be based
2395 on it. */
2396 basetype = TREE_TYPE (iv->base);
2397 if (POINTER_TYPE_P (basetype))
2398 basetype = sizetype;
2399 add_candidate (data, build_int_cst (basetype, 0),
2400 iv->step, true, use);
2401
2402 /* Third, try removing the constant offset. Make sure to even
2403 add a candidate for &a[0] vs. (T *)&a. */
2404 base = strip_offset (iv->base, &offset);
2405 if (offset
2406 || base != iv->base)
2407 add_candidate (data, base, iv->step, false, use);
2408 }
2409
2410 /* Adds candidates based on the uses. */
2411
2412 static void
2413 add_derived_ivs_candidates (struct ivopts_data *data)
2414 {
2415 unsigned i;
2416
2417 for (i = 0; i < n_iv_uses (data); i++)
2418 {
2419 struct iv_use *use = iv_use (data, i);
2420
2421 if (!use)
2422 continue;
2423
2424 switch (use->type)
2425 {
2426 case USE_NONLINEAR_EXPR:
2427 case USE_COMPARE:
2428 case USE_ADDRESS:
2429 /* Just add the ivs based on the value of the iv used here. */
2430 add_iv_value_candidates (data, use->iv, use);
2431 break;
2432
2433 default:
2434 gcc_unreachable ();
2435 }
2436 }
2437 }
2438
2439 /* Record important candidates and add them to related_cands bitmaps
2440 if needed. */
2441
2442 static void
2443 record_important_candidates (struct ivopts_data *data)
2444 {
2445 unsigned i;
2446 struct iv_use *use;
2447
2448 for (i = 0; i < n_iv_cands (data); i++)
2449 {
2450 struct iv_cand *cand = iv_cand (data, i);
2451
2452 if (cand->important)
2453 bitmap_set_bit (data->important_candidates, i);
2454 }
2455
2456 data->consider_all_candidates = (n_iv_cands (data)
2457 <= CONSIDER_ALL_CANDIDATES_BOUND);
2458
2459 if (data->consider_all_candidates)
2460 {
2461 /* We will not need "related_cands" bitmaps in this case,
2462 so release them to decrease peak memory consumption. */
2463 for (i = 0; i < n_iv_uses (data); i++)
2464 {
2465 use = iv_use (data, i);
2466 BITMAP_FREE (use->related_cands);
2467 }
2468 }
2469 else
2470 {
2471 /* Add important candidates to the related_cands bitmaps. */
2472 for (i = 0; i < n_iv_uses (data); i++)
2473 bitmap_ior_into (iv_use (data, i)->related_cands,
2474 data->important_candidates);
2475 }
2476 }
2477
2478 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2479 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2480 we allocate a simple list to every use. */
2481
2482 static void
2483 alloc_use_cost_map (struct ivopts_data *data)
2484 {
2485 unsigned i, size, s, j;
2486
2487 for (i = 0; i < n_iv_uses (data); i++)
2488 {
2489 struct iv_use *use = iv_use (data, i);
2490 bitmap_iterator bi;
2491
2492 if (data->consider_all_candidates)
2493 size = n_iv_cands (data);
2494 else
2495 {
2496 s = 0;
2497 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2498 {
2499 s++;
2500 }
2501
2502 /* Round up to the power of two, so that moduling by it is fast. */
2503 for (size = 1; size < s; size <<= 1)
2504 continue;
2505 }
2506
2507 use->n_map_members = size;
2508 use->cost_map = XCNEWVEC (struct cost_pair, size);
2509 }
2510 }
2511
2512 /* Returns description of computation cost of expression whose runtime
2513 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2514
2515 static comp_cost
2516 new_cost (unsigned runtime, unsigned complexity)
2517 {
2518 comp_cost cost;
2519
2520 cost.cost = runtime;
2521 cost.complexity = complexity;
2522
2523 return cost;
2524 }
2525
2526 /* Adds costs COST1 and COST2. */
2527
2528 static comp_cost
2529 add_costs (comp_cost cost1, comp_cost cost2)
2530 {
2531 cost1.cost += cost2.cost;
2532 cost1.complexity += cost2.complexity;
2533
2534 return cost1;
2535 }
2536 /* Subtracts costs COST1 and COST2. */
2537
2538 static comp_cost
2539 sub_costs (comp_cost cost1, comp_cost cost2)
2540 {
2541 cost1.cost -= cost2.cost;
2542 cost1.complexity -= cost2.complexity;
2543
2544 return cost1;
2545 }
2546
2547 /* Returns a negative number if COST1 < COST2, a positive number if
2548 COST1 > COST2, and 0 if COST1 = COST2. */
2549
2550 static int
2551 compare_costs (comp_cost cost1, comp_cost cost2)
2552 {
2553 if (cost1.cost == cost2.cost)
2554 return cost1.complexity - cost2.complexity;
2555
2556 return cost1.cost - cost2.cost;
2557 }
2558
2559 /* Returns true if COST is infinite. */
2560
2561 static bool
2562 infinite_cost_p (comp_cost cost)
2563 {
2564 return cost.cost == INFTY;
2565 }
2566
2567 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2568 on invariants DEPENDS_ON and that the value used in expressing it
2569 is VALUE. */
2570
2571 static void
2572 set_use_iv_cost (struct ivopts_data *data,
2573 struct iv_use *use, struct iv_cand *cand,
2574 comp_cost cost, bitmap depends_on, tree value)
2575 {
2576 unsigned i, s;
2577
2578 if (infinite_cost_p (cost))
2579 {
2580 BITMAP_FREE (depends_on);
2581 return;
2582 }
2583
2584 if (data->consider_all_candidates)
2585 {
2586 use->cost_map[cand->id].cand = cand;
2587 use->cost_map[cand->id].cost = cost;
2588 use->cost_map[cand->id].depends_on = depends_on;
2589 use->cost_map[cand->id].value = value;
2590 return;
2591 }
2592
2593 /* n_map_members is a power of two, so this computes modulo. */
2594 s = cand->id & (use->n_map_members - 1);
2595 for (i = s; i < use->n_map_members; i++)
2596 if (!use->cost_map[i].cand)
2597 goto found;
2598 for (i = 0; i < s; i++)
2599 if (!use->cost_map[i].cand)
2600 goto found;
2601
2602 gcc_unreachable ();
2603
2604 found:
2605 use->cost_map[i].cand = cand;
2606 use->cost_map[i].cost = cost;
2607 use->cost_map[i].depends_on = depends_on;
2608 use->cost_map[i].value = value;
2609 }
2610
2611 /* Gets cost of (USE, CANDIDATE) pair. */
2612
2613 static struct cost_pair *
2614 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2615 struct iv_cand *cand)
2616 {
2617 unsigned i, s;
2618 struct cost_pair *ret;
2619
2620 if (!cand)
2621 return NULL;
2622
2623 if (data->consider_all_candidates)
2624 {
2625 ret = use->cost_map + cand->id;
2626 if (!ret->cand)
2627 return NULL;
2628
2629 return ret;
2630 }
2631
2632 /* n_map_members is a power of two, so this computes modulo. */
2633 s = cand->id & (use->n_map_members - 1);
2634 for (i = s; i < use->n_map_members; i++)
2635 if (use->cost_map[i].cand == cand)
2636 return use->cost_map + i;
2637
2638 for (i = 0; i < s; i++)
2639 if (use->cost_map[i].cand == cand)
2640 return use->cost_map + i;
2641
2642 return NULL;
2643 }
2644
2645 /* Returns estimate on cost of computing SEQ. */
2646
2647 static unsigned
2648 seq_cost (rtx seq, bool speed)
2649 {
2650 unsigned cost = 0;
2651 rtx set;
2652
2653 for (; seq; seq = NEXT_INSN (seq))
2654 {
2655 set = single_set (seq);
2656 if (set)
2657 cost += rtx_cost (set, SET,speed);
2658 else
2659 cost++;
2660 }
2661
2662 return cost;
2663 }
2664
2665 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2666 static rtx
2667 produce_memory_decl_rtl (tree obj, int *regno)
2668 {
2669 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2670 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2671 rtx x;
2672
2673 gcc_assert (obj);
2674 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2675 {
2676 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2677 x = gen_rtx_SYMBOL_REF (address_mode, name);
2678 SET_SYMBOL_REF_DECL (x, obj);
2679 x = gen_rtx_MEM (DECL_MODE (obj), x);
2680 set_mem_addr_space (x, as);
2681 targetm.encode_section_info (obj, x, true);
2682 }
2683 else
2684 {
2685 x = gen_raw_REG (address_mode, (*regno)++);
2686 x = gen_rtx_MEM (DECL_MODE (obj), x);
2687 set_mem_addr_space (x, as);
2688 }
2689
2690 return x;
2691 }
2692
2693 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2694 walk_tree. DATA contains the actual fake register number. */
2695
2696 static tree
2697 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2698 {
2699 tree obj = NULL_TREE;
2700 rtx x = NULL_RTX;
2701 int *regno = (int *) data;
2702
2703 switch (TREE_CODE (*expr_p))
2704 {
2705 case ADDR_EXPR:
2706 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2707 handled_component_p (*expr_p);
2708 expr_p = &TREE_OPERAND (*expr_p, 0))
2709 continue;
2710 obj = *expr_p;
2711 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2712 x = produce_memory_decl_rtl (obj, regno);
2713 break;
2714
2715 case SSA_NAME:
2716 *ws = 0;
2717 obj = SSA_NAME_VAR (*expr_p);
2718 if (!DECL_RTL_SET_P (obj))
2719 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2720 break;
2721
2722 case VAR_DECL:
2723 case PARM_DECL:
2724 case RESULT_DECL:
2725 *ws = 0;
2726 obj = *expr_p;
2727
2728 if (DECL_RTL_SET_P (obj))
2729 break;
2730
2731 if (DECL_MODE (obj) == BLKmode)
2732 x = produce_memory_decl_rtl (obj, regno);
2733 else
2734 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2735
2736 break;
2737
2738 default:
2739 break;
2740 }
2741
2742 if (x)
2743 {
2744 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2745 SET_DECL_RTL (obj, x);
2746 }
2747
2748 return NULL_TREE;
2749 }
2750
2751 /* Determines cost of the computation of EXPR. */
2752
2753 static unsigned
2754 computation_cost (tree expr, bool speed)
2755 {
2756 rtx seq, rslt;
2757 tree type = TREE_TYPE (expr);
2758 unsigned cost;
2759 /* Avoid using hard regs in ways which may be unsupported. */
2760 int regno = LAST_VIRTUAL_REGISTER + 1;
2761 struct cgraph_node *node = cgraph_node (current_function_decl);
2762 enum node_frequency real_frequency = node->frequency;
2763
2764 node->frequency = NODE_FREQUENCY_NORMAL;
2765 crtl->maybe_hot_insn_p = speed;
2766 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2767 start_sequence ();
2768 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2769 seq = get_insns ();
2770 end_sequence ();
2771 default_rtl_profile ();
2772 node->frequency = real_frequency;
2773
2774 cost = seq_cost (seq, speed);
2775 if (MEM_P (rslt))
2776 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2777 TYPE_ADDR_SPACE (type), speed);
2778
2779 return cost;
2780 }
2781
2782 /* Returns variable containing the value of candidate CAND at statement AT. */
2783
2784 static tree
2785 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2786 {
2787 if (stmt_after_increment (loop, cand, stmt))
2788 return cand->var_after;
2789 else
2790 return cand->var_before;
2791 }
2792
2793 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2794 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2795
2796 int
2797 tree_int_cst_sign_bit (const_tree t)
2798 {
2799 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2800 unsigned HOST_WIDE_INT w;
2801
2802 if (bitno < HOST_BITS_PER_WIDE_INT)
2803 w = TREE_INT_CST_LOW (t);
2804 else
2805 {
2806 w = TREE_INT_CST_HIGH (t);
2807 bitno -= HOST_BITS_PER_WIDE_INT;
2808 }
2809
2810 return (w >> bitno) & 1;
2811 }
2812
2813 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2814 same precision that is at least as wide as the precision of TYPE, stores
2815 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2816 type of A and B. */
2817
2818 static tree
2819 determine_common_wider_type (tree *a, tree *b)
2820 {
2821 tree wider_type = NULL;
2822 tree suba, subb;
2823 tree atype = TREE_TYPE (*a);
2824
2825 if (CONVERT_EXPR_P (*a))
2826 {
2827 suba = TREE_OPERAND (*a, 0);
2828 wider_type = TREE_TYPE (suba);
2829 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2830 return atype;
2831 }
2832 else
2833 return atype;
2834
2835 if (CONVERT_EXPR_P (*b))
2836 {
2837 subb = TREE_OPERAND (*b, 0);
2838 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2839 return atype;
2840 }
2841 else
2842 return atype;
2843
2844 *a = suba;
2845 *b = subb;
2846 return wider_type;
2847 }
2848
2849 /* Determines the expression by that USE is expressed from induction variable
2850 CAND at statement AT in LOOP. The expression is stored in a decomposed
2851 form into AFF. Returns false if USE cannot be expressed using CAND. */
2852
2853 static bool
2854 get_computation_aff (struct loop *loop,
2855 struct iv_use *use, struct iv_cand *cand, gimple at,
2856 struct affine_tree_combination *aff)
2857 {
2858 tree ubase = use->iv->base;
2859 tree ustep = use->iv->step;
2860 tree cbase = cand->iv->base;
2861 tree cstep = cand->iv->step, cstep_common;
2862 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2863 tree common_type, var;
2864 tree uutype;
2865 aff_tree cbase_aff, var_aff;
2866 double_int rat;
2867
2868 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2869 {
2870 /* We do not have a precision to express the values of use. */
2871 return false;
2872 }
2873
2874 var = var_at_stmt (loop, cand, at);
2875 uutype = unsigned_type_for (utype);
2876
2877 /* If the conversion is not noop, perform it. */
2878 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2879 {
2880 cstep = fold_convert (uutype, cstep);
2881 cbase = fold_convert (uutype, cbase);
2882 var = fold_convert (uutype, var);
2883 }
2884
2885 if (!constant_multiple_of (ustep, cstep, &rat))
2886 return false;
2887
2888 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2889 type, we achieve better folding by computing their difference in this
2890 wider type, and cast the result to UUTYPE. We do not need to worry about
2891 overflows, as all the arithmetics will in the end be performed in UUTYPE
2892 anyway. */
2893 common_type = determine_common_wider_type (&ubase, &cbase);
2894
2895 /* use = ubase - ratio * cbase + ratio * var. */
2896 tree_to_aff_combination (ubase, common_type, aff);
2897 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2898 tree_to_aff_combination (var, uutype, &var_aff);
2899
2900 /* We need to shift the value if we are after the increment. */
2901 if (stmt_after_increment (loop, cand, at))
2902 {
2903 aff_tree cstep_aff;
2904
2905 if (common_type != uutype)
2906 cstep_common = fold_convert (common_type, cstep);
2907 else
2908 cstep_common = cstep;
2909
2910 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2911 aff_combination_add (&cbase_aff, &cstep_aff);
2912 }
2913
2914 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2915 aff_combination_add (aff, &cbase_aff);
2916 if (common_type != uutype)
2917 aff_combination_convert (aff, uutype);
2918
2919 aff_combination_scale (&var_aff, rat);
2920 aff_combination_add (aff, &var_aff);
2921
2922 return true;
2923 }
2924
2925 /* Determines the expression by that USE is expressed from induction variable
2926 CAND at statement AT in LOOP. The computation is unshared. */
2927
2928 static tree
2929 get_computation_at (struct loop *loop,
2930 struct iv_use *use, struct iv_cand *cand, gimple at)
2931 {
2932 aff_tree aff;
2933 tree type = TREE_TYPE (use->iv->base);
2934
2935 if (!get_computation_aff (loop, use, cand, at, &aff))
2936 return NULL_TREE;
2937 unshare_aff_combination (&aff);
2938 return fold_convert (type, aff_combination_to_tree (&aff));
2939 }
2940
2941 /* Determines the expression by that USE is expressed from induction variable
2942 CAND in LOOP. The computation is unshared. */
2943
2944 static tree
2945 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2946 {
2947 return get_computation_at (loop, use, cand, use->stmt);
2948 }
2949
2950 /* Adjust the cost COST for being in loop setup rather than loop body.
2951 If we're optimizing for space, the loop setup overhead is constant;
2952 if we're optimizing for speed, amortize it over the per-iteration cost. */
2953 static unsigned
2954 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2955 {
2956 if (cost == INFTY)
2957 return cost;
2958 else if (optimize_loop_for_speed_p (data->current_loop))
2959 return cost / AVG_LOOP_NITER (data->current_loop);
2960 else
2961 return cost;
2962 }
2963
2964 /* Returns cost of addition in MODE. */
2965
2966 static unsigned
2967 add_cost (enum machine_mode mode, bool speed)
2968 {
2969 static unsigned costs[NUM_MACHINE_MODES];
2970 rtx seq;
2971 unsigned cost;
2972
2973 if (costs[mode])
2974 return costs[mode];
2975
2976 start_sequence ();
2977 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2978 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2979 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2980 NULL_RTX);
2981 seq = get_insns ();
2982 end_sequence ();
2983
2984 cost = seq_cost (seq, speed);
2985 if (!cost)
2986 cost = 1;
2987
2988 costs[mode] = cost;
2989
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2991 fprintf (dump_file, "Addition in %s costs %d\n",
2992 GET_MODE_NAME (mode), cost);
2993 return cost;
2994 }
2995
2996 /* Entry in a hashtable of already known costs for multiplication. */
2997 struct mbc_entry
2998 {
2999 HOST_WIDE_INT cst; /* The constant to multiply by. */
3000 enum machine_mode mode; /* In mode. */
3001 unsigned cost; /* The cost. */
3002 };
3003
3004 /* Counts hash value for the ENTRY. */
3005
3006 static hashval_t
3007 mbc_entry_hash (const void *entry)
3008 {
3009 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3010
3011 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3012 }
3013
3014 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3015
3016 static int
3017 mbc_entry_eq (const void *entry1, const void *entry2)
3018 {
3019 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3020 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3021
3022 return (e1->mode == e2->mode
3023 && e1->cst == e2->cst);
3024 }
3025
3026 /* Returns cost of multiplication by constant CST in MODE. */
3027
3028 unsigned
3029 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3030 {
3031 static htab_t costs;
3032 struct mbc_entry **cached, act;
3033 rtx seq;
3034 unsigned cost;
3035
3036 if (!costs)
3037 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3038
3039 act.mode = mode;
3040 act.cst = cst;
3041 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3042 if (*cached)
3043 return (*cached)->cost;
3044
3045 *cached = XNEW (struct mbc_entry);
3046 (*cached)->mode = mode;
3047 (*cached)->cst = cst;
3048
3049 start_sequence ();
3050 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3051 gen_int_mode (cst, mode), NULL_RTX, 0);
3052 seq = get_insns ();
3053 end_sequence ();
3054
3055 cost = seq_cost (seq, speed);
3056
3057 if (dump_file && (dump_flags & TDF_DETAILS))
3058 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3059 (int) cst, GET_MODE_NAME (mode), cost);
3060
3061 (*cached)->cost = cost;
3062
3063 return cost;
3064 }
3065
3066 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3067 validity for a memory reference accessing memory of mode MODE in
3068 address space AS. */
3069
3070 DEF_VEC_P (sbitmap);
3071 DEF_VEC_ALLOC_P (sbitmap, heap);
3072
3073 bool
3074 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3075 addr_space_t as)
3076 {
3077 #define MAX_RATIO 128
3078 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3079 static VEC (sbitmap, heap) *valid_mult_list;
3080 sbitmap valid_mult;
3081
3082 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3083 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3084
3085 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3086 if (!valid_mult)
3087 {
3088 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3089 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3090 rtx addr;
3091 HOST_WIDE_INT i;
3092
3093 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3094 sbitmap_zero (valid_mult);
3095 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3096 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3097 {
3098 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3099 if (memory_address_addr_space_p (mode, addr, as))
3100 SET_BIT (valid_mult, i + MAX_RATIO);
3101 }
3102
3103 if (dump_file && (dump_flags & TDF_DETAILS))
3104 {
3105 fprintf (dump_file, " allowed multipliers:");
3106 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3107 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3108 fprintf (dump_file, " %d", (int) i);
3109 fprintf (dump_file, "\n");
3110 fprintf (dump_file, "\n");
3111 }
3112
3113 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3114 }
3115
3116 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3117 return false;
3118
3119 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3120 }
3121
3122 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3123 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3124 variable is omitted. Compute the cost for a memory reference that accesses
3125 a memory location of mode MEM_MODE in address space AS.
3126
3127 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3128 size of MEM_MODE / RATIO) is available. To make this determination, we
3129 look at the size of the increment to be made, which is given in CSTEP.
3130 CSTEP may be zero if the step is unknown.
3131 STMT_AFTER_INC is true iff the statement we're looking at is after the
3132 increment of the original biv.
3133
3134 TODO -- there must be some better way. This all is quite crude. */
3135
3136 typedef struct
3137 {
3138 HOST_WIDE_INT min_offset, max_offset;
3139 unsigned costs[2][2][2][2];
3140 } *address_cost_data;
3141
3142 DEF_VEC_P (address_cost_data);
3143 DEF_VEC_ALLOC_P (address_cost_data, heap);
3144
3145 static comp_cost
3146 get_address_cost (bool symbol_present, bool var_present,
3147 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3148 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3149 addr_space_t as, bool speed,
3150 bool stmt_after_inc, bool *may_autoinc)
3151 {
3152 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3153 static VEC(address_cost_data, heap) *address_cost_data_list;
3154 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3155 address_cost_data data;
3156 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3157 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3158 unsigned cost, acost, complexity;
3159 bool offset_p, ratio_p, autoinc;
3160 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3161 unsigned HOST_WIDE_INT mask;
3162 unsigned bits;
3163
3164 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3165 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3166 data_index + 1);
3167
3168 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3169 if (!data)
3170 {
3171 HOST_WIDE_INT i;
3172 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3173 HOST_WIDE_INT rat, off;
3174 int old_cse_not_expected;
3175 unsigned sym_p, var_p, off_p, rat_p, add_c;
3176 rtx seq, addr, base;
3177 rtx reg0, reg1;
3178
3179 data = (address_cost_data) xcalloc (1, sizeof (*data));
3180
3181 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3182
3183 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3184 for (i = start; i <= 1 << 20; i <<= 1)
3185 {
3186 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3187 if (!memory_address_addr_space_p (mem_mode, addr, as))
3188 break;
3189 }
3190 data->max_offset = i == start ? 0 : i >> 1;
3191 off = data->max_offset;
3192
3193 for (i = start; i <= 1 << 20; i <<= 1)
3194 {
3195 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3196 if (!memory_address_addr_space_p (mem_mode, addr, as))
3197 break;
3198 }
3199 data->min_offset = i == start ? 0 : -(i >> 1);
3200
3201 if (dump_file && (dump_flags & TDF_DETAILS))
3202 {
3203 fprintf (dump_file, "get_address_cost:\n");
3204 fprintf (dump_file, " min offset %s %d\n",
3205 GET_MODE_NAME (mem_mode),
3206 (int) data->min_offset);
3207 fprintf (dump_file, " max offset %s %d\n",
3208 GET_MODE_NAME (mem_mode),
3209 (int) data->max_offset);
3210 }
3211
3212 rat = 1;
3213 for (i = 2; i <= MAX_RATIO; i++)
3214 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3215 {
3216 rat = i;
3217 break;
3218 }
3219
3220 /* Compute the cost of various addressing modes. */
3221 acost = 0;
3222 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3223 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3224
3225 if (HAVE_PRE_DECREMENT)
3226 {
3227 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3228 has_predec[mem_mode]
3229 = memory_address_addr_space_p (mem_mode, addr, as);
3230 }
3231 if (HAVE_POST_DECREMENT)
3232 {
3233 addr = gen_rtx_POST_DEC (address_mode, reg0);
3234 has_postdec[mem_mode]
3235 = memory_address_addr_space_p (mem_mode, addr, as);
3236 }
3237 if (HAVE_PRE_INCREMENT)
3238 {
3239 addr = gen_rtx_PRE_INC (address_mode, reg0);
3240 has_preinc[mem_mode]
3241 = memory_address_addr_space_p (mem_mode, addr, as);
3242 }
3243 if (HAVE_POST_INCREMENT)
3244 {
3245 addr = gen_rtx_POST_INC (address_mode, reg0);
3246 has_postinc[mem_mode]
3247 = memory_address_addr_space_p (mem_mode, addr, as);
3248 }
3249 for (i = 0; i < 16; i++)
3250 {
3251 sym_p = i & 1;
3252 var_p = (i >> 1) & 1;
3253 off_p = (i >> 2) & 1;
3254 rat_p = (i >> 3) & 1;
3255
3256 addr = reg0;
3257 if (rat_p)
3258 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3259 gen_int_mode (rat, address_mode));
3260
3261 if (var_p)
3262 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3263
3264 if (sym_p)
3265 {
3266 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3267 /* ??? We can run into trouble with some backends by presenting
3268 it with symbols which haven't been properly passed through
3269 targetm.encode_section_info. By setting the local bit, we
3270 enhance the probability of things working. */
3271 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3272
3273 if (off_p)
3274 base = gen_rtx_fmt_e (CONST, address_mode,
3275 gen_rtx_fmt_ee
3276 (PLUS, address_mode, base,
3277 gen_int_mode (off, address_mode)));
3278 }
3279 else if (off_p)
3280 base = gen_int_mode (off, address_mode);
3281 else
3282 base = NULL_RTX;
3283
3284 if (base)
3285 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3286
3287 start_sequence ();
3288 /* To avoid splitting addressing modes, pretend that no cse will
3289 follow. */
3290 old_cse_not_expected = cse_not_expected;
3291 cse_not_expected = true;
3292 addr = memory_address_addr_space (mem_mode, addr, as);
3293 cse_not_expected = old_cse_not_expected;
3294 seq = get_insns ();
3295 end_sequence ();
3296
3297 acost = seq_cost (seq, speed);
3298 acost += address_cost (addr, mem_mode, as, speed);
3299
3300 if (!acost)
3301 acost = 1;
3302 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3303 }
3304
3305 /* On some targets, it is quite expensive to load symbol to a register,
3306 which makes addresses that contain symbols look much more expensive.
3307 However, the symbol will have to be loaded in any case before the
3308 loop (and quite likely we have it in register already), so it does not
3309 make much sense to penalize them too heavily. So make some final
3310 tweaks for the SYMBOL_PRESENT modes:
3311
3312 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3313 var is cheaper, use this mode with small penalty.
3314 If VAR_PRESENT is true, try whether the mode with
3315 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3316 if this is the case, use it. */
3317 add_c = add_cost (address_mode, speed);
3318 for (i = 0; i < 8; i++)
3319 {
3320 var_p = i & 1;
3321 off_p = (i >> 1) & 1;
3322 rat_p = (i >> 2) & 1;
3323
3324 acost = data->costs[0][1][off_p][rat_p] + 1;
3325 if (var_p)
3326 acost += add_c;
3327
3328 if (acost < data->costs[1][var_p][off_p][rat_p])
3329 data->costs[1][var_p][off_p][rat_p] = acost;
3330 }
3331
3332 if (dump_file && (dump_flags & TDF_DETAILS))
3333 {
3334 fprintf (dump_file, "Address costs:\n");
3335
3336 for (i = 0; i < 16; i++)
3337 {
3338 sym_p = i & 1;
3339 var_p = (i >> 1) & 1;
3340 off_p = (i >> 2) & 1;
3341 rat_p = (i >> 3) & 1;
3342
3343 fprintf (dump_file, " ");
3344 if (sym_p)
3345 fprintf (dump_file, "sym + ");
3346 if (var_p)
3347 fprintf (dump_file, "var + ");
3348 if (off_p)
3349 fprintf (dump_file, "cst + ");
3350 if (rat_p)
3351 fprintf (dump_file, "rat * ");
3352
3353 acost = data->costs[sym_p][var_p][off_p][rat_p];
3354 fprintf (dump_file, "index costs %d\n", acost);
3355 }
3356 if (has_predec[mem_mode] || has_postdec[mem_mode]
3357 || has_preinc[mem_mode] || has_postinc[mem_mode])
3358 fprintf (dump_file, " May include autoinc/dec\n");
3359 fprintf (dump_file, "\n");
3360 }
3361
3362 VEC_replace (address_cost_data, address_cost_data_list,
3363 data_index, data);
3364 }
3365
3366 bits = GET_MODE_BITSIZE (address_mode);
3367 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3368 offset &= mask;
3369 if ((offset >> (bits - 1) & 1))
3370 offset |= ~mask;
3371 s_offset = offset;
3372
3373 autoinc = false;
3374 msize = GET_MODE_SIZE (mem_mode);
3375 autoinc_offset = offset;
3376 if (stmt_after_inc)
3377 autoinc_offset += ratio * cstep;
3378 if (symbol_present || var_present || ratio != 1)
3379 autoinc = false;
3380 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3381 && msize == cstep)
3382 || (has_postdec[mem_mode] && autoinc_offset == 0
3383 && msize == -cstep)
3384 || (has_preinc[mem_mode] && autoinc_offset == msize
3385 && msize == cstep)
3386 || (has_predec[mem_mode] && autoinc_offset == -msize
3387 && msize == -cstep))
3388 autoinc = true;
3389
3390 cost = 0;
3391 offset_p = (s_offset != 0
3392 && data->min_offset <= s_offset
3393 && s_offset <= data->max_offset);
3394 ratio_p = (ratio != 1
3395 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3396
3397 if (ratio != 1 && !ratio_p)
3398 cost += multiply_by_cost (ratio, address_mode, speed);
3399
3400 if (s_offset && !offset_p && !symbol_present)
3401 cost += add_cost (address_mode, speed);
3402
3403 if (may_autoinc)
3404 *may_autoinc = autoinc;
3405 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3406 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3407 return new_cost (cost + acost, complexity);
3408 }
3409
3410 /* Estimates cost of forcing expression EXPR into a variable. */
3411
3412 static comp_cost
3413 force_expr_to_var_cost (tree expr, bool speed)
3414 {
3415 static bool costs_initialized = false;
3416 static unsigned integer_cost [2];
3417 static unsigned symbol_cost [2];
3418 static unsigned address_cost [2];
3419 tree op0, op1;
3420 comp_cost cost0, cost1, cost;
3421 enum machine_mode mode;
3422
3423 if (!costs_initialized)
3424 {
3425 tree type = build_pointer_type (integer_type_node);
3426 tree var, addr;
3427 rtx x;
3428 int i;
3429
3430 var = create_tmp_var_raw (integer_type_node, "test_var");
3431 TREE_STATIC (var) = 1;
3432 x = produce_memory_decl_rtl (var, NULL);
3433 SET_DECL_RTL (var, x);
3434
3435 addr = build1 (ADDR_EXPR, type, var);
3436
3437
3438 for (i = 0; i < 2; i++)
3439 {
3440 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3441 2000), i);
3442
3443 symbol_cost[i] = computation_cost (addr, i) + 1;
3444
3445 address_cost[i]
3446 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3447 addr,
3448 build_int_cst (sizetype, 2000)), i) + 1;
3449 if (dump_file && (dump_flags & TDF_DETAILS))
3450 {
3451 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3452 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3453 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3454 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3455 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3456 fprintf (dump_file, "\n");
3457 }
3458 }
3459
3460 costs_initialized = true;
3461 }
3462
3463 STRIP_NOPS (expr);
3464
3465 if (SSA_VAR_P (expr))
3466 return zero_cost;
3467
3468 if (is_gimple_min_invariant (expr))
3469 {
3470 if (TREE_CODE (expr) == INTEGER_CST)
3471 return new_cost (integer_cost [speed], 0);
3472
3473 if (TREE_CODE (expr) == ADDR_EXPR)
3474 {
3475 tree obj = TREE_OPERAND (expr, 0);
3476
3477 if (TREE_CODE (obj) == VAR_DECL
3478 || TREE_CODE (obj) == PARM_DECL
3479 || TREE_CODE (obj) == RESULT_DECL)
3480 return new_cost (symbol_cost [speed], 0);
3481 }
3482
3483 return new_cost (address_cost [speed], 0);
3484 }
3485
3486 switch (TREE_CODE (expr))
3487 {
3488 case POINTER_PLUS_EXPR:
3489 case PLUS_EXPR:
3490 case MINUS_EXPR:
3491 case MULT_EXPR:
3492 op0 = TREE_OPERAND (expr, 0);
3493 op1 = TREE_OPERAND (expr, 1);
3494 STRIP_NOPS (op0);
3495 STRIP_NOPS (op1);
3496
3497 if (is_gimple_val (op0))
3498 cost0 = zero_cost;
3499 else
3500 cost0 = force_expr_to_var_cost (op0, speed);
3501
3502 if (is_gimple_val (op1))
3503 cost1 = zero_cost;
3504 else
3505 cost1 = force_expr_to_var_cost (op1, speed);
3506
3507 break;
3508
3509 case NEGATE_EXPR:
3510 op0 = TREE_OPERAND (expr, 0);
3511 STRIP_NOPS (op0);
3512 op1 = NULL_TREE;
3513
3514 if (is_gimple_val (op0))
3515 cost0 = zero_cost;
3516 else
3517 cost0 = force_expr_to_var_cost (op0, speed);
3518
3519 cost1 = zero_cost;
3520 break;
3521
3522 default:
3523 /* Just an arbitrary value, FIXME. */
3524 return new_cost (target_spill_cost[speed], 0);
3525 }
3526
3527 mode = TYPE_MODE (TREE_TYPE (expr));
3528 switch (TREE_CODE (expr))
3529 {
3530 case POINTER_PLUS_EXPR:
3531 case PLUS_EXPR:
3532 case MINUS_EXPR:
3533 case NEGATE_EXPR:
3534 cost = new_cost (add_cost (mode, speed), 0);
3535 break;
3536
3537 case MULT_EXPR:
3538 if (cst_and_fits_in_hwi (op0))
3539 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3540 else if (cst_and_fits_in_hwi (op1))
3541 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3542 else
3543 return new_cost (target_spill_cost [speed], 0);
3544 break;
3545
3546 default:
3547 gcc_unreachable ();
3548 }
3549
3550 cost = add_costs (cost, cost0);
3551 cost = add_costs (cost, cost1);
3552
3553 /* Bound the cost by target_spill_cost. The parts of complicated
3554 computations often are either loop invariant or at least can
3555 be shared between several iv uses, so letting this grow without
3556 limits would not give reasonable results. */
3557 if (cost.cost > (int) target_spill_cost [speed])
3558 cost.cost = target_spill_cost [speed];
3559
3560 return cost;
3561 }
3562
3563 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3564 invariants the computation depends on. */
3565
3566 static comp_cost
3567 force_var_cost (struct ivopts_data *data,
3568 tree expr, bitmap *depends_on)
3569 {
3570 if (depends_on)
3571 {
3572 fd_ivopts_data = data;
3573 walk_tree (&expr, find_depends, depends_on, NULL);
3574 }
3575
3576 return force_expr_to_var_cost (expr, data->speed);
3577 }
3578
3579 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3580 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3581 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3582 invariants the computation depends on. */
3583
3584 static comp_cost
3585 split_address_cost (struct ivopts_data *data,
3586 tree addr, bool *symbol_present, bool *var_present,
3587 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3588 {
3589 tree core;
3590 HOST_WIDE_INT bitsize;
3591 HOST_WIDE_INT bitpos;
3592 tree toffset;
3593 enum machine_mode mode;
3594 int unsignedp, volatilep;
3595
3596 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3597 &unsignedp, &volatilep, false);
3598
3599 if (toffset != 0
3600 || bitpos % BITS_PER_UNIT != 0
3601 || TREE_CODE (core) != VAR_DECL)
3602 {
3603 *symbol_present = false;
3604 *var_present = true;
3605 fd_ivopts_data = data;
3606 walk_tree (&addr, find_depends, depends_on, NULL);
3607 return new_cost (target_spill_cost[data->speed], 0);
3608 }
3609
3610 *offset += bitpos / BITS_PER_UNIT;
3611 if (TREE_STATIC (core)
3612 || DECL_EXTERNAL (core))
3613 {
3614 *symbol_present = true;
3615 *var_present = false;
3616 return zero_cost;
3617 }
3618
3619 *symbol_present = false;
3620 *var_present = true;
3621 return zero_cost;
3622 }
3623
3624 /* Estimates cost of expressing difference of addresses E1 - E2 as
3625 var + symbol + offset. The value of offset is added to OFFSET,
3626 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3627 part is missing. DEPENDS_ON is a set of the invariants the computation
3628 depends on. */
3629
3630 static comp_cost
3631 ptr_difference_cost (struct ivopts_data *data,
3632 tree e1, tree e2, bool *symbol_present, bool *var_present,
3633 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3634 {
3635 HOST_WIDE_INT diff = 0;
3636 aff_tree aff_e1, aff_e2;
3637 tree type;
3638
3639 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3640
3641 if (ptr_difference_const (e1, e2, &diff))
3642 {
3643 *offset += diff;
3644 *symbol_present = false;
3645 *var_present = false;
3646 return zero_cost;
3647 }
3648
3649 if (integer_zerop (e2))
3650 return split_address_cost (data, TREE_OPERAND (e1, 0),
3651 symbol_present, var_present, offset, depends_on);
3652
3653 *symbol_present = false;
3654 *var_present = true;
3655
3656 type = signed_type_for (TREE_TYPE (e1));
3657 tree_to_aff_combination (e1, type, &aff_e1);
3658 tree_to_aff_combination (e2, type, &aff_e2);
3659 aff_combination_scale (&aff_e2, double_int_minus_one);
3660 aff_combination_add (&aff_e1, &aff_e2);
3661
3662 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3663 }
3664
3665 /* Estimates cost of expressing difference E1 - E2 as
3666 var + symbol + offset. The value of offset is added to OFFSET,
3667 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3668 part is missing. DEPENDS_ON is a set of the invariants the computation
3669 depends on. */
3670
3671 static comp_cost
3672 difference_cost (struct ivopts_data *data,
3673 tree e1, tree e2, bool *symbol_present, bool *var_present,
3674 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3675 {
3676 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3677 unsigned HOST_WIDE_INT off1, off2;
3678 aff_tree aff_e1, aff_e2;
3679 tree type;
3680
3681 e1 = strip_offset (e1, &off1);
3682 e2 = strip_offset (e2, &off2);
3683 *offset += off1 - off2;
3684
3685 STRIP_NOPS (e1);
3686 STRIP_NOPS (e2);
3687
3688 if (TREE_CODE (e1) == ADDR_EXPR)
3689 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3690 offset, depends_on);
3691 *symbol_present = false;
3692
3693 if (operand_equal_p (e1, e2, 0))
3694 {
3695 *var_present = false;
3696 return zero_cost;
3697 }
3698
3699 *var_present = true;
3700
3701 if (integer_zerop (e2))
3702 return force_var_cost (data, e1, depends_on);
3703
3704 if (integer_zerop (e1))
3705 {
3706 comp_cost cost = force_var_cost (data, e2, depends_on);
3707 cost.cost += multiply_by_cost (-1, mode, data->speed);
3708 return cost;
3709 }
3710
3711 type = signed_type_for (TREE_TYPE (e1));
3712 tree_to_aff_combination (e1, type, &aff_e1);
3713 tree_to_aff_combination (e2, type, &aff_e2);
3714 aff_combination_scale (&aff_e2, double_int_minus_one);
3715 aff_combination_add (&aff_e1, &aff_e2);
3716
3717 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3718 }
3719
3720 /* Determines the cost of the computation by that USE is expressed
3721 from induction variable CAND. If ADDRESS_P is true, we just need
3722 to create an address from it, otherwise we want to get it into
3723 register. A set of invariants we depend on is stored in
3724 DEPENDS_ON. AT is the statement at that the value is computed.
3725 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3726 addressing is likely. */
3727
3728 static comp_cost
3729 get_computation_cost_at (struct ivopts_data *data,
3730 struct iv_use *use, struct iv_cand *cand,
3731 bool address_p, bitmap *depends_on, gimple at,
3732 bool *can_autoinc)
3733 {
3734 tree ubase = use->iv->base, ustep = use->iv->step;
3735 tree cbase, cstep;
3736 tree utype = TREE_TYPE (ubase), ctype;
3737 unsigned HOST_WIDE_INT cstepi, offset = 0;
3738 HOST_WIDE_INT ratio, aratio;
3739 bool var_present, symbol_present, stmt_is_after_inc;
3740 comp_cost cost;
3741 double_int rat;
3742 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3743
3744 *depends_on = NULL;
3745
3746 /* Only consider real candidates. */
3747 if (!cand->iv)
3748 return infinite_cost;
3749
3750 cbase = cand->iv->base;
3751 cstep = cand->iv->step;
3752 ctype = TREE_TYPE (cbase);
3753
3754 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3755 {
3756 /* We do not have a precision to express the values of use. */
3757 return infinite_cost;
3758 }
3759
3760 if (address_p)
3761 {
3762 /* Do not try to express address of an object with computation based
3763 on address of a different object. This may cause problems in rtl
3764 level alias analysis (that does not expect this to be happening,
3765 as this is illegal in C), and would be unlikely to be useful
3766 anyway. */
3767 if (use->iv->base_object
3768 && cand->iv->base_object
3769 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3770 return infinite_cost;
3771 }
3772
3773 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3774 {
3775 /* TODO -- add direct handling of this case. */
3776 goto fallback;
3777 }
3778
3779 /* CSTEPI is removed from the offset in case statement is after the
3780 increment. If the step is not constant, we use zero instead.
3781 This is a bit imprecise (there is the extra addition), but
3782 redundancy elimination is likely to transform the code so that
3783 it uses value of the variable before increment anyway,
3784 so it is not that much unrealistic. */
3785 if (cst_and_fits_in_hwi (cstep))
3786 cstepi = int_cst_value (cstep);
3787 else
3788 cstepi = 0;
3789
3790 if (!constant_multiple_of (ustep, cstep, &rat))
3791 return infinite_cost;
3792
3793 if (double_int_fits_in_shwi_p (rat))
3794 ratio = double_int_to_shwi (rat);
3795 else
3796 return infinite_cost;
3797
3798 STRIP_NOPS (cbase);
3799 ctype = TREE_TYPE (cbase);
3800
3801 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3802 or ratio == 1, it is better to handle this like
3803
3804 ubase - ratio * cbase + ratio * var
3805
3806 (also holds in the case ratio == -1, TODO. */
3807
3808 if (cst_and_fits_in_hwi (cbase))
3809 {
3810 offset = - ratio * int_cst_value (cbase);
3811 cost = difference_cost (data,
3812 ubase, build_int_cst (utype, 0),
3813 &symbol_present, &var_present, &offset,
3814 depends_on);
3815 }
3816 else if (ratio == 1)
3817 {
3818 cost = difference_cost (data,
3819 ubase, cbase,
3820 &symbol_present, &var_present, &offset,
3821 depends_on);
3822 }
3823 else if (address_p
3824 && !POINTER_TYPE_P (ctype)
3825 && multiplier_allowed_in_address_p
3826 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3827 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3828 {
3829 cbase
3830 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3831 cost = difference_cost (data,
3832 ubase, cbase,
3833 &symbol_present, &var_present, &offset,
3834 depends_on);
3835 }
3836 else
3837 {
3838 cost = force_var_cost (data, cbase, depends_on);
3839 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3840 cost = add_costs (cost,
3841 difference_cost (data,
3842 ubase, build_int_cst (utype, 0),
3843 &symbol_present, &var_present,
3844 &offset, depends_on));
3845 }
3846
3847 /* If we are after the increment, the value of the candidate is higher by
3848 one iteration. */
3849 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3850 if (stmt_is_after_inc)
3851 offset -= ratio * cstepi;
3852
3853 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3854 (symbol/var1/const parts may be omitted). If we are looking for an
3855 address, find the cost of addressing this. */
3856 if (address_p)
3857 return add_costs (cost,
3858 get_address_cost (symbol_present, var_present,
3859 offset, ratio, cstepi,
3860 TYPE_MODE (TREE_TYPE (utype)),
3861 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3862 speed, stmt_is_after_inc,
3863 can_autoinc));
3864
3865 /* Otherwise estimate the costs for computing the expression. */
3866 if (!symbol_present && !var_present && !offset)
3867 {
3868 if (ratio != 1)
3869 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3870 return cost;
3871 }
3872
3873 /* Symbol + offset should be compile-time computable so consider that they
3874 are added once to the variable, if present. */
3875 if (var_present && (symbol_present || offset))
3876 cost.cost += adjust_setup_cost (data,
3877 add_cost (TYPE_MODE (ctype), speed));
3878
3879 /* Having offset does not affect runtime cost in case it is added to
3880 symbol, but it increases complexity. */
3881 if (offset)
3882 cost.complexity++;
3883
3884 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3885
3886 aratio = ratio > 0 ? ratio : -ratio;
3887 if (aratio != 1)
3888 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3889 return cost;
3890
3891 fallback:
3892 if (can_autoinc)
3893 *can_autoinc = false;
3894
3895 {
3896 /* Just get the expression, expand it and measure the cost. */
3897 tree comp = get_computation_at (data->current_loop, use, cand, at);
3898
3899 if (!comp)
3900 return infinite_cost;
3901
3902 if (address_p)
3903 comp = build_simple_mem_ref (comp);
3904
3905 return new_cost (computation_cost (comp, speed), 0);
3906 }
3907 }
3908
3909 /* Determines the cost of the computation by that USE is expressed
3910 from induction variable CAND. If ADDRESS_P is true, we just need
3911 to create an address from it, otherwise we want to get it into
3912 register. A set of invariants we depend on is stored in
3913 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3914 autoinc addressing is likely. */
3915
3916 static comp_cost
3917 get_computation_cost (struct ivopts_data *data,
3918 struct iv_use *use, struct iv_cand *cand,
3919 bool address_p, bitmap *depends_on, bool *can_autoinc)
3920 {
3921 return get_computation_cost_at (data,
3922 use, cand, address_p, depends_on, use->stmt,
3923 can_autoinc);
3924 }
3925
3926 /* Determines cost of basing replacement of USE on CAND in a generic
3927 expression. */
3928
3929 static bool
3930 determine_use_iv_cost_generic (struct ivopts_data *data,
3931 struct iv_use *use, struct iv_cand *cand)
3932 {
3933 bitmap depends_on;
3934 comp_cost cost;
3935
3936 /* The simple case first -- if we need to express value of the preserved
3937 original biv, the cost is 0. This also prevents us from counting the
3938 cost of increment twice -- once at this use and once in the cost of
3939 the candidate. */
3940 if (cand->pos == IP_ORIGINAL
3941 && cand->incremented_at == use->stmt)
3942 {
3943 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3944 return true;
3945 }
3946
3947 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3948 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3949
3950 return !infinite_cost_p (cost);
3951 }
3952
3953 /* Determines cost of basing replacement of USE on CAND in an address. */
3954
3955 static bool
3956 determine_use_iv_cost_address (struct ivopts_data *data,
3957 struct iv_use *use, struct iv_cand *cand)
3958 {
3959 bitmap depends_on;
3960 bool can_autoinc;
3961 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3962 &can_autoinc);
3963
3964 if (cand->ainc_use == use)
3965 {
3966 if (can_autoinc)
3967 cost.cost -= cand->cost_step;
3968 /* If we generated the candidate solely for exploiting autoincrement
3969 opportunities, and it turns out it can't be used, set the cost to
3970 infinity to make sure we ignore it. */
3971 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3972 cost = infinite_cost;
3973 }
3974 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3975
3976 return !infinite_cost_p (cost);
3977 }
3978
3979 /* Computes value of candidate CAND at position AT in iteration NITER, and
3980 stores it to VAL. */
3981
3982 static void
3983 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3984 aff_tree *val)
3985 {
3986 aff_tree step, delta, nit;
3987 struct iv *iv = cand->iv;
3988 tree type = TREE_TYPE (iv->base);
3989 tree steptype = type;
3990 if (POINTER_TYPE_P (type))
3991 steptype = sizetype;
3992
3993 tree_to_aff_combination (iv->step, steptype, &step);
3994 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3995 aff_combination_convert (&nit, steptype);
3996 aff_combination_mult (&nit, &step, &delta);
3997 if (stmt_after_increment (loop, cand, at))
3998 aff_combination_add (&delta, &step);
3999
4000 tree_to_aff_combination (iv->base, type, val);
4001 aff_combination_add (val, &delta);
4002 }
4003
4004 /* Returns period of induction variable iv. */
4005
4006 static tree
4007 iv_period (struct iv *iv)
4008 {
4009 tree step = iv->step, period, type;
4010 tree pow2div;
4011
4012 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4013
4014 type = unsigned_type_for (TREE_TYPE (step));
4015 /* Period of the iv is lcm (step, type_range)/step -1,
4016 i.e., N*type_range/step - 1. Since type range is power
4017 of two, N == (step >> num_of_ending_zeros_binary (step),
4018 so the final result is
4019
4020 (type_range >> num_of_ending_zeros_binary (step)) - 1
4021
4022 */
4023 pow2div = num_ending_zeros (step);
4024
4025 period = build_low_bits_mask (type,
4026 (TYPE_PRECISION (type)
4027 - tree_low_cst (pow2div, 1)));
4028
4029 return period;
4030 }
4031
4032 /* Returns the comparison operator used when eliminating the iv USE. */
4033
4034 static enum tree_code
4035 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4036 {
4037 struct loop *loop = data->current_loop;
4038 basic_block ex_bb;
4039 edge exit;
4040
4041 ex_bb = gimple_bb (use->stmt);
4042 exit = EDGE_SUCC (ex_bb, 0);
4043 if (flow_bb_inside_loop_p (loop, exit->dest))
4044 exit = EDGE_SUCC (ex_bb, 1);
4045
4046 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4047 }
4048
4049 /* Check whether it is possible to express the condition in USE by comparison
4050 of candidate CAND. If so, store the value compared with to BOUND. */
4051
4052 static bool
4053 may_eliminate_iv (struct ivopts_data *data,
4054 struct iv_use *use, struct iv_cand *cand, tree *bound)
4055 {
4056 basic_block ex_bb;
4057 edge exit;
4058 tree nit, period;
4059 struct loop *loop = data->current_loop;
4060 aff_tree bnd;
4061 struct tree_niter_desc *desc = NULL;
4062
4063 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4064 return false;
4065
4066 /* For now works only for exits that dominate the loop latch.
4067 TODO: extend to other conditions inside loop body. */
4068 ex_bb = gimple_bb (use->stmt);
4069 if (use->stmt != last_stmt (ex_bb)
4070 || gimple_code (use->stmt) != GIMPLE_COND
4071 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4072 return false;
4073
4074 exit = EDGE_SUCC (ex_bb, 0);
4075 if (flow_bb_inside_loop_p (loop, exit->dest))
4076 exit = EDGE_SUCC (ex_bb, 1);
4077 if (flow_bb_inside_loop_p (loop, exit->dest))
4078 return false;
4079
4080 nit = niter_for_exit (data, exit, &desc);
4081 if (!nit)
4082 return false;
4083
4084 /* Determine whether we can use the variable to test the exit condition.
4085 This is the case iff the period of the induction variable is greater
4086 than the number of iterations for which the exit condition is true. */
4087 period = iv_period (cand->iv);
4088
4089 /* If the number of iterations is constant, compare against it directly. */
4090 if (TREE_CODE (nit) == INTEGER_CST)
4091 {
4092 /* See cand_value_at. */
4093 if (stmt_after_increment (loop, cand, use->stmt))
4094 {
4095 if (!tree_int_cst_lt (nit, period))
4096 return false;
4097 }
4098 else
4099 {
4100 if (tree_int_cst_lt (period, nit))
4101 return false;
4102 }
4103 }
4104
4105 /* If not, and if this is the only possible exit of the loop, see whether
4106 we can get a conservative estimate on the number of iterations of the
4107 entire loop and compare against that instead. */
4108 else
4109 {
4110 double_int period_value, max_niter;
4111
4112 max_niter = desc->max;
4113 if (stmt_after_increment (loop, cand, use->stmt))
4114 max_niter = double_int_add (max_niter, double_int_one);
4115 period_value = tree_to_double_int (period);
4116 if (double_int_ucmp (max_niter, period_value) > 0)
4117 {
4118 /* See if we can take advantage of infered loop bound information. */
4119 if (loop_only_exit_p (loop, exit))
4120 {
4121 if (!estimated_loop_iterations (loop, true, &max_niter))
4122 return false;
4123 /* The loop bound is already adjusted by adding 1. */
4124 if (double_int_ucmp (max_niter, period_value) > 0)
4125 return false;
4126 }
4127 else
4128 return false;
4129 }
4130 }
4131
4132 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4133
4134 *bound = aff_combination_to_tree (&bnd);
4135 /* It is unlikely that computing the number of iterations using division
4136 would be more profitable than keeping the original induction variable. */
4137 if (expression_expensive_p (*bound))
4138 return false;
4139 return true;
4140 }
4141
4142
4143 /* Determines cost of basing replacement of USE on CAND in a condition. */
4144
4145 static bool
4146 determine_use_iv_cost_condition (struct ivopts_data *data,
4147 struct iv_use *use, struct iv_cand *cand)
4148 {
4149 tree bound = NULL_TREE;
4150 struct iv *cmp_iv;
4151 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4152 comp_cost elim_cost, express_cost, cost;
4153 bool ok;
4154 tree *control_var, *bound_cst;
4155
4156 /* Only consider real candidates. */
4157 if (!cand->iv)
4158 {
4159 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4160 return false;
4161 }
4162
4163 /* Try iv elimination. */
4164 if (may_eliminate_iv (data, use, cand, &bound))
4165 {
4166 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4167 /* The bound is a loop invariant, so it will be only computed
4168 once. */
4169 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4170 }
4171 else
4172 elim_cost = infinite_cost;
4173
4174 /* Try expressing the original giv. If it is compared with an invariant,
4175 note that we cannot get rid of it. */
4176 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4177 NULL, &cmp_iv);
4178 gcc_assert (ok);
4179
4180 /* When the condition is a comparison of the candidate IV against
4181 zero, prefer this IV.
4182
4183 TODO: The constant that we're substracting from the cost should
4184 be target-dependent. This information should be added to the
4185 target costs for each backend. */
4186 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4187 && integer_zerop (*bound_cst)
4188 && (operand_equal_p (*control_var, cand->var_after, 0)
4189 || operand_equal_p (*control_var, cand->var_before, 0)))
4190 elim_cost.cost -= 1;
4191
4192 express_cost = get_computation_cost (data, use, cand, false,
4193 &depends_on_express, NULL);
4194 fd_ivopts_data = data;
4195 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4196
4197 /* Choose the better approach, preferring the eliminated IV. */
4198 if (compare_costs (elim_cost, express_cost) <= 0)
4199 {
4200 cost = elim_cost;
4201 depends_on = depends_on_elim;
4202 depends_on_elim = NULL;
4203 }
4204 else
4205 {
4206 cost = express_cost;
4207 depends_on = depends_on_express;
4208 depends_on_express = NULL;
4209 bound = NULL_TREE;
4210 }
4211
4212 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4213
4214 if (depends_on_elim)
4215 BITMAP_FREE (depends_on_elim);
4216 if (depends_on_express)
4217 BITMAP_FREE (depends_on_express);
4218
4219 return !infinite_cost_p (cost);
4220 }
4221
4222 /* Determines cost of basing replacement of USE on CAND. Returns false
4223 if USE cannot be based on CAND. */
4224
4225 static bool
4226 determine_use_iv_cost (struct ivopts_data *data,
4227 struct iv_use *use, struct iv_cand *cand)
4228 {
4229 switch (use->type)
4230 {
4231 case USE_NONLINEAR_EXPR:
4232 return determine_use_iv_cost_generic (data, use, cand);
4233
4234 case USE_ADDRESS:
4235 return determine_use_iv_cost_address (data, use, cand);
4236
4237 case USE_COMPARE:
4238 return determine_use_iv_cost_condition (data, use, cand);
4239
4240 default:
4241 gcc_unreachable ();
4242 }
4243 }
4244
4245 /* Return true if get_computation_cost indicates that autoincrement is
4246 a possibility for the pair of USE and CAND, false otherwise. */
4247
4248 static bool
4249 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4250 struct iv_cand *cand)
4251 {
4252 bitmap depends_on;
4253 bool can_autoinc;
4254 comp_cost cost;
4255
4256 if (use->type != USE_ADDRESS)
4257 return false;
4258
4259 cost = get_computation_cost (data, use, cand, true, &depends_on,
4260 &can_autoinc);
4261
4262 BITMAP_FREE (depends_on);
4263
4264 return !infinite_cost_p (cost) && can_autoinc;
4265 }
4266
4267 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4268 use that allows autoincrement, and set their AINC_USE if possible. */
4269
4270 static void
4271 set_autoinc_for_original_candidates (struct ivopts_data *data)
4272 {
4273 unsigned i, j;
4274
4275 for (i = 0; i < n_iv_cands (data); i++)
4276 {
4277 struct iv_cand *cand = iv_cand (data, i);
4278 struct iv_use *closest = NULL;
4279 if (cand->pos != IP_ORIGINAL)
4280 continue;
4281 for (j = 0; j < n_iv_uses (data); j++)
4282 {
4283 struct iv_use *use = iv_use (data, j);
4284 unsigned uid = gimple_uid (use->stmt);
4285 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4286 || uid > gimple_uid (cand->incremented_at))
4287 continue;
4288 if (closest == NULL || uid > gimple_uid (closest->stmt))
4289 closest = use;
4290 }
4291 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4292 continue;
4293 cand->ainc_use = closest;
4294 }
4295 }
4296
4297 /* Finds the candidates for the induction variables. */
4298
4299 static void
4300 find_iv_candidates (struct ivopts_data *data)
4301 {
4302 /* Add commonly used ivs. */
4303 add_standard_iv_candidates (data);
4304
4305 /* Add old induction variables. */
4306 add_old_ivs_candidates (data);
4307
4308 /* Add induction variables derived from uses. */
4309 add_derived_ivs_candidates (data);
4310
4311 set_autoinc_for_original_candidates (data);
4312
4313 /* Record the important candidates. */
4314 record_important_candidates (data);
4315 }
4316
4317 /* Determines costs of basing the use of the iv on an iv candidate. */
4318
4319 static void
4320 determine_use_iv_costs (struct ivopts_data *data)
4321 {
4322 unsigned i, j;
4323 struct iv_use *use;
4324 struct iv_cand *cand;
4325 bitmap to_clear = BITMAP_ALLOC (NULL);
4326
4327 alloc_use_cost_map (data);
4328
4329 for (i = 0; i < n_iv_uses (data); i++)
4330 {
4331 use = iv_use (data, i);
4332
4333 if (data->consider_all_candidates)
4334 {
4335 for (j = 0; j < n_iv_cands (data); j++)
4336 {
4337 cand = iv_cand (data, j);
4338 determine_use_iv_cost (data, use, cand);
4339 }
4340 }
4341 else
4342 {
4343 bitmap_iterator bi;
4344
4345 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4346 {
4347 cand = iv_cand (data, j);
4348 if (!determine_use_iv_cost (data, use, cand))
4349 bitmap_set_bit (to_clear, j);
4350 }
4351
4352 /* Remove the candidates for that the cost is infinite from
4353 the list of related candidates. */
4354 bitmap_and_compl_into (use->related_cands, to_clear);
4355 bitmap_clear (to_clear);
4356 }
4357 }
4358
4359 BITMAP_FREE (to_clear);
4360
4361 if (dump_file && (dump_flags & TDF_DETAILS))
4362 {
4363 fprintf (dump_file, "Use-candidate costs:\n");
4364
4365 for (i = 0; i < n_iv_uses (data); i++)
4366 {
4367 use = iv_use (data, i);
4368
4369 fprintf (dump_file, "Use %d:\n", i);
4370 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4371 for (j = 0; j < use->n_map_members; j++)
4372 {
4373 if (!use->cost_map[j].cand
4374 || infinite_cost_p (use->cost_map[j].cost))
4375 continue;
4376
4377 fprintf (dump_file, " %d\t%d\t%d\t",
4378 use->cost_map[j].cand->id,
4379 use->cost_map[j].cost.cost,
4380 use->cost_map[j].cost.complexity);
4381 if (use->cost_map[j].depends_on)
4382 bitmap_print (dump_file,
4383 use->cost_map[j].depends_on, "","");
4384 fprintf (dump_file, "\n");
4385 }
4386
4387 fprintf (dump_file, "\n");
4388 }
4389 fprintf (dump_file, "\n");
4390 }
4391 }
4392
4393 /* Determines cost of the candidate CAND. */
4394
4395 static void
4396 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4397 {
4398 comp_cost cost_base;
4399 unsigned cost, cost_step;
4400 tree base;
4401
4402 if (!cand->iv)
4403 {
4404 cand->cost = 0;
4405 return;
4406 }
4407
4408 /* There are two costs associated with the candidate -- its increment
4409 and its initialization. The second is almost negligible for any loop
4410 that rolls enough, so we take it just very little into account. */
4411
4412 base = cand->iv->base;
4413 cost_base = force_var_cost (data, base, NULL);
4414 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4415
4416 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4417
4418 /* Prefer the original ivs unless we may gain something by replacing it.
4419 The reason is to make debugging simpler; so this is not relevant for
4420 artificial ivs created by other optimization passes. */
4421 if (cand->pos != IP_ORIGINAL
4422 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4423 cost++;
4424
4425 /* Prefer not to insert statements into latch unless there are some
4426 already (so that we do not create unnecessary jumps). */
4427 if (cand->pos == IP_END
4428 && empty_block_p (ip_end_pos (data->current_loop)))
4429 cost++;
4430
4431 cand->cost = cost;
4432 cand->cost_step = cost_step;
4433 }
4434
4435 /* Determines costs of computation of the candidates. */
4436
4437 static void
4438 determine_iv_costs (struct ivopts_data *data)
4439 {
4440 unsigned i;
4441
4442 if (dump_file && (dump_flags & TDF_DETAILS))
4443 {
4444 fprintf (dump_file, "Candidate costs:\n");
4445 fprintf (dump_file, " cand\tcost\n");
4446 }
4447
4448 for (i = 0; i < n_iv_cands (data); i++)
4449 {
4450 struct iv_cand *cand = iv_cand (data, i);
4451
4452 determine_iv_cost (data, cand);
4453
4454 if (dump_file && (dump_flags & TDF_DETAILS))
4455 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4456 }
4457
4458 if (dump_file && (dump_flags & TDF_DETAILS))
4459 fprintf (dump_file, "\n");
4460 }
4461
4462 /* Calculates cost for having SIZE induction variables. */
4463
4464 static unsigned
4465 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4466 {
4467 /* We add size to the cost, so that we prefer eliminating ivs
4468 if possible. */
4469 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4470 data->body_includes_call);
4471 }
4472
4473 /* For each size of the induction variable set determine the penalty. */
4474
4475 static void
4476 determine_set_costs (struct ivopts_data *data)
4477 {
4478 unsigned j, n;
4479 gimple phi;
4480 gimple_stmt_iterator psi;
4481 tree op;
4482 struct loop *loop = data->current_loop;
4483 bitmap_iterator bi;
4484
4485 if (dump_file && (dump_flags & TDF_DETAILS))
4486 {
4487 fprintf (dump_file, "Global costs:\n");
4488 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4489 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4490 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4491 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4492 }
4493
4494 n = 0;
4495 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4496 {
4497 phi = gsi_stmt (psi);
4498 op = PHI_RESULT (phi);
4499
4500 if (!is_gimple_reg (op))
4501 continue;
4502
4503 if (get_iv (data, op))
4504 continue;
4505
4506 n++;
4507 }
4508
4509 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4510 {
4511 struct version_info *info = ver_info (data, j);
4512
4513 if (info->inv_id && info->has_nonlin_use)
4514 n++;
4515 }
4516
4517 data->regs_used = n;
4518 if (dump_file && (dump_flags & TDF_DETAILS))
4519 fprintf (dump_file, " regs_used %d\n", n);
4520
4521 if (dump_file && (dump_flags & TDF_DETAILS))
4522 {
4523 fprintf (dump_file, " cost for size:\n");
4524 fprintf (dump_file, " ivs\tcost\n");
4525 for (j = 0; j <= 2 * target_avail_regs; j++)
4526 fprintf (dump_file, " %d\t%d\n", j,
4527 ivopts_global_cost_for_size (data, j));
4528 fprintf (dump_file, "\n");
4529 }
4530 }
4531
4532 /* Returns true if A is a cheaper cost pair than B. */
4533
4534 static bool
4535 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4536 {
4537 int cmp;
4538
4539 if (!a)
4540 return false;
4541
4542 if (!b)
4543 return true;
4544
4545 cmp = compare_costs (a->cost, b->cost);
4546 if (cmp < 0)
4547 return true;
4548
4549 if (cmp > 0)
4550 return false;
4551
4552 /* In case the costs are the same, prefer the cheaper candidate. */
4553 if (a->cand->cost < b->cand->cost)
4554 return true;
4555
4556 return false;
4557 }
4558
4559 /* Computes the cost field of IVS structure. */
4560
4561 static void
4562 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4563 {
4564 comp_cost cost = ivs->cand_use_cost;
4565 cost.cost += ivs->cand_cost;
4566 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4567
4568 ivs->cost = cost;
4569 }
4570
4571 /* Remove invariants in set INVS to set IVS. */
4572
4573 static void
4574 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4575 {
4576 bitmap_iterator bi;
4577 unsigned iid;
4578
4579 if (!invs)
4580 return;
4581
4582 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4583 {
4584 ivs->n_invariant_uses[iid]--;
4585 if (ivs->n_invariant_uses[iid] == 0)
4586 ivs->n_regs--;
4587 }
4588 }
4589
4590 /* Set USE not to be expressed by any candidate in IVS. */
4591
4592 static void
4593 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4594 struct iv_use *use)
4595 {
4596 unsigned uid = use->id, cid;
4597 struct cost_pair *cp;
4598
4599 cp = ivs->cand_for_use[uid];
4600 if (!cp)
4601 return;
4602 cid = cp->cand->id;
4603
4604 ivs->bad_uses++;
4605 ivs->cand_for_use[uid] = NULL;
4606 ivs->n_cand_uses[cid]--;
4607
4608 if (ivs->n_cand_uses[cid] == 0)
4609 {
4610 bitmap_clear_bit (ivs->cands, cid);
4611 /* Do not count the pseudocandidates. */
4612 if (cp->cand->iv)
4613 ivs->n_regs--;
4614 ivs->n_cands--;
4615 ivs->cand_cost -= cp->cand->cost;
4616
4617 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4618 }
4619
4620 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4621
4622 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4623 iv_ca_recount_cost (data, ivs);
4624 }
4625
4626 /* Add invariants in set INVS to set IVS. */
4627
4628 static void
4629 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4630 {
4631 bitmap_iterator bi;
4632 unsigned iid;
4633
4634 if (!invs)
4635 return;
4636
4637 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4638 {
4639 ivs->n_invariant_uses[iid]++;
4640 if (ivs->n_invariant_uses[iid] == 1)
4641 ivs->n_regs++;
4642 }
4643 }
4644
4645 /* Set cost pair for USE in set IVS to CP. */
4646
4647 static void
4648 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4649 struct iv_use *use, struct cost_pair *cp)
4650 {
4651 unsigned uid = use->id, cid;
4652
4653 if (ivs->cand_for_use[uid] == cp)
4654 return;
4655
4656 if (ivs->cand_for_use[uid])
4657 iv_ca_set_no_cp (data, ivs, use);
4658
4659 if (cp)
4660 {
4661 cid = cp->cand->id;
4662
4663 ivs->bad_uses--;
4664 ivs->cand_for_use[uid] = cp;
4665 ivs->n_cand_uses[cid]++;
4666 if (ivs->n_cand_uses[cid] == 1)
4667 {
4668 bitmap_set_bit (ivs->cands, cid);
4669 /* Do not count the pseudocandidates. */
4670 if (cp->cand->iv)
4671 ivs->n_regs++;
4672 ivs->n_cands++;
4673 ivs->cand_cost += cp->cand->cost;
4674
4675 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4676 }
4677
4678 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4679 iv_ca_set_add_invariants (ivs, cp->depends_on);
4680 iv_ca_recount_cost (data, ivs);
4681 }
4682 }
4683
4684 /* Extend set IVS by expressing USE by some of the candidates in it
4685 if possible. */
4686
4687 static void
4688 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4689 struct iv_use *use)
4690 {
4691 struct cost_pair *best_cp = NULL, *cp;
4692 bitmap_iterator bi;
4693 unsigned i;
4694
4695 gcc_assert (ivs->upto >= use->id);
4696
4697 if (ivs->upto == use->id)
4698 {
4699 ivs->upto++;
4700 ivs->bad_uses++;
4701 }
4702
4703 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4704 {
4705 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4706
4707 if (cheaper_cost_pair (cp, best_cp))
4708 best_cp = cp;
4709 }
4710
4711 iv_ca_set_cp (data, ivs, use, best_cp);
4712 }
4713
4714 /* Get cost for assignment IVS. */
4715
4716 static comp_cost
4717 iv_ca_cost (struct iv_ca *ivs)
4718 {
4719 /* This was a conditional expression but it triggered a bug in
4720 Sun C 5.5. */
4721 if (ivs->bad_uses)
4722 return infinite_cost;
4723 else
4724 return ivs->cost;
4725 }
4726
4727 /* Returns true if all dependences of CP are among invariants in IVS. */
4728
4729 static bool
4730 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4731 {
4732 unsigned i;
4733 bitmap_iterator bi;
4734
4735 if (!cp->depends_on)
4736 return true;
4737
4738 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4739 {
4740 if (ivs->n_invariant_uses[i] == 0)
4741 return false;
4742 }
4743
4744 return true;
4745 }
4746
4747 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4748 it before NEXT_CHANGE. */
4749
4750 static struct iv_ca_delta *
4751 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4752 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4753 {
4754 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4755
4756 change->use = use;
4757 change->old_cp = old_cp;
4758 change->new_cp = new_cp;
4759 change->next_change = next_change;
4760
4761 return change;
4762 }
4763
4764 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4765 are rewritten. */
4766
4767 static struct iv_ca_delta *
4768 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4769 {
4770 struct iv_ca_delta *last;
4771
4772 if (!l2)
4773 return l1;
4774
4775 if (!l1)
4776 return l2;
4777
4778 for (last = l1; last->next_change; last = last->next_change)
4779 continue;
4780 last->next_change = l2;
4781
4782 return l1;
4783 }
4784
4785 /* Returns candidate by that USE is expressed in IVS. */
4786
4787 static struct cost_pair *
4788 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4789 {
4790 return ivs->cand_for_use[use->id];
4791 }
4792
4793 /* Reverse the list of changes DELTA, forming the inverse to it. */
4794
4795 static struct iv_ca_delta *
4796 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4797 {
4798 struct iv_ca_delta *act, *next, *prev = NULL;
4799 struct cost_pair *tmp;
4800
4801 for (act = delta; act; act = next)
4802 {
4803 next = act->next_change;
4804 act->next_change = prev;
4805 prev = act;
4806
4807 tmp = act->old_cp;
4808 act->old_cp = act->new_cp;
4809 act->new_cp = tmp;
4810 }
4811
4812 return prev;
4813 }
4814
4815 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4816 reverted instead. */
4817
4818 static void
4819 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4820 struct iv_ca_delta *delta, bool forward)
4821 {
4822 struct cost_pair *from, *to;
4823 struct iv_ca_delta *act;
4824
4825 if (!forward)
4826 delta = iv_ca_delta_reverse (delta);
4827
4828 for (act = delta; act; act = act->next_change)
4829 {
4830 from = act->old_cp;
4831 to = act->new_cp;
4832 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4833 iv_ca_set_cp (data, ivs, act->use, to);
4834 }
4835
4836 if (!forward)
4837 iv_ca_delta_reverse (delta);
4838 }
4839
4840 /* Returns true if CAND is used in IVS. */
4841
4842 static bool
4843 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4844 {
4845 return ivs->n_cand_uses[cand->id] > 0;
4846 }
4847
4848 /* Returns number of induction variable candidates in the set IVS. */
4849
4850 static unsigned
4851 iv_ca_n_cands (struct iv_ca *ivs)
4852 {
4853 return ivs->n_cands;
4854 }
4855
4856 /* Free the list of changes DELTA. */
4857
4858 static void
4859 iv_ca_delta_free (struct iv_ca_delta **delta)
4860 {
4861 struct iv_ca_delta *act, *next;
4862
4863 for (act = *delta; act; act = next)
4864 {
4865 next = act->next_change;
4866 free (act);
4867 }
4868
4869 *delta = NULL;
4870 }
4871
4872 /* Allocates new iv candidates assignment. */
4873
4874 static struct iv_ca *
4875 iv_ca_new (struct ivopts_data *data)
4876 {
4877 struct iv_ca *nw = XNEW (struct iv_ca);
4878
4879 nw->upto = 0;
4880 nw->bad_uses = 0;
4881 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4882 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4883 nw->cands = BITMAP_ALLOC (NULL);
4884 nw->n_cands = 0;
4885 nw->n_regs = 0;
4886 nw->cand_use_cost = zero_cost;
4887 nw->cand_cost = 0;
4888 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4889 nw->cost = zero_cost;
4890
4891 return nw;
4892 }
4893
4894 /* Free memory occupied by the set IVS. */
4895
4896 static void
4897 iv_ca_free (struct iv_ca **ivs)
4898 {
4899 free ((*ivs)->cand_for_use);
4900 free ((*ivs)->n_cand_uses);
4901 BITMAP_FREE ((*ivs)->cands);
4902 free ((*ivs)->n_invariant_uses);
4903 free (*ivs);
4904 *ivs = NULL;
4905 }
4906
4907 /* Dumps IVS to FILE. */
4908
4909 static void
4910 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4911 {
4912 const char *pref = " invariants ";
4913 unsigned i;
4914 comp_cost cost = iv_ca_cost (ivs);
4915
4916 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4917 bitmap_print (file, ivs->cands, " candidates ","\n");
4918
4919 for (i = 1; i <= data->max_inv_id; i++)
4920 if (ivs->n_invariant_uses[i])
4921 {
4922 fprintf (file, "%s%d", pref, i);
4923 pref = ", ";
4924 }
4925 fprintf (file, "\n");
4926 }
4927
4928 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4929 new set, and store differences in DELTA. Number of induction variables
4930 in the new set is stored to N_IVS. */
4931
4932 static comp_cost
4933 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4934 struct iv_cand *cand, struct iv_ca_delta **delta,
4935 unsigned *n_ivs)
4936 {
4937 unsigned i;
4938 comp_cost cost;
4939 struct iv_use *use;
4940 struct cost_pair *old_cp, *new_cp;
4941
4942 *delta = NULL;
4943 for (i = 0; i < ivs->upto; i++)
4944 {
4945 use = iv_use (data, i);
4946 old_cp = iv_ca_cand_for_use (ivs, use);
4947
4948 if (old_cp
4949 && old_cp->cand == cand)
4950 continue;
4951
4952 new_cp = get_use_iv_cost (data, use, cand);
4953 if (!new_cp)
4954 continue;
4955
4956 if (!iv_ca_has_deps (ivs, new_cp))
4957 continue;
4958
4959 if (!cheaper_cost_pair (new_cp, old_cp))
4960 continue;
4961
4962 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4963 }
4964
4965 iv_ca_delta_commit (data, ivs, *delta, true);
4966 cost = iv_ca_cost (ivs);
4967 if (n_ivs)
4968 *n_ivs = iv_ca_n_cands (ivs);
4969 iv_ca_delta_commit (data, ivs, *delta, false);
4970
4971 return cost;
4972 }
4973
4974 /* Try narrowing set IVS by removing CAND. Return the cost of
4975 the new set and store the differences in DELTA. */
4976
4977 static comp_cost
4978 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4979 struct iv_cand *cand, struct iv_ca_delta **delta)
4980 {
4981 unsigned i, ci;
4982 struct iv_use *use;
4983 struct cost_pair *old_cp, *new_cp, *cp;
4984 bitmap_iterator bi;
4985 struct iv_cand *cnd;
4986 comp_cost cost;
4987
4988 *delta = NULL;
4989 for (i = 0; i < n_iv_uses (data); i++)
4990 {
4991 use = iv_use (data, i);
4992
4993 old_cp = iv_ca_cand_for_use (ivs, use);
4994 if (old_cp->cand != cand)
4995 continue;
4996
4997 new_cp = NULL;
4998
4999 if (data->consider_all_candidates)
5000 {
5001 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5002 {
5003 if (ci == cand->id)
5004 continue;
5005
5006 cnd = iv_cand (data, ci);
5007
5008 cp = get_use_iv_cost (data, use, cnd);
5009 if (!cp)
5010 continue;
5011 if (!iv_ca_has_deps (ivs, cp))
5012 continue;
5013
5014 if (!cheaper_cost_pair (cp, new_cp))
5015 continue;
5016
5017 new_cp = cp;
5018 }
5019 }
5020 else
5021 {
5022 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5023 {
5024 if (ci == cand->id)
5025 continue;
5026
5027 cnd = iv_cand (data, ci);
5028
5029 cp = get_use_iv_cost (data, use, cnd);
5030 if (!cp)
5031 continue;
5032 if (!iv_ca_has_deps (ivs, cp))
5033 continue;
5034
5035 if (!cheaper_cost_pair (cp, new_cp))
5036 continue;
5037
5038 new_cp = cp;
5039 }
5040 }
5041
5042 if (!new_cp)
5043 {
5044 iv_ca_delta_free (delta);
5045 return infinite_cost;
5046 }
5047
5048 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5049 }
5050
5051 iv_ca_delta_commit (data, ivs, *delta, true);
5052 cost = iv_ca_cost (ivs);
5053 iv_ca_delta_commit (data, ivs, *delta, false);
5054
5055 return cost;
5056 }
5057
5058 /* Try optimizing the set of candidates IVS by removing candidates different
5059 from to EXCEPT_CAND from it. Return cost of the new set, and store
5060 differences in DELTA. */
5061
5062 static comp_cost
5063 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5064 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5065 {
5066 bitmap_iterator bi;
5067 struct iv_ca_delta *act_delta, *best_delta;
5068 unsigned i;
5069 comp_cost best_cost, acost;
5070 struct iv_cand *cand;
5071
5072 best_delta = NULL;
5073 best_cost = iv_ca_cost (ivs);
5074
5075 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5076 {
5077 cand = iv_cand (data, i);
5078
5079 if (cand == except_cand)
5080 continue;
5081
5082 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5083
5084 if (compare_costs (acost, best_cost) < 0)
5085 {
5086 best_cost = acost;
5087 iv_ca_delta_free (&best_delta);
5088 best_delta = act_delta;
5089 }
5090 else
5091 iv_ca_delta_free (&act_delta);
5092 }
5093
5094 if (!best_delta)
5095 {
5096 *delta = NULL;
5097 return best_cost;
5098 }
5099
5100 /* Recurse to possibly remove other unnecessary ivs. */
5101 iv_ca_delta_commit (data, ivs, best_delta, true);
5102 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5103 iv_ca_delta_commit (data, ivs, best_delta, false);
5104 *delta = iv_ca_delta_join (best_delta, *delta);
5105 return best_cost;
5106 }
5107
5108 /* Tries to extend the sets IVS in the best possible way in order
5109 to express the USE. If ORIGINALP is true, prefer candidates from
5110 the original set of IVs, otherwise favor important candidates not
5111 based on any memory object. */
5112
5113 static bool
5114 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5115 struct iv_use *use, bool originalp)
5116 {
5117 comp_cost best_cost, act_cost;
5118 unsigned i;
5119 bitmap_iterator bi;
5120 struct iv_cand *cand;
5121 struct iv_ca_delta *best_delta = NULL, *act_delta;
5122 struct cost_pair *cp;
5123
5124 iv_ca_add_use (data, ivs, use);
5125 best_cost = iv_ca_cost (ivs);
5126
5127 cp = iv_ca_cand_for_use (ivs, use);
5128 if (cp)
5129 {
5130 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5131 iv_ca_set_no_cp (data, ivs, use);
5132 }
5133
5134 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5135 first try important candidates not based on any memory object. Only if
5136 this fails, try the specific ones. Rationale -- in loops with many
5137 variables the best choice often is to use just one generic biv. If we
5138 added here many ivs specific to the uses, the optimization algorithm later
5139 would be likely to get stuck in a local minimum, thus causing us to create
5140 too many ivs. The approach from few ivs to more seems more likely to be
5141 successful -- starting from few ivs, replacing an expensive use by a
5142 specific iv should always be a win. */
5143 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5144 {
5145 cand = iv_cand (data, i);
5146
5147 if (originalp && cand->pos !=IP_ORIGINAL)
5148 continue;
5149
5150 if (!originalp && cand->iv->base_object != NULL_TREE)
5151 continue;
5152
5153 if (iv_ca_cand_used_p (ivs, cand))
5154 continue;
5155
5156 cp = get_use_iv_cost (data, use, cand);
5157 if (!cp)
5158 continue;
5159
5160 iv_ca_set_cp (data, ivs, use, cp);
5161 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5162 iv_ca_set_no_cp (data, ivs, use);
5163 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5164
5165 if (compare_costs (act_cost, best_cost) < 0)
5166 {
5167 best_cost = act_cost;
5168
5169 iv_ca_delta_free (&best_delta);
5170 best_delta = act_delta;
5171 }
5172 else
5173 iv_ca_delta_free (&act_delta);
5174 }
5175
5176 if (infinite_cost_p (best_cost))
5177 {
5178 for (i = 0; i < use->n_map_members; i++)
5179 {
5180 cp = use->cost_map + i;
5181 cand = cp->cand;
5182 if (!cand)
5183 continue;
5184
5185 /* Already tried this. */
5186 if (cand->important)
5187 {
5188 if (originalp && cand->pos == IP_ORIGINAL)
5189 continue;
5190 if (!originalp && cand->iv->base_object == NULL_TREE)
5191 continue;
5192 }
5193
5194 if (iv_ca_cand_used_p (ivs, cand))
5195 continue;
5196
5197 act_delta = NULL;
5198 iv_ca_set_cp (data, ivs, use, cp);
5199 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5200 iv_ca_set_no_cp (data, ivs, use);
5201 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5202 cp, act_delta);
5203
5204 if (compare_costs (act_cost, best_cost) < 0)
5205 {
5206 best_cost = act_cost;
5207
5208 if (best_delta)
5209 iv_ca_delta_free (&best_delta);
5210 best_delta = act_delta;
5211 }
5212 else
5213 iv_ca_delta_free (&act_delta);
5214 }
5215 }
5216
5217 iv_ca_delta_commit (data, ivs, best_delta, true);
5218 iv_ca_delta_free (&best_delta);
5219
5220 return !infinite_cost_p (best_cost);
5221 }
5222
5223 /* Finds an initial assignment of candidates to uses. */
5224
5225 static struct iv_ca *
5226 get_initial_solution (struct ivopts_data *data, bool originalp)
5227 {
5228 struct iv_ca *ivs = iv_ca_new (data);
5229 unsigned i;
5230
5231 for (i = 0; i < n_iv_uses (data); i++)
5232 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5233 {
5234 iv_ca_free (&ivs);
5235 return NULL;
5236 }
5237
5238 return ivs;
5239 }
5240
5241 /* Tries to improve set of induction variables IVS. */
5242
5243 static bool
5244 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5245 {
5246 unsigned i, n_ivs;
5247 comp_cost acost, best_cost = iv_ca_cost (ivs);
5248 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5249 struct iv_cand *cand;
5250
5251 /* Try extending the set of induction variables by one. */
5252 for (i = 0; i < n_iv_cands (data); i++)
5253 {
5254 cand = iv_cand (data, i);
5255
5256 if (iv_ca_cand_used_p (ivs, cand))
5257 continue;
5258
5259 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5260 if (!act_delta)
5261 continue;
5262
5263 /* If we successfully added the candidate and the set is small enough,
5264 try optimizing it by removing other candidates. */
5265 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5266 {
5267 iv_ca_delta_commit (data, ivs, act_delta, true);
5268 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5269 iv_ca_delta_commit (data, ivs, act_delta, false);
5270 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5271 }
5272
5273 if (compare_costs (acost, best_cost) < 0)
5274 {
5275 best_cost = acost;
5276 iv_ca_delta_free (&best_delta);
5277 best_delta = act_delta;
5278 }
5279 else
5280 iv_ca_delta_free (&act_delta);
5281 }
5282
5283 if (!best_delta)
5284 {
5285 /* Try removing the candidates from the set instead. */
5286 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5287
5288 /* Nothing more we can do. */
5289 if (!best_delta)
5290 return false;
5291 }
5292
5293 iv_ca_delta_commit (data, ivs, best_delta, true);
5294 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5295 iv_ca_delta_free (&best_delta);
5296 return true;
5297 }
5298
5299 /* Attempts to find the optimal set of induction variables. We do simple
5300 greedy heuristic -- we try to replace at most one candidate in the selected
5301 solution and remove the unused ivs while this improves the cost. */
5302
5303 static struct iv_ca *
5304 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5305 {
5306 struct iv_ca *set;
5307
5308 /* Get the initial solution. */
5309 set = get_initial_solution (data, originalp);
5310 if (!set)
5311 {
5312 if (dump_file && (dump_flags & TDF_DETAILS))
5313 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5314 return NULL;
5315 }
5316
5317 if (dump_file && (dump_flags & TDF_DETAILS))
5318 {
5319 fprintf (dump_file, "Initial set of candidates:\n");
5320 iv_ca_dump (data, dump_file, set);
5321 }
5322
5323 while (try_improve_iv_set (data, set))
5324 {
5325 if (dump_file && (dump_flags & TDF_DETAILS))
5326 {
5327 fprintf (dump_file, "Improved to:\n");
5328 iv_ca_dump (data, dump_file, set);
5329 }
5330 }
5331
5332 return set;
5333 }
5334
5335 static struct iv_ca *
5336 find_optimal_iv_set (struct ivopts_data *data)
5337 {
5338 unsigned i;
5339 struct iv_ca *set, *origset;
5340 struct iv_use *use;
5341 comp_cost cost, origcost;
5342
5343 /* Determine the cost based on a strategy that starts with original IVs,
5344 and try again using a strategy that prefers candidates not based
5345 on any IVs. */
5346 origset = find_optimal_iv_set_1 (data, true);
5347 set = find_optimal_iv_set_1 (data, false);
5348
5349 if (!origset && !set)
5350 return NULL;
5351
5352 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5353 cost = set ? iv_ca_cost (set) : infinite_cost;
5354
5355 if (dump_file && (dump_flags & TDF_DETAILS))
5356 {
5357 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5358 origcost.cost, origcost.complexity);
5359 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5360 cost.cost, cost.complexity);
5361 }
5362
5363 /* Choose the one with the best cost. */
5364 if (compare_costs (origcost, cost) <= 0)
5365 {
5366 if (set)
5367 iv_ca_free (&set);
5368 set = origset;
5369 }
5370 else if (origset)
5371 iv_ca_free (&origset);
5372
5373 for (i = 0; i < n_iv_uses (data); i++)
5374 {
5375 use = iv_use (data, i);
5376 use->selected = iv_ca_cand_for_use (set, use)->cand;
5377 }
5378
5379 return set;
5380 }
5381
5382 /* Creates a new induction variable corresponding to CAND. */
5383
5384 static void
5385 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5386 {
5387 gimple_stmt_iterator incr_pos;
5388 tree base;
5389 bool after = false;
5390
5391 if (!cand->iv)
5392 return;
5393
5394 switch (cand->pos)
5395 {
5396 case IP_NORMAL:
5397 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5398 break;
5399
5400 case IP_END:
5401 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5402 after = true;
5403 break;
5404
5405 case IP_AFTER_USE:
5406 after = true;
5407 /* fall through */
5408 case IP_BEFORE_USE:
5409 incr_pos = gsi_for_stmt (cand->incremented_at);
5410 break;
5411
5412 case IP_ORIGINAL:
5413 /* Mark that the iv is preserved. */
5414 name_info (data, cand->var_before)->preserve_biv = true;
5415 name_info (data, cand->var_after)->preserve_biv = true;
5416
5417 /* Rewrite the increment so that it uses var_before directly. */
5418 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5419
5420 return;
5421 }
5422
5423 gimple_add_tmp_var (cand->var_before);
5424 add_referenced_var (cand->var_before);
5425
5426 base = unshare_expr (cand->iv->base);
5427
5428 create_iv (base, unshare_expr (cand->iv->step),
5429 cand->var_before, data->current_loop,
5430 &incr_pos, after, &cand->var_before, &cand->var_after);
5431 }
5432
5433 /* Creates new induction variables described in SET. */
5434
5435 static void
5436 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5437 {
5438 unsigned i;
5439 struct iv_cand *cand;
5440 bitmap_iterator bi;
5441
5442 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5443 {
5444 cand = iv_cand (data, i);
5445 create_new_iv (data, cand);
5446 }
5447 }
5448
5449
5450 /* Rewrites USE (definition of iv used in a nonlinear expression)
5451 using candidate CAND. */
5452
5453 static void
5454 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5455 struct iv_use *use, struct iv_cand *cand)
5456 {
5457 tree comp;
5458 tree op, tgt;
5459 gimple ass;
5460 gimple_stmt_iterator bsi;
5461
5462 /* An important special case -- if we are asked to express value of
5463 the original iv by itself, just exit; there is no need to
5464 introduce a new computation (that might also need casting the
5465 variable to unsigned and back). */
5466 if (cand->pos == IP_ORIGINAL
5467 && cand->incremented_at == use->stmt)
5468 {
5469 tree step, ctype, utype;
5470 enum tree_code incr_code = PLUS_EXPR, old_code;
5471
5472 gcc_assert (is_gimple_assign (use->stmt));
5473 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5474
5475 step = cand->iv->step;
5476 ctype = TREE_TYPE (step);
5477 utype = TREE_TYPE (cand->var_after);
5478 if (TREE_CODE (step) == NEGATE_EXPR)
5479 {
5480 incr_code = MINUS_EXPR;
5481 step = TREE_OPERAND (step, 0);
5482 }
5483
5484 /* Check whether we may leave the computation unchanged.
5485 This is the case only if it does not rely on other
5486 computations in the loop -- otherwise, the computation
5487 we rely upon may be removed in remove_unused_ivs,
5488 thus leading to ICE. */
5489 old_code = gimple_assign_rhs_code (use->stmt);
5490 if (old_code == PLUS_EXPR
5491 || old_code == MINUS_EXPR
5492 || old_code == POINTER_PLUS_EXPR)
5493 {
5494 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5495 op = gimple_assign_rhs2 (use->stmt);
5496 else if (old_code != MINUS_EXPR
5497 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5498 op = gimple_assign_rhs1 (use->stmt);
5499 else
5500 op = NULL_TREE;
5501 }
5502 else
5503 op = NULL_TREE;
5504
5505 if (op
5506 && (TREE_CODE (op) == INTEGER_CST
5507 || operand_equal_p (op, step, 0)))
5508 return;
5509
5510 /* Otherwise, add the necessary computations to express
5511 the iv. */
5512 op = fold_convert (ctype, cand->var_before);
5513 comp = fold_convert (utype,
5514 build2 (incr_code, ctype, op,
5515 unshare_expr (step)));
5516 }
5517 else
5518 {
5519 comp = get_computation (data->current_loop, use, cand);
5520 gcc_assert (comp != NULL_TREE);
5521 }
5522
5523 switch (gimple_code (use->stmt))
5524 {
5525 case GIMPLE_PHI:
5526 tgt = PHI_RESULT (use->stmt);
5527
5528 /* If we should keep the biv, do not replace it. */
5529 if (name_info (data, tgt)->preserve_biv)
5530 return;
5531
5532 bsi = gsi_after_labels (gimple_bb (use->stmt));
5533 break;
5534
5535 case GIMPLE_ASSIGN:
5536 tgt = gimple_assign_lhs (use->stmt);
5537 bsi = gsi_for_stmt (use->stmt);
5538 break;
5539
5540 default:
5541 gcc_unreachable ();
5542 }
5543
5544 if (!valid_gimple_rhs_p (comp)
5545 || (gimple_code (use->stmt) != GIMPLE_PHI
5546 /* We can't allow re-allocating the stmt as it might be pointed
5547 to still. */
5548 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5549 >= gimple_num_ops (gsi_stmt (bsi)))))
5550 {
5551 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5552 true, GSI_SAME_STMT);
5553 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5554 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5555 }
5556
5557 if (gimple_code (use->stmt) == GIMPLE_PHI)
5558 {
5559 ass = gimple_build_assign (tgt, comp);
5560 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5561
5562 bsi = gsi_for_stmt (use->stmt);
5563 remove_phi_node (&bsi, false);
5564 }
5565 else
5566 {
5567 gimple_assign_set_rhs_from_tree (&bsi, comp);
5568 use->stmt = gsi_stmt (bsi);
5569 }
5570 }
5571
5572 /* Replaces ssa name in index IDX by its basic variable. Callback for
5573 for_each_index. */
5574
5575 static bool
5576 idx_remove_ssa_names (tree base, tree *idx,
5577 void *data ATTRIBUTE_UNUSED)
5578 {
5579 tree *op;
5580
5581 if (TREE_CODE (*idx) == SSA_NAME)
5582 *idx = SSA_NAME_VAR (*idx);
5583
5584 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5585 {
5586 op = &TREE_OPERAND (base, 2);
5587 if (*op
5588 && TREE_CODE (*op) == SSA_NAME)
5589 *op = SSA_NAME_VAR (*op);
5590 op = &TREE_OPERAND (base, 3);
5591 if (*op
5592 && TREE_CODE (*op) == SSA_NAME)
5593 *op = SSA_NAME_VAR (*op);
5594 }
5595
5596 return true;
5597 }
5598
5599 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5600
5601 static tree
5602 unshare_and_remove_ssa_names (tree ref)
5603 {
5604 ref = unshare_expr (ref);
5605 for_each_index (&ref, idx_remove_ssa_names, NULL);
5606
5607 return ref;
5608 }
5609
5610 /* Copies the reference information from OLD_REF to NEW_REF. */
5611
5612 static void
5613 copy_ref_info (tree new_ref, tree old_ref)
5614 {
5615 tree new_ptr_base = NULL_TREE;
5616
5617 if (TREE_CODE (old_ref) == TARGET_MEM_REF
5618 && TREE_CODE (new_ref) == TARGET_MEM_REF)
5619 TMR_ORIGINAL (new_ref) = TMR_ORIGINAL (old_ref);
5620 else if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5621 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5622
5623 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5624 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5625
5626 if (TREE_CODE (new_ref) == TARGET_MEM_REF)
5627 new_ptr_base = TMR_BASE (new_ref);
5628 else if (TREE_CODE (new_ref) == MEM_REF)
5629 new_ptr_base = TREE_OPERAND (new_ref, 0);
5630
5631 /* We can transfer points-to information from an old pointer
5632 or decl base to the new one. */
5633 if (new_ptr_base
5634 && TREE_CODE (new_ptr_base) == SSA_NAME
5635 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base))
5636 && !SSA_NAME_PTR_INFO (new_ptr_base))
5637 {
5638 tree base = get_base_address (old_ref);
5639 if (!base)
5640 ;
5641 else if ((INDIRECT_REF_P (base)
5642 || TREE_CODE (base) == MEM_REF)
5643 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5644 duplicate_ssa_name_ptr_info
5645 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5646 else if (TREE_CODE (base) == VAR_DECL
5647 || TREE_CODE (base) == PARM_DECL
5648 || TREE_CODE (base) == RESULT_DECL)
5649 {
5650 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5651 pt_solution_set_var (&pi->pt, base);
5652 }
5653 }
5654 }
5655
5656 /* Rewrites USE (address that is an iv) using candidate CAND. */
5657
5658 static void
5659 rewrite_use_address (struct ivopts_data *data,
5660 struct iv_use *use, struct iv_cand *cand)
5661 {
5662 aff_tree aff;
5663 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5664 tree base_hint = NULL_TREE;
5665 tree ref;
5666 bool ok;
5667
5668 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5669 gcc_assert (ok);
5670 unshare_aff_combination (&aff);
5671
5672 /* To avoid undefined overflow problems, all IV candidates use unsigned
5673 integer types. The drawback is that this makes it impossible for
5674 create_mem_ref to distinguish an IV that is based on a memory object
5675 from one that represents simply an offset.
5676
5677 To work around this problem, we pass a hint to create_mem_ref that
5678 indicates which variable (if any) in aff is an IV based on a memory
5679 object. Note that we only consider the candidate. If this is not
5680 based on an object, the base of the reference is in some subexpression
5681 of the use -- but these will use pointer types, so they are recognized
5682 by the create_mem_ref heuristics anyway. */
5683 if (cand->iv->base_object)
5684 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5685
5686 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p),
5687 reference_alias_ptr_type (*use->op_p),
5688 &aff, base_hint, data->speed);
5689 copy_ref_info (ref, *use->op_p);
5690 *use->op_p = ref;
5691 }
5692
5693 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5694 candidate CAND. */
5695
5696 static void
5697 rewrite_use_compare (struct ivopts_data *data,
5698 struct iv_use *use, struct iv_cand *cand)
5699 {
5700 tree comp, *var_p, op, bound;
5701 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5702 enum tree_code compare;
5703 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5704 bool ok;
5705
5706 bound = cp->value;
5707 if (bound)
5708 {
5709 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5710 tree var_type = TREE_TYPE (var);
5711 gimple_seq stmts;
5712
5713 compare = iv_elimination_compare (data, use);
5714 bound = unshare_expr (fold_convert (var_type, bound));
5715 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5716 if (stmts)
5717 gsi_insert_seq_on_edge_immediate (
5718 loop_preheader_edge (data->current_loop),
5719 stmts);
5720
5721 gimple_cond_set_lhs (use->stmt, var);
5722 gimple_cond_set_code (use->stmt, compare);
5723 gimple_cond_set_rhs (use->stmt, op);
5724 return;
5725 }
5726
5727 /* The induction variable elimination failed; just express the original
5728 giv. */
5729 comp = get_computation (data->current_loop, use, cand);
5730 gcc_assert (comp != NULL_TREE);
5731
5732 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5733 gcc_assert (ok);
5734
5735 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5736 true, GSI_SAME_STMT);
5737 }
5738
5739 /* Rewrites USE using candidate CAND. */
5740
5741 static void
5742 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5743 {
5744 switch (use->type)
5745 {
5746 case USE_NONLINEAR_EXPR:
5747 rewrite_use_nonlinear_expr (data, use, cand);
5748 break;
5749
5750 case USE_ADDRESS:
5751 rewrite_use_address (data, use, cand);
5752 break;
5753
5754 case USE_COMPARE:
5755 rewrite_use_compare (data, use, cand);
5756 break;
5757
5758 default:
5759 gcc_unreachable ();
5760 }
5761
5762 update_stmt (use->stmt);
5763 }
5764
5765 /* Rewrite the uses using the selected induction variables. */
5766
5767 static void
5768 rewrite_uses (struct ivopts_data *data)
5769 {
5770 unsigned i;
5771 struct iv_cand *cand;
5772 struct iv_use *use;
5773
5774 for (i = 0; i < n_iv_uses (data); i++)
5775 {
5776 use = iv_use (data, i);
5777 cand = use->selected;
5778 gcc_assert (cand);
5779
5780 rewrite_use (data, use, cand);
5781 }
5782 }
5783
5784 /* Removes the ivs that are not used after rewriting. */
5785
5786 static void
5787 remove_unused_ivs (struct ivopts_data *data)
5788 {
5789 unsigned j;
5790 bitmap_iterator bi;
5791 bitmap toremove = BITMAP_ALLOC (NULL);
5792
5793 /* Figure out an order in which to release SSA DEFs so that we don't
5794 release something that we'd have to propagate into a debug stmt
5795 afterwards. */
5796 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5797 {
5798 struct version_info *info;
5799
5800 info = ver_info (data, j);
5801 if (info->iv
5802 && !integer_zerop (info->iv->step)
5803 && !info->inv_id
5804 && !info->iv->have_use_for
5805 && !info->preserve_biv)
5806 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5807 }
5808
5809 release_defs_bitset (toremove);
5810
5811 BITMAP_FREE (toremove);
5812 }
5813
5814 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
5815 for pointer_map_traverse. */
5816
5817 static bool
5818 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
5819 void *data ATTRIBUTE_UNUSED)
5820 {
5821 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
5822
5823 free (niter);
5824 return true;
5825 }
5826
5827 /* Frees data allocated by the optimization of a single loop. */
5828
5829 static void
5830 free_loop_data (struct ivopts_data *data)
5831 {
5832 unsigned i, j;
5833 bitmap_iterator bi;
5834 tree obj;
5835
5836 if (data->niters)
5837 {
5838 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
5839 pointer_map_destroy (data->niters);
5840 data->niters = NULL;
5841 }
5842
5843 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5844 {
5845 struct version_info *info;
5846
5847 info = ver_info (data, i);
5848 if (info->iv)
5849 free (info->iv);
5850 info->iv = NULL;
5851 info->has_nonlin_use = false;
5852 info->preserve_biv = false;
5853 info->inv_id = 0;
5854 }
5855 bitmap_clear (data->relevant);
5856 bitmap_clear (data->important_candidates);
5857
5858 for (i = 0; i < n_iv_uses (data); i++)
5859 {
5860 struct iv_use *use = iv_use (data, i);
5861
5862 free (use->iv);
5863 BITMAP_FREE (use->related_cands);
5864 for (j = 0; j < use->n_map_members; j++)
5865 if (use->cost_map[j].depends_on)
5866 BITMAP_FREE (use->cost_map[j].depends_on);
5867 free (use->cost_map);
5868 free (use);
5869 }
5870 VEC_truncate (iv_use_p, data->iv_uses, 0);
5871
5872 for (i = 0; i < n_iv_cands (data); i++)
5873 {
5874 struct iv_cand *cand = iv_cand (data, i);
5875
5876 if (cand->iv)
5877 free (cand->iv);
5878 if (cand->depends_on)
5879 BITMAP_FREE (cand->depends_on);
5880 free (cand);
5881 }
5882 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5883
5884 if (data->version_info_size < num_ssa_names)
5885 {
5886 data->version_info_size = 2 * num_ssa_names;
5887 free (data->version_info);
5888 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5889 }
5890
5891 data->max_inv_id = 0;
5892
5893 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5894 SET_DECL_RTL (obj, NULL_RTX);
5895
5896 VEC_truncate (tree, decl_rtl_to_reset, 0);
5897 }
5898
5899 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5900 loop tree. */
5901
5902 static void
5903 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5904 {
5905 free_loop_data (data);
5906 free (data->version_info);
5907 BITMAP_FREE (data->relevant);
5908 BITMAP_FREE (data->important_candidates);
5909
5910 VEC_free (tree, heap, decl_rtl_to_reset);
5911 VEC_free (iv_use_p, heap, data->iv_uses);
5912 VEC_free (iv_cand_p, heap, data->iv_candidates);
5913 }
5914
5915 /* Returns true if the loop body BODY includes any function calls. */
5916
5917 static bool
5918 loop_body_includes_call (basic_block *body, unsigned num_nodes)
5919 {
5920 gimple_stmt_iterator gsi;
5921 unsigned i;
5922
5923 for (i = 0; i < num_nodes; i++)
5924 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
5925 {
5926 gimple stmt = gsi_stmt (gsi);
5927 if (is_gimple_call (stmt)
5928 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
5929 return true;
5930 }
5931 return false;
5932 }
5933
5934 /* Optimizes the LOOP. Returns true if anything changed. */
5935
5936 static bool
5937 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5938 {
5939 bool changed = false;
5940 struct iv_ca *iv_ca;
5941 edge exit;
5942 basic_block *body;
5943
5944 gcc_assert (!data->niters);
5945 data->current_loop = loop;
5946 data->speed = optimize_loop_for_speed_p (loop);
5947
5948 if (dump_file && (dump_flags & TDF_DETAILS))
5949 {
5950 fprintf (dump_file, "Processing loop %d\n", loop->num);
5951
5952 exit = single_dom_exit (loop);
5953 if (exit)
5954 {
5955 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5956 exit->src->index, exit->dest->index);
5957 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5958 fprintf (dump_file, "\n");
5959 }
5960
5961 fprintf (dump_file, "\n");
5962 }
5963
5964 body = get_loop_body (loop);
5965 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
5966 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5967 free (body);
5968
5969 /* For each ssa name determines whether it behaves as an induction variable
5970 in some loop. */
5971 if (!find_induction_variables (data))
5972 goto finish;
5973
5974 /* Finds interesting uses (item 1). */
5975 find_interesting_uses (data);
5976 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5977 goto finish;
5978
5979 /* Finds candidates for the induction variables (item 2). */
5980 find_iv_candidates (data);
5981
5982 /* Calculates the costs (item 3, part 1). */
5983 determine_iv_costs (data);
5984 determine_use_iv_costs (data);
5985 determine_set_costs (data);
5986
5987 /* Find the optimal set of induction variables (item 3, part 2). */
5988 iv_ca = find_optimal_iv_set (data);
5989 if (!iv_ca)
5990 goto finish;
5991 changed = true;
5992
5993 /* Create the new induction variables (item 4, part 1). */
5994 create_new_ivs (data, iv_ca);
5995 iv_ca_free (&iv_ca);
5996
5997 /* Rewrite the uses (item 4, part 2). */
5998 rewrite_uses (data);
5999
6000 /* Remove the ivs that are unused after rewriting. */
6001 remove_unused_ivs (data);
6002
6003 /* We have changed the structure of induction variables; it might happen
6004 that definitions in the scev database refer to some of them that were
6005 eliminated. */
6006 scev_reset ();
6007
6008 finish:
6009 free_loop_data (data);
6010
6011 return changed;
6012 }
6013
6014 /* Main entry point. Optimizes induction variables in loops. */
6015
6016 void
6017 tree_ssa_iv_optimize (void)
6018 {
6019 struct loop *loop;
6020 struct ivopts_data data;
6021 loop_iterator li;
6022
6023 tree_ssa_iv_optimize_init (&data);
6024
6025 /* Optimize the loops starting with the innermost ones. */
6026 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6027 {
6028 if (dump_file && (dump_flags & TDF_DETAILS))
6029 flow_loop_dump (loop, dump_file, NULL, 1);
6030
6031 tree_ssa_iv_optimize_loop (&data, loop);
6032 }
6033
6034 tree_ssa_iv_optimize_finalize (&data);
6035 }