IA MCU psABI support: changes to libraries
[gcc.git] / gcc / gimple-ssa-strength-reduction.c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2015 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
26
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
31
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
35
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "options.h"
42 #include "tree.h"
43 #include "fold-const.h"
44 #include "predict.h"
45 #include "tm.h"
46 #include "hard-reg-set.h"
47 #include "function.h"
48 #include "dominance.h"
49 #include "cfg.h"
50 #include "basic-block.h"
51 #include "tree-ssa-alias.h"
52 #include "internal-fn.h"
53 #include "gimple-expr.h"
54 #include "gimple.h"
55 #include "gimple-iterator.h"
56 #include "gimplify-me.h"
57 #include "stor-layout.h"
58 #include "rtl.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "expmed.h"
62 #include "dojump.h"
63 #include "explow.h"
64 #include "calls.h"
65 #include "emit-rtl.h"
66 #include "varasm.h"
67 #include "stmt.h"
68 #include "expr.h"
69 #include "tree-pass.h"
70 #include "cfgloop.h"
71 #include "gimple-pretty-print.h"
72 #include "gimple-ssa.h"
73 #include "tree-cfg.h"
74 #include "tree-phinodes.h"
75 #include "ssa-iterators.h"
76 #include "stringpool.h"
77 #include "tree-ssanames.h"
78 #include "domwalk.h"
79 #include "params.h"
80 #include "tree-ssa-address.h"
81 #include "tree-affine.h"
82 #include "wide-int-print.h"
83 #include "builtins.h"
84 \f
85 /* Information about a strength reduction candidate. Each statement
86 in the candidate table represents an expression of one of the
87 following forms (the special case of CAND_REF will be described
88 later):
89
90 (CAND_MULT) S1: X = (B + i) * S
91 (CAND_ADD) S1: X = B + (i * S)
92
93 Here X and B are SSA names, i is an integer constant, and S is
94 either an SSA name or a constant. We call B the "base," i the
95 "index", and S the "stride."
96
97 Any statement S0 that dominates S1 and is of the form:
98
99 (CAND_MULT) S0: Y = (B + i') * S
100 (CAND_ADD) S0: Y = B + (i' * S)
101
102 is called a "basis" for S1. In both cases, S1 may be replaced by
103
104 S1': X = Y + (i - i') * S,
105
106 where (i - i') * S is folded to the extent possible.
107
108 All gimple statements are visited in dominator order, and each
109 statement that may contribute to one of the forms of S1 above is
110 given at least one entry in the candidate table. Such statements
111 include addition, pointer addition, subtraction, multiplication,
112 negation, copies, and nontrivial type casts. If a statement may
113 represent more than one expression of the forms of S1 above,
114 multiple "interpretations" are stored in the table and chained
115 together. Examples:
116
117 * An add of two SSA names may treat either operand as the base.
118 * A multiply of two SSA names, likewise.
119 * A copy or cast may be thought of as either a CAND_MULT with
120 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
121
122 Candidate records are allocated from an obstack. They are addressed
123 both from a hash table keyed on S1, and from a vector of candidate
124 pointers arranged in predominator order.
125
126 Opportunity note
127 ----------------
128 Currently we don't recognize:
129
130 S0: Y = (S * i') - B
131 S1: X = (S * i) - B
132
133 as a strength reduction opportunity, even though this S1 would
134 also be replaceable by the S1' above. This can be added if it
135 comes up in practice.
136
137 Strength reduction in addressing
138 --------------------------------
139 There is another kind of candidate known as CAND_REF. A CAND_REF
140 describes a statement containing a memory reference having
141 complex addressing that might benefit from strength reduction.
142 Specifically, we are interested in references for which
143 get_inner_reference returns a base address, offset, and bitpos as
144 follows:
145
146 base: MEM_REF (T1, C1)
147 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
148 bitpos: C4 * BITS_PER_UNIT
149
150 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
151 arbitrary integer constants. Note that C2 may be zero, in which
152 case the offset will be MULT_EXPR (T2, C3).
153
154 When this pattern is recognized, the original memory reference
155 can be replaced with:
156
157 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
158 C1 + (C2 * C3) + C4)
159
160 which distributes the multiply to allow constant folding. When
161 two or more addressing expressions can be represented by MEM_REFs
162 of this form, differing only in the constants C1, C2, and C4,
163 making this substitution produces more efficient addressing during
164 the RTL phases. When there are not at least two expressions with
165 the same values of T1, T2, and C3, there is nothing to be gained
166 by the replacement.
167
168 Strength reduction of CAND_REFs uses the same infrastructure as
169 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
170 field, MULT_EXPR (T2, C3) in the stride (S) field, and
171 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
172 is thus another CAND_REF with the same B and S values. When at
173 least two CAND_REFs are chained together using the basis relation,
174 each of them is replaced as above, resulting in improved code
175 generation for addressing.
176
177 Conditional candidates
178 ======================
179
180 Conditional candidates are best illustrated with an example.
181 Consider the code sequence:
182
183 (1) x_0 = ...;
184 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
185 if (...)
186 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
187 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
188 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
189 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
190
191 Here strength reduction is complicated by the uncertain value of x_2.
192 A legitimate transformation is:
193
194 (1) x_0 = ...;
195 (2) a_0 = x_0 * 5;
196 if (...)
197 {
198 (3) [x_1 = x_0 + 1;]
199 (3a) t_1 = a_0 + 5;
200 }
201 (4) [x_2 = PHI <x_0, x_1>;]
202 (4a) t_2 = PHI <a_0, t_1>;
203 (5) [x_3 = x_2 + 1;]
204 (6r) a_1 = t_2 + 5;
205
206 where the bracketed instructions may go dead.
207
208 To recognize this opportunity, we have to observe that statement (6)
209 has a "hidden basis" (2). The hidden basis is unlike a normal basis
210 in that the statement and the hidden basis have different base SSA
211 names (x_2 and x_0, respectively). The relationship is established
212 when a statement's base name (x_2) is defined by a phi statement (4),
213 each argument of which (x_0, x_1) has an identical "derived base name."
214 If the argument is defined by a candidate (as x_1 is by (3)) that is a
215 CAND_ADD having a stride of 1, the derived base name of the argument is
216 the base name of the candidate (x_0). Otherwise, the argument itself
217 is its derived base name (as is the case with argument x_0).
218
219 The hidden basis for statement (6) is the nearest dominating candidate
220 whose base name is the derived base name (x_0) of the feeding phi (4),
221 and whose stride is identical to that of the statement. We can then
222 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
223 allowing the final replacement of (6) by the strength-reduced (6r).
224
225 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
226 A CAND_PHI is not a candidate for replacement, but is maintained in the
227 candidate table to ease discovery of hidden bases. Any phi statement
228 whose arguments share a common derived base name is entered into the
229 table with the derived base name, an (arbitrary) index of zero, and a
230 stride of 1. A statement with a hidden basis can then be detected by
231 simply looking up its feeding phi definition in the candidate table,
232 extracting the derived base name, and searching for a basis in the
233 usual manner after substituting the derived base name.
234
235 Note that the transformation is only valid when the original phi and
236 the statements that define the phi's arguments are all at the same
237 position in the loop hierarchy. */
238
239
240 /* Index into the candidate vector, offset by 1. VECs are zero-based,
241 while cand_idx's are one-based, with zero indicating null. */
242 typedef unsigned cand_idx;
243
244 /* The kind of candidate. */
245 enum cand_kind
246 {
247 CAND_MULT,
248 CAND_ADD,
249 CAND_REF,
250 CAND_PHI
251 };
252
253 struct slsr_cand_d
254 {
255 /* The candidate statement S1. */
256 gimple cand_stmt;
257
258 /* The base expression B: often an SSA name, but not always. */
259 tree base_expr;
260
261 /* The stride S. */
262 tree stride;
263
264 /* The index constant i. */
265 widest_int index;
266
267 /* The type of the candidate. This is normally the type of base_expr,
268 but casts may have occurred when combining feeding instructions.
269 A candidate can only be a basis for candidates of the same final type.
270 (For CAND_REFs, this is the type to be used for operand 1 of the
271 replacement MEM_REF.) */
272 tree cand_type;
273
274 /* The kind of candidate (CAND_MULT, etc.). */
275 enum cand_kind kind;
276
277 /* Index of this candidate in the candidate vector. */
278 cand_idx cand_num;
279
280 /* Index of the next candidate record for the same statement.
281 A statement may be useful in more than one way (e.g., due to
282 commutativity). So we can have multiple "interpretations"
283 of a statement. */
284 cand_idx next_interp;
285
286 /* Index of the basis statement S0, if any, in the candidate vector. */
287 cand_idx basis;
288
289 /* First candidate for which this candidate is a basis, if one exists. */
290 cand_idx dependent;
291
292 /* Next candidate having the same basis as this one. */
293 cand_idx sibling;
294
295 /* If this is a conditional candidate, the CAND_PHI candidate
296 that defines the base SSA name B. */
297 cand_idx def_phi;
298
299 /* Savings that can be expected from eliminating dead code if this
300 candidate is replaced. */
301 int dead_savings;
302 };
303
304 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
305 typedef const struct slsr_cand_d *const_slsr_cand_t;
306
307 /* Pointers to candidates are chained together as part of a mapping
308 from base expressions to the candidates that use them. */
309
310 struct cand_chain_d
311 {
312 /* Base expression for the chain of candidates: often, but not
313 always, an SSA name. */
314 tree base_expr;
315
316 /* Pointer to a candidate. */
317 slsr_cand_t cand;
318
319 /* Chain pointer. */
320 struct cand_chain_d *next;
321
322 };
323
324 typedef struct cand_chain_d cand_chain, *cand_chain_t;
325 typedef const struct cand_chain_d *const_cand_chain_t;
326
327 /* Information about a unique "increment" associated with candidates
328 having an SSA name for a stride. An increment is the difference
329 between the index of the candidate and the index of its basis,
330 i.e., (i - i') as discussed in the module commentary.
331
332 When we are not going to generate address arithmetic we treat
333 increments that differ only in sign as the same, allowing sharing
334 of the cost of initializers. The absolute value of the increment
335 is stored in the incr_info. */
336
337 struct incr_info_d
338 {
339 /* The increment that relates a candidate to its basis. */
340 widest_int incr;
341
342 /* How many times the increment occurs in the candidate tree. */
343 unsigned count;
344
345 /* Cost of replacing candidates using this increment. Negative and
346 zero costs indicate replacement should be performed. */
347 int cost;
348
349 /* If this increment is profitable but is not -1, 0, or 1, it requires
350 an initializer T_0 = stride * incr to be found or introduced in the
351 nearest common dominator of all candidates. This field holds T_0
352 for subsequent use. */
353 tree initializer;
354
355 /* If the initializer was found to already exist, this is the block
356 where it was found. */
357 basic_block init_bb;
358 };
359
360 typedef struct incr_info_d incr_info, *incr_info_t;
361
362 /* Candidates are maintained in a vector. If candidate X dominates
363 candidate Y, then X appears before Y in the vector; but the
364 converse does not necessarily hold. */
365 static vec<slsr_cand_t> cand_vec;
366
367 enum cost_consts
368 {
369 COST_NEUTRAL = 0,
370 COST_INFINITE = 1000
371 };
372
373 enum stride_status
374 {
375 UNKNOWN_STRIDE = 0,
376 KNOWN_STRIDE = 1
377 };
378
379 enum phi_adjust_status
380 {
381 NOT_PHI_ADJUST = 0,
382 PHI_ADJUST = 1
383 };
384
385 enum count_phis_status
386 {
387 DONT_COUNT_PHIS = 0,
388 COUNT_PHIS = 1
389 };
390
391 /* Pointer map embodying a mapping from statements to candidates. */
392 static hash_map<gimple, slsr_cand_t> *stmt_cand_map;
393
394 /* Obstack for candidates. */
395 static struct obstack cand_obstack;
396
397 /* Obstack for candidate chains. */
398 static struct obstack chain_obstack;
399
400 /* An array INCR_VEC of incr_infos is used during analysis of related
401 candidates having an SSA name for a stride. INCR_VEC_LEN describes
402 its current length. MAX_INCR_VEC_LEN is used to avoid costly
403 pathological cases. */
404 static incr_info_t incr_vec;
405 static unsigned incr_vec_len;
406 const int MAX_INCR_VEC_LEN = 16;
407
408 /* For a chain of candidates with unknown stride, indicates whether or not
409 we must generate pointer arithmetic when replacing statements. */
410 static bool address_arithmetic_p;
411
412 /* Forward function declarations. */
413 static slsr_cand_t base_cand_from_table (tree);
414 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
415 static bool legal_cast_p_1 (tree, tree);
416 \f
417 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
418
419 static slsr_cand_t
420 lookup_cand (cand_idx idx)
421 {
422 return cand_vec[idx - 1];
423 }
424
425 /* Helper for hashing a candidate chain header. */
426
427 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
428 {
429 static inline hashval_t hash (const cand_chain *);
430 static inline bool equal (const cand_chain *, const cand_chain *);
431 };
432
433 inline hashval_t
434 cand_chain_hasher::hash (const cand_chain *p)
435 {
436 tree base_expr = p->base_expr;
437 return iterative_hash_expr (base_expr, 0);
438 }
439
440 inline bool
441 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
442 {
443 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
444 }
445
446 /* Hash table embodying a mapping from base exprs to chains of candidates. */
447 static hash_table<cand_chain_hasher> *base_cand_map;
448 \f
449 /* Pointer map used by tree_to_aff_combination_expand. */
450 static hash_map<tree, name_expansion *> *name_expansions;
451 /* Pointer map embodying a mapping from bases to alternative bases. */
452 static hash_map<tree, tree> *alt_base_map;
453
454 /* Given BASE, use the tree affine combiniation facilities to
455 find the underlying tree expression for BASE, with any
456 immediate offset excluded.
457
458 N.B. we should eliminate this backtracking with better forward
459 analysis in a future release. */
460
461 static tree
462 get_alternative_base (tree base)
463 {
464 tree *result = alt_base_map->get (base);
465
466 if (result == NULL)
467 {
468 tree expr;
469 aff_tree aff;
470
471 tree_to_aff_combination_expand (base, TREE_TYPE (base),
472 &aff, &name_expansions);
473 aff.offset = 0;
474 expr = aff_combination_to_tree (&aff);
475
476 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
477
478 return expr == base ? NULL : expr;
479 }
480
481 return *result;
482 }
483
484 /* Look in the candidate table for a CAND_PHI that defines BASE and
485 return it if found; otherwise return NULL. */
486
487 static cand_idx
488 find_phi_def (tree base)
489 {
490 slsr_cand_t c;
491
492 if (TREE_CODE (base) != SSA_NAME)
493 return 0;
494
495 c = base_cand_from_table (base);
496
497 if (!c || c->kind != CAND_PHI)
498 return 0;
499
500 return c->cand_num;
501 }
502
503 /* Helper routine for find_basis_for_candidate. May be called twice:
504 once for the candidate's base expr, and optionally again either for
505 the candidate's phi definition or for a CAND_REF's alternative base
506 expression. */
507
508 static slsr_cand_t
509 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
510 {
511 cand_chain mapping_key;
512 cand_chain_t chain;
513 slsr_cand_t basis = NULL;
514
515 // Limit potential of N^2 behavior for long candidate chains.
516 int iters = 0;
517 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
518
519 mapping_key.base_expr = base_expr;
520 chain = base_cand_map->find (&mapping_key);
521
522 for (; chain && iters < max_iters; chain = chain->next, ++iters)
523 {
524 slsr_cand_t one_basis = chain->cand;
525
526 if (one_basis->kind != c->kind
527 || one_basis->cand_stmt == c->cand_stmt
528 || !operand_equal_p (one_basis->stride, c->stride, 0)
529 || !types_compatible_p (one_basis->cand_type, c->cand_type)
530 || !dominated_by_p (CDI_DOMINATORS,
531 gimple_bb (c->cand_stmt),
532 gimple_bb (one_basis->cand_stmt)))
533 continue;
534
535 if (!basis || basis->cand_num < one_basis->cand_num)
536 basis = one_basis;
537 }
538
539 return basis;
540 }
541
542 /* Use the base expr from candidate C to look for possible candidates
543 that can serve as a basis for C. Each potential basis must also
544 appear in a block that dominates the candidate statement and have
545 the same stride and type. If more than one possible basis exists,
546 the one with highest index in the vector is chosen; this will be
547 the most immediately dominating basis. */
548
549 static int
550 find_basis_for_candidate (slsr_cand_t c)
551 {
552 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
553
554 /* If a candidate doesn't have a basis using its base expression,
555 it may have a basis hidden by one or more intervening phis. */
556 if (!basis && c->def_phi)
557 {
558 basic_block basis_bb, phi_bb;
559 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
560 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
561
562 if (basis)
563 {
564 /* A hidden basis must dominate the phi-definition of the
565 candidate's base name. */
566 phi_bb = gimple_bb (phi_cand->cand_stmt);
567 basis_bb = gimple_bb (basis->cand_stmt);
568
569 if (phi_bb == basis_bb
570 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
571 {
572 basis = NULL;
573 c->basis = 0;
574 }
575
576 /* If we found a hidden basis, estimate additional dead-code
577 savings if the phi and its feeding statements can be removed. */
578 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
579 c->dead_savings += phi_cand->dead_savings;
580 }
581 }
582
583 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
584 {
585 tree alt_base_expr = get_alternative_base (c->base_expr);
586 if (alt_base_expr)
587 basis = find_basis_for_base_expr (c, alt_base_expr);
588 }
589
590 if (basis)
591 {
592 c->sibling = basis->dependent;
593 basis->dependent = c->cand_num;
594 return basis->cand_num;
595 }
596
597 return 0;
598 }
599
600 /* Record a mapping from BASE to C, indicating that C may potentially serve
601 as a basis using that base expression. BASE may be the same as
602 C->BASE_EXPR; alternatively BASE can be a different tree that share the
603 underlining expression of C->BASE_EXPR. */
604
605 static void
606 record_potential_basis (slsr_cand_t c, tree base)
607 {
608 cand_chain_t node;
609 cand_chain **slot;
610
611 gcc_assert (base);
612
613 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
614 node->base_expr = base;
615 node->cand = c;
616 node->next = NULL;
617 slot = base_cand_map->find_slot (node, INSERT);
618
619 if (*slot)
620 {
621 cand_chain_t head = (cand_chain_t) (*slot);
622 node->next = head->next;
623 head->next = node;
624 }
625 else
626 *slot = node;
627 }
628
629 /* Allocate storage for a new candidate and initialize its fields.
630 Attempt to find a basis for the candidate.
631
632 For CAND_REF, an alternative base may also be recorded and used
633 to find a basis. This helps cases where the expression hidden
634 behind BASE (which is usually an SSA_NAME) has immediate offset,
635 e.g.
636
637 a2[i][j] = 1;
638 a2[i + 20][j] = 2; */
639
640 static slsr_cand_t
641 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
642 const widest_int &index, tree stride, tree ctype,
643 unsigned savings)
644 {
645 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
646 sizeof (slsr_cand));
647 c->cand_stmt = gs;
648 c->base_expr = base;
649 c->stride = stride;
650 c->index = index;
651 c->cand_type = ctype;
652 c->kind = kind;
653 c->cand_num = cand_vec.length () + 1;
654 c->next_interp = 0;
655 c->dependent = 0;
656 c->sibling = 0;
657 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
658 c->dead_savings = savings;
659
660 cand_vec.safe_push (c);
661
662 if (kind == CAND_PHI)
663 c->basis = 0;
664 else
665 c->basis = find_basis_for_candidate (c);
666
667 record_potential_basis (c, base);
668 if (flag_expensive_optimizations && kind == CAND_REF)
669 {
670 tree alt_base = get_alternative_base (base);
671 if (alt_base)
672 record_potential_basis (c, alt_base);
673 }
674
675 return c;
676 }
677
678 /* Determine the target cost of statement GS when compiling according
679 to SPEED. */
680
681 static int
682 stmt_cost (gimple gs, bool speed)
683 {
684 tree lhs, rhs1, rhs2;
685 machine_mode lhs_mode;
686
687 gcc_assert (is_gimple_assign (gs));
688 lhs = gimple_assign_lhs (gs);
689 rhs1 = gimple_assign_rhs1 (gs);
690 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
691
692 switch (gimple_assign_rhs_code (gs))
693 {
694 case MULT_EXPR:
695 rhs2 = gimple_assign_rhs2 (gs);
696
697 if (tree_fits_shwi_p (rhs2))
698 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
699
700 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
701 return mul_cost (speed, lhs_mode);
702
703 case PLUS_EXPR:
704 case POINTER_PLUS_EXPR:
705 case MINUS_EXPR:
706 return add_cost (speed, lhs_mode);
707
708 case NEGATE_EXPR:
709 return neg_cost (speed, lhs_mode);
710
711 CASE_CONVERT:
712 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
713
714 /* Note that we don't assign costs to copies that in most cases
715 will go away. */
716 default:
717 ;
718 }
719
720 gcc_unreachable ();
721 return 0;
722 }
723
724 /* Look up the defining statement for BASE_IN and return a pointer
725 to its candidate in the candidate table, if any; otherwise NULL.
726 Only CAND_ADD and CAND_MULT candidates are returned. */
727
728 static slsr_cand_t
729 base_cand_from_table (tree base_in)
730 {
731 slsr_cand_t *result;
732
733 gimple def = SSA_NAME_DEF_STMT (base_in);
734 if (!def)
735 return (slsr_cand_t) NULL;
736
737 result = stmt_cand_map->get (def);
738
739 if (result && (*result)->kind != CAND_REF)
740 return *result;
741
742 return (slsr_cand_t) NULL;
743 }
744
745 /* Add an entry to the statement-to-candidate mapping. */
746
747 static void
748 add_cand_for_stmt (gimple gs, slsr_cand_t c)
749 {
750 gcc_assert (!stmt_cand_map->put (gs, c));
751 }
752 \f
753 /* Given PHI which contains a phi statement, determine whether it
754 satisfies all the requirements of a phi candidate. If so, create
755 a candidate. Note that a CAND_PHI never has a basis itself, but
756 is used to help find a basis for subsequent candidates. */
757
758 static void
759 slsr_process_phi (gphi *phi, bool speed)
760 {
761 unsigned i;
762 tree arg0_base = NULL_TREE, base_type;
763 slsr_cand_t c;
764 struct loop *cand_loop = gimple_bb (phi)->loop_father;
765 unsigned savings = 0;
766
767 /* A CAND_PHI requires each of its arguments to have the same
768 derived base name. (See the module header commentary for a
769 definition of derived base names.) Furthermore, all feeding
770 definitions must be in the same position in the loop hierarchy
771 as PHI. */
772
773 for (i = 0; i < gimple_phi_num_args (phi); i++)
774 {
775 slsr_cand_t arg_cand;
776 tree arg = gimple_phi_arg_def (phi, i);
777 tree derived_base_name = NULL_TREE;
778 gimple arg_stmt = NULL;
779 basic_block arg_bb = NULL;
780
781 if (TREE_CODE (arg) != SSA_NAME)
782 return;
783
784 arg_cand = base_cand_from_table (arg);
785
786 if (arg_cand)
787 {
788 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
789 {
790 if (!arg_cand->next_interp)
791 return;
792
793 arg_cand = lookup_cand (arg_cand->next_interp);
794 }
795
796 if (!integer_onep (arg_cand->stride))
797 return;
798
799 derived_base_name = arg_cand->base_expr;
800 arg_stmt = arg_cand->cand_stmt;
801 arg_bb = gimple_bb (arg_stmt);
802
803 /* Gather potential dead code savings if the phi statement
804 can be removed later on. */
805 if (has_single_use (arg))
806 {
807 if (gimple_code (arg_stmt) == GIMPLE_PHI)
808 savings += arg_cand->dead_savings;
809 else
810 savings += stmt_cost (arg_stmt, speed);
811 }
812 }
813 else
814 {
815 derived_base_name = arg;
816
817 if (SSA_NAME_IS_DEFAULT_DEF (arg))
818 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
819 else
820 gimple_bb (SSA_NAME_DEF_STMT (arg));
821 }
822
823 if (!arg_bb || arg_bb->loop_father != cand_loop)
824 return;
825
826 if (i == 0)
827 arg0_base = derived_base_name;
828 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
829 return;
830 }
831
832 /* Create the candidate. "alloc_cand_and_find_basis" is named
833 misleadingly for this case, as no basis will be sought for a
834 CAND_PHI. */
835 base_type = TREE_TYPE (arg0_base);
836
837 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
838 0, integer_one_node, base_type, savings);
839
840 /* Add the candidate to the statement-candidate mapping. */
841 add_cand_for_stmt (phi, c);
842 }
843
844 /* Given PBASE which is a pointer to tree, look up the defining
845 statement for it and check whether the candidate is in the
846 form of:
847
848 X = B + (1 * S), S is integer constant
849 X = B + (i * S), S is integer one
850
851 If so, set PBASE to the candidate's base_expr and return double
852 int (i * S).
853 Otherwise, just return double int zero. */
854
855 static widest_int
856 backtrace_base_for_ref (tree *pbase)
857 {
858 tree base_in = *pbase;
859 slsr_cand_t base_cand;
860
861 STRIP_NOPS (base_in);
862
863 /* Strip off widening conversion(s) to handle cases where
864 e.g. 'B' is widened from an 'int' in order to calculate
865 a 64-bit address. */
866 if (CONVERT_EXPR_P (base_in)
867 && legal_cast_p_1 (base_in, TREE_OPERAND (base_in, 0)))
868 base_in = get_unwidened (base_in, NULL_TREE);
869
870 if (TREE_CODE (base_in) != SSA_NAME)
871 return 0;
872
873 base_cand = base_cand_from_table (base_in);
874
875 while (base_cand && base_cand->kind != CAND_PHI)
876 {
877 if (base_cand->kind == CAND_ADD
878 && base_cand->index == 1
879 && TREE_CODE (base_cand->stride) == INTEGER_CST)
880 {
881 /* X = B + (1 * S), S is integer constant. */
882 *pbase = base_cand->base_expr;
883 return wi::to_widest (base_cand->stride);
884 }
885 else if (base_cand->kind == CAND_ADD
886 && TREE_CODE (base_cand->stride) == INTEGER_CST
887 && integer_onep (base_cand->stride))
888 {
889 /* X = B + (i * S), S is integer one. */
890 *pbase = base_cand->base_expr;
891 return base_cand->index;
892 }
893
894 if (base_cand->next_interp)
895 base_cand = lookup_cand (base_cand->next_interp);
896 else
897 base_cand = NULL;
898 }
899
900 return 0;
901 }
902
903 /* Look for the following pattern:
904
905 *PBASE: MEM_REF (T1, C1)
906
907 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
908 or
909 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
910 or
911 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
912
913 *PINDEX: C4 * BITS_PER_UNIT
914
915 If not present, leave the input values unchanged and return FALSE.
916 Otherwise, modify the input values as follows and return TRUE:
917
918 *PBASE: T1
919 *POFFSET: MULT_EXPR (T2, C3)
920 *PINDEX: C1 + (C2 * C3) + C4
921
922 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
923 will be further restructured to:
924
925 *PBASE: T1
926 *POFFSET: MULT_EXPR (T2', C3)
927 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
928
929 static bool
930 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
931 tree *ptype)
932 {
933 tree base = *pbase, offset = *poffset;
934 widest_int index = *pindex;
935 tree mult_op0, t1, t2, type;
936 widest_int c1, c2, c3, c4, c5;
937
938 if (!base
939 || !offset
940 || TREE_CODE (base) != MEM_REF
941 || TREE_CODE (offset) != MULT_EXPR
942 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
943 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
944 return false;
945
946 t1 = TREE_OPERAND (base, 0);
947 c1 = widest_int::from (mem_ref_offset (base), SIGNED);
948 type = TREE_TYPE (TREE_OPERAND (base, 1));
949
950 mult_op0 = TREE_OPERAND (offset, 0);
951 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
952
953 if (TREE_CODE (mult_op0) == PLUS_EXPR)
954
955 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
956 {
957 t2 = TREE_OPERAND (mult_op0, 0);
958 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
959 }
960 else
961 return false;
962
963 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
964
965 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
966 {
967 t2 = TREE_OPERAND (mult_op0, 0);
968 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
969 }
970 else
971 return false;
972
973 else
974 {
975 t2 = mult_op0;
976 c2 = 0;
977 }
978
979 c4 = wi::lrshift (index, LOG2_BITS_PER_UNIT);
980 c5 = backtrace_base_for_ref (&t2);
981
982 *pbase = t1;
983 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
984 wide_int_to_tree (sizetype, c3));
985 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
986 *ptype = type;
987
988 return true;
989 }
990
991 /* Given GS which contains a data reference, create a CAND_REF entry in
992 the candidate table and attempt to find a basis. */
993
994 static void
995 slsr_process_ref (gimple gs)
996 {
997 tree ref_expr, base, offset, type;
998 HOST_WIDE_INT bitsize, bitpos;
999 machine_mode mode;
1000 int unsignedp, volatilep;
1001 slsr_cand_t c;
1002
1003 if (gimple_vdef (gs))
1004 ref_expr = gimple_assign_lhs (gs);
1005 else
1006 ref_expr = gimple_assign_rhs1 (gs);
1007
1008 if (!handled_component_p (ref_expr)
1009 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1010 || (TREE_CODE (ref_expr) == COMPONENT_REF
1011 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1012 return;
1013
1014 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1015 &unsignedp, &volatilep, false);
1016 widest_int index = bitpos;
1017
1018 if (!restructure_reference (&base, &offset, &index, &type))
1019 return;
1020
1021 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1022 type, 0);
1023
1024 /* Add the candidate to the statement-candidate mapping. */
1025 add_cand_for_stmt (gs, c);
1026 }
1027
1028 /* Create a candidate entry for a statement GS, where GS multiplies
1029 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1030 about the two SSA names into the new candidate. Return the new
1031 candidate. */
1032
1033 static slsr_cand_t
1034 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1035 {
1036 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1037 widest_int index;
1038 unsigned savings = 0;
1039 slsr_cand_t c;
1040 slsr_cand_t base_cand = base_cand_from_table (base_in);
1041
1042 /* Look at all interpretations of the base candidate, if necessary,
1043 to find information to propagate into this candidate. */
1044 while (base_cand && !base && base_cand->kind != CAND_PHI)
1045 {
1046
1047 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1048 {
1049 /* Y = (B + i') * 1
1050 X = Y * Z
1051 ================
1052 X = (B + i') * Z */
1053 base = base_cand->base_expr;
1054 index = base_cand->index;
1055 stride = stride_in;
1056 ctype = base_cand->cand_type;
1057 if (has_single_use (base_in))
1058 savings = (base_cand->dead_savings
1059 + stmt_cost (base_cand->cand_stmt, speed));
1060 }
1061 else if (base_cand->kind == CAND_ADD
1062 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1063 {
1064 /* Y = B + (i' * S), S constant
1065 X = Y * Z
1066 ============================
1067 X = B + ((i' * S) * Z) */
1068 base = base_cand->base_expr;
1069 index = base_cand->index * wi::to_widest (base_cand->stride);
1070 stride = stride_in;
1071 ctype = base_cand->cand_type;
1072 if (has_single_use (base_in))
1073 savings = (base_cand->dead_savings
1074 + stmt_cost (base_cand->cand_stmt, speed));
1075 }
1076
1077 if (base_cand->next_interp)
1078 base_cand = lookup_cand (base_cand->next_interp);
1079 else
1080 base_cand = NULL;
1081 }
1082
1083 if (!base)
1084 {
1085 /* No interpretations had anything useful to propagate, so
1086 produce X = (Y + 0) * Z. */
1087 base = base_in;
1088 index = 0;
1089 stride = stride_in;
1090 ctype = TREE_TYPE (base_in);
1091 }
1092
1093 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1094 ctype, savings);
1095 return c;
1096 }
1097
1098 /* Create a candidate entry for a statement GS, where GS multiplies
1099 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1100 information about BASE_IN into the new candidate. Return the new
1101 candidate. */
1102
1103 static slsr_cand_t
1104 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1105 {
1106 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1107 widest_int index, temp;
1108 unsigned savings = 0;
1109 slsr_cand_t c;
1110 slsr_cand_t base_cand = base_cand_from_table (base_in);
1111
1112 /* Look at all interpretations of the base candidate, if necessary,
1113 to find information to propagate into this candidate. */
1114 while (base_cand && !base && base_cand->kind != CAND_PHI)
1115 {
1116 if (base_cand->kind == CAND_MULT
1117 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1118 {
1119 /* Y = (B + i') * S, S constant
1120 X = Y * c
1121 ============================
1122 X = (B + i') * (S * c) */
1123 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1124 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1125 {
1126 base = base_cand->base_expr;
1127 index = base_cand->index;
1128 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1129 ctype = base_cand->cand_type;
1130 if (has_single_use (base_in))
1131 savings = (base_cand->dead_savings
1132 + stmt_cost (base_cand->cand_stmt, speed));
1133 }
1134 }
1135 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1136 {
1137 /* Y = B + (i' * 1)
1138 X = Y * c
1139 ===========================
1140 X = (B + i') * c */
1141 base = base_cand->base_expr;
1142 index = base_cand->index;
1143 stride = stride_in;
1144 ctype = base_cand->cand_type;
1145 if (has_single_use (base_in))
1146 savings = (base_cand->dead_savings
1147 + stmt_cost (base_cand->cand_stmt, speed));
1148 }
1149 else if (base_cand->kind == CAND_ADD
1150 && base_cand->index == 1
1151 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1152 {
1153 /* Y = B + (1 * S), S constant
1154 X = Y * c
1155 ===========================
1156 X = (B + S) * c */
1157 base = base_cand->base_expr;
1158 index = wi::to_widest (base_cand->stride);
1159 stride = stride_in;
1160 ctype = base_cand->cand_type;
1161 if (has_single_use (base_in))
1162 savings = (base_cand->dead_savings
1163 + stmt_cost (base_cand->cand_stmt, speed));
1164 }
1165
1166 if (base_cand->next_interp)
1167 base_cand = lookup_cand (base_cand->next_interp);
1168 else
1169 base_cand = NULL;
1170 }
1171
1172 if (!base)
1173 {
1174 /* No interpretations had anything useful to propagate, so
1175 produce X = (Y + 0) * c. */
1176 base = base_in;
1177 index = 0;
1178 stride = stride_in;
1179 ctype = TREE_TYPE (base_in);
1180 }
1181
1182 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1183 ctype, savings);
1184 return c;
1185 }
1186
1187 /* Given GS which is a multiply of scalar integers, make an appropriate
1188 entry in the candidate table. If this is a multiply of two SSA names,
1189 create two CAND_MULT interpretations and attempt to find a basis for
1190 each of them. Otherwise, create a single CAND_MULT and attempt to
1191 find a basis. */
1192
1193 static void
1194 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1195 {
1196 slsr_cand_t c, c2;
1197
1198 /* If this is a multiply of an SSA name with itself, it is highly
1199 unlikely that we will get a strength reduction opportunity, so
1200 don't record it as a candidate. This simplifies the logic for
1201 finding a basis, so if this is removed that must be considered. */
1202 if (rhs1 == rhs2)
1203 return;
1204
1205 if (TREE_CODE (rhs2) == SSA_NAME)
1206 {
1207 /* Record an interpretation of this statement in the candidate table
1208 assuming RHS1 is the base expression and RHS2 is the stride. */
1209 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1210
1211 /* Add the first interpretation to the statement-candidate mapping. */
1212 add_cand_for_stmt (gs, c);
1213
1214 /* Record another interpretation of this statement assuming RHS1
1215 is the stride and RHS2 is the base expression. */
1216 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1217 c->next_interp = c2->cand_num;
1218 }
1219 else
1220 {
1221 /* Record an interpretation for the multiply-immediate. */
1222 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1223
1224 /* Add the interpretation to the statement-candidate mapping. */
1225 add_cand_for_stmt (gs, c);
1226 }
1227 }
1228
1229 /* Create a candidate entry for a statement GS, where GS adds two
1230 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1231 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1232 information about the two SSA names into the new candidate.
1233 Return the new candidate. */
1234
1235 static slsr_cand_t
1236 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1237 bool subtract_p, bool speed)
1238 {
1239 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1240 widest_int index;
1241 unsigned savings = 0;
1242 slsr_cand_t c;
1243 slsr_cand_t base_cand = base_cand_from_table (base_in);
1244 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1245
1246 /* The most useful transformation is a multiply-immediate feeding
1247 an add or subtract. Look for that first. */
1248 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1249 {
1250 if (addend_cand->kind == CAND_MULT
1251 && addend_cand->index == 0
1252 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1253 {
1254 /* Z = (B + 0) * S, S constant
1255 X = Y +/- Z
1256 ===========================
1257 X = Y + ((+/-1 * S) * B) */
1258 base = base_in;
1259 index = wi::to_widest (addend_cand->stride);
1260 if (subtract_p)
1261 index = -index;
1262 stride = addend_cand->base_expr;
1263 ctype = TREE_TYPE (base_in);
1264 if (has_single_use (addend_in))
1265 savings = (addend_cand->dead_savings
1266 + stmt_cost (addend_cand->cand_stmt, speed));
1267 }
1268
1269 if (addend_cand->next_interp)
1270 addend_cand = lookup_cand (addend_cand->next_interp);
1271 else
1272 addend_cand = NULL;
1273 }
1274
1275 while (base_cand && !base && base_cand->kind != CAND_PHI)
1276 {
1277 if (base_cand->kind == CAND_ADD
1278 && (base_cand->index == 0
1279 || operand_equal_p (base_cand->stride,
1280 integer_zero_node, 0)))
1281 {
1282 /* Y = B + (i' * S), i' * S = 0
1283 X = Y +/- Z
1284 ============================
1285 X = B + (+/-1 * Z) */
1286 base = base_cand->base_expr;
1287 index = subtract_p ? -1 : 1;
1288 stride = addend_in;
1289 ctype = base_cand->cand_type;
1290 if (has_single_use (base_in))
1291 savings = (base_cand->dead_savings
1292 + stmt_cost (base_cand->cand_stmt, speed));
1293 }
1294 else if (subtract_p)
1295 {
1296 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1297
1298 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1299 {
1300 if (subtrahend_cand->kind == CAND_MULT
1301 && subtrahend_cand->index == 0
1302 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1303 {
1304 /* Z = (B + 0) * S, S constant
1305 X = Y - Z
1306 ===========================
1307 Value: X = Y + ((-1 * S) * B) */
1308 base = base_in;
1309 index = wi::to_widest (subtrahend_cand->stride);
1310 index = -index;
1311 stride = subtrahend_cand->base_expr;
1312 ctype = TREE_TYPE (base_in);
1313 if (has_single_use (addend_in))
1314 savings = (subtrahend_cand->dead_savings
1315 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1316 }
1317
1318 if (subtrahend_cand->next_interp)
1319 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1320 else
1321 subtrahend_cand = NULL;
1322 }
1323 }
1324
1325 if (base_cand->next_interp)
1326 base_cand = lookup_cand (base_cand->next_interp);
1327 else
1328 base_cand = NULL;
1329 }
1330
1331 if (!base)
1332 {
1333 /* No interpretations had anything useful to propagate, so
1334 produce X = Y + (1 * Z). */
1335 base = base_in;
1336 index = subtract_p ? -1 : 1;
1337 stride = addend_in;
1338 ctype = TREE_TYPE (base_in);
1339 }
1340
1341 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1342 ctype, savings);
1343 return c;
1344 }
1345
1346 /* Create a candidate entry for a statement GS, where GS adds SSA
1347 name BASE_IN to constant INDEX_IN. Propagate any known information
1348 about BASE_IN into the new candidate. Return the new candidate. */
1349
1350 static slsr_cand_t
1351 create_add_imm_cand (gimple gs, tree base_in, const widest_int &index_in,
1352 bool speed)
1353 {
1354 enum cand_kind kind = CAND_ADD;
1355 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1356 widest_int index, multiple;
1357 unsigned savings = 0;
1358 slsr_cand_t c;
1359 slsr_cand_t base_cand = base_cand_from_table (base_in);
1360
1361 while (base_cand && !base && base_cand->kind != CAND_PHI)
1362 {
1363 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1364
1365 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1366 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1367 sign, &multiple))
1368 {
1369 /* Y = (B + i') * S, S constant, c = kS for some integer k
1370 X = Y + c
1371 ============================
1372 X = (B + (i'+ k)) * S
1373 OR
1374 Y = B + (i' * S), S constant, c = kS for some integer k
1375 X = Y + c
1376 ============================
1377 X = (B + (i'+ k)) * S */
1378 kind = base_cand->kind;
1379 base = base_cand->base_expr;
1380 index = base_cand->index + multiple;
1381 stride = base_cand->stride;
1382 ctype = base_cand->cand_type;
1383 if (has_single_use (base_in))
1384 savings = (base_cand->dead_savings
1385 + stmt_cost (base_cand->cand_stmt, speed));
1386 }
1387
1388 if (base_cand->next_interp)
1389 base_cand = lookup_cand (base_cand->next_interp);
1390 else
1391 base_cand = NULL;
1392 }
1393
1394 if (!base)
1395 {
1396 /* No interpretations had anything useful to propagate, so
1397 produce X = Y + (c * 1). */
1398 kind = CAND_ADD;
1399 base = base_in;
1400 index = index_in;
1401 stride = integer_one_node;
1402 ctype = TREE_TYPE (base_in);
1403 }
1404
1405 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1406 ctype, savings);
1407 return c;
1408 }
1409
1410 /* Given GS which is an add or subtract of scalar integers or pointers,
1411 make at least one appropriate entry in the candidate table. */
1412
1413 static void
1414 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1415 {
1416 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1417 slsr_cand_t c = NULL, c2;
1418
1419 if (TREE_CODE (rhs2) == SSA_NAME)
1420 {
1421 /* First record an interpretation assuming RHS1 is the base expression
1422 and RHS2 is the stride. But it doesn't make sense for the
1423 stride to be a pointer, so don't record a candidate in that case. */
1424 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1425 {
1426 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1427
1428 /* Add the first interpretation to the statement-candidate
1429 mapping. */
1430 add_cand_for_stmt (gs, c);
1431 }
1432
1433 /* If the two RHS operands are identical, or this is a subtract,
1434 we're done. */
1435 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1436 return;
1437
1438 /* Otherwise, record another interpretation assuming RHS2 is the
1439 base expression and RHS1 is the stride, again provided that the
1440 stride is not a pointer. */
1441 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1442 {
1443 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1444 if (c)
1445 c->next_interp = c2->cand_num;
1446 else
1447 add_cand_for_stmt (gs, c2);
1448 }
1449 }
1450 else
1451 {
1452 /* Record an interpretation for the add-immediate. */
1453 widest_int index = wi::to_widest (rhs2);
1454 if (subtract_p)
1455 index = -index;
1456
1457 c = create_add_imm_cand (gs, rhs1, index, speed);
1458
1459 /* Add the interpretation to the statement-candidate mapping. */
1460 add_cand_for_stmt (gs, c);
1461 }
1462 }
1463
1464 /* Given GS which is a negate of a scalar integer, make an appropriate
1465 entry in the candidate table. A negate is equivalent to a multiply
1466 by -1. */
1467
1468 static void
1469 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1470 {
1471 /* Record a CAND_MULT interpretation for the multiply by -1. */
1472 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1473
1474 /* Add the interpretation to the statement-candidate mapping. */
1475 add_cand_for_stmt (gs, c);
1476 }
1477
1478 /* Help function for legal_cast_p, operating on two trees. Checks
1479 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1480 for more details. */
1481
1482 static bool
1483 legal_cast_p_1 (tree lhs, tree rhs)
1484 {
1485 tree lhs_type, rhs_type;
1486 unsigned lhs_size, rhs_size;
1487 bool lhs_wraps, rhs_wraps;
1488
1489 lhs_type = TREE_TYPE (lhs);
1490 rhs_type = TREE_TYPE (rhs);
1491 lhs_size = TYPE_PRECISION (lhs_type);
1492 rhs_size = TYPE_PRECISION (rhs_type);
1493 lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
1494 rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
1495
1496 if (lhs_size < rhs_size
1497 || (rhs_wraps && !lhs_wraps)
1498 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1499 return false;
1500
1501 return true;
1502 }
1503
1504 /* Return TRUE if GS is a statement that defines an SSA name from
1505 a conversion and is legal for us to combine with an add and multiply
1506 in the candidate table. For example, suppose we have:
1507
1508 A = B + i;
1509 C = (type) A;
1510 D = C * S;
1511
1512 Without the type-cast, we would create a CAND_MULT for D with base B,
1513 index i, and stride S. We want to record this candidate only if it
1514 is equivalent to apply the type cast following the multiply:
1515
1516 A = B + i;
1517 E = A * S;
1518 D = (type) E;
1519
1520 We will record the type with the candidate for D. This allows us
1521 to use a similar previous candidate as a basis. If we have earlier seen
1522
1523 A' = B + i';
1524 C' = (type) A';
1525 D' = C' * S;
1526
1527 we can replace D with
1528
1529 D = D' + (i - i') * S;
1530
1531 But if moving the type-cast would change semantics, we mustn't do this.
1532
1533 This is legitimate for casts from a non-wrapping integral type to
1534 any integral type of the same or larger size. It is not legitimate
1535 to convert a wrapping type to a non-wrapping type, or to a wrapping
1536 type of a different size. I.e., with a wrapping type, we must
1537 assume that the addition B + i could wrap, in which case performing
1538 the multiply before or after one of the "illegal" type casts will
1539 have different semantics. */
1540
1541 static bool
1542 legal_cast_p (gimple gs, tree rhs)
1543 {
1544 if (!is_gimple_assign (gs)
1545 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1546 return false;
1547
1548 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1549 }
1550
1551 /* Given GS which is a cast to a scalar integer type, determine whether
1552 the cast is legal for strength reduction. If so, make at least one
1553 appropriate entry in the candidate table. */
1554
1555 static void
1556 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1557 {
1558 tree lhs, ctype;
1559 slsr_cand_t base_cand, c, c2;
1560 unsigned savings = 0;
1561
1562 if (!legal_cast_p (gs, rhs1))
1563 return;
1564
1565 lhs = gimple_assign_lhs (gs);
1566 base_cand = base_cand_from_table (rhs1);
1567 ctype = TREE_TYPE (lhs);
1568
1569 if (base_cand && base_cand->kind != CAND_PHI)
1570 {
1571 while (base_cand)
1572 {
1573 /* Propagate all data from the base candidate except the type,
1574 which comes from the cast, and the base candidate's cast,
1575 which is no longer applicable. */
1576 if (has_single_use (rhs1))
1577 savings = (base_cand->dead_savings
1578 + stmt_cost (base_cand->cand_stmt, speed));
1579
1580 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1581 base_cand->base_expr,
1582 base_cand->index, base_cand->stride,
1583 ctype, savings);
1584 if (base_cand->next_interp)
1585 base_cand = lookup_cand (base_cand->next_interp);
1586 else
1587 base_cand = NULL;
1588 }
1589 }
1590 else
1591 {
1592 /* If nothing is known about the RHS, create fresh CAND_ADD and
1593 CAND_MULT interpretations:
1594
1595 X = Y + (0 * 1)
1596 X = (Y + 0) * 1
1597
1598 The first of these is somewhat arbitrary, but the choice of
1599 1 for the stride simplifies the logic for propagating casts
1600 into their uses. */
1601 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1,
1602 0, integer_one_node, ctype, 0);
1603 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1,
1604 0, integer_one_node, ctype, 0);
1605 c->next_interp = c2->cand_num;
1606 }
1607
1608 /* Add the first (or only) interpretation to the statement-candidate
1609 mapping. */
1610 add_cand_for_stmt (gs, c);
1611 }
1612
1613 /* Given GS which is a copy of a scalar integer type, make at least one
1614 appropriate entry in the candidate table.
1615
1616 This interface is included for completeness, but is unnecessary
1617 if this pass immediately follows a pass that performs copy
1618 propagation, such as DOM. */
1619
1620 static void
1621 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1622 {
1623 slsr_cand_t base_cand, c, c2;
1624 unsigned savings = 0;
1625
1626 base_cand = base_cand_from_table (rhs1);
1627
1628 if (base_cand && base_cand->kind != CAND_PHI)
1629 {
1630 while (base_cand)
1631 {
1632 /* Propagate all data from the base candidate. */
1633 if (has_single_use (rhs1))
1634 savings = (base_cand->dead_savings
1635 + stmt_cost (base_cand->cand_stmt, speed));
1636
1637 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1638 base_cand->base_expr,
1639 base_cand->index, base_cand->stride,
1640 base_cand->cand_type, savings);
1641 if (base_cand->next_interp)
1642 base_cand = lookup_cand (base_cand->next_interp);
1643 else
1644 base_cand = NULL;
1645 }
1646 }
1647 else
1648 {
1649 /* If nothing is known about the RHS, create fresh CAND_ADD and
1650 CAND_MULT interpretations:
1651
1652 X = Y + (0 * 1)
1653 X = (Y + 0) * 1
1654
1655 The first of these is somewhat arbitrary, but the choice of
1656 1 for the stride simplifies the logic for propagating casts
1657 into their uses. */
1658 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1,
1659 0, integer_one_node, TREE_TYPE (rhs1), 0);
1660 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1,
1661 0, integer_one_node, TREE_TYPE (rhs1), 0);
1662 c->next_interp = c2->cand_num;
1663 }
1664
1665 /* Add the first (or only) interpretation to the statement-candidate
1666 mapping. */
1667 add_cand_for_stmt (gs, c);
1668 }
1669 \f
1670 class find_candidates_dom_walker : public dom_walker
1671 {
1672 public:
1673 find_candidates_dom_walker (cdi_direction direction)
1674 : dom_walker (direction) {}
1675 virtual void before_dom_children (basic_block);
1676 };
1677
1678 /* Find strength-reduction candidates in block BB. */
1679
1680 void
1681 find_candidates_dom_walker::before_dom_children (basic_block bb)
1682 {
1683 bool speed = optimize_bb_for_speed_p (bb);
1684
1685 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
1686 gsi_next (&gsi))
1687 slsr_process_phi (gsi.phi (), speed);
1688
1689 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1690 gsi_next (&gsi))
1691 {
1692 gimple gs = gsi_stmt (gsi);
1693
1694 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1695 slsr_process_ref (gs);
1696
1697 else if (is_gimple_assign (gs)
1698 && SCALAR_INT_MODE_P
1699 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1700 {
1701 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1702
1703 switch (gimple_assign_rhs_code (gs))
1704 {
1705 case MULT_EXPR:
1706 case PLUS_EXPR:
1707 rhs1 = gimple_assign_rhs1 (gs);
1708 rhs2 = gimple_assign_rhs2 (gs);
1709 /* Should never happen, but currently some buggy situations
1710 in earlier phases put constants in rhs1. */
1711 if (TREE_CODE (rhs1) != SSA_NAME)
1712 continue;
1713 break;
1714
1715 /* Possible future opportunity: rhs1 of a ptr+ can be
1716 an ADDR_EXPR. */
1717 case POINTER_PLUS_EXPR:
1718 case MINUS_EXPR:
1719 rhs2 = gimple_assign_rhs2 (gs);
1720 /* Fall-through. */
1721
1722 CASE_CONVERT:
1723 case MODIFY_EXPR:
1724 case NEGATE_EXPR:
1725 rhs1 = gimple_assign_rhs1 (gs);
1726 if (TREE_CODE (rhs1) != SSA_NAME)
1727 continue;
1728 break;
1729
1730 default:
1731 ;
1732 }
1733
1734 switch (gimple_assign_rhs_code (gs))
1735 {
1736 case MULT_EXPR:
1737 slsr_process_mul (gs, rhs1, rhs2, speed);
1738 break;
1739
1740 case PLUS_EXPR:
1741 case POINTER_PLUS_EXPR:
1742 case MINUS_EXPR:
1743 slsr_process_add (gs, rhs1, rhs2, speed);
1744 break;
1745
1746 case NEGATE_EXPR:
1747 slsr_process_neg (gs, rhs1, speed);
1748 break;
1749
1750 CASE_CONVERT:
1751 slsr_process_cast (gs, rhs1, speed);
1752 break;
1753
1754 case MODIFY_EXPR:
1755 slsr_process_copy (gs, rhs1, speed);
1756 break;
1757
1758 default:
1759 ;
1760 }
1761 }
1762 }
1763 }
1764 \f
1765 /* Dump a candidate for debug. */
1766
1767 static void
1768 dump_candidate (slsr_cand_t c)
1769 {
1770 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1771 gimple_bb (c->cand_stmt)->index);
1772 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1773 switch (c->kind)
1774 {
1775 case CAND_MULT:
1776 fputs (" MULT : (", dump_file);
1777 print_generic_expr (dump_file, c->base_expr, 0);
1778 fputs (" + ", dump_file);
1779 print_decs (c->index, dump_file);
1780 fputs (") * ", dump_file);
1781 print_generic_expr (dump_file, c->stride, 0);
1782 fputs (" : ", dump_file);
1783 break;
1784 case CAND_ADD:
1785 fputs (" ADD : ", dump_file);
1786 print_generic_expr (dump_file, c->base_expr, 0);
1787 fputs (" + (", dump_file);
1788 print_decs (c->index, dump_file);
1789 fputs (" * ", dump_file);
1790 print_generic_expr (dump_file, c->stride, 0);
1791 fputs (") : ", dump_file);
1792 break;
1793 case CAND_REF:
1794 fputs (" REF : ", dump_file);
1795 print_generic_expr (dump_file, c->base_expr, 0);
1796 fputs (" + (", dump_file);
1797 print_generic_expr (dump_file, c->stride, 0);
1798 fputs (") + ", dump_file);
1799 print_decs (c->index, dump_file);
1800 fputs (" : ", dump_file);
1801 break;
1802 case CAND_PHI:
1803 fputs (" PHI : ", dump_file);
1804 print_generic_expr (dump_file, c->base_expr, 0);
1805 fputs (" + (unknown * ", dump_file);
1806 print_generic_expr (dump_file, c->stride, 0);
1807 fputs (") : ", dump_file);
1808 break;
1809 default:
1810 gcc_unreachable ();
1811 }
1812 print_generic_expr (dump_file, c->cand_type, 0);
1813 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1814 c->basis, c->dependent, c->sibling);
1815 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1816 c->next_interp, c->dead_savings);
1817 if (c->def_phi)
1818 fprintf (dump_file, " phi: %d\n", c->def_phi);
1819 fputs ("\n", dump_file);
1820 }
1821
1822 /* Dump the candidate vector for debug. */
1823
1824 static void
1825 dump_cand_vec (void)
1826 {
1827 unsigned i;
1828 slsr_cand_t c;
1829
1830 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1831
1832 FOR_EACH_VEC_ELT (cand_vec, i, c)
1833 dump_candidate (c);
1834 }
1835
1836 /* Callback used to dump the candidate chains hash table. */
1837
1838 int
1839 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1840 {
1841 const_cand_chain_t chain = *slot;
1842 cand_chain_t p;
1843
1844 print_generic_expr (dump_file, chain->base_expr, 0);
1845 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1846
1847 for (p = chain->next; p; p = p->next)
1848 fprintf (dump_file, " -> %d", p->cand->cand_num);
1849
1850 fputs ("\n", dump_file);
1851 return 1;
1852 }
1853
1854 /* Dump the candidate chains. */
1855
1856 static void
1857 dump_cand_chains (void)
1858 {
1859 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1860 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1861 (NULL);
1862 fputs ("\n", dump_file);
1863 }
1864
1865 /* Dump the increment vector for debug. */
1866
1867 static void
1868 dump_incr_vec (void)
1869 {
1870 if (dump_file && (dump_flags & TDF_DETAILS))
1871 {
1872 unsigned i;
1873
1874 fprintf (dump_file, "\nIncrement vector:\n\n");
1875
1876 for (i = 0; i < incr_vec_len; i++)
1877 {
1878 fprintf (dump_file, "%3d increment: ", i);
1879 print_decs (incr_vec[i].incr, dump_file);
1880 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1881 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1882 fputs ("\n initializer: ", dump_file);
1883 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1884 fputs ("\n\n", dump_file);
1885 }
1886 }
1887 }
1888 \f
1889 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1890 data reference. */
1891
1892 static void
1893 replace_ref (tree *expr, slsr_cand_t c)
1894 {
1895 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1896 unsigned HOST_WIDE_INT misalign;
1897 unsigned align;
1898
1899 /* Ensure the memory reference carries the minimum alignment
1900 requirement for the data type. See PR58041. */
1901 get_object_alignment_1 (*expr, &align, &misalign);
1902 if (misalign != 0)
1903 align = (misalign & -misalign);
1904 if (align < TYPE_ALIGN (acc_type))
1905 acc_type = build_aligned_type (acc_type, align);
1906
1907 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1908 c->base_expr, c->stride);
1909 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1910 wide_int_to_tree (c->cand_type, c->index));
1911
1912 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1913 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1914 TREE_OPERAND (mem_ref, 0)
1915 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1916 /*simple_p=*/true, NULL,
1917 /*before=*/true, GSI_SAME_STMT);
1918 copy_ref_info (mem_ref, *expr);
1919 *expr = mem_ref;
1920 update_stmt (c->cand_stmt);
1921 }
1922
1923 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1924 dependent of candidate C with an equivalent strength-reduced data
1925 reference. */
1926
1927 static void
1928 replace_refs (slsr_cand_t c)
1929 {
1930 if (dump_file && (dump_flags & TDF_DETAILS))
1931 {
1932 fputs ("Replacing reference: ", dump_file);
1933 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1934 }
1935
1936 if (gimple_vdef (c->cand_stmt))
1937 {
1938 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1939 replace_ref (lhs, c);
1940 }
1941 else
1942 {
1943 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1944 replace_ref (rhs, c);
1945 }
1946
1947 if (dump_file && (dump_flags & TDF_DETAILS))
1948 {
1949 fputs ("With: ", dump_file);
1950 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1951 fputs ("\n", dump_file);
1952 }
1953
1954 if (c->sibling)
1955 replace_refs (lookup_cand (c->sibling));
1956
1957 if (c->dependent)
1958 replace_refs (lookup_cand (c->dependent));
1959 }
1960
1961 /* Return TRUE if candidate C is dependent upon a PHI. */
1962
1963 static bool
1964 phi_dependent_cand_p (slsr_cand_t c)
1965 {
1966 /* A candidate is not necessarily dependent upon a PHI just because
1967 it has a phi definition for its base name. It may have a basis
1968 that relies upon the same phi definition, in which case the PHI
1969 is irrelevant to this candidate. */
1970 return (c->def_phi
1971 && c->basis
1972 && lookup_cand (c->basis)->def_phi != c->def_phi);
1973 }
1974
1975 /* Calculate the increment required for candidate C relative to
1976 its basis. */
1977
1978 static widest_int
1979 cand_increment (slsr_cand_t c)
1980 {
1981 slsr_cand_t basis;
1982
1983 /* If the candidate doesn't have a basis, just return its own
1984 index. This is useful in record_increments to help us find
1985 an existing initializer. Also, if the candidate's basis is
1986 hidden by a phi, then its own index will be the increment
1987 from the newly introduced phi basis. */
1988 if (!c->basis || phi_dependent_cand_p (c))
1989 return c->index;
1990
1991 basis = lookup_cand (c->basis);
1992 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1993 return c->index - basis->index;
1994 }
1995
1996 /* Calculate the increment required for candidate C relative to
1997 its basis. If we aren't going to generate pointer arithmetic
1998 for this candidate, return the absolute value of that increment
1999 instead. */
2000
2001 static inline widest_int
2002 cand_abs_increment (slsr_cand_t c)
2003 {
2004 widest_int increment = cand_increment (c);
2005
2006 if (!address_arithmetic_p && wi::neg_p (increment))
2007 increment = -increment;
2008
2009 return increment;
2010 }
2011
2012 /* Return TRUE iff candidate C has already been replaced under
2013 another interpretation. */
2014
2015 static inline bool
2016 cand_already_replaced (slsr_cand_t c)
2017 {
2018 return (gimple_bb (c->cand_stmt) == 0);
2019 }
2020
2021 /* Common logic used by replace_unconditional_candidate and
2022 replace_conditional_candidate. */
2023
2024 static void
2025 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2026 {
2027 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2028 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2029
2030 /* It is highly unlikely, but possible, that the resulting
2031 bump doesn't fit in a HWI. Abandon the replacement
2032 in this case. This does not affect siblings or dependents
2033 of C. Restriction to signed HWI is conservative for unsigned
2034 types but allows for safe negation without twisted logic. */
2035 if (wi::fits_shwi_p (bump)
2036 && bump.to_shwi () != HOST_WIDE_INT_MIN
2037 /* It is not useful to replace casts, copies, or adds of
2038 an SSA name and a constant. */
2039 && cand_code != MODIFY_EXPR
2040 && !CONVERT_EXPR_CODE_P (cand_code)
2041 && cand_code != PLUS_EXPR
2042 && cand_code != POINTER_PLUS_EXPR
2043 && cand_code != MINUS_EXPR)
2044 {
2045 enum tree_code code = PLUS_EXPR;
2046 tree bump_tree;
2047 gimple stmt_to_print = NULL;
2048
2049 /* If the basis name and the candidate's LHS have incompatible
2050 types, introduce a cast. */
2051 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2052 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2053 if (wi::neg_p (bump))
2054 {
2055 code = MINUS_EXPR;
2056 bump = -bump;
2057 }
2058
2059 bump_tree = wide_int_to_tree (target_type, bump);
2060
2061 if (dump_file && (dump_flags & TDF_DETAILS))
2062 {
2063 fputs ("Replacing: ", dump_file);
2064 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2065 }
2066
2067 if (bump == 0)
2068 {
2069 tree lhs = gimple_assign_lhs (c->cand_stmt);
2070 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
2071 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2072 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2073 gsi_replace (&gsi, copy_stmt, false);
2074 c->cand_stmt = copy_stmt;
2075 if (dump_file && (dump_flags & TDF_DETAILS))
2076 stmt_to_print = copy_stmt;
2077 }
2078 else
2079 {
2080 tree rhs1, rhs2;
2081 if (cand_code != NEGATE_EXPR) {
2082 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2083 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2084 }
2085 if (cand_code != NEGATE_EXPR
2086 && ((operand_equal_p (rhs1, basis_name, 0)
2087 && operand_equal_p (rhs2, bump_tree, 0))
2088 || (operand_equal_p (rhs1, bump_tree, 0)
2089 && operand_equal_p (rhs2, basis_name, 0))))
2090 {
2091 if (dump_file && (dump_flags & TDF_DETAILS))
2092 {
2093 fputs ("(duplicate, not actually replacing)", dump_file);
2094 stmt_to_print = c->cand_stmt;
2095 }
2096 }
2097 else
2098 {
2099 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2100 gimple_assign_set_rhs_with_ops (&gsi, code,
2101 basis_name, bump_tree);
2102 update_stmt (gsi_stmt (gsi));
2103 c->cand_stmt = gsi_stmt (gsi);
2104 if (dump_file && (dump_flags & TDF_DETAILS))
2105 stmt_to_print = gsi_stmt (gsi);
2106 }
2107 }
2108
2109 if (dump_file && (dump_flags & TDF_DETAILS))
2110 {
2111 fputs ("With: ", dump_file);
2112 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2113 fputs ("\n", dump_file);
2114 }
2115 }
2116 }
2117
2118 /* Replace candidate C with an add or subtract. Note that we only
2119 operate on CAND_MULTs with known strides, so we will never generate
2120 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2121 X = Y + ((i - i') * S), as described in the module commentary. The
2122 folded value ((i - i') * S) is referred to here as the "bump." */
2123
2124 static void
2125 replace_unconditional_candidate (slsr_cand_t c)
2126 {
2127 slsr_cand_t basis;
2128
2129 if (cand_already_replaced (c))
2130 return;
2131
2132 basis = lookup_cand (c->basis);
2133 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2134
2135 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2136 }
2137 \f
2138 /* Return the index in the increment vector of the given INCREMENT,
2139 or -1 if not found. The latter can occur if more than
2140 MAX_INCR_VEC_LEN increments have been found. */
2141
2142 static inline int
2143 incr_vec_index (const widest_int &increment)
2144 {
2145 unsigned i;
2146
2147 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2148 ;
2149
2150 if (i < incr_vec_len)
2151 return i;
2152 else
2153 return -1;
2154 }
2155
2156 /* Create a new statement along edge E to add BASIS_NAME to the product
2157 of INCREMENT and the stride of candidate C. Create and return a new
2158 SSA name from *VAR to be used as the LHS of the new statement.
2159 KNOWN_STRIDE is true iff C's stride is a constant. */
2160
2161 static tree
2162 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2163 widest_int increment, edge e, location_t loc,
2164 bool known_stride)
2165 {
2166 basic_block insert_bb;
2167 gimple_stmt_iterator gsi;
2168 tree lhs, basis_type;
2169 gassign *new_stmt;
2170
2171 /* If the add candidate along this incoming edge has the same
2172 index as C's hidden basis, the hidden basis represents this
2173 edge correctly. */
2174 if (increment == 0)
2175 return basis_name;
2176
2177 basis_type = TREE_TYPE (basis_name);
2178 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2179
2180 if (known_stride)
2181 {
2182 tree bump_tree;
2183 enum tree_code code = PLUS_EXPR;
2184 widest_int bump = increment * wi::to_widest (c->stride);
2185 if (wi::neg_p (bump))
2186 {
2187 code = MINUS_EXPR;
2188 bump = -bump;
2189 }
2190
2191 bump_tree = wide_int_to_tree (basis_type, bump);
2192 new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
2193 }
2194 else
2195 {
2196 int i;
2197 bool negate_incr = (!address_arithmetic_p && wi::neg_p (increment));
2198 i = incr_vec_index (negate_incr ? -increment : increment);
2199 gcc_assert (i >= 0);
2200
2201 if (incr_vec[i].initializer)
2202 {
2203 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2204 new_stmt = gimple_build_assign (lhs, code, basis_name,
2205 incr_vec[i].initializer);
2206 }
2207 else if (increment == 1)
2208 new_stmt = gimple_build_assign (lhs, PLUS_EXPR, basis_name, c->stride);
2209 else if (increment == -1)
2210 new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name,
2211 c->stride);
2212 else
2213 gcc_unreachable ();
2214 }
2215
2216 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2217 gsi = gsi_last_bb (insert_bb);
2218
2219 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2220 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2221 else
2222 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2223
2224 gimple_set_location (new_stmt, loc);
2225
2226 if (dump_file && (dump_flags & TDF_DETAILS))
2227 {
2228 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2229 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2230 }
2231
2232 return lhs;
2233 }
2234
2235 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2236 is hidden by the phi node FROM_PHI, create a new phi node in the same
2237 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2238 with its phi arguments representing conditional adjustments to the
2239 hidden basis along conditional incoming paths. Those adjustments are
2240 made by creating add statements (and sometimes recursively creating
2241 phis) along those incoming paths. LOC is the location to attach to
2242 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2243 constant. */
2244
2245 static tree
2246 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2247 location_t loc, bool known_stride)
2248 {
2249 int i;
2250 tree name, phi_arg;
2251 gphi *phi;
2252 vec<tree> phi_args;
2253 slsr_cand_t basis = lookup_cand (c->basis);
2254 int nargs = gimple_phi_num_args (from_phi);
2255 basic_block phi_bb = gimple_bb (from_phi);
2256 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2257 phi_args.create (nargs);
2258
2259 /* Process each argument of the existing phi that represents
2260 conditionally-executed add candidates. */
2261 for (i = 0; i < nargs; i++)
2262 {
2263 edge e = (*phi_bb->preds)[i];
2264 tree arg = gimple_phi_arg_def (from_phi, i);
2265 tree feeding_def;
2266
2267 /* If the phi argument is the base name of the CAND_PHI, then
2268 this incoming arc should use the hidden basis. */
2269 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2270 if (basis->index == 0)
2271 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2272 else
2273 {
2274 widest_int incr = -basis->index;
2275 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2276 e, loc, known_stride);
2277 }
2278 else
2279 {
2280 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2281
2282 /* If there is another phi along this incoming edge, we must
2283 process it in the same fashion to ensure that all basis
2284 adjustments are made along its incoming edges. */
2285 if (gimple_code (arg_def) == GIMPLE_PHI)
2286 feeding_def = create_phi_basis (c, arg_def, basis_name,
2287 loc, known_stride);
2288 else
2289 {
2290 slsr_cand_t arg_cand = base_cand_from_table (arg);
2291 widest_int diff = arg_cand->index - basis->index;
2292 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2293 e, loc, known_stride);
2294 }
2295 }
2296
2297 /* Because of recursion, we need to save the arguments in a vector
2298 so we can create the PHI statement all at once. Otherwise the
2299 storage for the half-created PHI can be reclaimed. */
2300 phi_args.safe_push (feeding_def);
2301 }
2302
2303 /* Create the new phi basis. */
2304 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2305 phi = create_phi_node (name, phi_bb);
2306 SSA_NAME_DEF_STMT (name) = phi;
2307
2308 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2309 {
2310 edge e = (*phi_bb->preds)[i];
2311 add_phi_arg (phi, phi_arg, e, loc);
2312 }
2313
2314 update_stmt (phi);
2315
2316 if (dump_file && (dump_flags & TDF_DETAILS))
2317 {
2318 fputs ("Introducing new phi basis: ", dump_file);
2319 print_gimple_stmt (dump_file, phi, 0, 0);
2320 }
2321
2322 return name;
2323 }
2324
2325 /* Given a candidate C whose basis is hidden by at least one intervening
2326 phi, introduce a matching number of new phis to represent its basis
2327 adjusted by conditional increments along possible incoming paths. Then
2328 replace C as though it were an unconditional candidate, using the new
2329 basis. */
2330
2331 static void
2332 replace_conditional_candidate (slsr_cand_t c)
2333 {
2334 tree basis_name, name;
2335 slsr_cand_t basis;
2336 location_t loc;
2337
2338 /* Look up the LHS SSA name from C's basis. This will be the
2339 RHS1 of the adds we will introduce to create new phi arguments. */
2340 basis = lookup_cand (c->basis);
2341 basis_name = gimple_assign_lhs (basis->cand_stmt);
2342
2343 /* Create a new phi statement which will represent C's true basis
2344 after the transformation is complete. */
2345 loc = gimple_location (c->cand_stmt);
2346 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2347 basis_name, loc, KNOWN_STRIDE);
2348 /* Replace C with an add of the new basis phi and a constant. */
2349 widest_int bump = c->index * wi::to_widest (c->stride);
2350
2351 replace_mult_candidate (c, name, bump);
2352 }
2353
2354 /* Compute the expected costs of inserting basis adjustments for
2355 candidate C with phi-definition PHI. The cost of inserting
2356 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2357 which are themselves phi results, recursively calculate costs
2358 for those phis as well. */
2359
2360 static int
2361 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2362 {
2363 unsigned i;
2364 int cost = 0;
2365 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2366
2367 /* If we work our way back to a phi that isn't dominated by the hidden
2368 basis, this isn't a candidate for replacement. Indicate this by
2369 returning an unreasonably high cost. It's not easy to detect
2370 these situations when determining the basis, so we defer the
2371 decision until now. */
2372 basic_block phi_bb = gimple_bb (phi);
2373 slsr_cand_t basis = lookup_cand (c->basis);
2374 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2375
2376 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2377 return COST_INFINITE;
2378
2379 for (i = 0; i < gimple_phi_num_args (phi); i++)
2380 {
2381 tree arg = gimple_phi_arg_def (phi, i);
2382
2383 if (arg != phi_cand->base_expr)
2384 {
2385 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2386
2387 if (gimple_code (arg_def) == GIMPLE_PHI)
2388 cost += phi_add_costs (arg_def, c, one_add_cost);
2389 else
2390 {
2391 slsr_cand_t arg_cand = base_cand_from_table (arg);
2392
2393 if (arg_cand->index != c->index)
2394 cost += one_add_cost;
2395 }
2396 }
2397 }
2398
2399 return cost;
2400 }
2401
2402 /* For candidate C, each sibling of candidate C, and each dependent of
2403 candidate C, determine whether the candidate is dependent upon a
2404 phi that hides its basis. If not, replace the candidate unconditionally.
2405 Otherwise, determine whether the cost of introducing compensation code
2406 for the candidate is offset by the gains from strength reduction. If
2407 so, replace the candidate and introduce the compensation code. */
2408
2409 static void
2410 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2411 {
2412 if (phi_dependent_cand_p (c))
2413 {
2414 if (c->kind == CAND_MULT)
2415 {
2416 /* A candidate dependent upon a phi will replace a multiply by
2417 a constant with an add, and will insert at most one add for
2418 each phi argument. Add these costs with the potential dead-code
2419 savings to determine profitability. */
2420 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2421 int mult_savings = stmt_cost (c->cand_stmt, speed);
2422 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2423 tree phi_result = gimple_phi_result (phi);
2424 int one_add_cost = add_cost (speed,
2425 TYPE_MODE (TREE_TYPE (phi_result)));
2426 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2427 int cost = add_costs - mult_savings - c->dead_savings;
2428
2429 if (dump_file && (dump_flags & TDF_DETAILS))
2430 {
2431 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2432 fprintf (dump_file, " add_costs = %d\n", add_costs);
2433 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2434 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2435 fprintf (dump_file, " cost = %d\n", cost);
2436 if (cost <= COST_NEUTRAL)
2437 fputs (" Replacing...\n", dump_file);
2438 else
2439 fputs (" Not replaced.\n", dump_file);
2440 }
2441
2442 if (cost <= COST_NEUTRAL)
2443 replace_conditional_candidate (c);
2444 }
2445 }
2446 else
2447 replace_unconditional_candidate (c);
2448
2449 if (c->sibling)
2450 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2451
2452 if (c->dependent)
2453 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2454 }
2455 \f
2456 /* Count the number of candidates in the tree rooted at C that have
2457 not already been replaced under other interpretations. */
2458
2459 static int
2460 count_candidates (slsr_cand_t c)
2461 {
2462 unsigned count = cand_already_replaced (c) ? 0 : 1;
2463
2464 if (c->sibling)
2465 count += count_candidates (lookup_cand (c->sibling));
2466
2467 if (c->dependent)
2468 count += count_candidates (lookup_cand (c->dependent));
2469
2470 return count;
2471 }
2472
2473 /* Increase the count of INCREMENT by one in the increment vector.
2474 INCREMENT is associated with candidate C. If INCREMENT is to be
2475 conditionally executed as part of a conditional candidate replacement,
2476 IS_PHI_ADJUST is true, otherwise false. If an initializer
2477 T_0 = stride * I is provided by a candidate that dominates all
2478 candidates with the same increment, also record T_0 for subsequent use. */
2479
2480 static void
2481 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2482 {
2483 bool found = false;
2484 unsigned i;
2485
2486 /* Treat increments that differ only in sign as identical so as to
2487 share initializers, unless we are generating pointer arithmetic. */
2488 if (!address_arithmetic_p && wi::neg_p (increment))
2489 increment = -increment;
2490
2491 for (i = 0; i < incr_vec_len; i++)
2492 {
2493 if (incr_vec[i].incr == increment)
2494 {
2495 incr_vec[i].count++;
2496 found = true;
2497
2498 /* If we previously recorded an initializer that doesn't
2499 dominate this candidate, it's not going to be useful to
2500 us after all. */
2501 if (incr_vec[i].initializer
2502 && !dominated_by_p (CDI_DOMINATORS,
2503 gimple_bb (c->cand_stmt),
2504 incr_vec[i].init_bb))
2505 {
2506 incr_vec[i].initializer = NULL_TREE;
2507 incr_vec[i].init_bb = NULL;
2508 }
2509
2510 break;
2511 }
2512 }
2513
2514 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2515 {
2516 /* The first time we see an increment, create the entry for it.
2517 If this is the root candidate which doesn't have a basis, set
2518 the count to zero. We're only processing it so it can possibly
2519 provide an initializer for other candidates. */
2520 incr_vec[incr_vec_len].incr = increment;
2521 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2522 incr_vec[incr_vec_len].cost = COST_INFINITE;
2523
2524 /* Optimistically record the first occurrence of this increment
2525 as providing an initializer (if it does); we will revise this
2526 opinion later if it doesn't dominate all other occurrences.
2527 Exception: increments of -1, 0, 1 never need initializers;
2528 and phi adjustments don't ever provide initializers. */
2529 if (c->kind == CAND_ADD
2530 && !is_phi_adjust
2531 && c->index == increment
2532 && (wi::gts_p (increment, 1)
2533 || wi::lts_p (increment, -1))
2534 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2535 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2536 {
2537 tree t0 = NULL_TREE;
2538 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2539 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2540 if (operand_equal_p (rhs1, c->base_expr, 0))
2541 t0 = rhs2;
2542 else if (operand_equal_p (rhs2, c->base_expr, 0))
2543 t0 = rhs1;
2544 if (t0
2545 && SSA_NAME_DEF_STMT (t0)
2546 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2547 {
2548 incr_vec[incr_vec_len].initializer = t0;
2549 incr_vec[incr_vec_len++].init_bb
2550 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2551 }
2552 else
2553 {
2554 incr_vec[incr_vec_len].initializer = NULL_TREE;
2555 incr_vec[incr_vec_len++].init_bb = NULL;
2556 }
2557 }
2558 else
2559 {
2560 incr_vec[incr_vec_len].initializer = NULL_TREE;
2561 incr_vec[incr_vec_len++].init_bb = NULL;
2562 }
2563 }
2564 }
2565
2566 /* Given phi statement PHI that hides a candidate from its BASIS, find
2567 the increments along each incoming arc (recursively handling additional
2568 phis that may be present) and record them. These increments are the
2569 difference in index between the index-adjusting statements and the
2570 index of the basis. */
2571
2572 static void
2573 record_phi_increments (slsr_cand_t basis, gimple phi)
2574 {
2575 unsigned i;
2576 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2577
2578 for (i = 0; i < gimple_phi_num_args (phi); i++)
2579 {
2580 tree arg = gimple_phi_arg_def (phi, i);
2581
2582 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2583 {
2584 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2585
2586 if (gimple_code (arg_def) == GIMPLE_PHI)
2587 record_phi_increments (basis, arg_def);
2588 else
2589 {
2590 slsr_cand_t arg_cand = base_cand_from_table (arg);
2591 widest_int diff = arg_cand->index - basis->index;
2592 record_increment (arg_cand, diff, PHI_ADJUST);
2593 }
2594 }
2595 }
2596 }
2597
2598 /* Determine how many times each unique increment occurs in the set
2599 of candidates rooted at C's parent, recording the data in the
2600 increment vector. For each unique increment I, if an initializer
2601 T_0 = stride * I is provided by a candidate that dominates all
2602 candidates with the same increment, also record T_0 for subsequent
2603 use. */
2604
2605 static void
2606 record_increments (slsr_cand_t c)
2607 {
2608 if (!cand_already_replaced (c))
2609 {
2610 if (!phi_dependent_cand_p (c))
2611 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2612 else
2613 {
2614 /* A candidate with a basis hidden by a phi will have one
2615 increment for its relationship to the index represented by
2616 the phi, and potentially additional increments along each
2617 incoming edge. For the root of the dependency tree (which
2618 has no basis), process just the initial index in case it has
2619 an initializer that can be used by subsequent candidates. */
2620 record_increment (c, c->index, NOT_PHI_ADJUST);
2621
2622 if (c->basis)
2623 record_phi_increments (lookup_cand (c->basis),
2624 lookup_cand (c->def_phi)->cand_stmt);
2625 }
2626 }
2627
2628 if (c->sibling)
2629 record_increments (lookup_cand (c->sibling));
2630
2631 if (c->dependent)
2632 record_increments (lookup_cand (c->dependent));
2633 }
2634
2635 /* Add up and return the costs of introducing add statements that
2636 require the increment INCR on behalf of candidate C and phi
2637 statement PHI. Accumulate into *SAVINGS the potential savings
2638 from removing existing statements that feed PHI and have no other
2639 uses. */
2640
2641 static int
2642 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple phi, int *savings)
2643 {
2644 unsigned i;
2645 int cost = 0;
2646 slsr_cand_t basis = lookup_cand (c->basis);
2647 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2648
2649 for (i = 0; i < gimple_phi_num_args (phi); i++)
2650 {
2651 tree arg = gimple_phi_arg_def (phi, i);
2652
2653 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2654 {
2655 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2656
2657 if (gimple_code (arg_def) == GIMPLE_PHI)
2658 {
2659 int feeding_savings = 0;
2660 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2661 if (has_single_use (gimple_phi_result (arg_def)))
2662 *savings += feeding_savings;
2663 }
2664 else
2665 {
2666 slsr_cand_t arg_cand = base_cand_from_table (arg);
2667 widest_int diff = arg_cand->index - basis->index;
2668
2669 if (incr == diff)
2670 {
2671 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2672 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2673 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2674 if (has_single_use (lhs))
2675 *savings += stmt_cost (arg_cand->cand_stmt, true);
2676 }
2677 }
2678 }
2679 }
2680
2681 return cost;
2682 }
2683
2684 /* Return the first candidate in the tree rooted at C that has not
2685 already been replaced, favoring siblings over dependents. */
2686
2687 static slsr_cand_t
2688 unreplaced_cand_in_tree (slsr_cand_t c)
2689 {
2690 if (!cand_already_replaced (c))
2691 return c;
2692
2693 if (c->sibling)
2694 {
2695 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2696 if (sib)
2697 return sib;
2698 }
2699
2700 if (c->dependent)
2701 {
2702 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2703 if (dep)
2704 return dep;
2705 }
2706
2707 return NULL;
2708 }
2709
2710 /* Return TRUE if the candidates in the tree rooted at C should be
2711 optimized for speed, else FALSE. We estimate this based on the block
2712 containing the most dominant candidate in the tree that has not yet
2713 been replaced. */
2714
2715 static bool
2716 optimize_cands_for_speed_p (slsr_cand_t c)
2717 {
2718 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2719 gcc_assert (c2);
2720 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2721 }
2722
2723 /* Add COST_IN to the lowest cost of any dependent path starting at
2724 candidate C or any of its siblings, counting only candidates along
2725 such paths with increment INCR. Assume that replacing a candidate
2726 reduces cost by REPL_SAVINGS. Also account for savings from any
2727 statements that would go dead. If COUNT_PHIS is true, include
2728 costs of introducing feeding statements for conditional candidates. */
2729
2730 static int
2731 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2732 const widest_int &incr, bool count_phis)
2733 {
2734 int local_cost, sib_cost, savings = 0;
2735 widest_int cand_incr = cand_abs_increment (c);
2736
2737 if (cand_already_replaced (c))
2738 local_cost = cost_in;
2739 else if (incr == cand_incr)
2740 local_cost = cost_in - repl_savings - c->dead_savings;
2741 else
2742 local_cost = cost_in - c->dead_savings;
2743
2744 if (count_phis
2745 && phi_dependent_cand_p (c)
2746 && !cand_already_replaced (c))
2747 {
2748 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2749 local_cost += phi_incr_cost (c, incr, phi, &savings);
2750
2751 if (has_single_use (gimple_phi_result (phi)))
2752 local_cost -= savings;
2753 }
2754
2755 if (c->dependent)
2756 local_cost = lowest_cost_path (local_cost, repl_savings,
2757 lookup_cand (c->dependent), incr,
2758 count_phis);
2759
2760 if (c->sibling)
2761 {
2762 sib_cost = lowest_cost_path (cost_in, repl_savings,
2763 lookup_cand (c->sibling), incr,
2764 count_phis);
2765 local_cost = MIN (local_cost, sib_cost);
2766 }
2767
2768 return local_cost;
2769 }
2770
2771 /* Compute the total savings that would accrue from all replacements
2772 in the candidate tree rooted at C, counting only candidates with
2773 increment INCR. Assume that replacing a candidate reduces cost
2774 by REPL_SAVINGS. Also account for savings from statements that
2775 would go dead. */
2776
2777 static int
2778 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
2779 bool count_phis)
2780 {
2781 int savings = 0;
2782 widest_int cand_incr = cand_abs_increment (c);
2783
2784 if (incr == cand_incr && !cand_already_replaced (c))
2785 savings += repl_savings + c->dead_savings;
2786
2787 if (count_phis
2788 && phi_dependent_cand_p (c)
2789 && !cand_already_replaced (c))
2790 {
2791 int phi_savings = 0;
2792 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2793 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2794
2795 if (has_single_use (gimple_phi_result (phi)))
2796 savings += phi_savings;
2797 }
2798
2799 if (c->dependent)
2800 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2801 count_phis);
2802
2803 if (c->sibling)
2804 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2805 count_phis);
2806
2807 return savings;
2808 }
2809
2810 /* Use target-specific costs to determine and record which increments
2811 in the current candidate tree are profitable to replace, assuming
2812 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2813 the candidate tree.
2814
2815 One slight limitation here is that we don't account for the possible
2816 introduction of casts in some cases. See replace_one_candidate for
2817 the cases where these are introduced. This should probably be cleaned
2818 up sometime. */
2819
2820 static void
2821 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
2822 {
2823 unsigned i;
2824
2825 for (i = 0; i < incr_vec_len; i++)
2826 {
2827 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2828
2829 /* If somehow this increment is bigger than a HWI, we won't
2830 be optimizing candidates that use it. And if the increment
2831 has a count of zero, nothing will be done with it. */
2832 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
2833 incr_vec[i].cost = COST_INFINITE;
2834
2835 /* Increments of 0, 1, and -1 are always profitable to replace,
2836 because they always replace a multiply or add with an add or
2837 copy, and may cause one or more existing instructions to go
2838 dead. Exception: -1 can't be assumed to be profitable for
2839 pointer addition. */
2840 else if (incr == 0
2841 || incr == 1
2842 || (incr == -1
2843 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2844 != POINTER_PLUS_EXPR)))
2845 incr_vec[i].cost = COST_NEUTRAL;
2846
2847 /* FORNOW: If we need to add an initializer, give up if a cast from
2848 the candidate's type to its stride's type can lose precision.
2849 This could eventually be handled better by expressly retaining the
2850 result of a cast to a wider type in the stride. Example:
2851
2852 short int _1;
2853 _2 = (int) _1;
2854 _3 = _2 * 10;
2855 _4 = x + _3; ADD: x + (10 * _1) : int
2856 _5 = _2 * 15;
2857 _6 = x + _3; ADD: x + (15 * _1) : int
2858
2859 Right now replacing _6 would cause insertion of an initializer
2860 of the form "short int T = _1 * 5;" followed by a cast to
2861 int, which could overflow incorrectly. Had we recorded _2 or
2862 (int)_1 as the stride, this wouldn't happen. However, doing
2863 this breaks other opportunities, so this will require some
2864 care. */
2865 else if (!incr_vec[i].initializer
2866 && TREE_CODE (first_dep->stride) != INTEGER_CST
2867 && !legal_cast_p_1 (first_dep->stride,
2868 gimple_assign_lhs (first_dep->cand_stmt)))
2869
2870 incr_vec[i].cost = COST_INFINITE;
2871
2872 /* If we need to add an initializer, make sure we don't introduce
2873 a multiply by a pointer type, which can happen in certain cast
2874 scenarios. FIXME: When cleaning up these cast issues, we can
2875 afford to introduce the multiply provided we cast out to an
2876 unsigned int of appropriate size. */
2877 else if (!incr_vec[i].initializer
2878 && TREE_CODE (first_dep->stride) != INTEGER_CST
2879 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2880
2881 incr_vec[i].cost = COST_INFINITE;
2882
2883 /* For any other increment, if this is a multiply candidate, we
2884 must introduce a temporary T and initialize it with
2885 T_0 = stride * increment. When optimizing for speed, walk the
2886 candidate tree to calculate the best cost reduction along any
2887 path; if it offsets the fixed cost of inserting the initializer,
2888 replacing the increment is profitable. When optimizing for
2889 size, instead calculate the total cost reduction from replacing
2890 all candidates with this increment. */
2891 else if (first_dep->kind == CAND_MULT)
2892 {
2893 int cost = mult_by_coeff_cost (incr, mode, speed);
2894 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2895 if (speed)
2896 cost = lowest_cost_path (cost, repl_savings, first_dep,
2897 incr_vec[i].incr, COUNT_PHIS);
2898 else
2899 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2900 COUNT_PHIS);
2901
2902 incr_vec[i].cost = cost;
2903 }
2904
2905 /* If this is an add candidate, the initializer may already
2906 exist, so only calculate the cost of the initializer if it
2907 doesn't. We are replacing one add with another here, so the
2908 known replacement savings is zero. We will account for removal
2909 of dead instructions in lowest_cost_path or total_savings. */
2910 else
2911 {
2912 int cost = 0;
2913 if (!incr_vec[i].initializer)
2914 cost = mult_by_coeff_cost (incr, mode, speed);
2915
2916 if (speed)
2917 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2918 DONT_COUNT_PHIS);
2919 else
2920 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2921 DONT_COUNT_PHIS);
2922
2923 incr_vec[i].cost = cost;
2924 }
2925 }
2926 }
2927
2928 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2929 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2930 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2931 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2932 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2933
2934 static basic_block
2935 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2936 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2937 {
2938 basic_block ncd;
2939
2940 if (!bb1)
2941 {
2942 *where = c2;
2943 return bb2;
2944 }
2945
2946 if (!bb2)
2947 {
2948 *where = c1;
2949 return bb1;
2950 }
2951
2952 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2953
2954 /* If both candidates are in the same block, the earlier
2955 candidate wins. */
2956 if (bb1 == ncd && bb2 == ncd)
2957 {
2958 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2959 *where = c2;
2960 else
2961 *where = c1;
2962 }
2963
2964 /* Otherwise, if one of them produced a candidate in the
2965 dominator, that one wins. */
2966 else if (bb1 == ncd)
2967 *where = c1;
2968
2969 else if (bb2 == ncd)
2970 *where = c2;
2971
2972 /* If neither matches the dominator, neither wins. */
2973 else
2974 *where = NULL;
2975
2976 return ncd;
2977 }
2978
2979 /* Consider all candidates that feed PHI. Find the nearest common
2980 dominator of those candidates requiring the given increment INCR.
2981 Further find and return the nearest common dominator of this result
2982 with block NCD. If the returned block contains one or more of the
2983 candidates, return the earliest candidate in the block in *WHERE. */
2984
2985 static basic_block
2986 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
2987 basic_block ncd, slsr_cand_t *where)
2988 {
2989 unsigned i;
2990 slsr_cand_t basis = lookup_cand (c->basis);
2991 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2992
2993 for (i = 0; i < gimple_phi_num_args (phi); i++)
2994 {
2995 tree arg = gimple_phi_arg_def (phi, i);
2996
2997 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2998 {
2999 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3000
3001 if (gimple_code (arg_def) == GIMPLE_PHI)
3002 ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd,
3003 where);
3004 else
3005 {
3006 slsr_cand_t arg_cand = base_cand_from_table (arg);
3007 widest_int diff = arg_cand->index - basis->index;
3008 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3009
3010 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3011 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3012 }
3013 }
3014 }
3015
3016 return ncd;
3017 }
3018
3019 /* Consider the candidate C together with any candidates that feed
3020 C's phi dependence (if any). Find and return the nearest common
3021 dominator of those candidates requiring the given increment INCR.
3022 If the returned block contains one or more of the candidates,
3023 return the earliest candidate in the block in *WHERE. */
3024
3025 static basic_block
3026 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3027 {
3028 basic_block ncd = NULL;
3029
3030 if (cand_abs_increment (c) == incr)
3031 {
3032 ncd = gimple_bb (c->cand_stmt);
3033 *where = c;
3034 }
3035
3036 if (phi_dependent_cand_p (c))
3037 ncd = ncd_with_phi (c, incr,
3038 as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
3039 ncd, where);
3040
3041 return ncd;
3042 }
3043
3044 /* Consider all candidates in the tree rooted at C for which INCR
3045 represents the required increment of C relative to its basis.
3046 Find and return the basic block that most nearly dominates all
3047 such candidates. If the returned block contains one or more of
3048 the candidates, return the earliest candidate in the block in
3049 *WHERE. */
3050
3051 static basic_block
3052 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3053 slsr_cand_t *where)
3054 {
3055 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3056 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3057
3058 /* First find the NCD of all siblings and dependents. */
3059 if (c->sibling)
3060 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3061 incr, &sib_where);
3062 if (c->dependent)
3063 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3064 incr, &dep_where);
3065 if (!sib_ncd && !dep_ncd)
3066 {
3067 new_where = NULL;
3068 ncd = NULL;
3069 }
3070 else if (sib_ncd && !dep_ncd)
3071 {
3072 new_where = sib_where;
3073 ncd = sib_ncd;
3074 }
3075 else if (dep_ncd && !sib_ncd)
3076 {
3077 new_where = dep_where;
3078 ncd = dep_ncd;
3079 }
3080 else
3081 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3082 dep_where, &new_where);
3083
3084 /* If the candidate's increment doesn't match the one we're interested
3085 in (and nor do any increments for feeding defs of a phi-dependence),
3086 then the result depends only on siblings and dependents. */
3087 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3088
3089 if (!this_ncd || cand_already_replaced (c))
3090 {
3091 *where = new_where;
3092 return ncd;
3093 }
3094
3095 /* Otherwise, compare this candidate with the result from all siblings
3096 and dependents. */
3097 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3098
3099 return ncd;
3100 }
3101
3102 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3103
3104 static inline bool
3105 profitable_increment_p (unsigned index)
3106 {
3107 return (incr_vec[index].cost <= COST_NEUTRAL);
3108 }
3109
3110 /* For each profitable increment in the increment vector not equal to
3111 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3112 dominator of all statements in the candidate chain rooted at C
3113 that require that increment, and insert an initializer
3114 T_0 = stride * increment at that location. Record T_0 with the
3115 increment record. */
3116
3117 static void
3118 insert_initializers (slsr_cand_t c)
3119 {
3120 unsigned i;
3121
3122 for (i = 0; i < incr_vec_len; i++)
3123 {
3124 basic_block bb;
3125 slsr_cand_t where = NULL;
3126 gassign *init_stmt;
3127 tree stride_type, new_name, incr_tree;
3128 widest_int incr = incr_vec[i].incr;
3129
3130 if (!profitable_increment_p (i)
3131 || incr == 1
3132 || (incr == -1
3133 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
3134 || incr == 0)
3135 continue;
3136
3137 /* We may have already identified an existing initializer that
3138 will suffice. */
3139 if (incr_vec[i].initializer)
3140 {
3141 if (dump_file && (dump_flags & TDF_DETAILS))
3142 {
3143 fputs ("Using existing initializer: ", dump_file);
3144 print_gimple_stmt (dump_file,
3145 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3146 0, 0);
3147 }
3148 continue;
3149 }
3150
3151 /* Find the block that most closely dominates all candidates
3152 with this increment. If there is at least one candidate in
3153 that block, the earliest one will be returned in WHERE. */
3154 bb = nearest_common_dominator_for_cands (c, incr, &where);
3155
3156 /* Create a new SSA name to hold the initializer's value. */
3157 stride_type = TREE_TYPE (c->stride);
3158 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
3159 incr_vec[i].initializer = new_name;
3160
3161 /* Create the initializer and insert it in the latest possible
3162 dominating position. */
3163 incr_tree = wide_int_to_tree (stride_type, incr);
3164 init_stmt = gimple_build_assign (new_name, MULT_EXPR,
3165 c->stride, incr_tree);
3166 if (where)
3167 {
3168 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3169 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3170 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
3171 }
3172 else
3173 {
3174 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3175 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
3176
3177 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
3178 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3179 else
3180 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3181
3182 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3183 }
3184
3185 if (dump_file && (dump_flags & TDF_DETAILS))
3186 {
3187 fputs ("Inserting initializer: ", dump_file);
3188 print_gimple_stmt (dump_file, init_stmt, 0, 0);
3189 }
3190 }
3191 }
3192
3193 /* Return TRUE iff all required increments for candidates feeding PHI
3194 are profitable to replace on behalf of candidate C. */
3195
3196 static bool
3197 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3198 {
3199 unsigned i;
3200 slsr_cand_t basis = lookup_cand (c->basis);
3201 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3202
3203 for (i = 0; i < gimple_phi_num_args (phi); i++)
3204 {
3205 tree arg = gimple_phi_arg_def (phi, i);
3206
3207 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3208 {
3209 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3210
3211 if (gimple_code (arg_def) == GIMPLE_PHI)
3212 {
3213 if (!all_phi_incrs_profitable (c, arg_def))
3214 return false;
3215 }
3216 else
3217 {
3218 int j;
3219 slsr_cand_t arg_cand = base_cand_from_table (arg);
3220 widest_int increment = arg_cand->index - basis->index;
3221
3222 if (!address_arithmetic_p && wi::neg_p (increment))
3223 increment = -increment;
3224
3225 j = incr_vec_index (increment);
3226
3227 if (dump_file && (dump_flags & TDF_DETAILS))
3228 {
3229 fprintf (dump_file, " Conditional candidate %d, phi: ",
3230 c->cand_num);
3231 print_gimple_stmt (dump_file, phi, 0, 0);
3232 fputs (" increment: ", dump_file);
3233 print_decs (increment, dump_file);
3234 if (j < 0)
3235 fprintf (dump_file,
3236 "\n Not replaced; incr_vec overflow.\n");
3237 else {
3238 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3239 if (profitable_increment_p (j))
3240 fputs (" Replacing...\n", dump_file);
3241 else
3242 fputs (" Not replaced.\n", dump_file);
3243 }
3244 }
3245
3246 if (j < 0 || !profitable_increment_p (j))
3247 return false;
3248 }
3249 }
3250 }
3251
3252 return true;
3253 }
3254
3255 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3256 type TO_TYPE, and insert it in front of the statement represented
3257 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3258 the new SSA name. */
3259
3260 static tree
3261 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3262 {
3263 tree cast_lhs;
3264 gassign *cast_stmt;
3265 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3266
3267 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3268 cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
3269 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3270 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3271
3272 if (dump_file && (dump_flags & TDF_DETAILS))
3273 {
3274 fputs (" Inserting: ", dump_file);
3275 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3276 }
3277
3278 return cast_lhs;
3279 }
3280
3281 /* Replace the RHS of the statement represented by candidate C with
3282 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3283 leave C unchanged or just interchange its operands. The original
3284 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3285 If the replacement was made and we are doing a details dump,
3286 return the revised statement, else NULL. */
3287
3288 static gimple
3289 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3290 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3291 slsr_cand_t c)
3292 {
3293 if (new_code != old_code
3294 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3295 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3296 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3297 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3298 {
3299 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3300 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3301 update_stmt (gsi_stmt (gsi));
3302 c->cand_stmt = gsi_stmt (gsi);
3303
3304 if (dump_file && (dump_flags & TDF_DETAILS))
3305 return gsi_stmt (gsi);
3306 }
3307
3308 else if (dump_file && (dump_flags & TDF_DETAILS))
3309 fputs (" (duplicate, not actually replacing)\n", dump_file);
3310
3311 return NULL;
3312 }
3313
3314 /* Strength-reduce the statement represented by candidate C by replacing
3315 it with an equivalent addition or subtraction. I is the index into
3316 the increment vector identifying C's increment. NEW_VAR is used to
3317 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3318 is the rhs1 to use in creating the add/subtract. */
3319
3320 static void
3321 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3322 {
3323 gimple stmt_to_print = NULL;
3324 tree orig_rhs1, orig_rhs2;
3325 tree rhs2;
3326 enum tree_code orig_code, repl_code;
3327 widest_int cand_incr;
3328
3329 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3330 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3331 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3332 cand_incr = cand_increment (c);
3333
3334 if (dump_file && (dump_flags & TDF_DETAILS))
3335 {
3336 fputs ("Replacing: ", dump_file);
3337 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3338 stmt_to_print = c->cand_stmt;
3339 }
3340
3341 if (address_arithmetic_p)
3342 repl_code = POINTER_PLUS_EXPR;
3343 else
3344 repl_code = PLUS_EXPR;
3345
3346 /* If the increment has an initializer T_0, replace the candidate
3347 statement with an add of the basis name and the initializer. */
3348 if (incr_vec[i].initializer)
3349 {
3350 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3351 tree orig_type = TREE_TYPE (orig_rhs2);
3352
3353 if (types_compatible_p (orig_type, init_type))
3354 rhs2 = incr_vec[i].initializer;
3355 else
3356 rhs2 = introduce_cast_before_cand (c, orig_type,
3357 incr_vec[i].initializer);
3358
3359 if (incr_vec[i].incr != cand_incr)
3360 {
3361 gcc_assert (repl_code == PLUS_EXPR);
3362 repl_code = MINUS_EXPR;
3363 }
3364
3365 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3366 orig_code, orig_rhs1, orig_rhs2,
3367 c);
3368 }
3369
3370 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3371 with a subtract of the stride from the basis name, a copy
3372 from the basis name, or an add of the stride to the basis
3373 name, respectively. It may be necessary to introduce a
3374 cast (or reuse an existing cast). */
3375 else if (cand_incr == 1)
3376 {
3377 tree stride_type = TREE_TYPE (c->stride);
3378 tree orig_type = TREE_TYPE (orig_rhs2);
3379
3380 if (types_compatible_p (orig_type, stride_type))
3381 rhs2 = c->stride;
3382 else
3383 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3384
3385 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3386 orig_code, orig_rhs1, orig_rhs2,
3387 c);
3388 }
3389
3390 else if (cand_incr == -1)
3391 {
3392 tree stride_type = TREE_TYPE (c->stride);
3393 tree orig_type = TREE_TYPE (orig_rhs2);
3394 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3395
3396 if (types_compatible_p (orig_type, stride_type))
3397 rhs2 = c->stride;
3398 else
3399 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3400
3401 if (orig_code != MINUS_EXPR
3402 || !operand_equal_p (basis_name, orig_rhs1, 0)
3403 || !operand_equal_p (rhs2, orig_rhs2, 0))
3404 {
3405 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3406 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3407 update_stmt (gsi_stmt (gsi));
3408 c->cand_stmt = gsi_stmt (gsi);
3409
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3411 stmt_to_print = gsi_stmt (gsi);
3412 }
3413 else if (dump_file && (dump_flags & TDF_DETAILS))
3414 fputs (" (duplicate, not actually replacing)\n", dump_file);
3415 }
3416
3417 else if (cand_incr == 0)
3418 {
3419 tree lhs = gimple_assign_lhs (c->cand_stmt);
3420 tree lhs_type = TREE_TYPE (lhs);
3421 tree basis_type = TREE_TYPE (basis_name);
3422
3423 if (types_compatible_p (lhs_type, basis_type))
3424 {
3425 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
3426 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3427 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3428 gsi_replace (&gsi, copy_stmt, false);
3429 c->cand_stmt = copy_stmt;
3430
3431 if (dump_file && (dump_flags & TDF_DETAILS))
3432 stmt_to_print = copy_stmt;
3433 }
3434 else
3435 {
3436 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3437 gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
3438 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3439 gsi_replace (&gsi, cast_stmt, false);
3440 c->cand_stmt = cast_stmt;
3441
3442 if (dump_file && (dump_flags & TDF_DETAILS))
3443 stmt_to_print = cast_stmt;
3444 }
3445 }
3446 else
3447 gcc_unreachable ();
3448
3449 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3450 {
3451 fputs ("With: ", dump_file);
3452 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3453 fputs ("\n", dump_file);
3454 }
3455 }
3456
3457 /* For each candidate in the tree rooted at C, replace it with
3458 an increment if such has been shown to be profitable. */
3459
3460 static void
3461 replace_profitable_candidates (slsr_cand_t c)
3462 {
3463 if (!cand_already_replaced (c))
3464 {
3465 widest_int increment = cand_abs_increment (c);
3466 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3467 int i;
3468
3469 i = incr_vec_index (increment);
3470
3471 /* Only process profitable increments. Nothing useful can be done
3472 to a cast or copy. */
3473 if (i >= 0
3474 && profitable_increment_p (i)
3475 && orig_code != MODIFY_EXPR
3476 && !CONVERT_EXPR_CODE_P (orig_code))
3477 {
3478 if (phi_dependent_cand_p (c))
3479 {
3480 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3481
3482 if (all_phi_incrs_profitable (c, phi))
3483 {
3484 /* Look up the LHS SSA name from C's basis. This will be
3485 the RHS1 of the adds we will introduce to create new
3486 phi arguments. */
3487 slsr_cand_t basis = lookup_cand (c->basis);
3488 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3489
3490 /* Create a new phi statement that will represent C's true
3491 basis after the transformation is complete. */
3492 location_t loc = gimple_location (c->cand_stmt);
3493 tree name = create_phi_basis (c, phi, basis_name,
3494 loc, UNKNOWN_STRIDE);
3495
3496 /* Replace C with an add of the new basis phi and the
3497 increment. */
3498 replace_one_candidate (c, i, name);
3499 }
3500 }
3501 else
3502 {
3503 slsr_cand_t basis = lookup_cand (c->basis);
3504 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3505 replace_one_candidate (c, i, basis_name);
3506 }
3507 }
3508 }
3509
3510 if (c->sibling)
3511 replace_profitable_candidates (lookup_cand (c->sibling));
3512
3513 if (c->dependent)
3514 replace_profitable_candidates (lookup_cand (c->dependent));
3515 }
3516 \f
3517 /* Analyze costs of related candidates in the candidate vector,
3518 and make beneficial replacements. */
3519
3520 static void
3521 analyze_candidates_and_replace (void)
3522 {
3523 unsigned i;
3524 slsr_cand_t c;
3525
3526 /* Each candidate that has a null basis and a non-null
3527 dependent is the root of a tree of related statements.
3528 Analyze each tree to determine a subset of those
3529 statements that can be replaced with maximum benefit. */
3530 FOR_EACH_VEC_ELT (cand_vec, i, c)
3531 {
3532 slsr_cand_t first_dep;
3533
3534 if (c->basis != 0 || c->dependent == 0)
3535 continue;
3536
3537 if (dump_file && (dump_flags & TDF_DETAILS))
3538 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3539 c->cand_num);
3540
3541 first_dep = lookup_cand (c->dependent);
3542
3543 /* If this is a chain of CAND_REFs, unconditionally replace
3544 each of them with a strength-reduced data reference. */
3545 if (c->kind == CAND_REF)
3546 replace_refs (c);
3547
3548 /* If the common stride of all related candidates is a known
3549 constant, each candidate without a phi-dependence can be
3550 profitably replaced. Each replaces a multiply by a single
3551 add, with the possibility that a feeding add also goes dead.
3552 A candidate with a phi-dependence is replaced only if the
3553 compensation code it requires is offset by the strength
3554 reduction savings. */
3555 else if (TREE_CODE (c->stride) == INTEGER_CST)
3556 replace_uncond_cands_and_profitable_phis (first_dep);
3557
3558 /* When the stride is an SSA name, it may still be profitable
3559 to replace some or all of the dependent candidates, depending
3560 on whether the introduced increments can be reused, or are
3561 less expensive to calculate than the replaced statements. */
3562 else
3563 {
3564 machine_mode mode;
3565 bool speed;
3566
3567 /* Determine whether we'll be generating pointer arithmetic
3568 when replacing candidates. */
3569 address_arithmetic_p = (c->kind == CAND_ADD
3570 && POINTER_TYPE_P (c->cand_type));
3571
3572 /* If all candidates have already been replaced under other
3573 interpretations, nothing remains to be done. */
3574 if (!count_candidates (c))
3575 continue;
3576
3577 /* Construct an array of increments for this candidate chain. */
3578 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3579 incr_vec_len = 0;
3580 record_increments (c);
3581
3582 /* Determine which increments are profitable to replace. */
3583 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3584 speed = optimize_cands_for_speed_p (c);
3585 analyze_increments (first_dep, mode, speed);
3586
3587 /* Insert initializers of the form T_0 = stride * increment
3588 for use in profitable replacements. */
3589 insert_initializers (first_dep);
3590 dump_incr_vec ();
3591
3592 /* Perform the replacements. */
3593 replace_profitable_candidates (first_dep);
3594 free (incr_vec);
3595 }
3596 }
3597 }
3598
3599 namespace {
3600
3601 const pass_data pass_data_strength_reduction =
3602 {
3603 GIMPLE_PASS, /* type */
3604 "slsr", /* name */
3605 OPTGROUP_NONE, /* optinfo_flags */
3606 TV_GIMPLE_SLSR, /* tv_id */
3607 ( PROP_cfg | PROP_ssa ), /* properties_required */
3608 0, /* properties_provided */
3609 0, /* properties_destroyed */
3610 0, /* todo_flags_start */
3611 0, /* todo_flags_finish */
3612 };
3613
3614 class pass_strength_reduction : public gimple_opt_pass
3615 {
3616 public:
3617 pass_strength_reduction (gcc::context *ctxt)
3618 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3619 {}
3620
3621 /* opt_pass methods: */
3622 virtual bool gate (function *) { return flag_tree_slsr; }
3623 virtual unsigned int execute (function *);
3624
3625 }; // class pass_strength_reduction
3626
3627 unsigned
3628 pass_strength_reduction::execute (function *fun)
3629 {
3630 /* Create the obstack where candidates will reside. */
3631 gcc_obstack_init (&cand_obstack);
3632
3633 /* Allocate the candidate vector. */
3634 cand_vec.create (128);
3635
3636 /* Allocate the mapping from statements to candidate indices. */
3637 stmt_cand_map = new hash_map<gimple, slsr_cand_t>;
3638
3639 /* Create the obstack where candidate chains will reside. */
3640 gcc_obstack_init (&chain_obstack);
3641
3642 /* Allocate the mapping from base expressions to candidate chains. */
3643 base_cand_map = new hash_table<cand_chain_hasher> (500);
3644
3645 /* Allocate the mapping from bases to alternative bases. */
3646 alt_base_map = new hash_map<tree, tree>;
3647
3648 /* Initialize the loop optimizer. We need to detect flow across
3649 back edges, and this gives us dominator information as well. */
3650 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3651
3652 /* Walk the CFG in predominator order looking for strength reduction
3653 candidates. */
3654 find_candidates_dom_walker (CDI_DOMINATORS)
3655 .walk (fun->cfg->x_entry_block_ptr);
3656
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3658 {
3659 dump_cand_vec ();
3660 dump_cand_chains ();
3661 }
3662
3663 delete alt_base_map;
3664 free_affine_expand_cache (&name_expansions);
3665
3666 /* Analyze costs and make appropriate replacements. */
3667 analyze_candidates_and_replace ();
3668
3669 loop_optimizer_finalize ();
3670 delete base_cand_map;
3671 base_cand_map = NULL;
3672 obstack_free (&chain_obstack, NULL);
3673 delete stmt_cand_map;
3674 cand_vec.release ();
3675 obstack_free (&cand_obstack, NULL);
3676
3677 return 0;
3678 }
3679
3680 } // anon namespace
3681
3682 gimple_opt_pass *
3683 make_pass_strength_reduction (gcc::context *ctxt)
3684 {
3685 return new pass_strength_reduction (ctxt);
3686 }