Remove a layer of indirection from hash_table
[gcc.git] / gcc / gimple-ssa-strength-reduction.c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2014 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
26
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
31
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
35
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "pointer-set.h"
41 #include "hash-table.h"
42 #include "basic-block.h"
43 #include "tree-ssa-alias.h"
44 #include "internal-fn.h"
45 #include "gimple-expr.h"
46 #include "is-a.h"
47 #include "gimple.h"
48 #include "gimple-iterator.h"
49 #include "gimplify-me.h"
50 #include "stor-layout.h"
51 #include "expr.h"
52 #include "tree-pass.h"
53 #include "cfgloop.h"
54 #include "gimple-pretty-print.h"
55 #include "gimple-ssa.h"
56 #include "tree-cfg.h"
57 #include "tree-phinodes.h"
58 #include "ssa-iterators.h"
59 #include "stringpool.h"
60 #include "tree-ssanames.h"
61 #include "domwalk.h"
62 #include "expmed.h"
63 #include "params.h"
64 #include "tree-ssa-address.h"
65 #include "tree-affine.h"
66 #include "wide-int-print.h"
67 #include "builtins.h"
68 \f
69 /* Information about a strength reduction candidate. Each statement
70 in the candidate table represents an expression of one of the
71 following forms (the special case of CAND_REF will be described
72 later):
73
74 (CAND_MULT) S1: X = (B + i) * S
75 (CAND_ADD) S1: X = B + (i * S)
76
77 Here X and B are SSA names, i is an integer constant, and S is
78 either an SSA name or a constant. We call B the "base," i the
79 "index", and S the "stride."
80
81 Any statement S0 that dominates S1 and is of the form:
82
83 (CAND_MULT) S0: Y = (B + i') * S
84 (CAND_ADD) S0: Y = B + (i' * S)
85
86 is called a "basis" for S1. In both cases, S1 may be replaced by
87
88 S1': X = Y + (i - i') * S,
89
90 where (i - i') * S is folded to the extent possible.
91
92 All gimple statements are visited in dominator order, and each
93 statement that may contribute to one of the forms of S1 above is
94 given at least one entry in the candidate table. Such statements
95 include addition, pointer addition, subtraction, multiplication,
96 negation, copies, and nontrivial type casts. If a statement may
97 represent more than one expression of the forms of S1 above,
98 multiple "interpretations" are stored in the table and chained
99 together. Examples:
100
101 * An add of two SSA names may treat either operand as the base.
102 * A multiply of two SSA names, likewise.
103 * A copy or cast may be thought of as either a CAND_MULT with
104 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
105
106 Candidate records are allocated from an obstack. They are addressed
107 both from a hash table keyed on S1, and from a vector of candidate
108 pointers arranged in predominator order.
109
110 Opportunity note
111 ----------------
112 Currently we don't recognize:
113
114 S0: Y = (S * i') - B
115 S1: X = (S * i) - B
116
117 as a strength reduction opportunity, even though this S1 would
118 also be replaceable by the S1' above. This can be added if it
119 comes up in practice.
120
121 Strength reduction in addressing
122 --------------------------------
123 There is another kind of candidate known as CAND_REF. A CAND_REF
124 describes a statement containing a memory reference having
125 complex addressing that might benefit from strength reduction.
126 Specifically, we are interested in references for which
127 get_inner_reference returns a base address, offset, and bitpos as
128 follows:
129
130 base: MEM_REF (T1, C1)
131 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
132 bitpos: C4 * BITS_PER_UNIT
133
134 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
135 arbitrary integer constants. Note that C2 may be zero, in which
136 case the offset will be MULT_EXPR (T2, C3).
137
138 When this pattern is recognized, the original memory reference
139 can be replaced with:
140
141 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
142 C1 + (C2 * C3) + C4)
143
144 which distributes the multiply to allow constant folding. When
145 two or more addressing expressions can be represented by MEM_REFs
146 of this form, differing only in the constants C1, C2, and C4,
147 making this substitution produces more efficient addressing during
148 the RTL phases. When there are not at least two expressions with
149 the same values of T1, T2, and C3, there is nothing to be gained
150 by the replacement.
151
152 Strength reduction of CAND_REFs uses the same infrastructure as
153 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
154 field, MULT_EXPR (T2, C3) in the stride (S) field, and
155 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
156 is thus another CAND_REF with the same B and S values. When at
157 least two CAND_REFs are chained together using the basis relation,
158 each of them is replaced as above, resulting in improved code
159 generation for addressing.
160
161 Conditional candidates
162 ======================
163
164 Conditional candidates are best illustrated with an example.
165 Consider the code sequence:
166
167 (1) x_0 = ...;
168 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
169 if (...)
170 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
171 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
172 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
173 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
174
175 Here strength reduction is complicated by the uncertain value of x_2.
176 A legitimate transformation is:
177
178 (1) x_0 = ...;
179 (2) a_0 = x_0 * 5;
180 if (...)
181 {
182 (3) [x_1 = x_0 + 1;]
183 (3a) t_1 = a_0 + 5;
184 }
185 (4) [x_2 = PHI <x_0, x_1>;]
186 (4a) t_2 = PHI <a_0, t_1>;
187 (5) [x_3 = x_2 + 1;]
188 (6r) a_1 = t_2 + 5;
189
190 where the bracketed instructions may go dead.
191
192 To recognize this opportunity, we have to observe that statement (6)
193 has a "hidden basis" (2). The hidden basis is unlike a normal basis
194 in that the statement and the hidden basis have different base SSA
195 names (x_2 and x_0, respectively). The relationship is established
196 when a statement's base name (x_2) is defined by a phi statement (4),
197 each argument of which (x_0, x_1) has an identical "derived base name."
198 If the argument is defined by a candidate (as x_1 is by (3)) that is a
199 CAND_ADD having a stride of 1, the derived base name of the argument is
200 the base name of the candidate (x_0). Otherwise, the argument itself
201 is its derived base name (as is the case with argument x_0).
202
203 The hidden basis for statement (6) is the nearest dominating candidate
204 whose base name is the derived base name (x_0) of the feeding phi (4),
205 and whose stride is identical to that of the statement. We can then
206 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
207 allowing the final replacement of (6) by the strength-reduced (6r).
208
209 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
210 A CAND_PHI is not a candidate for replacement, but is maintained in the
211 candidate table to ease discovery of hidden bases. Any phi statement
212 whose arguments share a common derived base name is entered into the
213 table with the derived base name, an (arbitrary) index of zero, and a
214 stride of 1. A statement with a hidden basis can then be detected by
215 simply looking up its feeding phi definition in the candidate table,
216 extracting the derived base name, and searching for a basis in the
217 usual manner after substituting the derived base name.
218
219 Note that the transformation is only valid when the original phi and
220 the statements that define the phi's arguments are all at the same
221 position in the loop hierarchy. */
222
223
224 /* Index into the candidate vector, offset by 1. VECs are zero-based,
225 while cand_idx's are one-based, with zero indicating null. */
226 typedef unsigned cand_idx;
227
228 /* The kind of candidate. */
229 enum cand_kind
230 {
231 CAND_MULT,
232 CAND_ADD,
233 CAND_REF,
234 CAND_PHI
235 };
236
237 struct slsr_cand_d
238 {
239 /* The candidate statement S1. */
240 gimple cand_stmt;
241
242 /* The base expression B: often an SSA name, but not always. */
243 tree base_expr;
244
245 /* The stride S. */
246 tree stride;
247
248 /* The index constant i. */
249 widest_int index;
250
251 /* The type of the candidate. This is normally the type of base_expr,
252 but casts may have occurred when combining feeding instructions.
253 A candidate can only be a basis for candidates of the same final type.
254 (For CAND_REFs, this is the type to be used for operand 1 of the
255 replacement MEM_REF.) */
256 tree cand_type;
257
258 /* The kind of candidate (CAND_MULT, etc.). */
259 enum cand_kind kind;
260
261 /* Index of this candidate in the candidate vector. */
262 cand_idx cand_num;
263
264 /* Index of the next candidate record for the same statement.
265 A statement may be useful in more than one way (e.g., due to
266 commutativity). So we can have multiple "interpretations"
267 of a statement. */
268 cand_idx next_interp;
269
270 /* Index of the basis statement S0, if any, in the candidate vector. */
271 cand_idx basis;
272
273 /* First candidate for which this candidate is a basis, if one exists. */
274 cand_idx dependent;
275
276 /* Next candidate having the same basis as this one. */
277 cand_idx sibling;
278
279 /* If this is a conditional candidate, the CAND_PHI candidate
280 that defines the base SSA name B. */
281 cand_idx def_phi;
282
283 /* Savings that can be expected from eliminating dead code if this
284 candidate is replaced. */
285 int dead_savings;
286 };
287
288 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
289 typedef const struct slsr_cand_d *const_slsr_cand_t;
290
291 /* Pointers to candidates are chained together as part of a mapping
292 from base expressions to the candidates that use them. */
293
294 struct cand_chain_d
295 {
296 /* Base expression for the chain of candidates: often, but not
297 always, an SSA name. */
298 tree base_expr;
299
300 /* Pointer to a candidate. */
301 slsr_cand_t cand;
302
303 /* Chain pointer. */
304 struct cand_chain_d *next;
305
306 };
307
308 typedef struct cand_chain_d cand_chain, *cand_chain_t;
309 typedef const struct cand_chain_d *const_cand_chain_t;
310
311 /* Information about a unique "increment" associated with candidates
312 having an SSA name for a stride. An increment is the difference
313 between the index of the candidate and the index of its basis,
314 i.e., (i - i') as discussed in the module commentary.
315
316 When we are not going to generate address arithmetic we treat
317 increments that differ only in sign as the same, allowing sharing
318 of the cost of initializers. The absolute value of the increment
319 is stored in the incr_info. */
320
321 struct incr_info_d
322 {
323 /* The increment that relates a candidate to its basis. */
324 widest_int incr;
325
326 /* How many times the increment occurs in the candidate tree. */
327 unsigned count;
328
329 /* Cost of replacing candidates using this increment. Negative and
330 zero costs indicate replacement should be performed. */
331 int cost;
332
333 /* If this increment is profitable but is not -1, 0, or 1, it requires
334 an initializer T_0 = stride * incr to be found or introduced in the
335 nearest common dominator of all candidates. This field holds T_0
336 for subsequent use. */
337 tree initializer;
338
339 /* If the initializer was found to already exist, this is the block
340 where it was found. */
341 basic_block init_bb;
342 };
343
344 typedef struct incr_info_d incr_info, *incr_info_t;
345
346 /* Candidates are maintained in a vector. If candidate X dominates
347 candidate Y, then X appears before Y in the vector; but the
348 converse does not necessarily hold. */
349 static vec<slsr_cand_t> cand_vec;
350
351 enum cost_consts
352 {
353 COST_NEUTRAL = 0,
354 COST_INFINITE = 1000
355 };
356
357 enum stride_status
358 {
359 UNKNOWN_STRIDE = 0,
360 KNOWN_STRIDE = 1
361 };
362
363 enum phi_adjust_status
364 {
365 NOT_PHI_ADJUST = 0,
366 PHI_ADJUST = 1
367 };
368
369 enum count_phis_status
370 {
371 DONT_COUNT_PHIS = 0,
372 COUNT_PHIS = 1
373 };
374
375 /* Pointer map embodying a mapping from statements to candidates. */
376 static struct pointer_map_t *stmt_cand_map;
377
378 /* Obstack for candidates. */
379 static struct obstack cand_obstack;
380
381 /* Obstack for candidate chains. */
382 static struct obstack chain_obstack;
383
384 /* An array INCR_VEC of incr_infos is used during analysis of related
385 candidates having an SSA name for a stride. INCR_VEC_LEN describes
386 its current length. MAX_INCR_VEC_LEN is used to avoid costly
387 pathological cases. */
388 static incr_info_t incr_vec;
389 static unsigned incr_vec_len;
390 const int MAX_INCR_VEC_LEN = 16;
391
392 /* For a chain of candidates with unknown stride, indicates whether or not
393 we must generate pointer arithmetic when replacing statements. */
394 static bool address_arithmetic_p;
395
396 /* Forward function declarations. */
397 static slsr_cand_t base_cand_from_table (tree);
398 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
399 static bool legal_cast_p_1 (tree, tree);
400 \f
401 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
402
403 static slsr_cand_t
404 lookup_cand (cand_idx idx)
405 {
406 return cand_vec[idx - 1];
407 }
408
409 /* Helper for hashing a candidate chain header. */
410
411 struct cand_chain_hasher : typed_noop_remove <cand_chain>
412 {
413 typedef cand_chain value_type;
414 typedef cand_chain compare_type;
415 static inline hashval_t hash (const value_type *);
416 static inline bool equal (const value_type *, const compare_type *);
417 };
418
419 inline hashval_t
420 cand_chain_hasher::hash (const value_type *p)
421 {
422 tree base_expr = p->base_expr;
423 return iterative_hash_expr (base_expr, 0);
424 }
425
426 inline bool
427 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2)
428 {
429 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
430 }
431
432 /* Hash table embodying a mapping from base exprs to chains of candidates. */
433 static hash_table<cand_chain_hasher> *base_cand_map;
434 \f
435 /* Pointer map used by tree_to_aff_combination_expand. */
436 static struct pointer_map_t *name_expansions;
437 /* Pointer map embodying a mapping from bases to alternative bases. */
438 static struct pointer_map_t *alt_base_map;
439
440 /* Given BASE, use the tree affine combiniation facilities to
441 find the underlying tree expression for BASE, with any
442 immediate offset excluded.
443
444 N.B. we should eliminate this backtracking with better forward
445 analysis in a future release. */
446
447 static tree
448 get_alternative_base (tree base)
449 {
450 tree *result = (tree *) pointer_map_contains (alt_base_map, base);
451
452 if (result == NULL)
453 {
454 tree expr;
455 aff_tree aff;
456
457 tree_to_aff_combination_expand (base, TREE_TYPE (base),
458 &aff, &name_expansions);
459 aff.offset = 0;
460 expr = aff_combination_to_tree (&aff);
461
462 result = (tree *) pointer_map_insert (alt_base_map, base);
463 gcc_assert (!*result);
464
465 if (expr == base)
466 *result = NULL;
467 else
468 *result = expr;
469 }
470
471 return *result;
472 }
473
474 /* Look in the candidate table for a CAND_PHI that defines BASE and
475 return it if found; otherwise return NULL. */
476
477 static cand_idx
478 find_phi_def (tree base)
479 {
480 slsr_cand_t c;
481
482 if (TREE_CODE (base) != SSA_NAME)
483 return 0;
484
485 c = base_cand_from_table (base);
486
487 if (!c || c->kind != CAND_PHI)
488 return 0;
489
490 return c->cand_num;
491 }
492
493 /* Helper routine for find_basis_for_candidate. May be called twice:
494 once for the candidate's base expr, and optionally again either for
495 the candidate's phi definition or for a CAND_REF's alternative base
496 expression. */
497
498 static slsr_cand_t
499 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
500 {
501 cand_chain mapping_key;
502 cand_chain_t chain;
503 slsr_cand_t basis = NULL;
504
505 // Limit potential of N^2 behavior for long candidate chains.
506 int iters = 0;
507 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
508
509 mapping_key.base_expr = base_expr;
510 chain = base_cand_map->find (&mapping_key);
511
512 for (; chain && iters < max_iters; chain = chain->next, ++iters)
513 {
514 slsr_cand_t one_basis = chain->cand;
515
516 if (one_basis->kind != c->kind
517 || one_basis->cand_stmt == c->cand_stmt
518 || !operand_equal_p (one_basis->stride, c->stride, 0)
519 || !types_compatible_p (one_basis->cand_type, c->cand_type)
520 || !dominated_by_p (CDI_DOMINATORS,
521 gimple_bb (c->cand_stmt),
522 gimple_bb (one_basis->cand_stmt)))
523 continue;
524
525 if (!basis || basis->cand_num < one_basis->cand_num)
526 basis = one_basis;
527 }
528
529 return basis;
530 }
531
532 /* Use the base expr from candidate C to look for possible candidates
533 that can serve as a basis for C. Each potential basis must also
534 appear in a block that dominates the candidate statement and have
535 the same stride and type. If more than one possible basis exists,
536 the one with highest index in the vector is chosen; this will be
537 the most immediately dominating basis. */
538
539 static int
540 find_basis_for_candidate (slsr_cand_t c)
541 {
542 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
543
544 /* If a candidate doesn't have a basis using its base expression,
545 it may have a basis hidden by one or more intervening phis. */
546 if (!basis && c->def_phi)
547 {
548 basic_block basis_bb, phi_bb;
549 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
550 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
551
552 if (basis)
553 {
554 /* A hidden basis must dominate the phi-definition of the
555 candidate's base name. */
556 phi_bb = gimple_bb (phi_cand->cand_stmt);
557 basis_bb = gimple_bb (basis->cand_stmt);
558
559 if (phi_bb == basis_bb
560 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
561 {
562 basis = NULL;
563 c->basis = 0;
564 }
565
566 /* If we found a hidden basis, estimate additional dead-code
567 savings if the phi and its feeding statements can be removed. */
568 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
569 c->dead_savings += phi_cand->dead_savings;
570 }
571 }
572
573 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
574 {
575 tree alt_base_expr = get_alternative_base (c->base_expr);
576 if (alt_base_expr)
577 basis = find_basis_for_base_expr (c, alt_base_expr);
578 }
579
580 if (basis)
581 {
582 c->sibling = basis->dependent;
583 basis->dependent = c->cand_num;
584 return basis->cand_num;
585 }
586
587 return 0;
588 }
589
590 /* Record a mapping from BASE to C, indicating that C may potentially serve
591 as a basis using that base expression. BASE may be the same as
592 C->BASE_EXPR; alternatively BASE can be a different tree that share the
593 underlining expression of C->BASE_EXPR. */
594
595 static void
596 record_potential_basis (slsr_cand_t c, tree base)
597 {
598 cand_chain_t node;
599 cand_chain **slot;
600
601 gcc_assert (base);
602
603 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
604 node->base_expr = base;
605 node->cand = c;
606 node->next = NULL;
607 slot = base_cand_map->find_slot (node, INSERT);
608
609 if (*slot)
610 {
611 cand_chain_t head = (cand_chain_t) (*slot);
612 node->next = head->next;
613 head->next = node;
614 }
615 else
616 *slot = node;
617 }
618
619 /* Allocate storage for a new candidate and initialize its fields.
620 Attempt to find a basis for the candidate.
621
622 For CAND_REF, an alternative base may also be recorded and used
623 to find a basis. This helps cases where the expression hidden
624 behind BASE (which is usually an SSA_NAME) has immediate offset,
625 e.g.
626
627 a2[i][j] = 1;
628 a2[i + 20][j] = 2; */
629
630 static slsr_cand_t
631 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
632 const widest_int &index, tree stride, tree ctype,
633 unsigned savings)
634 {
635 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
636 sizeof (slsr_cand));
637 c->cand_stmt = gs;
638 c->base_expr = base;
639 c->stride = stride;
640 c->index = index;
641 c->cand_type = ctype;
642 c->kind = kind;
643 c->cand_num = cand_vec.length () + 1;
644 c->next_interp = 0;
645 c->dependent = 0;
646 c->sibling = 0;
647 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
648 c->dead_savings = savings;
649
650 cand_vec.safe_push (c);
651
652 if (kind == CAND_PHI)
653 c->basis = 0;
654 else
655 c->basis = find_basis_for_candidate (c);
656
657 record_potential_basis (c, base);
658 if (flag_expensive_optimizations && kind == CAND_REF)
659 {
660 tree alt_base = get_alternative_base (base);
661 if (alt_base)
662 record_potential_basis (c, alt_base);
663 }
664
665 return c;
666 }
667
668 /* Determine the target cost of statement GS when compiling according
669 to SPEED. */
670
671 static int
672 stmt_cost (gimple gs, bool speed)
673 {
674 tree lhs, rhs1, rhs2;
675 enum machine_mode lhs_mode;
676
677 gcc_assert (is_gimple_assign (gs));
678 lhs = gimple_assign_lhs (gs);
679 rhs1 = gimple_assign_rhs1 (gs);
680 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
681
682 switch (gimple_assign_rhs_code (gs))
683 {
684 case MULT_EXPR:
685 rhs2 = gimple_assign_rhs2 (gs);
686
687 if (tree_fits_shwi_p (rhs2))
688 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
689
690 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
691 return mul_cost (speed, lhs_mode);
692
693 case PLUS_EXPR:
694 case POINTER_PLUS_EXPR:
695 case MINUS_EXPR:
696 return add_cost (speed, lhs_mode);
697
698 case NEGATE_EXPR:
699 return neg_cost (speed, lhs_mode);
700
701 case NOP_EXPR:
702 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
703
704 /* Note that we don't assign costs to copies that in most cases
705 will go away. */
706 default:
707 ;
708 }
709
710 gcc_unreachable ();
711 return 0;
712 }
713
714 /* Look up the defining statement for BASE_IN and return a pointer
715 to its candidate in the candidate table, if any; otherwise NULL.
716 Only CAND_ADD and CAND_MULT candidates are returned. */
717
718 static slsr_cand_t
719 base_cand_from_table (tree base_in)
720 {
721 slsr_cand_t *result;
722
723 gimple def = SSA_NAME_DEF_STMT (base_in);
724 if (!def)
725 return (slsr_cand_t) NULL;
726
727 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
728
729 if (result && (*result)->kind != CAND_REF)
730 return *result;
731
732 return (slsr_cand_t) NULL;
733 }
734
735 /* Add an entry to the statement-to-candidate mapping. */
736
737 static void
738 add_cand_for_stmt (gimple gs, slsr_cand_t c)
739 {
740 void **slot = pointer_map_insert (stmt_cand_map, gs);
741 gcc_assert (!*slot);
742 *slot = c;
743 }
744 \f
745 /* Given PHI which contains a phi statement, determine whether it
746 satisfies all the requirements of a phi candidate. If so, create
747 a candidate. Note that a CAND_PHI never has a basis itself, but
748 is used to help find a basis for subsequent candidates. */
749
750 static void
751 slsr_process_phi (gimple phi, bool speed)
752 {
753 unsigned i;
754 tree arg0_base = NULL_TREE, base_type;
755 slsr_cand_t c;
756 struct loop *cand_loop = gimple_bb (phi)->loop_father;
757 unsigned savings = 0;
758
759 /* A CAND_PHI requires each of its arguments to have the same
760 derived base name. (See the module header commentary for a
761 definition of derived base names.) Furthermore, all feeding
762 definitions must be in the same position in the loop hierarchy
763 as PHI. */
764
765 for (i = 0; i < gimple_phi_num_args (phi); i++)
766 {
767 slsr_cand_t arg_cand;
768 tree arg = gimple_phi_arg_def (phi, i);
769 tree derived_base_name = NULL_TREE;
770 gimple arg_stmt = NULL;
771 basic_block arg_bb = NULL;
772
773 if (TREE_CODE (arg) != SSA_NAME)
774 return;
775
776 arg_cand = base_cand_from_table (arg);
777
778 if (arg_cand)
779 {
780 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
781 {
782 if (!arg_cand->next_interp)
783 return;
784
785 arg_cand = lookup_cand (arg_cand->next_interp);
786 }
787
788 if (!integer_onep (arg_cand->stride))
789 return;
790
791 derived_base_name = arg_cand->base_expr;
792 arg_stmt = arg_cand->cand_stmt;
793 arg_bb = gimple_bb (arg_stmt);
794
795 /* Gather potential dead code savings if the phi statement
796 can be removed later on. */
797 if (has_single_use (arg))
798 {
799 if (gimple_code (arg_stmt) == GIMPLE_PHI)
800 savings += arg_cand->dead_savings;
801 else
802 savings += stmt_cost (arg_stmt, speed);
803 }
804 }
805 else
806 {
807 derived_base_name = arg;
808
809 if (SSA_NAME_IS_DEFAULT_DEF (arg))
810 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
811 else
812 gimple_bb (SSA_NAME_DEF_STMT (arg));
813 }
814
815 if (!arg_bb || arg_bb->loop_father != cand_loop)
816 return;
817
818 if (i == 0)
819 arg0_base = derived_base_name;
820 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
821 return;
822 }
823
824 /* Create the candidate. "alloc_cand_and_find_basis" is named
825 misleadingly for this case, as no basis will be sought for a
826 CAND_PHI. */
827 base_type = TREE_TYPE (arg0_base);
828
829 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
830 0, integer_one_node, base_type, savings);
831
832 /* Add the candidate to the statement-candidate mapping. */
833 add_cand_for_stmt (phi, c);
834 }
835
836 /* Given PBASE which is a pointer to tree, look up the defining
837 statement for it and check whether the candidate is in the
838 form of:
839
840 X = B + (1 * S), S is integer constant
841 X = B + (i * S), S is integer one
842
843 If so, set PBASE to the candidate's base_expr and return double
844 int (i * S).
845 Otherwise, just return double int zero. */
846
847 static widest_int
848 backtrace_base_for_ref (tree *pbase)
849 {
850 tree base_in = *pbase;
851 slsr_cand_t base_cand;
852
853 STRIP_NOPS (base_in);
854
855 /* Strip off widening conversion(s) to handle cases where
856 e.g. 'B' is widened from an 'int' in order to calculate
857 a 64-bit address. */
858 if (CONVERT_EXPR_P (base_in)
859 && legal_cast_p_1 (base_in, TREE_OPERAND (base_in, 0)))
860 base_in = get_unwidened (base_in, NULL_TREE);
861
862 if (TREE_CODE (base_in) != SSA_NAME)
863 return 0;
864
865 base_cand = base_cand_from_table (base_in);
866
867 while (base_cand && base_cand->kind != CAND_PHI)
868 {
869 if (base_cand->kind == CAND_ADD
870 && base_cand->index == 1
871 && TREE_CODE (base_cand->stride) == INTEGER_CST)
872 {
873 /* X = B + (1 * S), S is integer constant. */
874 *pbase = base_cand->base_expr;
875 return wi::to_widest (base_cand->stride);
876 }
877 else if (base_cand->kind == CAND_ADD
878 && TREE_CODE (base_cand->stride) == INTEGER_CST
879 && integer_onep (base_cand->stride))
880 {
881 /* X = B + (i * S), S is integer one. */
882 *pbase = base_cand->base_expr;
883 return base_cand->index;
884 }
885
886 if (base_cand->next_interp)
887 base_cand = lookup_cand (base_cand->next_interp);
888 else
889 base_cand = NULL;
890 }
891
892 return 0;
893 }
894
895 /* Look for the following pattern:
896
897 *PBASE: MEM_REF (T1, C1)
898
899 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
900 or
901 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
902 or
903 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
904
905 *PINDEX: C4 * BITS_PER_UNIT
906
907 If not present, leave the input values unchanged and return FALSE.
908 Otherwise, modify the input values as follows and return TRUE:
909
910 *PBASE: T1
911 *POFFSET: MULT_EXPR (T2, C3)
912 *PINDEX: C1 + (C2 * C3) + C4
913
914 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
915 will be further restructured to:
916
917 *PBASE: T1
918 *POFFSET: MULT_EXPR (T2', C3)
919 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
920
921 static bool
922 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
923 tree *ptype)
924 {
925 tree base = *pbase, offset = *poffset;
926 widest_int index = *pindex;
927 tree mult_op0, t1, t2, type;
928 widest_int c1, c2, c3, c4, c5;
929
930 if (!base
931 || !offset
932 || TREE_CODE (base) != MEM_REF
933 || TREE_CODE (offset) != MULT_EXPR
934 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
935 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
936 return false;
937
938 t1 = TREE_OPERAND (base, 0);
939 c1 = widest_int::from (mem_ref_offset (base), SIGNED);
940 type = TREE_TYPE (TREE_OPERAND (base, 1));
941
942 mult_op0 = TREE_OPERAND (offset, 0);
943 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
944
945 if (TREE_CODE (mult_op0) == PLUS_EXPR)
946
947 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
948 {
949 t2 = TREE_OPERAND (mult_op0, 0);
950 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
951 }
952 else
953 return false;
954
955 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
956
957 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
958 {
959 t2 = TREE_OPERAND (mult_op0, 0);
960 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
961 }
962 else
963 return false;
964
965 else
966 {
967 t2 = mult_op0;
968 c2 = 0;
969 }
970
971 c4 = wi::lrshift (index, LOG2_BITS_PER_UNIT);
972 c5 = backtrace_base_for_ref (&t2);
973
974 *pbase = t1;
975 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
976 wide_int_to_tree (sizetype, c3));
977 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
978 *ptype = type;
979
980 return true;
981 }
982
983 /* Given GS which contains a data reference, create a CAND_REF entry in
984 the candidate table and attempt to find a basis. */
985
986 static void
987 slsr_process_ref (gimple gs)
988 {
989 tree ref_expr, base, offset, type;
990 HOST_WIDE_INT bitsize, bitpos;
991 enum machine_mode mode;
992 int unsignedp, volatilep;
993 slsr_cand_t c;
994
995 if (gimple_vdef (gs))
996 ref_expr = gimple_assign_lhs (gs);
997 else
998 ref_expr = gimple_assign_rhs1 (gs);
999
1000 if (!handled_component_p (ref_expr)
1001 || TREE_CODE (ref_expr) == BIT_FIELD_REF
1002 || (TREE_CODE (ref_expr) == COMPONENT_REF
1003 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
1004 return;
1005
1006 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
1007 &unsignedp, &volatilep, false);
1008 widest_int index = bitpos;
1009
1010 if (!restructure_reference (&base, &offset, &index, &type))
1011 return;
1012
1013 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
1014 type, 0);
1015
1016 /* Add the candidate to the statement-candidate mapping. */
1017 add_cand_for_stmt (gs, c);
1018 }
1019
1020 /* Create a candidate entry for a statement GS, where GS multiplies
1021 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
1022 about the two SSA names into the new candidate. Return the new
1023 candidate. */
1024
1025 static slsr_cand_t
1026 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1027 {
1028 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1029 widest_int index;
1030 unsigned savings = 0;
1031 slsr_cand_t c;
1032 slsr_cand_t base_cand = base_cand_from_table (base_in);
1033
1034 /* Look at all interpretations of the base candidate, if necessary,
1035 to find information to propagate into this candidate. */
1036 while (base_cand && !base && base_cand->kind != CAND_PHI)
1037 {
1038
1039 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
1040 {
1041 /* Y = (B + i') * 1
1042 X = Y * Z
1043 ================
1044 X = (B + i') * Z */
1045 base = base_cand->base_expr;
1046 index = base_cand->index;
1047 stride = stride_in;
1048 ctype = base_cand->cand_type;
1049 if (has_single_use (base_in))
1050 savings = (base_cand->dead_savings
1051 + stmt_cost (base_cand->cand_stmt, speed));
1052 }
1053 else if (base_cand->kind == CAND_ADD
1054 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1055 {
1056 /* Y = B + (i' * S), S constant
1057 X = Y * Z
1058 ============================
1059 X = B + ((i' * S) * Z) */
1060 base = base_cand->base_expr;
1061 index = base_cand->index * wi::to_widest (base_cand->stride);
1062 stride = stride_in;
1063 ctype = base_cand->cand_type;
1064 if (has_single_use (base_in))
1065 savings = (base_cand->dead_savings
1066 + stmt_cost (base_cand->cand_stmt, speed));
1067 }
1068
1069 if (base_cand->next_interp)
1070 base_cand = lookup_cand (base_cand->next_interp);
1071 else
1072 base_cand = NULL;
1073 }
1074
1075 if (!base)
1076 {
1077 /* No interpretations had anything useful to propagate, so
1078 produce X = (Y + 0) * Z. */
1079 base = base_in;
1080 index = 0;
1081 stride = stride_in;
1082 ctype = TREE_TYPE (base_in);
1083 }
1084
1085 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1086 ctype, savings);
1087 return c;
1088 }
1089
1090 /* Create a candidate entry for a statement GS, where GS multiplies
1091 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
1092 information about BASE_IN into the new candidate. Return the new
1093 candidate. */
1094
1095 static slsr_cand_t
1096 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
1097 {
1098 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1099 widest_int index, temp;
1100 unsigned savings = 0;
1101 slsr_cand_t c;
1102 slsr_cand_t base_cand = base_cand_from_table (base_in);
1103
1104 /* Look at all interpretations of the base candidate, if necessary,
1105 to find information to propagate into this candidate. */
1106 while (base_cand && !base && base_cand->kind != CAND_PHI)
1107 {
1108 if (base_cand->kind == CAND_MULT
1109 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1110 {
1111 /* Y = (B + i') * S, S constant
1112 X = Y * c
1113 ============================
1114 X = (B + i') * (S * c) */
1115 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
1116 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
1117 {
1118 base = base_cand->base_expr;
1119 index = base_cand->index;
1120 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
1121 ctype = base_cand->cand_type;
1122 if (has_single_use (base_in))
1123 savings = (base_cand->dead_savings
1124 + stmt_cost (base_cand->cand_stmt, speed));
1125 }
1126 }
1127 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
1128 {
1129 /* Y = B + (i' * 1)
1130 X = Y * c
1131 ===========================
1132 X = (B + i') * c */
1133 base = base_cand->base_expr;
1134 index = base_cand->index;
1135 stride = stride_in;
1136 ctype = base_cand->cand_type;
1137 if (has_single_use (base_in))
1138 savings = (base_cand->dead_savings
1139 + stmt_cost (base_cand->cand_stmt, speed));
1140 }
1141 else if (base_cand->kind == CAND_ADD
1142 && base_cand->index == 1
1143 && TREE_CODE (base_cand->stride) == INTEGER_CST)
1144 {
1145 /* Y = B + (1 * S), S constant
1146 X = Y * c
1147 ===========================
1148 X = (B + S) * c */
1149 base = base_cand->base_expr;
1150 index = wi::to_widest (base_cand->stride);
1151 stride = stride_in;
1152 ctype = base_cand->cand_type;
1153 if (has_single_use (base_in))
1154 savings = (base_cand->dead_savings
1155 + stmt_cost (base_cand->cand_stmt, speed));
1156 }
1157
1158 if (base_cand->next_interp)
1159 base_cand = lookup_cand (base_cand->next_interp);
1160 else
1161 base_cand = NULL;
1162 }
1163
1164 if (!base)
1165 {
1166 /* No interpretations had anything useful to propagate, so
1167 produce X = (Y + 0) * c. */
1168 base = base_in;
1169 index = 0;
1170 stride = stride_in;
1171 ctype = TREE_TYPE (base_in);
1172 }
1173
1174 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1175 ctype, savings);
1176 return c;
1177 }
1178
1179 /* Given GS which is a multiply of scalar integers, make an appropriate
1180 entry in the candidate table. If this is a multiply of two SSA names,
1181 create two CAND_MULT interpretations and attempt to find a basis for
1182 each of them. Otherwise, create a single CAND_MULT and attempt to
1183 find a basis. */
1184
1185 static void
1186 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1187 {
1188 slsr_cand_t c, c2;
1189
1190 /* If this is a multiply of an SSA name with itself, it is highly
1191 unlikely that we will get a strength reduction opportunity, so
1192 don't record it as a candidate. This simplifies the logic for
1193 finding a basis, so if this is removed that must be considered. */
1194 if (rhs1 == rhs2)
1195 return;
1196
1197 if (TREE_CODE (rhs2) == SSA_NAME)
1198 {
1199 /* Record an interpretation of this statement in the candidate table
1200 assuming RHS1 is the base expression and RHS2 is the stride. */
1201 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1202
1203 /* Add the first interpretation to the statement-candidate mapping. */
1204 add_cand_for_stmt (gs, c);
1205
1206 /* Record another interpretation of this statement assuming RHS1
1207 is the stride and RHS2 is the base expression. */
1208 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1209 c->next_interp = c2->cand_num;
1210 }
1211 else
1212 {
1213 /* Record an interpretation for the multiply-immediate. */
1214 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1215
1216 /* Add the interpretation to the statement-candidate mapping. */
1217 add_cand_for_stmt (gs, c);
1218 }
1219 }
1220
1221 /* Create a candidate entry for a statement GS, where GS adds two
1222 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1223 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1224 information about the two SSA names into the new candidate.
1225 Return the new candidate. */
1226
1227 static slsr_cand_t
1228 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1229 bool subtract_p, bool speed)
1230 {
1231 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1232 widest_int index;
1233 unsigned savings = 0;
1234 slsr_cand_t c;
1235 slsr_cand_t base_cand = base_cand_from_table (base_in);
1236 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1237
1238 /* The most useful transformation is a multiply-immediate feeding
1239 an add or subtract. Look for that first. */
1240 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1241 {
1242 if (addend_cand->kind == CAND_MULT
1243 && addend_cand->index == 0
1244 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1245 {
1246 /* Z = (B + 0) * S, S constant
1247 X = Y +/- Z
1248 ===========================
1249 X = Y + ((+/-1 * S) * B) */
1250 base = base_in;
1251 index = wi::to_widest (addend_cand->stride);
1252 if (subtract_p)
1253 index = -index;
1254 stride = addend_cand->base_expr;
1255 ctype = TREE_TYPE (base_in);
1256 if (has_single_use (addend_in))
1257 savings = (addend_cand->dead_savings
1258 + stmt_cost (addend_cand->cand_stmt, speed));
1259 }
1260
1261 if (addend_cand->next_interp)
1262 addend_cand = lookup_cand (addend_cand->next_interp);
1263 else
1264 addend_cand = NULL;
1265 }
1266
1267 while (base_cand && !base && base_cand->kind != CAND_PHI)
1268 {
1269 if (base_cand->kind == CAND_ADD
1270 && (base_cand->index == 0
1271 || operand_equal_p (base_cand->stride,
1272 integer_zero_node, 0)))
1273 {
1274 /* Y = B + (i' * S), i' * S = 0
1275 X = Y +/- Z
1276 ============================
1277 X = B + (+/-1 * Z) */
1278 base = base_cand->base_expr;
1279 index = subtract_p ? -1 : 1;
1280 stride = addend_in;
1281 ctype = base_cand->cand_type;
1282 if (has_single_use (base_in))
1283 savings = (base_cand->dead_savings
1284 + stmt_cost (base_cand->cand_stmt, speed));
1285 }
1286 else if (subtract_p)
1287 {
1288 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1289
1290 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1291 {
1292 if (subtrahend_cand->kind == CAND_MULT
1293 && subtrahend_cand->index == 0
1294 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1295 {
1296 /* Z = (B + 0) * S, S constant
1297 X = Y - Z
1298 ===========================
1299 Value: X = Y + ((-1 * S) * B) */
1300 base = base_in;
1301 index = wi::to_widest (subtrahend_cand->stride);
1302 index = -index;
1303 stride = subtrahend_cand->base_expr;
1304 ctype = TREE_TYPE (base_in);
1305 if (has_single_use (addend_in))
1306 savings = (subtrahend_cand->dead_savings
1307 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1308 }
1309
1310 if (subtrahend_cand->next_interp)
1311 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1312 else
1313 subtrahend_cand = NULL;
1314 }
1315 }
1316
1317 if (base_cand->next_interp)
1318 base_cand = lookup_cand (base_cand->next_interp);
1319 else
1320 base_cand = NULL;
1321 }
1322
1323 if (!base)
1324 {
1325 /* No interpretations had anything useful to propagate, so
1326 produce X = Y + (1 * Z). */
1327 base = base_in;
1328 index = subtract_p ? -1 : 1;
1329 stride = addend_in;
1330 ctype = TREE_TYPE (base_in);
1331 }
1332
1333 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1334 ctype, savings);
1335 return c;
1336 }
1337
1338 /* Create a candidate entry for a statement GS, where GS adds SSA
1339 name BASE_IN to constant INDEX_IN. Propagate any known information
1340 about BASE_IN into the new candidate. Return the new candidate. */
1341
1342 static slsr_cand_t
1343 create_add_imm_cand (gimple gs, tree base_in, const widest_int &index_in,
1344 bool speed)
1345 {
1346 enum cand_kind kind = CAND_ADD;
1347 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1348 widest_int index, multiple;
1349 unsigned savings = 0;
1350 slsr_cand_t c;
1351 slsr_cand_t base_cand = base_cand_from_table (base_in);
1352
1353 while (base_cand && !base && base_cand->kind != CAND_PHI)
1354 {
1355 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
1356
1357 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1358 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
1359 sign, &multiple))
1360 {
1361 /* Y = (B + i') * S, S constant, c = kS for some integer k
1362 X = Y + c
1363 ============================
1364 X = (B + (i'+ k)) * S
1365 OR
1366 Y = B + (i' * S), S constant, c = kS for some integer k
1367 X = Y + c
1368 ============================
1369 X = (B + (i'+ k)) * S */
1370 kind = base_cand->kind;
1371 base = base_cand->base_expr;
1372 index = base_cand->index + multiple;
1373 stride = base_cand->stride;
1374 ctype = base_cand->cand_type;
1375 if (has_single_use (base_in))
1376 savings = (base_cand->dead_savings
1377 + stmt_cost (base_cand->cand_stmt, speed));
1378 }
1379
1380 if (base_cand->next_interp)
1381 base_cand = lookup_cand (base_cand->next_interp);
1382 else
1383 base_cand = NULL;
1384 }
1385
1386 if (!base)
1387 {
1388 /* No interpretations had anything useful to propagate, so
1389 produce X = Y + (c * 1). */
1390 kind = CAND_ADD;
1391 base = base_in;
1392 index = index_in;
1393 stride = integer_one_node;
1394 ctype = TREE_TYPE (base_in);
1395 }
1396
1397 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1398 ctype, savings);
1399 return c;
1400 }
1401
1402 /* Given GS which is an add or subtract of scalar integers or pointers,
1403 make at least one appropriate entry in the candidate table. */
1404
1405 static void
1406 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1407 {
1408 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1409 slsr_cand_t c = NULL, c2;
1410
1411 if (TREE_CODE (rhs2) == SSA_NAME)
1412 {
1413 /* First record an interpretation assuming RHS1 is the base expression
1414 and RHS2 is the stride. But it doesn't make sense for the
1415 stride to be a pointer, so don't record a candidate in that case. */
1416 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1417 {
1418 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1419
1420 /* Add the first interpretation to the statement-candidate
1421 mapping. */
1422 add_cand_for_stmt (gs, c);
1423 }
1424
1425 /* If the two RHS operands are identical, or this is a subtract,
1426 we're done. */
1427 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1428 return;
1429
1430 /* Otherwise, record another interpretation assuming RHS2 is the
1431 base expression and RHS1 is the stride, again provided that the
1432 stride is not a pointer. */
1433 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1434 {
1435 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1436 if (c)
1437 c->next_interp = c2->cand_num;
1438 else
1439 add_cand_for_stmt (gs, c2);
1440 }
1441 }
1442 else
1443 {
1444 /* Record an interpretation for the add-immediate. */
1445 widest_int index = wi::to_widest (rhs2);
1446 if (subtract_p)
1447 index = -index;
1448
1449 c = create_add_imm_cand (gs, rhs1, index, speed);
1450
1451 /* Add the interpretation to the statement-candidate mapping. */
1452 add_cand_for_stmt (gs, c);
1453 }
1454 }
1455
1456 /* Given GS which is a negate of a scalar integer, make an appropriate
1457 entry in the candidate table. A negate is equivalent to a multiply
1458 by -1. */
1459
1460 static void
1461 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1462 {
1463 /* Record a CAND_MULT interpretation for the multiply by -1. */
1464 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1465
1466 /* Add the interpretation to the statement-candidate mapping. */
1467 add_cand_for_stmt (gs, c);
1468 }
1469
1470 /* Help function for legal_cast_p, operating on two trees. Checks
1471 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1472 for more details. */
1473
1474 static bool
1475 legal_cast_p_1 (tree lhs, tree rhs)
1476 {
1477 tree lhs_type, rhs_type;
1478 unsigned lhs_size, rhs_size;
1479 bool lhs_wraps, rhs_wraps;
1480
1481 lhs_type = TREE_TYPE (lhs);
1482 rhs_type = TREE_TYPE (rhs);
1483 lhs_size = TYPE_PRECISION (lhs_type);
1484 rhs_size = TYPE_PRECISION (rhs_type);
1485 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1486 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1487
1488 if (lhs_size < rhs_size
1489 || (rhs_wraps && !lhs_wraps)
1490 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1491 return false;
1492
1493 return true;
1494 }
1495
1496 /* Return TRUE if GS is a statement that defines an SSA name from
1497 a conversion and is legal for us to combine with an add and multiply
1498 in the candidate table. For example, suppose we have:
1499
1500 A = B + i;
1501 C = (type) A;
1502 D = C * S;
1503
1504 Without the type-cast, we would create a CAND_MULT for D with base B,
1505 index i, and stride S. We want to record this candidate only if it
1506 is equivalent to apply the type cast following the multiply:
1507
1508 A = B + i;
1509 E = A * S;
1510 D = (type) E;
1511
1512 We will record the type with the candidate for D. This allows us
1513 to use a similar previous candidate as a basis. If we have earlier seen
1514
1515 A' = B + i';
1516 C' = (type) A';
1517 D' = C' * S;
1518
1519 we can replace D with
1520
1521 D = D' + (i - i') * S;
1522
1523 But if moving the type-cast would change semantics, we mustn't do this.
1524
1525 This is legitimate for casts from a non-wrapping integral type to
1526 any integral type of the same or larger size. It is not legitimate
1527 to convert a wrapping type to a non-wrapping type, or to a wrapping
1528 type of a different size. I.e., with a wrapping type, we must
1529 assume that the addition B + i could wrap, in which case performing
1530 the multiply before or after one of the "illegal" type casts will
1531 have different semantics. */
1532
1533 static bool
1534 legal_cast_p (gimple gs, tree rhs)
1535 {
1536 if (!is_gimple_assign (gs)
1537 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1538 return false;
1539
1540 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1541 }
1542
1543 /* Given GS which is a cast to a scalar integer type, determine whether
1544 the cast is legal for strength reduction. If so, make at least one
1545 appropriate entry in the candidate table. */
1546
1547 static void
1548 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1549 {
1550 tree lhs, ctype;
1551 slsr_cand_t base_cand, c, c2;
1552 unsigned savings = 0;
1553
1554 if (!legal_cast_p (gs, rhs1))
1555 return;
1556
1557 lhs = gimple_assign_lhs (gs);
1558 base_cand = base_cand_from_table (rhs1);
1559 ctype = TREE_TYPE (lhs);
1560
1561 if (base_cand && base_cand->kind != CAND_PHI)
1562 {
1563 while (base_cand)
1564 {
1565 /* Propagate all data from the base candidate except the type,
1566 which comes from the cast, and the base candidate's cast,
1567 which is no longer applicable. */
1568 if (has_single_use (rhs1))
1569 savings = (base_cand->dead_savings
1570 + stmt_cost (base_cand->cand_stmt, speed));
1571
1572 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1573 base_cand->base_expr,
1574 base_cand->index, base_cand->stride,
1575 ctype, savings);
1576 if (base_cand->next_interp)
1577 base_cand = lookup_cand (base_cand->next_interp);
1578 else
1579 base_cand = NULL;
1580 }
1581 }
1582 else
1583 {
1584 /* If nothing is known about the RHS, create fresh CAND_ADD and
1585 CAND_MULT interpretations:
1586
1587 X = Y + (0 * 1)
1588 X = (Y + 0) * 1
1589
1590 The first of these is somewhat arbitrary, but the choice of
1591 1 for the stride simplifies the logic for propagating casts
1592 into their uses. */
1593 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1,
1594 0, integer_one_node, ctype, 0);
1595 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1,
1596 0, integer_one_node, ctype, 0);
1597 c->next_interp = c2->cand_num;
1598 }
1599
1600 /* Add the first (or only) interpretation to the statement-candidate
1601 mapping. */
1602 add_cand_for_stmt (gs, c);
1603 }
1604
1605 /* Given GS which is a copy of a scalar integer type, make at least one
1606 appropriate entry in the candidate table.
1607
1608 This interface is included for completeness, but is unnecessary
1609 if this pass immediately follows a pass that performs copy
1610 propagation, such as DOM. */
1611
1612 static void
1613 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1614 {
1615 slsr_cand_t base_cand, c, c2;
1616 unsigned savings = 0;
1617
1618 base_cand = base_cand_from_table (rhs1);
1619
1620 if (base_cand && base_cand->kind != CAND_PHI)
1621 {
1622 while (base_cand)
1623 {
1624 /* Propagate all data from the base candidate. */
1625 if (has_single_use (rhs1))
1626 savings = (base_cand->dead_savings
1627 + stmt_cost (base_cand->cand_stmt, speed));
1628
1629 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1630 base_cand->base_expr,
1631 base_cand->index, base_cand->stride,
1632 base_cand->cand_type, savings);
1633 if (base_cand->next_interp)
1634 base_cand = lookup_cand (base_cand->next_interp);
1635 else
1636 base_cand = NULL;
1637 }
1638 }
1639 else
1640 {
1641 /* If nothing is known about the RHS, create fresh CAND_ADD and
1642 CAND_MULT interpretations:
1643
1644 X = Y + (0 * 1)
1645 X = (Y + 0) * 1
1646
1647 The first of these is somewhat arbitrary, but the choice of
1648 1 for the stride simplifies the logic for propagating casts
1649 into their uses. */
1650 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1,
1651 0, integer_one_node, TREE_TYPE (rhs1), 0);
1652 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1,
1653 0, integer_one_node, TREE_TYPE (rhs1), 0);
1654 c->next_interp = c2->cand_num;
1655 }
1656
1657 /* Add the first (or only) interpretation to the statement-candidate
1658 mapping. */
1659 add_cand_for_stmt (gs, c);
1660 }
1661 \f
1662 class find_candidates_dom_walker : public dom_walker
1663 {
1664 public:
1665 find_candidates_dom_walker (cdi_direction direction)
1666 : dom_walker (direction) {}
1667 virtual void before_dom_children (basic_block);
1668 };
1669
1670 /* Find strength-reduction candidates in block BB. */
1671
1672 void
1673 find_candidates_dom_walker::before_dom_children (basic_block bb)
1674 {
1675 bool speed = optimize_bb_for_speed_p (bb);
1676 gimple_stmt_iterator gsi;
1677
1678 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1679 slsr_process_phi (gsi_stmt (gsi), speed);
1680
1681 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1682 {
1683 gimple gs = gsi_stmt (gsi);
1684
1685 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1686 slsr_process_ref (gs);
1687
1688 else if (is_gimple_assign (gs)
1689 && SCALAR_INT_MODE_P
1690 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1691 {
1692 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1693
1694 switch (gimple_assign_rhs_code (gs))
1695 {
1696 case MULT_EXPR:
1697 case PLUS_EXPR:
1698 rhs1 = gimple_assign_rhs1 (gs);
1699 rhs2 = gimple_assign_rhs2 (gs);
1700 /* Should never happen, but currently some buggy situations
1701 in earlier phases put constants in rhs1. */
1702 if (TREE_CODE (rhs1) != SSA_NAME)
1703 continue;
1704 break;
1705
1706 /* Possible future opportunity: rhs1 of a ptr+ can be
1707 an ADDR_EXPR. */
1708 case POINTER_PLUS_EXPR:
1709 case MINUS_EXPR:
1710 rhs2 = gimple_assign_rhs2 (gs);
1711 /* Fall-through. */
1712
1713 case NOP_EXPR:
1714 case MODIFY_EXPR:
1715 case NEGATE_EXPR:
1716 rhs1 = gimple_assign_rhs1 (gs);
1717 if (TREE_CODE (rhs1) != SSA_NAME)
1718 continue;
1719 break;
1720
1721 default:
1722 ;
1723 }
1724
1725 switch (gimple_assign_rhs_code (gs))
1726 {
1727 case MULT_EXPR:
1728 slsr_process_mul (gs, rhs1, rhs2, speed);
1729 break;
1730
1731 case PLUS_EXPR:
1732 case POINTER_PLUS_EXPR:
1733 case MINUS_EXPR:
1734 slsr_process_add (gs, rhs1, rhs2, speed);
1735 break;
1736
1737 case NEGATE_EXPR:
1738 slsr_process_neg (gs, rhs1, speed);
1739 break;
1740
1741 case NOP_EXPR:
1742 slsr_process_cast (gs, rhs1, speed);
1743 break;
1744
1745 case MODIFY_EXPR:
1746 slsr_process_copy (gs, rhs1, speed);
1747 break;
1748
1749 default:
1750 ;
1751 }
1752 }
1753 }
1754 }
1755 \f
1756 /* Dump a candidate for debug. */
1757
1758 static void
1759 dump_candidate (slsr_cand_t c)
1760 {
1761 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1762 gimple_bb (c->cand_stmt)->index);
1763 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1764 switch (c->kind)
1765 {
1766 case CAND_MULT:
1767 fputs (" MULT : (", dump_file);
1768 print_generic_expr (dump_file, c->base_expr, 0);
1769 fputs (" + ", dump_file);
1770 print_decs (c->index, dump_file);
1771 fputs (") * ", dump_file);
1772 print_generic_expr (dump_file, c->stride, 0);
1773 fputs (" : ", dump_file);
1774 break;
1775 case CAND_ADD:
1776 fputs (" ADD : ", dump_file);
1777 print_generic_expr (dump_file, c->base_expr, 0);
1778 fputs (" + (", dump_file);
1779 print_decs (c->index, dump_file);
1780 fputs (" * ", dump_file);
1781 print_generic_expr (dump_file, c->stride, 0);
1782 fputs (") : ", dump_file);
1783 break;
1784 case CAND_REF:
1785 fputs (" REF : ", dump_file);
1786 print_generic_expr (dump_file, c->base_expr, 0);
1787 fputs (" + (", dump_file);
1788 print_generic_expr (dump_file, c->stride, 0);
1789 fputs (") + ", dump_file);
1790 print_decs (c->index, dump_file);
1791 fputs (" : ", dump_file);
1792 break;
1793 case CAND_PHI:
1794 fputs (" PHI : ", dump_file);
1795 print_generic_expr (dump_file, c->base_expr, 0);
1796 fputs (" + (unknown * ", dump_file);
1797 print_generic_expr (dump_file, c->stride, 0);
1798 fputs (") : ", dump_file);
1799 break;
1800 default:
1801 gcc_unreachable ();
1802 }
1803 print_generic_expr (dump_file, c->cand_type, 0);
1804 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1805 c->basis, c->dependent, c->sibling);
1806 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1807 c->next_interp, c->dead_savings);
1808 if (c->def_phi)
1809 fprintf (dump_file, " phi: %d\n", c->def_phi);
1810 fputs ("\n", dump_file);
1811 }
1812
1813 /* Dump the candidate vector for debug. */
1814
1815 static void
1816 dump_cand_vec (void)
1817 {
1818 unsigned i;
1819 slsr_cand_t c;
1820
1821 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1822
1823 FOR_EACH_VEC_ELT (cand_vec, i, c)
1824 dump_candidate (c);
1825 }
1826
1827 /* Callback used to dump the candidate chains hash table. */
1828
1829 int
1830 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1831 {
1832 const_cand_chain_t chain = *slot;
1833 cand_chain_t p;
1834
1835 print_generic_expr (dump_file, chain->base_expr, 0);
1836 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1837
1838 for (p = chain->next; p; p = p->next)
1839 fprintf (dump_file, " -> %d", p->cand->cand_num);
1840
1841 fputs ("\n", dump_file);
1842 return 1;
1843 }
1844
1845 /* Dump the candidate chains. */
1846
1847 static void
1848 dump_cand_chains (void)
1849 {
1850 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1851 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
1852 (NULL);
1853 fputs ("\n", dump_file);
1854 }
1855
1856 /* Dump the increment vector for debug. */
1857
1858 static void
1859 dump_incr_vec (void)
1860 {
1861 if (dump_file && (dump_flags & TDF_DETAILS))
1862 {
1863 unsigned i;
1864
1865 fprintf (dump_file, "\nIncrement vector:\n\n");
1866
1867 for (i = 0; i < incr_vec_len; i++)
1868 {
1869 fprintf (dump_file, "%3d increment: ", i);
1870 print_decs (incr_vec[i].incr, dump_file);
1871 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1872 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1873 fputs ("\n initializer: ", dump_file);
1874 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1875 fputs ("\n\n", dump_file);
1876 }
1877 }
1878 }
1879 \f
1880 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1881 data reference. */
1882
1883 static void
1884 replace_ref (tree *expr, slsr_cand_t c)
1885 {
1886 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1887 unsigned HOST_WIDE_INT misalign;
1888 unsigned align;
1889
1890 /* Ensure the memory reference carries the minimum alignment
1891 requirement for the data type. See PR58041. */
1892 get_object_alignment_1 (*expr, &align, &misalign);
1893 if (misalign != 0)
1894 align = (misalign & -misalign);
1895 if (align < TYPE_ALIGN (acc_type))
1896 acc_type = build_aligned_type (acc_type, align);
1897
1898 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1899 c->base_expr, c->stride);
1900 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1901 wide_int_to_tree (c->cand_type, c->index));
1902
1903 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1904 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1905 TREE_OPERAND (mem_ref, 0)
1906 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1907 /*simple_p=*/true, NULL,
1908 /*before=*/true, GSI_SAME_STMT);
1909 copy_ref_info (mem_ref, *expr);
1910 *expr = mem_ref;
1911 update_stmt (c->cand_stmt);
1912 }
1913
1914 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1915 dependent of candidate C with an equivalent strength-reduced data
1916 reference. */
1917
1918 static void
1919 replace_refs (slsr_cand_t c)
1920 {
1921 if (dump_file && (dump_flags & TDF_DETAILS))
1922 {
1923 fputs ("Replacing reference: ", dump_file);
1924 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1925 }
1926
1927 if (gimple_vdef (c->cand_stmt))
1928 {
1929 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1930 replace_ref (lhs, c);
1931 }
1932 else
1933 {
1934 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1935 replace_ref (rhs, c);
1936 }
1937
1938 if (dump_file && (dump_flags & TDF_DETAILS))
1939 {
1940 fputs ("With: ", dump_file);
1941 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1942 fputs ("\n", dump_file);
1943 }
1944
1945 if (c->sibling)
1946 replace_refs (lookup_cand (c->sibling));
1947
1948 if (c->dependent)
1949 replace_refs (lookup_cand (c->dependent));
1950 }
1951
1952 /* Return TRUE if candidate C is dependent upon a PHI. */
1953
1954 static bool
1955 phi_dependent_cand_p (slsr_cand_t c)
1956 {
1957 /* A candidate is not necessarily dependent upon a PHI just because
1958 it has a phi definition for its base name. It may have a basis
1959 that relies upon the same phi definition, in which case the PHI
1960 is irrelevant to this candidate. */
1961 return (c->def_phi
1962 && c->basis
1963 && lookup_cand (c->basis)->def_phi != c->def_phi);
1964 }
1965
1966 /* Calculate the increment required for candidate C relative to
1967 its basis. */
1968
1969 static widest_int
1970 cand_increment (slsr_cand_t c)
1971 {
1972 slsr_cand_t basis;
1973
1974 /* If the candidate doesn't have a basis, just return its own
1975 index. This is useful in record_increments to help us find
1976 an existing initializer. Also, if the candidate's basis is
1977 hidden by a phi, then its own index will be the increment
1978 from the newly introduced phi basis. */
1979 if (!c->basis || phi_dependent_cand_p (c))
1980 return c->index;
1981
1982 basis = lookup_cand (c->basis);
1983 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1984 return c->index - basis->index;
1985 }
1986
1987 /* Calculate the increment required for candidate C relative to
1988 its basis. If we aren't going to generate pointer arithmetic
1989 for this candidate, return the absolute value of that increment
1990 instead. */
1991
1992 static inline widest_int
1993 cand_abs_increment (slsr_cand_t c)
1994 {
1995 widest_int increment = cand_increment (c);
1996
1997 if (!address_arithmetic_p && wi::neg_p (increment))
1998 increment = -increment;
1999
2000 return increment;
2001 }
2002
2003 /* Return TRUE iff candidate C has already been replaced under
2004 another interpretation. */
2005
2006 static inline bool
2007 cand_already_replaced (slsr_cand_t c)
2008 {
2009 return (gimple_bb (c->cand_stmt) == 0);
2010 }
2011
2012 /* Common logic used by replace_unconditional_candidate and
2013 replace_conditional_candidate. */
2014
2015 static void
2016 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
2017 {
2018 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
2019 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
2020
2021 /* It is highly unlikely, but possible, that the resulting
2022 bump doesn't fit in a HWI. Abandon the replacement
2023 in this case. This does not affect siblings or dependents
2024 of C. Restriction to signed HWI is conservative for unsigned
2025 types but allows for safe negation without twisted logic. */
2026 if (wi::fits_shwi_p (bump)
2027 && bump.to_shwi () != HOST_WIDE_INT_MIN
2028 /* It is not useful to replace casts, copies, or adds of
2029 an SSA name and a constant. */
2030 && cand_code != MODIFY_EXPR
2031 && cand_code != NOP_EXPR
2032 && cand_code != PLUS_EXPR
2033 && cand_code != POINTER_PLUS_EXPR
2034 && cand_code != MINUS_EXPR)
2035 {
2036 enum tree_code code = PLUS_EXPR;
2037 tree bump_tree;
2038 gimple stmt_to_print = NULL;
2039
2040 /* If the basis name and the candidate's LHS have incompatible
2041 types, introduce a cast. */
2042 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
2043 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
2044 if (wi::neg_p (bump))
2045 {
2046 code = MINUS_EXPR;
2047 bump = -bump;
2048 }
2049
2050 bump_tree = wide_int_to_tree (target_type, bump);
2051
2052 if (dump_file && (dump_flags & TDF_DETAILS))
2053 {
2054 fputs ("Replacing: ", dump_file);
2055 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
2056 }
2057
2058 if (bump == 0)
2059 {
2060 tree lhs = gimple_assign_lhs (c->cand_stmt);
2061 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
2062 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2063 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
2064 gsi_replace (&gsi, copy_stmt, false);
2065 c->cand_stmt = copy_stmt;
2066 if (dump_file && (dump_flags & TDF_DETAILS))
2067 stmt_to_print = copy_stmt;
2068 }
2069 else
2070 {
2071 tree rhs1, rhs2;
2072 if (cand_code != NEGATE_EXPR) {
2073 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2074 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2075 }
2076 if (cand_code != NEGATE_EXPR
2077 && ((operand_equal_p (rhs1, basis_name, 0)
2078 && operand_equal_p (rhs2, bump_tree, 0))
2079 || (operand_equal_p (rhs1, bump_tree, 0)
2080 && operand_equal_p (rhs2, basis_name, 0))))
2081 {
2082 if (dump_file && (dump_flags & TDF_DETAILS))
2083 {
2084 fputs ("(duplicate, not actually replacing)", dump_file);
2085 stmt_to_print = c->cand_stmt;
2086 }
2087 }
2088 else
2089 {
2090 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
2091 gimple_assign_set_rhs_with_ops (&gsi, code,
2092 basis_name, bump_tree);
2093 update_stmt (gsi_stmt (gsi));
2094 c->cand_stmt = gsi_stmt (gsi);
2095 if (dump_file && (dump_flags & TDF_DETAILS))
2096 stmt_to_print = gsi_stmt (gsi);
2097 }
2098 }
2099
2100 if (dump_file && (dump_flags & TDF_DETAILS))
2101 {
2102 fputs ("With: ", dump_file);
2103 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
2104 fputs ("\n", dump_file);
2105 }
2106 }
2107 }
2108
2109 /* Replace candidate C with an add or subtract. Note that we only
2110 operate on CAND_MULTs with known strides, so we will never generate
2111 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
2112 X = Y + ((i - i') * S), as described in the module commentary. The
2113 folded value ((i - i') * S) is referred to here as the "bump." */
2114
2115 static void
2116 replace_unconditional_candidate (slsr_cand_t c)
2117 {
2118 slsr_cand_t basis;
2119
2120 if (cand_already_replaced (c))
2121 return;
2122
2123 basis = lookup_cand (c->basis);
2124 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
2125
2126 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
2127 }
2128 \f
2129 /* Return the index in the increment vector of the given INCREMENT,
2130 or -1 if not found. The latter can occur if more than
2131 MAX_INCR_VEC_LEN increments have been found. */
2132
2133 static inline int
2134 incr_vec_index (const widest_int &increment)
2135 {
2136 unsigned i;
2137
2138 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
2139 ;
2140
2141 if (i < incr_vec_len)
2142 return i;
2143 else
2144 return -1;
2145 }
2146
2147 /* Create a new statement along edge E to add BASIS_NAME to the product
2148 of INCREMENT and the stride of candidate C. Create and return a new
2149 SSA name from *VAR to be used as the LHS of the new statement.
2150 KNOWN_STRIDE is true iff C's stride is a constant. */
2151
2152 static tree
2153 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
2154 widest_int increment, edge e, location_t loc,
2155 bool known_stride)
2156 {
2157 basic_block insert_bb;
2158 gimple_stmt_iterator gsi;
2159 tree lhs, basis_type;
2160 gimple new_stmt;
2161
2162 /* If the add candidate along this incoming edge has the same
2163 index as C's hidden basis, the hidden basis represents this
2164 edge correctly. */
2165 if (increment == 0)
2166 return basis_name;
2167
2168 basis_type = TREE_TYPE (basis_name);
2169 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2170
2171 if (known_stride)
2172 {
2173 tree bump_tree;
2174 enum tree_code code = PLUS_EXPR;
2175 widest_int bump = increment * wi::to_widest (c->stride);
2176 if (wi::neg_p (bump))
2177 {
2178 code = MINUS_EXPR;
2179 bump = -bump;
2180 }
2181
2182 bump_tree = wide_int_to_tree (basis_type, bump);
2183 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2184 bump_tree);
2185 }
2186 else
2187 {
2188 int i;
2189 bool negate_incr = (!address_arithmetic_p && wi::neg_p (increment));
2190 i = incr_vec_index (negate_incr ? -increment : increment);
2191 gcc_assert (i >= 0);
2192
2193 if (incr_vec[i].initializer)
2194 {
2195 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2196 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2197 incr_vec[i].initializer);
2198 }
2199 else if (increment == 1)
2200 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, lhs, basis_name,
2201 c->stride);
2202 else if (increment == -1)
2203 new_stmt = gimple_build_assign_with_ops (MINUS_EXPR, lhs, basis_name,
2204 c->stride);
2205 else
2206 gcc_unreachable ();
2207 }
2208
2209 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2210 gsi = gsi_last_bb (insert_bb);
2211
2212 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2213 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2214 else
2215 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2216
2217 gimple_set_location (new_stmt, loc);
2218
2219 if (dump_file && (dump_flags & TDF_DETAILS))
2220 {
2221 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2222 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2223 }
2224
2225 return lhs;
2226 }
2227
2228 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2229 is hidden by the phi node FROM_PHI, create a new phi node in the same
2230 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2231 with its phi arguments representing conditional adjustments to the
2232 hidden basis along conditional incoming paths. Those adjustments are
2233 made by creating add statements (and sometimes recursively creating
2234 phis) along those incoming paths. LOC is the location to attach to
2235 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2236 constant. */
2237
2238 static tree
2239 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2240 location_t loc, bool known_stride)
2241 {
2242 int i;
2243 tree name, phi_arg;
2244 gimple phi;
2245 vec<tree> phi_args;
2246 slsr_cand_t basis = lookup_cand (c->basis);
2247 int nargs = gimple_phi_num_args (from_phi);
2248 basic_block phi_bb = gimple_bb (from_phi);
2249 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2250 phi_args.create (nargs);
2251
2252 /* Process each argument of the existing phi that represents
2253 conditionally-executed add candidates. */
2254 for (i = 0; i < nargs; i++)
2255 {
2256 edge e = (*phi_bb->preds)[i];
2257 tree arg = gimple_phi_arg_def (from_phi, i);
2258 tree feeding_def;
2259
2260 /* If the phi argument is the base name of the CAND_PHI, then
2261 this incoming arc should use the hidden basis. */
2262 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2263 if (basis->index == 0)
2264 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2265 else
2266 {
2267 widest_int incr = -basis->index;
2268 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2269 e, loc, known_stride);
2270 }
2271 else
2272 {
2273 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2274
2275 /* If there is another phi along this incoming edge, we must
2276 process it in the same fashion to ensure that all basis
2277 adjustments are made along its incoming edges. */
2278 if (gimple_code (arg_def) == GIMPLE_PHI)
2279 feeding_def = create_phi_basis (c, arg_def, basis_name,
2280 loc, known_stride);
2281 else
2282 {
2283 slsr_cand_t arg_cand = base_cand_from_table (arg);
2284 widest_int diff = arg_cand->index - basis->index;
2285 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2286 e, loc, known_stride);
2287 }
2288 }
2289
2290 /* Because of recursion, we need to save the arguments in a vector
2291 so we can create the PHI statement all at once. Otherwise the
2292 storage for the half-created PHI can be reclaimed. */
2293 phi_args.safe_push (feeding_def);
2294 }
2295
2296 /* Create the new phi basis. */
2297 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2298 phi = create_phi_node (name, phi_bb);
2299 SSA_NAME_DEF_STMT (name) = phi;
2300
2301 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2302 {
2303 edge e = (*phi_bb->preds)[i];
2304 add_phi_arg (phi, phi_arg, e, loc);
2305 }
2306
2307 update_stmt (phi);
2308
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 {
2311 fputs ("Introducing new phi basis: ", dump_file);
2312 print_gimple_stmt (dump_file, phi, 0, 0);
2313 }
2314
2315 return name;
2316 }
2317
2318 /* Given a candidate C whose basis is hidden by at least one intervening
2319 phi, introduce a matching number of new phis to represent its basis
2320 adjusted by conditional increments along possible incoming paths. Then
2321 replace C as though it were an unconditional candidate, using the new
2322 basis. */
2323
2324 static void
2325 replace_conditional_candidate (slsr_cand_t c)
2326 {
2327 tree basis_name, name;
2328 slsr_cand_t basis;
2329 location_t loc;
2330
2331 /* Look up the LHS SSA name from C's basis. This will be the
2332 RHS1 of the adds we will introduce to create new phi arguments. */
2333 basis = lookup_cand (c->basis);
2334 basis_name = gimple_assign_lhs (basis->cand_stmt);
2335
2336 /* Create a new phi statement which will represent C's true basis
2337 after the transformation is complete. */
2338 loc = gimple_location (c->cand_stmt);
2339 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2340 basis_name, loc, KNOWN_STRIDE);
2341 /* Replace C with an add of the new basis phi and a constant. */
2342 widest_int bump = c->index * wi::to_widest (c->stride);
2343
2344 replace_mult_candidate (c, name, bump);
2345 }
2346
2347 /* Compute the expected costs of inserting basis adjustments for
2348 candidate C with phi-definition PHI. The cost of inserting
2349 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2350 which are themselves phi results, recursively calculate costs
2351 for those phis as well. */
2352
2353 static int
2354 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2355 {
2356 unsigned i;
2357 int cost = 0;
2358 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2359
2360 /* If we work our way back to a phi that isn't dominated by the hidden
2361 basis, this isn't a candidate for replacement. Indicate this by
2362 returning an unreasonably high cost. It's not easy to detect
2363 these situations when determining the basis, so we defer the
2364 decision until now. */
2365 basic_block phi_bb = gimple_bb (phi);
2366 slsr_cand_t basis = lookup_cand (c->basis);
2367 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2368
2369 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2370 return COST_INFINITE;
2371
2372 for (i = 0; i < gimple_phi_num_args (phi); i++)
2373 {
2374 tree arg = gimple_phi_arg_def (phi, i);
2375
2376 if (arg != phi_cand->base_expr)
2377 {
2378 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2379
2380 if (gimple_code (arg_def) == GIMPLE_PHI)
2381 cost += phi_add_costs (arg_def, c, one_add_cost);
2382 else
2383 {
2384 slsr_cand_t arg_cand = base_cand_from_table (arg);
2385
2386 if (arg_cand->index != c->index)
2387 cost += one_add_cost;
2388 }
2389 }
2390 }
2391
2392 return cost;
2393 }
2394
2395 /* For candidate C, each sibling of candidate C, and each dependent of
2396 candidate C, determine whether the candidate is dependent upon a
2397 phi that hides its basis. If not, replace the candidate unconditionally.
2398 Otherwise, determine whether the cost of introducing compensation code
2399 for the candidate is offset by the gains from strength reduction. If
2400 so, replace the candidate and introduce the compensation code. */
2401
2402 static void
2403 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2404 {
2405 if (phi_dependent_cand_p (c))
2406 {
2407 if (c->kind == CAND_MULT)
2408 {
2409 /* A candidate dependent upon a phi will replace a multiply by
2410 a constant with an add, and will insert at most one add for
2411 each phi argument. Add these costs with the potential dead-code
2412 savings to determine profitability. */
2413 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2414 int mult_savings = stmt_cost (c->cand_stmt, speed);
2415 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2416 tree phi_result = gimple_phi_result (phi);
2417 int one_add_cost = add_cost (speed,
2418 TYPE_MODE (TREE_TYPE (phi_result)));
2419 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2420 int cost = add_costs - mult_savings - c->dead_savings;
2421
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2423 {
2424 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2425 fprintf (dump_file, " add_costs = %d\n", add_costs);
2426 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2427 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2428 fprintf (dump_file, " cost = %d\n", cost);
2429 if (cost <= COST_NEUTRAL)
2430 fputs (" Replacing...\n", dump_file);
2431 else
2432 fputs (" Not replaced.\n", dump_file);
2433 }
2434
2435 if (cost <= COST_NEUTRAL)
2436 replace_conditional_candidate (c);
2437 }
2438 }
2439 else
2440 replace_unconditional_candidate (c);
2441
2442 if (c->sibling)
2443 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2444
2445 if (c->dependent)
2446 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2447 }
2448 \f
2449 /* Count the number of candidates in the tree rooted at C that have
2450 not already been replaced under other interpretations. */
2451
2452 static int
2453 count_candidates (slsr_cand_t c)
2454 {
2455 unsigned count = cand_already_replaced (c) ? 0 : 1;
2456
2457 if (c->sibling)
2458 count += count_candidates (lookup_cand (c->sibling));
2459
2460 if (c->dependent)
2461 count += count_candidates (lookup_cand (c->dependent));
2462
2463 return count;
2464 }
2465
2466 /* Increase the count of INCREMENT by one in the increment vector.
2467 INCREMENT is associated with candidate C. If INCREMENT is to be
2468 conditionally executed as part of a conditional candidate replacement,
2469 IS_PHI_ADJUST is true, otherwise false. If an initializer
2470 T_0 = stride * I is provided by a candidate that dominates all
2471 candidates with the same increment, also record T_0 for subsequent use. */
2472
2473 static void
2474 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
2475 {
2476 bool found = false;
2477 unsigned i;
2478
2479 /* Treat increments that differ only in sign as identical so as to
2480 share initializers, unless we are generating pointer arithmetic. */
2481 if (!address_arithmetic_p && wi::neg_p (increment))
2482 increment = -increment;
2483
2484 for (i = 0; i < incr_vec_len; i++)
2485 {
2486 if (incr_vec[i].incr == increment)
2487 {
2488 incr_vec[i].count++;
2489 found = true;
2490
2491 /* If we previously recorded an initializer that doesn't
2492 dominate this candidate, it's not going to be useful to
2493 us after all. */
2494 if (incr_vec[i].initializer
2495 && !dominated_by_p (CDI_DOMINATORS,
2496 gimple_bb (c->cand_stmt),
2497 incr_vec[i].init_bb))
2498 {
2499 incr_vec[i].initializer = NULL_TREE;
2500 incr_vec[i].init_bb = NULL;
2501 }
2502
2503 break;
2504 }
2505 }
2506
2507 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2508 {
2509 /* The first time we see an increment, create the entry for it.
2510 If this is the root candidate which doesn't have a basis, set
2511 the count to zero. We're only processing it so it can possibly
2512 provide an initializer for other candidates. */
2513 incr_vec[incr_vec_len].incr = increment;
2514 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2515 incr_vec[incr_vec_len].cost = COST_INFINITE;
2516
2517 /* Optimistically record the first occurrence of this increment
2518 as providing an initializer (if it does); we will revise this
2519 opinion later if it doesn't dominate all other occurrences.
2520 Exception: increments of -1, 0, 1 never need initializers;
2521 and phi adjustments don't ever provide initializers. */
2522 if (c->kind == CAND_ADD
2523 && !is_phi_adjust
2524 && c->index == increment
2525 && (wi::gts_p (increment, 1)
2526 || wi::lts_p (increment, -1))
2527 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2528 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2529 {
2530 tree t0 = NULL_TREE;
2531 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2532 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2533 if (operand_equal_p (rhs1, c->base_expr, 0))
2534 t0 = rhs2;
2535 else if (operand_equal_p (rhs2, c->base_expr, 0))
2536 t0 = rhs1;
2537 if (t0
2538 && SSA_NAME_DEF_STMT (t0)
2539 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2540 {
2541 incr_vec[incr_vec_len].initializer = t0;
2542 incr_vec[incr_vec_len++].init_bb
2543 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2544 }
2545 else
2546 {
2547 incr_vec[incr_vec_len].initializer = NULL_TREE;
2548 incr_vec[incr_vec_len++].init_bb = NULL;
2549 }
2550 }
2551 else
2552 {
2553 incr_vec[incr_vec_len].initializer = NULL_TREE;
2554 incr_vec[incr_vec_len++].init_bb = NULL;
2555 }
2556 }
2557 }
2558
2559 /* Given phi statement PHI that hides a candidate from its BASIS, find
2560 the increments along each incoming arc (recursively handling additional
2561 phis that may be present) and record them. These increments are the
2562 difference in index between the index-adjusting statements and the
2563 index of the basis. */
2564
2565 static void
2566 record_phi_increments (slsr_cand_t basis, gimple phi)
2567 {
2568 unsigned i;
2569 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2570
2571 for (i = 0; i < gimple_phi_num_args (phi); i++)
2572 {
2573 tree arg = gimple_phi_arg_def (phi, i);
2574
2575 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2576 {
2577 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2578
2579 if (gimple_code (arg_def) == GIMPLE_PHI)
2580 record_phi_increments (basis, arg_def);
2581 else
2582 {
2583 slsr_cand_t arg_cand = base_cand_from_table (arg);
2584 widest_int diff = arg_cand->index - basis->index;
2585 record_increment (arg_cand, diff, PHI_ADJUST);
2586 }
2587 }
2588 }
2589 }
2590
2591 /* Determine how many times each unique increment occurs in the set
2592 of candidates rooted at C's parent, recording the data in the
2593 increment vector. For each unique increment I, if an initializer
2594 T_0 = stride * I is provided by a candidate that dominates all
2595 candidates with the same increment, also record T_0 for subsequent
2596 use. */
2597
2598 static void
2599 record_increments (slsr_cand_t c)
2600 {
2601 if (!cand_already_replaced (c))
2602 {
2603 if (!phi_dependent_cand_p (c))
2604 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2605 else
2606 {
2607 /* A candidate with a basis hidden by a phi will have one
2608 increment for its relationship to the index represented by
2609 the phi, and potentially additional increments along each
2610 incoming edge. For the root of the dependency tree (which
2611 has no basis), process just the initial index in case it has
2612 an initializer that can be used by subsequent candidates. */
2613 record_increment (c, c->index, NOT_PHI_ADJUST);
2614
2615 if (c->basis)
2616 record_phi_increments (lookup_cand (c->basis),
2617 lookup_cand (c->def_phi)->cand_stmt);
2618 }
2619 }
2620
2621 if (c->sibling)
2622 record_increments (lookup_cand (c->sibling));
2623
2624 if (c->dependent)
2625 record_increments (lookup_cand (c->dependent));
2626 }
2627
2628 /* Add up and return the costs of introducing add statements that
2629 require the increment INCR on behalf of candidate C and phi
2630 statement PHI. Accumulate into *SAVINGS the potential savings
2631 from removing existing statements that feed PHI and have no other
2632 uses. */
2633
2634 static int
2635 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple phi, int *savings)
2636 {
2637 unsigned i;
2638 int cost = 0;
2639 slsr_cand_t basis = lookup_cand (c->basis);
2640 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2641
2642 for (i = 0; i < gimple_phi_num_args (phi); i++)
2643 {
2644 tree arg = gimple_phi_arg_def (phi, i);
2645
2646 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2647 {
2648 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2649
2650 if (gimple_code (arg_def) == GIMPLE_PHI)
2651 {
2652 int feeding_savings = 0;
2653 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2654 if (has_single_use (gimple_phi_result (arg_def)))
2655 *savings += feeding_savings;
2656 }
2657 else
2658 {
2659 slsr_cand_t arg_cand = base_cand_from_table (arg);
2660 widest_int diff = arg_cand->index - basis->index;
2661
2662 if (incr == diff)
2663 {
2664 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2665 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2666 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2667 if (has_single_use (lhs))
2668 *savings += stmt_cost (arg_cand->cand_stmt, true);
2669 }
2670 }
2671 }
2672 }
2673
2674 return cost;
2675 }
2676
2677 /* Return the first candidate in the tree rooted at C that has not
2678 already been replaced, favoring siblings over dependents. */
2679
2680 static slsr_cand_t
2681 unreplaced_cand_in_tree (slsr_cand_t c)
2682 {
2683 if (!cand_already_replaced (c))
2684 return c;
2685
2686 if (c->sibling)
2687 {
2688 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2689 if (sib)
2690 return sib;
2691 }
2692
2693 if (c->dependent)
2694 {
2695 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2696 if (dep)
2697 return dep;
2698 }
2699
2700 return NULL;
2701 }
2702
2703 /* Return TRUE if the candidates in the tree rooted at C should be
2704 optimized for speed, else FALSE. We estimate this based on the block
2705 containing the most dominant candidate in the tree that has not yet
2706 been replaced. */
2707
2708 static bool
2709 optimize_cands_for_speed_p (slsr_cand_t c)
2710 {
2711 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2712 gcc_assert (c2);
2713 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2714 }
2715
2716 /* Add COST_IN to the lowest cost of any dependent path starting at
2717 candidate C or any of its siblings, counting only candidates along
2718 such paths with increment INCR. Assume that replacing a candidate
2719 reduces cost by REPL_SAVINGS. Also account for savings from any
2720 statements that would go dead. If COUNT_PHIS is true, include
2721 costs of introducing feeding statements for conditional candidates. */
2722
2723 static int
2724 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2725 const widest_int &incr, bool count_phis)
2726 {
2727 int local_cost, sib_cost, savings = 0;
2728 widest_int cand_incr = cand_abs_increment (c);
2729
2730 if (cand_already_replaced (c))
2731 local_cost = cost_in;
2732 else if (incr == cand_incr)
2733 local_cost = cost_in - repl_savings - c->dead_savings;
2734 else
2735 local_cost = cost_in - c->dead_savings;
2736
2737 if (count_phis
2738 && phi_dependent_cand_p (c)
2739 && !cand_already_replaced (c))
2740 {
2741 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2742 local_cost += phi_incr_cost (c, incr, phi, &savings);
2743
2744 if (has_single_use (gimple_phi_result (phi)))
2745 local_cost -= savings;
2746 }
2747
2748 if (c->dependent)
2749 local_cost = lowest_cost_path (local_cost, repl_savings,
2750 lookup_cand (c->dependent), incr,
2751 count_phis);
2752
2753 if (c->sibling)
2754 {
2755 sib_cost = lowest_cost_path (cost_in, repl_savings,
2756 lookup_cand (c->sibling), incr,
2757 count_phis);
2758 local_cost = MIN (local_cost, sib_cost);
2759 }
2760
2761 return local_cost;
2762 }
2763
2764 /* Compute the total savings that would accrue from all replacements
2765 in the candidate tree rooted at C, counting only candidates with
2766 increment INCR. Assume that replacing a candidate reduces cost
2767 by REPL_SAVINGS. Also account for savings from statements that
2768 would go dead. */
2769
2770 static int
2771 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
2772 bool count_phis)
2773 {
2774 int savings = 0;
2775 widest_int cand_incr = cand_abs_increment (c);
2776
2777 if (incr == cand_incr && !cand_already_replaced (c))
2778 savings += repl_savings + c->dead_savings;
2779
2780 if (count_phis
2781 && phi_dependent_cand_p (c)
2782 && !cand_already_replaced (c))
2783 {
2784 int phi_savings = 0;
2785 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2786 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2787
2788 if (has_single_use (gimple_phi_result (phi)))
2789 savings += phi_savings;
2790 }
2791
2792 if (c->dependent)
2793 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2794 count_phis);
2795
2796 if (c->sibling)
2797 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2798 count_phis);
2799
2800 return savings;
2801 }
2802
2803 /* Use target-specific costs to determine and record which increments
2804 in the current candidate tree are profitable to replace, assuming
2805 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2806 the candidate tree.
2807
2808 One slight limitation here is that we don't account for the possible
2809 introduction of casts in some cases. See replace_one_candidate for
2810 the cases where these are introduced. This should probably be cleaned
2811 up sometime. */
2812
2813 static void
2814 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2815 {
2816 unsigned i;
2817
2818 for (i = 0; i < incr_vec_len; i++)
2819 {
2820 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2821
2822 /* If somehow this increment is bigger than a HWI, we won't
2823 be optimizing candidates that use it. And if the increment
2824 has a count of zero, nothing will be done with it. */
2825 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
2826 incr_vec[i].cost = COST_INFINITE;
2827
2828 /* Increments of 0, 1, and -1 are always profitable to replace,
2829 because they always replace a multiply or add with an add or
2830 copy, and may cause one or more existing instructions to go
2831 dead. Exception: -1 can't be assumed to be profitable for
2832 pointer addition. */
2833 else if (incr == 0
2834 || incr == 1
2835 || (incr == -1
2836 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2837 != POINTER_PLUS_EXPR)))
2838 incr_vec[i].cost = COST_NEUTRAL;
2839
2840 /* FORNOW: If we need to add an initializer, give up if a cast from
2841 the candidate's type to its stride's type can lose precision.
2842 This could eventually be handled better by expressly retaining the
2843 result of a cast to a wider type in the stride. Example:
2844
2845 short int _1;
2846 _2 = (int) _1;
2847 _3 = _2 * 10;
2848 _4 = x + _3; ADD: x + (10 * _1) : int
2849 _5 = _2 * 15;
2850 _6 = x + _3; ADD: x + (15 * _1) : int
2851
2852 Right now replacing _6 would cause insertion of an initializer
2853 of the form "short int T = _1 * 5;" followed by a cast to
2854 int, which could overflow incorrectly. Had we recorded _2 or
2855 (int)_1 as the stride, this wouldn't happen. However, doing
2856 this breaks other opportunities, so this will require some
2857 care. */
2858 else if (!incr_vec[i].initializer
2859 && TREE_CODE (first_dep->stride) != INTEGER_CST
2860 && !legal_cast_p_1 (first_dep->stride,
2861 gimple_assign_lhs (first_dep->cand_stmt)))
2862
2863 incr_vec[i].cost = COST_INFINITE;
2864
2865 /* If we need to add an initializer, make sure we don't introduce
2866 a multiply by a pointer type, which can happen in certain cast
2867 scenarios. FIXME: When cleaning up these cast issues, we can
2868 afford to introduce the multiply provided we cast out to an
2869 unsigned int of appropriate size. */
2870 else if (!incr_vec[i].initializer
2871 && TREE_CODE (first_dep->stride) != INTEGER_CST
2872 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2873
2874 incr_vec[i].cost = COST_INFINITE;
2875
2876 /* For any other increment, if this is a multiply candidate, we
2877 must introduce a temporary T and initialize it with
2878 T_0 = stride * increment. When optimizing for speed, walk the
2879 candidate tree to calculate the best cost reduction along any
2880 path; if it offsets the fixed cost of inserting the initializer,
2881 replacing the increment is profitable. When optimizing for
2882 size, instead calculate the total cost reduction from replacing
2883 all candidates with this increment. */
2884 else if (first_dep->kind == CAND_MULT)
2885 {
2886 int cost = mult_by_coeff_cost (incr, mode, speed);
2887 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2888 if (speed)
2889 cost = lowest_cost_path (cost, repl_savings, first_dep,
2890 incr_vec[i].incr, COUNT_PHIS);
2891 else
2892 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2893 COUNT_PHIS);
2894
2895 incr_vec[i].cost = cost;
2896 }
2897
2898 /* If this is an add candidate, the initializer may already
2899 exist, so only calculate the cost of the initializer if it
2900 doesn't. We are replacing one add with another here, so the
2901 known replacement savings is zero. We will account for removal
2902 of dead instructions in lowest_cost_path or total_savings. */
2903 else
2904 {
2905 int cost = 0;
2906 if (!incr_vec[i].initializer)
2907 cost = mult_by_coeff_cost (incr, mode, speed);
2908
2909 if (speed)
2910 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2911 DONT_COUNT_PHIS);
2912 else
2913 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2914 DONT_COUNT_PHIS);
2915
2916 incr_vec[i].cost = cost;
2917 }
2918 }
2919 }
2920
2921 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2922 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2923 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2924 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2925 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2926
2927 static basic_block
2928 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2929 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2930 {
2931 basic_block ncd;
2932
2933 if (!bb1)
2934 {
2935 *where = c2;
2936 return bb2;
2937 }
2938
2939 if (!bb2)
2940 {
2941 *where = c1;
2942 return bb1;
2943 }
2944
2945 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2946
2947 /* If both candidates are in the same block, the earlier
2948 candidate wins. */
2949 if (bb1 == ncd && bb2 == ncd)
2950 {
2951 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2952 *where = c2;
2953 else
2954 *where = c1;
2955 }
2956
2957 /* Otherwise, if one of them produced a candidate in the
2958 dominator, that one wins. */
2959 else if (bb1 == ncd)
2960 *where = c1;
2961
2962 else if (bb2 == ncd)
2963 *where = c2;
2964
2965 /* If neither matches the dominator, neither wins. */
2966 else
2967 *where = NULL;
2968
2969 return ncd;
2970 }
2971
2972 /* Consider all candidates that feed PHI. Find the nearest common
2973 dominator of those candidates requiring the given increment INCR.
2974 Further find and return the nearest common dominator of this result
2975 with block NCD. If the returned block contains one or more of the
2976 candidates, return the earliest candidate in the block in *WHERE. */
2977
2978 static basic_block
2979 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gimple phi,
2980 basic_block ncd, slsr_cand_t *where)
2981 {
2982 unsigned i;
2983 slsr_cand_t basis = lookup_cand (c->basis);
2984 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2985
2986 for (i = 0; i < gimple_phi_num_args (phi); i++)
2987 {
2988 tree arg = gimple_phi_arg_def (phi, i);
2989
2990 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2991 {
2992 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2993
2994 if (gimple_code (arg_def) == GIMPLE_PHI)
2995 ncd = ncd_with_phi (c, incr, arg_def, ncd, where);
2996 else
2997 {
2998 slsr_cand_t arg_cand = base_cand_from_table (arg);
2999 widest_int diff = arg_cand->index - basis->index;
3000 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
3001
3002 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
3003 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
3004 }
3005 }
3006 }
3007
3008 return ncd;
3009 }
3010
3011 /* Consider the candidate C together with any candidates that feed
3012 C's phi dependence (if any). Find and return the nearest common
3013 dominator of those candidates requiring the given increment INCR.
3014 If the returned block contains one or more of the candidates,
3015 return the earliest candidate in the block in *WHERE. */
3016
3017 static basic_block
3018 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
3019 {
3020 basic_block ncd = NULL;
3021
3022 if (cand_abs_increment (c) == incr)
3023 {
3024 ncd = gimple_bb (c->cand_stmt);
3025 *where = c;
3026 }
3027
3028 if (phi_dependent_cand_p (c))
3029 ncd = ncd_with_phi (c, incr, lookup_cand (c->def_phi)->cand_stmt,
3030 ncd, where);
3031
3032 return ncd;
3033 }
3034
3035 /* Consider all candidates in the tree rooted at C for which INCR
3036 represents the required increment of C relative to its basis.
3037 Find and return the basic block that most nearly dominates all
3038 such candidates. If the returned block contains one or more of
3039 the candidates, return the earliest candidate in the block in
3040 *WHERE. */
3041
3042 static basic_block
3043 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
3044 slsr_cand_t *where)
3045 {
3046 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
3047 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
3048
3049 /* First find the NCD of all siblings and dependents. */
3050 if (c->sibling)
3051 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
3052 incr, &sib_where);
3053 if (c->dependent)
3054 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
3055 incr, &dep_where);
3056 if (!sib_ncd && !dep_ncd)
3057 {
3058 new_where = NULL;
3059 ncd = NULL;
3060 }
3061 else if (sib_ncd && !dep_ncd)
3062 {
3063 new_where = sib_where;
3064 ncd = sib_ncd;
3065 }
3066 else if (dep_ncd && !sib_ncd)
3067 {
3068 new_where = dep_where;
3069 ncd = dep_ncd;
3070 }
3071 else
3072 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
3073 dep_where, &new_where);
3074
3075 /* If the candidate's increment doesn't match the one we're interested
3076 in (and nor do any increments for feeding defs of a phi-dependence),
3077 then the result depends only on siblings and dependents. */
3078 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
3079
3080 if (!this_ncd || cand_already_replaced (c))
3081 {
3082 *where = new_where;
3083 return ncd;
3084 }
3085
3086 /* Otherwise, compare this candidate with the result from all siblings
3087 and dependents. */
3088 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
3089
3090 return ncd;
3091 }
3092
3093 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
3094
3095 static inline bool
3096 profitable_increment_p (unsigned index)
3097 {
3098 return (incr_vec[index].cost <= COST_NEUTRAL);
3099 }
3100
3101 /* For each profitable increment in the increment vector not equal to
3102 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
3103 dominator of all statements in the candidate chain rooted at C
3104 that require that increment, and insert an initializer
3105 T_0 = stride * increment at that location. Record T_0 with the
3106 increment record. */
3107
3108 static void
3109 insert_initializers (slsr_cand_t c)
3110 {
3111 unsigned i;
3112
3113 for (i = 0; i < incr_vec_len; i++)
3114 {
3115 basic_block bb;
3116 slsr_cand_t where = NULL;
3117 gimple init_stmt;
3118 tree stride_type, new_name, incr_tree;
3119 widest_int incr = incr_vec[i].incr;
3120
3121 if (!profitable_increment_p (i)
3122 || incr == 1
3123 || (incr == -1
3124 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
3125 || incr == 0)
3126 continue;
3127
3128 /* We may have already identified an existing initializer that
3129 will suffice. */
3130 if (incr_vec[i].initializer)
3131 {
3132 if (dump_file && (dump_flags & TDF_DETAILS))
3133 {
3134 fputs ("Using existing initializer: ", dump_file);
3135 print_gimple_stmt (dump_file,
3136 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
3137 0, 0);
3138 }
3139 continue;
3140 }
3141
3142 /* Find the block that most closely dominates all candidates
3143 with this increment. If there is at least one candidate in
3144 that block, the earliest one will be returned in WHERE. */
3145 bb = nearest_common_dominator_for_cands (c, incr, &where);
3146
3147 /* Create a new SSA name to hold the initializer's value. */
3148 stride_type = TREE_TYPE (c->stride);
3149 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
3150 incr_vec[i].initializer = new_name;
3151
3152 /* Create the initializer and insert it in the latest possible
3153 dominating position. */
3154 incr_tree = wide_int_to_tree (stride_type, incr);
3155 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
3156 c->stride, incr_tree);
3157 if (where)
3158 {
3159 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
3160 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3161 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
3162 }
3163 else
3164 {
3165 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3166 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
3167
3168 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
3169 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3170 else
3171 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3172
3173 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3174 }
3175
3176 if (dump_file && (dump_flags & TDF_DETAILS))
3177 {
3178 fputs ("Inserting initializer: ", dump_file);
3179 print_gimple_stmt (dump_file, init_stmt, 0, 0);
3180 }
3181 }
3182 }
3183
3184 /* Return TRUE iff all required increments for candidates feeding PHI
3185 are profitable to replace on behalf of candidate C. */
3186
3187 static bool
3188 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3189 {
3190 unsigned i;
3191 slsr_cand_t basis = lookup_cand (c->basis);
3192 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3193
3194 for (i = 0; i < gimple_phi_num_args (phi); i++)
3195 {
3196 tree arg = gimple_phi_arg_def (phi, i);
3197
3198 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3199 {
3200 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3201
3202 if (gimple_code (arg_def) == GIMPLE_PHI)
3203 {
3204 if (!all_phi_incrs_profitable (c, arg_def))
3205 return false;
3206 }
3207 else
3208 {
3209 int j;
3210 slsr_cand_t arg_cand = base_cand_from_table (arg);
3211 widest_int increment = arg_cand->index - basis->index;
3212
3213 if (!address_arithmetic_p && wi::neg_p (increment))
3214 increment = -increment;
3215
3216 j = incr_vec_index (increment);
3217
3218 if (dump_file && (dump_flags & TDF_DETAILS))
3219 {
3220 fprintf (dump_file, " Conditional candidate %d, phi: ",
3221 c->cand_num);
3222 print_gimple_stmt (dump_file, phi, 0, 0);
3223 fputs (" increment: ", dump_file);
3224 print_decs (increment, dump_file);
3225 if (j < 0)
3226 fprintf (dump_file,
3227 "\n Not replaced; incr_vec overflow.\n");
3228 else {
3229 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3230 if (profitable_increment_p (j))
3231 fputs (" Replacing...\n", dump_file);
3232 else
3233 fputs (" Not replaced.\n", dump_file);
3234 }
3235 }
3236
3237 if (j < 0 || !profitable_increment_p (j))
3238 return false;
3239 }
3240 }
3241 }
3242
3243 return true;
3244 }
3245
3246 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3247 type TO_TYPE, and insert it in front of the statement represented
3248 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3249 the new SSA name. */
3250
3251 static tree
3252 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3253 {
3254 tree cast_lhs;
3255 gimple cast_stmt;
3256 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3257
3258 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3259 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
3260 from_expr, NULL_TREE);
3261 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3262 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3263
3264 if (dump_file && (dump_flags & TDF_DETAILS))
3265 {
3266 fputs (" Inserting: ", dump_file);
3267 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3268 }
3269
3270 return cast_lhs;
3271 }
3272
3273 /* Replace the RHS of the statement represented by candidate C with
3274 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3275 leave C unchanged or just interchange its operands. The original
3276 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3277 If the replacement was made and we are doing a details dump,
3278 return the revised statement, else NULL. */
3279
3280 static gimple
3281 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3282 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3283 slsr_cand_t c)
3284 {
3285 if (new_code != old_code
3286 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3287 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3288 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3289 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3290 {
3291 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3292 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3293 update_stmt (gsi_stmt (gsi));
3294 c->cand_stmt = gsi_stmt (gsi);
3295
3296 if (dump_file && (dump_flags & TDF_DETAILS))
3297 return gsi_stmt (gsi);
3298 }
3299
3300 else if (dump_file && (dump_flags & TDF_DETAILS))
3301 fputs (" (duplicate, not actually replacing)\n", dump_file);
3302
3303 return NULL;
3304 }
3305
3306 /* Strength-reduce the statement represented by candidate C by replacing
3307 it with an equivalent addition or subtraction. I is the index into
3308 the increment vector identifying C's increment. NEW_VAR is used to
3309 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3310 is the rhs1 to use in creating the add/subtract. */
3311
3312 static void
3313 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3314 {
3315 gimple stmt_to_print = NULL;
3316 tree orig_rhs1, orig_rhs2;
3317 tree rhs2;
3318 enum tree_code orig_code, repl_code;
3319 widest_int cand_incr;
3320
3321 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3322 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3323 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3324 cand_incr = cand_increment (c);
3325
3326 if (dump_file && (dump_flags & TDF_DETAILS))
3327 {
3328 fputs ("Replacing: ", dump_file);
3329 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3330 stmt_to_print = c->cand_stmt;
3331 }
3332
3333 if (address_arithmetic_p)
3334 repl_code = POINTER_PLUS_EXPR;
3335 else
3336 repl_code = PLUS_EXPR;
3337
3338 /* If the increment has an initializer T_0, replace the candidate
3339 statement with an add of the basis name and the initializer. */
3340 if (incr_vec[i].initializer)
3341 {
3342 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3343 tree orig_type = TREE_TYPE (orig_rhs2);
3344
3345 if (types_compatible_p (orig_type, init_type))
3346 rhs2 = incr_vec[i].initializer;
3347 else
3348 rhs2 = introduce_cast_before_cand (c, orig_type,
3349 incr_vec[i].initializer);
3350
3351 if (incr_vec[i].incr != cand_incr)
3352 {
3353 gcc_assert (repl_code == PLUS_EXPR);
3354 repl_code = MINUS_EXPR;
3355 }
3356
3357 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3358 orig_code, orig_rhs1, orig_rhs2,
3359 c);
3360 }
3361
3362 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3363 with a subtract of the stride from the basis name, a copy
3364 from the basis name, or an add of the stride to the basis
3365 name, respectively. It may be necessary to introduce a
3366 cast (or reuse an existing cast). */
3367 else if (cand_incr == 1)
3368 {
3369 tree stride_type = TREE_TYPE (c->stride);
3370 tree orig_type = TREE_TYPE (orig_rhs2);
3371
3372 if (types_compatible_p (orig_type, stride_type))
3373 rhs2 = c->stride;
3374 else
3375 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3376
3377 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3378 orig_code, orig_rhs1, orig_rhs2,
3379 c);
3380 }
3381
3382 else if (cand_incr == -1)
3383 {
3384 tree stride_type = TREE_TYPE (c->stride);
3385 tree orig_type = TREE_TYPE (orig_rhs2);
3386 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3387
3388 if (types_compatible_p (orig_type, stride_type))
3389 rhs2 = c->stride;
3390 else
3391 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3392
3393 if (orig_code != MINUS_EXPR
3394 || !operand_equal_p (basis_name, orig_rhs1, 0)
3395 || !operand_equal_p (rhs2, orig_rhs2, 0))
3396 {
3397 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3398 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3399 update_stmt (gsi_stmt (gsi));
3400 c->cand_stmt = gsi_stmt (gsi);
3401
3402 if (dump_file && (dump_flags & TDF_DETAILS))
3403 stmt_to_print = gsi_stmt (gsi);
3404 }
3405 else if (dump_file && (dump_flags & TDF_DETAILS))
3406 fputs (" (duplicate, not actually replacing)\n", dump_file);
3407 }
3408
3409 else if (cand_incr == 0)
3410 {
3411 tree lhs = gimple_assign_lhs (c->cand_stmt);
3412 tree lhs_type = TREE_TYPE (lhs);
3413 tree basis_type = TREE_TYPE (basis_name);
3414
3415 if (types_compatible_p (lhs_type, basis_type))
3416 {
3417 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
3418 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3419 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3420 gsi_replace (&gsi, copy_stmt, false);
3421 c->cand_stmt = copy_stmt;
3422
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3424 stmt_to_print = copy_stmt;
3425 }
3426 else
3427 {
3428 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3429 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
3430 basis_name,
3431 NULL_TREE);
3432 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3433 gsi_replace (&gsi, cast_stmt, false);
3434 c->cand_stmt = cast_stmt;
3435
3436 if (dump_file && (dump_flags & TDF_DETAILS))
3437 stmt_to_print = cast_stmt;
3438 }
3439 }
3440 else
3441 gcc_unreachable ();
3442
3443 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3444 {
3445 fputs ("With: ", dump_file);
3446 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3447 fputs ("\n", dump_file);
3448 }
3449 }
3450
3451 /* For each candidate in the tree rooted at C, replace it with
3452 an increment if such has been shown to be profitable. */
3453
3454 static void
3455 replace_profitable_candidates (slsr_cand_t c)
3456 {
3457 if (!cand_already_replaced (c))
3458 {
3459 widest_int increment = cand_abs_increment (c);
3460 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3461 int i;
3462
3463 i = incr_vec_index (increment);
3464
3465 /* Only process profitable increments. Nothing useful can be done
3466 to a cast or copy. */
3467 if (i >= 0
3468 && profitable_increment_p (i)
3469 && orig_code != MODIFY_EXPR
3470 && orig_code != NOP_EXPR)
3471 {
3472 if (phi_dependent_cand_p (c))
3473 {
3474 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3475
3476 if (all_phi_incrs_profitable (c, phi))
3477 {
3478 /* Look up the LHS SSA name from C's basis. This will be
3479 the RHS1 of the adds we will introduce to create new
3480 phi arguments. */
3481 slsr_cand_t basis = lookup_cand (c->basis);
3482 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3483
3484 /* Create a new phi statement that will represent C's true
3485 basis after the transformation is complete. */
3486 location_t loc = gimple_location (c->cand_stmt);
3487 tree name = create_phi_basis (c, phi, basis_name,
3488 loc, UNKNOWN_STRIDE);
3489
3490 /* Replace C with an add of the new basis phi and the
3491 increment. */
3492 replace_one_candidate (c, i, name);
3493 }
3494 }
3495 else
3496 {
3497 slsr_cand_t basis = lookup_cand (c->basis);
3498 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3499 replace_one_candidate (c, i, basis_name);
3500 }
3501 }
3502 }
3503
3504 if (c->sibling)
3505 replace_profitable_candidates (lookup_cand (c->sibling));
3506
3507 if (c->dependent)
3508 replace_profitable_candidates (lookup_cand (c->dependent));
3509 }
3510 \f
3511 /* Analyze costs of related candidates in the candidate vector,
3512 and make beneficial replacements. */
3513
3514 static void
3515 analyze_candidates_and_replace (void)
3516 {
3517 unsigned i;
3518 slsr_cand_t c;
3519
3520 /* Each candidate that has a null basis and a non-null
3521 dependent is the root of a tree of related statements.
3522 Analyze each tree to determine a subset of those
3523 statements that can be replaced with maximum benefit. */
3524 FOR_EACH_VEC_ELT (cand_vec, i, c)
3525 {
3526 slsr_cand_t first_dep;
3527
3528 if (c->basis != 0 || c->dependent == 0)
3529 continue;
3530
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3532 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3533 c->cand_num);
3534
3535 first_dep = lookup_cand (c->dependent);
3536
3537 /* If this is a chain of CAND_REFs, unconditionally replace
3538 each of them with a strength-reduced data reference. */
3539 if (c->kind == CAND_REF)
3540 replace_refs (c);
3541
3542 /* If the common stride of all related candidates is a known
3543 constant, each candidate without a phi-dependence can be
3544 profitably replaced. Each replaces a multiply by a single
3545 add, with the possibility that a feeding add also goes dead.
3546 A candidate with a phi-dependence is replaced only if the
3547 compensation code it requires is offset by the strength
3548 reduction savings. */
3549 else if (TREE_CODE (c->stride) == INTEGER_CST)
3550 replace_uncond_cands_and_profitable_phis (first_dep);
3551
3552 /* When the stride is an SSA name, it may still be profitable
3553 to replace some or all of the dependent candidates, depending
3554 on whether the introduced increments can be reused, or are
3555 less expensive to calculate than the replaced statements. */
3556 else
3557 {
3558 enum machine_mode mode;
3559 bool speed;
3560
3561 /* Determine whether we'll be generating pointer arithmetic
3562 when replacing candidates. */
3563 address_arithmetic_p = (c->kind == CAND_ADD
3564 && POINTER_TYPE_P (c->cand_type));
3565
3566 /* If all candidates have already been replaced under other
3567 interpretations, nothing remains to be done. */
3568 if (!count_candidates (c))
3569 continue;
3570
3571 /* Construct an array of increments for this candidate chain. */
3572 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3573 incr_vec_len = 0;
3574 record_increments (c);
3575
3576 /* Determine which increments are profitable to replace. */
3577 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3578 speed = optimize_cands_for_speed_p (c);
3579 analyze_increments (first_dep, mode, speed);
3580
3581 /* Insert initializers of the form T_0 = stride * increment
3582 for use in profitable replacements. */
3583 insert_initializers (first_dep);
3584 dump_incr_vec ();
3585
3586 /* Perform the replacements. */
3587 replace_profitable_candidates (first_dep);
3588 free (incr_vec);
3589 }
3590 }
3591 }
3592
3593 namespace {
3594
3595 const pass_data pass_data_strength_reduction =
3596 {
3597 GIMPLE_PASS, /* type */
3598 "slsr", /* name */
3599 OPTGROUP_NONE, /* optinfo_flags */
3600 true, /* has_execute */
3601 TV_GIMPLE_SLSR, /* tv_id */
3602 ( PROP_cfg | PROP_ssa ), /* properties_required */
3603 0, /* properties_provided */
3604 0, /* properties_destroyed */
3605 0, /* todo_flags_start */
3606 0, /* todo_flags_finish */
3607 };
3608
3609 class pass_strength_reduction : public gimple_opt_pass
3610 {
3611 public:
3612 pass_strength_reduction (gcc::context *ctxt)
3613 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
3614 {}
3615
3616 /* opt_pass methods: */
3617 virtual bool gate (function *) { return flag_tree_slsr; }
3618 virtual unsigned int execute (function *);
3619
3620 }; // class pass_strength_reduction
3621
3622 unsigned
3623 pass_strength_reduction::execute (function *fun)
3624 {
3625 /* Create the obstack where candidates will reside. */
3626 gcc_obstack_init (&cand_obstack);
3627
3628 /* Allocate the candidate vector. */
3629 cand_vec.create (128);
3630
3631 /* Allocate the mapping from statements to candidate indices. */
3632 stmt_cand_map = pointer_map_create ();
3633
3634 /* Create the obstack where candidate chains will reside. */
3635 gcc_obstack_init (&chain_obstack);
3636
3637 /* Allocate the mapping from base expressions to candidate chains. */
3638 base_cand_map = new hash_table<cand_chain_hasher> (500);
3639
3640 /* Allocate the mapping from bases to alternative bases. */
3641 alt_base_map = pointer_map_create ();
3642
3643 /* Initialize the loop optimizer. We need to detect flow across
3644 back edges, and this gives us dominator information as well. */
3645 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3646
3647 /* Walk the CFG in predominator order looking for strength reduction
3648 candidates. */
3649 find_candidates_dom_walker (CDI_DOMINATORS)
3650 .walk (fun->cfg->x_entry_block_ptr);
3651
3652 if (dump_file && (dump_flags & TDF_DETAILS))
3653 {
3654 dump_cand_vec ();
3655 dump_cand_chains ();
3656 }
3657
3658 pointer_map_destroy (alt_base_map);
3659 free_affine_expand_cache (&name_expansions);
3660
3661 /* Analyze costs and make appropriate replacements. */
3662 analyze_candidates_and_replace ();
3663
3664 loop_optimizer_finalize ();
3665 delete base_cand_map;
3666 base_cand_map = NULL;
3667 obstack_free (&chain_obstack, NULL);
3668 pointer_map_destroy (stmt_cand_map);
3669 cand_vec.release ();
3670 obstack_free (&cand_obstack, NULL);
3671
3672 return 0;
3673 }
3674
3675 } // anon namespace
3676
3677 gimple_opt_pass *
3678 make_pass_strength_reduction (gcc::context *ctxt)
3679 {
3680 return new pass_strength_reduction (ctxt);
3681 }