tree-ssa.h: New.
[gcc.git] / gcc / gimple-ssa-strength-reduction.c
1 /* Straight-line strength reduction.
2 Copyright (C) 2012-2013 Free Software Foundation, Inc.
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* There are many algorithms for performing strength reduction on
22 loops. This is not one of them. IVOPTS handles strength reduction
23 of induction variables just fine. This pass is intended to pick
24 up the crumbs it leaves behind, by considering opportunities for
25 strength reduction along dominator paths.
26
27 Strength reduction addresses explicit multiplies, and certain
28 multiplies implicit in addressing expressions. It would also be
29 possible to apply strength reduction to divisions and modulos,
30 but such opportunities are relatively uncommon.
31
32 Strength reduction is also currently restricted to integer operations.
33 If desired, it could be extended to floating-point operations under
34 control of something like -funsafe-math-optimizations. */
35
36 #include "config.h"
37 #include "system.h"
38 #include "coretypes.h"
39 #include "tree.h"
40 #include "gimple.h"
41 #include "basic-block.h"
42 #include "tree-pass.h"
43 #include "cfgloop.h"
44 #include "gimple-pretty-print.h"
45 #include "tree-ssa.h"
46 #include "domwalk.h"
47 #include "pointer-set.h"
48 #include "expmed.h"
49 #include "params.h"
50 #include "hash-table.h"
51 \f
52 /* Information about a strength reduction candidate. Each statement
53 in the candidate table represents an expression of one of the
54 following forms (the special case of CAND_REF will be described
55 later):
56
57 (CAND_MULT) S1: X = (B + i) * S
58 (CAND_ADD) S1: X = B + (i * S)
59
60 Here X and B are SSA names, i is an integer constant, and S is
61 either an SSA name or a constant. We call B the "base," i the
62 "index", and S the "stride."
63
64 Any statement S0 that dominates S1 and is of the form:
65
66 (CAND_MULT) S0: Y = (B + i') * S
67 (CAND_ADD) S0: Y = B + (i' * S)
68
69 is called a "basis" for S1. In both cases, S1 may be replaced by
70
71 S1': X = Y + (i - i') * S,
72
73 where (i - i') * S is folded to the extent possible.
74
75 All gimple statements are visited in dominator order, and each
76 statement that may contribute to one of the forms of S1 above is
77 given at least one entry in the candidate table. Such statements
78 include addition, pointer addition, subtraction, multiplication,
79 negation, copies, and nontrivial type casts. If a statement may
80 represent more than one expression of the forms of S1 above,
81 multiple "interpretations" are stored in the table and chained
82 together. Examples:
83
84 * An add of two SSA names may treat either operand as the base.
85 * A multiply of two SSA names, likewise.
86 * A copy or cast may be thought of as either a CAND_MULT with
87 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
88
89 Candidate records are allocated from an obstack. They are addressed
90 both from a hash table keyed on S1, and from a vector of candidate
91 pointers arranged in predominator order.
92
93 Opportunity note
94 ----------------
95 Currently we don't recognize:
96
97 S0: Y = (S * i') - B
98 S1: X = (S * i) - B
99
100 as a strength reduction opportunity, even though this S1 would
101 also be replaceable by the S1' above. This can be added if it
102 comes up in practice.
103
104 Strength reduction in addressing
105 --------------------------------
106 There is another kind of candidate known as CAND_REF. A CAND_REF
107 describes a statement containing a memory reference having
108 complex addressing that might benefit from strength reduction.
109 Specifically, we are interested in references for which
110 get_inner_reference returns a base address, offset, and bitpos as
111 follows:
112
113 base: MEM_REF (T1, C1)
114 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
115 bitpos: C4 * BITS_PER_UNIT
116
117 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
118 arbitrary integer constants. Note that C2 may be zero, in which
119 case the offset will be MULT_EXPR (T2, C3).
120
121 When this pattern is recognized, the original memory reference
122 can be replaced with:
123
124 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
125 C1 + (C2 * C3) + C4)
126
127 which distributes the multiply to allow constant folding. When
128 two or more addressing expressions can be represented by MEM_REFs
129 of this form, differing only in the constants C1, C2, and C4,
130 making this substitution produces more efficient addressing during
131 the RTL phases. When there are not at least two expressions with
132 the same values of T1, T2, and C3, there is nothing to be gained
133 by the replacement.
134
135 Strength reduction of CAND_REFs uses the same infrastructure as
136 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
137 field, MULT_EXPR (T2, C3) in the stride (S) field, and
138 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
139 is thus another CAND_REF with the same B and S values. When at
140 least two CAND_REFs are chained together using the basis relation,
141 each of them is replaced as above, resulting in improved code
142 generation for addressing.
143
144 Conditional candidates
145 ======================
146
147 Conditional candidates are best illustrated with an example.
148 Consider the code sequence:
149
150 (1) x_0 = ...;
151 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
152 if (...)
153 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
154 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
155 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
156 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
157
158 Here strength reduction is complicated by the uncertain value of x_2.
159 A legitimate transformation is:
160
161 (1) x_0 = ...;
162 (2) a_0 = x_0 * 5;
163 if (...)
164 {
165 (3) [x_1 = x_0 + 1;]
166 (3a) t_1 = a_0 + 5;
167 }
168 (4) [x_2 = PHI <x_0, x_1>;]
169 (4a) t_2 = PHI <a_0, t_1>;
170 (5) [x_3 = x_2 + 1;]
171 (6r) a_1 = t_2 + 5;
172
173 where the bracketed instructions may go dead.
174
175 To recognize this opportunity, we have to observe that statement (6)
176 has a "hidden basis" (2). The hidden basis is unlike a normal basis
177 in that the statement and the hidden basis have different base SSA
178 names (x_2 and x_0, respectively). The relationship is established
179 when a statement's base name (x_2) is defined by a phi statement (4),
180 each argument of which (x_0, x_1) has an identical "derived base name."
181 If the argument is defined by a candidate (as x_1 is by (3)) that is a
182 CAND_ADD having a stride of 1, the derived base name of the argument is
183 the base name of the candidate (x_0). Otherwise, the argument itself
184 is its derived base name (as is the case with argument x_0).
185
186 The hidden basis for statement (6) is the nearest dominating candidate
187 whose base name is the derived base name (x_0) of the feeding phi (4),
188 and whose stride is identical to that of the statement. We can then
189 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
190 allowing the final replacement of (6) by the strength-reduced (6r).
191
192 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
193 A CAND_PHI is not a candidate for replacement, but is maintained in the
194 candidate table to ease discovery of hidden bases. Any phi statement
195 whose arguments share a common derived base name is entered into the
196 table with the derived base name, an (arbitrary) index of zero, and a
197 stride of 1. A statement with a hidden basis can then be detected by
198 simply looking up its feeding phi definition in the candidate table,
199 extracting the derived base name, and searching for a basis in the
200 usual manner after substituting the derived base name.
201
202 Note that the transformation is only valid when the original phi and
203 the statements that define the phi's arguments are all at the same
204 position in the loop hierarchy. */
205
206
207 /* Index into the candidate vector, offset by 1. VECs are zero-based,
208 while cand_idx's are one-based, with zero indicating null. */
209 typedef unsigned cand_idx;
210
211 /* The kind of candidate. */
212 enum cand_kind
213 {
214 CAND_MULT,
215 CAND_ADD,
216 CAND_REF,
217 CAND_PHI
218 };
219
220 struct slsr_cand_d
221 {
222 /* The candidate statement S1. */
223 gimple cand_stmt;
224
225 /* The base expression B: often an SSA name, but not always. */
226 tree base_expr;
227
228 /* The stride S. */
229 tree stride;
230
231 /* The index constant i. */
232 double_int index;
233
234 /* The type of the candidate. This is normally the type of base_expr,
235 but casts may have occurred when combining feeding instructions.
236 A candidate can only be a basis for candidates of the same final type.
237 (For CAND_REFs, this is the type to be used for operand 1 of the
238 replacement MEM_REF.) */
239 tree cand_type;
240
241 /* The kind of candidate (CAND_MULT, etc.). */
242 enum cand_kind kind;
243
244 /* Index of this candidate in the candidate vector. */
245 cand_idx cand_num;
246
247 /* Index of the next candidate record for the same statement.
248 A statement may be useful in more than one way (e.g., due to
249 commutativity). So we can have multiple "interpretations"
250 of a statement. */
251 cand_idx next_interp;
252
253 /* Index of the basis statement S0, if any, in the candidate vector. */
254 cand_idx basis;
255
256 /* First candidate for which this candidate is a basis, if one exists. */
257 cand_idx dependent;
258
259 /* Next candidate having the same basis as this one. */
260 cand_idx sibling;
261
262 /* If this is a conditional candidate, the CAND_PHI candidate
263 that defines the base SSA name B. */
264 cand_idx def_phi;
265
266 /* Savings that can be expected from eliminating dead code if this
267 candidate is replaced. */
268 int dead_savings;
269 };
270
271 typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
272 typedef const struct slsr_cand_d *const_slsr_cand_t;
273
274 /* Pointers to candidates are chained together as part of a mapping
275 from base expressions to the candidates that use them. */
276
277 struct cand_chain_d
278 {
279 /* Base expression for the chain of candidates: often, but not
280 always, an SSA name. */
281 tree base_expr;
282
283 /* Pointer to a candidate. */
284 slsr_cand_t cand;
285
286 /* Chain pointer. */
287 struct cand_chain_d *next;
288
289 };
290
291 typedef struct cand_chain_d cand_chain, *cand_chain_t;
292 typedef const struct cand_chain_d *const_cand_chain_t;
293
294 /* Information about a unique "increment" associated with candidates
295 having an SSA name for a stride. An increment is the difference
296 between the index of the candidate and the index of its basis,
297 i.e., (i - i') as discussed in the module commentary.
298
299 When we are not going to generate address arithmetic we treat
300 increments that differ only in sign as the same, allowing sharing
301 of the cost of initializers. The absolute value of the increment
302 is stored in the incr_info. */
303
304 struct incr_info_d
305 {
306 /* The increment that relates a candidate to its basis. */
307 double_int incr;
308
309 /* How many times the increment occurs in the candidate tree. */
310 unsigned count;
311
312 /* Cost of replacing candidates using this increment. Negative and
313 zero costs indicate replacement should be performed. */
314 int cost;
315
316 /* If this increment is profitable but is not -1, 0, or 1, it requires
317 an initializer T_0 = stride * incr to be found or introduced in the
318 nearest common dominator of all candidates. This field holds T_0
319 for subsequent use. */
320 tree initializer;
321
322 /* If the initializer was found to already exist, this is the block
323 where it was found. */
324 basic_block init_bb;
325 };
326
327 typedef struct incr_info_d incr_info, *incr_info_t;
328
329 /* Candidates are maintained in a vector. If candidate X dominates
330 candidate Y, then X appears before Y in the vector; but the
331 converse does not necessarily hold. */
332 static vec<slsr_cand_t> cand_vec;
333
334 enum cost_consts
335 {
336 COST_NEUTRAL = 0,
337 COST_INFINITE = 1000
338 };
339
340 enum stride_status
341 {
342 UNKNOWN_STRIDE = 0,
343 KNOWN_STRIDE = 1
344 };
345
346 enum phi_adjust_status
347 {
348 NOT_PHI_ADJUST = 0,
349 PHI_ADJUST = 1
350 };
351
352 enum count_phis_status
353 {
354 DONT_COUNT_PHIS = 0,
355 COUNT_PHIS = 1
356 };
357
358 /* Pointer map embodying a mapping from statements to candidates. */
359 static struct pointer_map_t *stmt_cand_map;
360
361 /* Obstack for candidates. */
362 static struct obstack cand_obstack;
363
364 /* Obstack for candidate chains. */
365 static struct obstack chain_obstack;
366
367 /* An array INCR_VEC of incr_infos is used during analysis of related
368 candidates having an SSA name for a stride. INCR_VEC_LEN describes
369 its current length. MAX_INCR_VEC_LEN is used to avoid costly
370 pathological cases. */
371 static incr_info_t incr_vec;
372 static unsigned incr_vec_len;
373 const int MAX_INCR_VEC_LEN = 16;
374
375 /* For a chain of candidates with unknown stride, indicates whether or not
376 we must generate pointer arithmetic when replacing statements. */
377 static bool address_arithmetic_p;
378
379 /* Forward function declarations. */
380 static slsr_cand_t base_cand_from_table (tree);
381 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
382 \f
383 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
384
385 static slsr_cand_t
386 lookup_cand (cand_idx idx)
387 {
388 return cand_vec[idx - 1];
389 }
390
391 /* Helper for hashing a candidate chain header. */
392
393 struct cand_chain_hasher : typed_noop_remove <cand_chain>
394 {
395 typedef cand_chain value_type;
396 typedef cand_chain compare_type;
397 static inline hashval_t hash (const value_type *);
398 static inline bool equal (const value_type *, const compare_type *);
399 };
400
401 inline hashval_t
402 cand_chain_hasher::hash (const value_type *p)
403 {
404 tree base_expr = p->base_expr;
405 return iterative_hash_expr (base_expr, 0);
406 }
407
408 inline bool
409 cand_chain_hasher::equal (const value_type *chain1, const compare_type *chain2)
410 {
411 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
412 }
413
414 /* Hash table embodying a mapping from base exprs to chains of candidates. */
415 static hash_table <cand_chain_hasher> base_cand_map;
416 \f
417 /* Look in the candidate table for a CAND_PHI that defines BASE and
418 return it if found; otherwise return NULL. */
419
420 static cand_idx
421 find_phi_def (tree base)
422 {
423 slsr_cand_t c;
424
425 if (TREE_CODE (base) != SSA_NAME)
426 return 0;
427
428 c = base_cand_from_table (base);
429
430 if (!c || c->kind != CAND_PHI)
431 return 0;
432
433 return c->cand_num;
434 }
435
436 /* Helper routine for find_basis_for_candidate. May be called twice:
437 once for the candidate's base expr, and optionally again for the
438 candidate's phi definition. */
439
440 static slsr_cand_t
441 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
442 {
443 cand_chain mapping_key;
444 cand_chain_t chain;
445 slsr_cand_t basis = NULL;
446
447 // Limit potential of N^2 behavior for long candidate chains.
448 int iters = 0;
449 int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN);
450
451 mapping_key.base_expr = base_expr;
452 chain = base_cand_map.find (&mapping_key);
453
454 for (; chain && iters < max_iters; chain = chain->next, ++iters)
455 {
456 slsr_cand_t one_basis = chain->cand;
457
458 if (one_basis->kind != c->kind
459 || one_basis->cand_stmt == c->cand_stmt
460 || !operand_equal_p (one_basis->stride, c->stride, 0)
461 || !types_compatible_p (one_basis->cand_type, c->cand_type)
462 || !dominated_by_p (CDI_DOMINATORS,
463 gimple_bb (c->cand_stmt),
464 gimple_bb (one_basis->cand_stmt)))
465 continue;
466
467 if (!basis || basis->cand_num < one_basis->cand_num)
468 basis = one_basis;
469 }
470
471 return basis;
472 }
473
474 /* Use the base expr from candidate C to look for possible candidates
475 that can serve as a basis for C. Each potential basis must also
476 appear in a block that dominates the candidate statement and have
477 the same stride and type. If more than one possible basis exists,
478 the one with highest index in the vector is chosen; this will be
479 the most immediately dominating basis. */
480
481 static int
482 find_basis_for_candidate (slsr_cand_t c)
483 {
484 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
485
486 /* If a candidate doesn't have a basis using its base expression,
487 it may have a basis hidden by one or more intervening phis. */
488 if (!basis && c->def_phi)
489 {
490 basic_block basis_bb, phi_bb;
491 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
492 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
493
494 if (basis)
495 {
496 /* A hidden basis must dominate the phi-definition of the
497 candidate's base name. */
498 phi_bb = gimple_bb (phi_cand->cand_stmt);
499 basis_bb = gimple_bb (basis->cand_stmt);
500
501 if (phi_bb == basis_bb
502 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
503 {
504 basis = NULL;
505 c->basis = 0;
506 }
507
508 /* If we found a hidden basis, estimate additional dead-code
509 savings if the phi and its feeding statements can be removed. */
510 if (basis && has_single_use (gimple_phi_result (phi_cand->cand_stmt)))
511 c->dead_savings += phi_cand->dead_savings;
512 }
513 }
514
515 if (basis)
516 {
517 c->sibling = basis->dependent;
518 basis->dependent = c->cand_num;
519 return basis->cand_num;
520 }
521
522 return 0;
523 }
524
525 /* Record a mapping from the base expression of C to C itself, indicating that
526 C may potentially serve as a basis using that base expression. */
527
528 static void
529 record_potential_basis (slsr_cand_t c)
530 {
531 cand_chain_t node;
532 cand_chain **slot;
533
534 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
535 node->base_expr = c->base_expr;
536 node->cand = c;
537 node->next = NULL;
538 slot = base_cand_map.find_slot (node, INSERT);
539
540 if (*slot)
541 {
542 cand_chain_t head = (cand_chain_t) (*slot);
543 node->next = head->next;
544 head->next = node;
545 }
546 else
547 *slot = node;
548 }
549
550 /* Allocate storage for a new candidate and initialize its fields.
551 Attempt to find a basis for the candidate. */
552
553 static slsr_cand_t
554 alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
555 double_int index, tree stride, tree ctype,
556 unsigned savings)
557 {
558 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
559 sizeof (slsr_cand));
560 c->cand_stmt = gs;
561 c->base_expr = base;
562 c->stride = stride;
563 c->index = index;
564 c->cand_type = ctype;
565 c->kind = kind;
566 c->cand_num = cand_vec.length () + 1;
567 c->next_interp = 0;
568 c->dependent = 0;
569 c->sibling = 0;
570 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
571 c->dead_savings = savings;
572
573 cand_vec.safe_push (c);
574
575 if (kind == CAND_PHI)
576 c->basis = 0;
577 else
578 c->basis = find_basis_for_candidate (c);
579
580 record_potential_basis (c);
581
582 return c;
583 }
584
585 /* Determine the target cost of statement GS when compiling according
586 to SPEED. */
587
588 static int
589 stmt_cost (gimple gs, bool speed)
590 {
591 tree lhs, rhs1, rhs2;
592 enum machine_mode lhs_mode;
593
594 gcc_assert (is_gimple_assign (gs));
595 lhs = gimple_assign_lhs (gs);
596 rhs1 = gimple_assign_rhs1 (gs);
597 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
598
599 switch (gimple_assign_rhs_code (gs))
600 {
601 case MULT_EXPR:
602 rhs2 = gimple_assign_rhs2 (gs);
603
604 if (host_integerp (rhs2, 0))
605 return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
606
607 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
608 return mul_cost (speed, lhs_mode);
609
610 case PLUS_EXPR:
611 case POINTER_PLUS_EXPR:
612 case MINUS_EXPR:
613 return add_cost (speed, lhs_mode);
614
615 case NEGATE_EXPR:
616 return neg_cost (speed, lhs_mode);
617
618 case NOP_EXPR:
619 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
620
621 /* Note that we don't assign costs to copies that in most cases
622 will go away. */
623 default:
624 ;
625 }
626
627 gcc_unreachable ();
628 return 0;
629 }
630
631 /* Look up the defining statement for BASE_IN and return a pointer
632 to its candidate in the candidate table, if any; otherwise NULL.
633 Only CAND_ADD and CAND_MULT candidates are returned. */
634
635 static slsr_cand_t
636 base_cand_from_table (tree base_in)
637 {
638 slsr_cand_t *result;
639
640 gimple def = SSA_NAME_DEF_STMT (base_in);
641 if (!def)
642 return (slsr_cand_t) NULL;
643
644 result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def);
645
646 if (result && (*result)->kind != CAND_REF)
647 return *result;
648
649 return (slsr_cand_t) NULL;
650 }
651
652 /* Add an entry to the statement-to-candidate mapping. */
653
654 static void
655 add_cand_for_stmt (gimple gs, slsr_cand_t c)
656 {
657 void **slot = pointer_map_insert (stmt_cand_map, gs);
658 gcc_assert (!*slot);
659 *slot = c;
660 }
661 \f
662 /* Given PHI which contains a phi statement, determine whether it
663 satisfies all the requirements of a phi candidate. If so, create
664 a candidate. Note that a CAND_PHI never has a basis itself, but
665 is used to help find a basis for subsequent candidates. */
666
667 static void
668 slsr_process_phi (gimple phi, bool speed)
669 {
670 unsigned i;
671 tree arg0_base = NULL_TREE, base_type;
672 slsr_cand_t c;
673 struct loop *cand_loop = gimple_bb (phi)->loop_father;
674 unsigned savings = 0;
675
676 /* A CAND_PHI requires each of its arguments to have the same
677 derived base name. (See the module header commentary for a
678 definition of derived base names.) Furthermore, all feeding
679 definitions must be in the same position in the loop hierarchy
680 as PHI. */
681
682 for (i = 0; i < gimple_phi_num_args (phi); i++)
683 {
684 slsr_cand_t arg_cand;
685 tree arg = gimple_phi_arg_def (phi, i);
686 tree derived_base_name = NULL_TREE;
687 gimple arg_stmt = NULL;
688 basic_block arg_bb = NULL;
689
690 if (TREE_CODE (arg) != SSA_NAME)
691 return;
692
693 arg_cand = base_cand_from_table (arg);
694
695 if (arg_cand)
696 {
697 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
698 {
699 if (!arg_cand->next_interp)
700 return;
701
702 arg_cand = lookup_cand (arg_cand->next_interp);
703 }
704
705 if (!integer_onep (arg_cand->stride))
706 return;
707
708 derived_base_name = arg_cand->base_expr;
709 arg_stmt = arg_cand->cand_stmt;
710 arg_bb = gimple_bb (arg_stmt);
711
712 /* Gather potential dead code savings if the phi statement
713 can be removed later on. */
714 if (has_single_use (arg))
715 {
716 if (gimple_code (arg_stmt) == GIMPLE_PHI)
717 savings += arg_cand->dead_savings;
718 else
719 savings += stmt_cost (arg_stmt, speed);
720 }
721 }
722 else
723 {
724 derived_base_name = arg;
725
726 if (SSA_NAME_IS_DEFAULT_DEF (arg))
727 arg_bb = single_succ (ENTRY_BLOCK_PTR);
728 else
729 gimple_bb (SSA_NAME_DEF_STMT (arg));
730 }
731
732 if (!arg_bb || arg_bb->loop_father != cand_loop)
733 return;
734
735 if (i == 0)
736 arg0_base = derived_base_name;
737 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
738 return;
739 }
740
741 /* Create the candidate. "alloc_cand_and_find_basis" is named
742 misleadingly for this case, as no basis will be sought for a
743 CAND_PHI. */
744 base_type = TREE_TYPE (arg0_base);
745
746 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base, double_int_zero,
747 integer_one_node, base_type, savings);
748
749 /* Add the candidate to the statement-candidate mapping. */
750 add_cand_for_stmt (phi, c);
751 }
752
753 /* Look for the following pattern:
754
755 *PBASE: MEM_REF (T1, C1)
756
757 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
758 or
759 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
760 or
761 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
762
763 *PINDEX: C4 * BITS_PER_UNIT
764
765 If not present, leave the input values unchanged and return FALSE.
766 Otherwise, modify the input values as follows and return TRUE:
767
768 *PBASE: T1
769 *POFFSET: MULT_EXPR (T2, C3)
770 *PINDEX: C1 + (C2 * C3) + C4 */
771
772 static bool
773 restructure_reference (tree *pbase, tree *poffset, double_int *pindex,
774 tree *ptype)
775 {
776 tree base = *pbase, offset = *poffset;
777 double_int index = *pindex;
778 double_int bpu = double_int::from_uhwi (BITS_PER_UNIT);
779 tree mult_op0, mult_op1, t1, t2, type;
780 double_int c1, c2, c3, c4;
781
782 if (!base
783 || !offset
784 || TREE_CODE (base) != MEM_REF
785 || TREE_CODE (offset) != MULT_EXPR
786 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
787 || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ())
788 return false;
789
790 t1 = TREE_OPERAND (base, 0);
791 c1 = mem_ref_offset (base);
792 type = TREE_TYPE (TREE_OPERAND (base, 1));
793
794 mult_op0 = TREE_OPERAND (offset, 0);
795 mult_op1 = TREE_OPERAND (offset, 1);
796
797 c3 = tree_to_double_int (mult_op1);
798
799 if (TREE_CODE (mult_op0) == PLUS_EXPR)
800
801 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
802 {
803 t2 = TREE_OPERAND (mult_op0, 0);
804 c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1));
805 }
806 else
807 return false;
808
809 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
810
811 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
812 {
813 t2 = TREE_OPERAND (mult_op0, 0);
814 c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1));
815 }
816 else
817 return false;
818
819 else
820 {
821 t2 = mult_op0;
822 c2 = double_int_zero;
823 }
824
825 c4 = index.udiv (bpu, FLOOR_DIV_EXPR);
826
827 *pbase = t1;
828 *poffset = fold_build2 (MULT_EXPR, sizetype, t2,
829 double_int_to_tree (sizetype, c3));
830 *pindex = c1 + c2 * c3 + c4;
831 *ptype = type;
832
833 return true;
834 }
835
836 /* Given GS which contains a data reference, create a CAND_REF entry in
837 the candidate table and attempt to find a basis. */
838
839 static void
840 slsr_process_ref (gimple gs)
841 {
842 tree ref_expr, base, offset, type;
843 HOST_WIDE_INT bitsize, bitpos;
844 enum machine_mode mode;
845 int unsignedp, volatilep;
846 double_int index;
847 slsr_cand_t c;
848
849 if (gimple_vdef (gs))
850 ref_expr = gimple_assign_lhs (gs);
851 else
852 ref_expr = gimple_assign_rhs1 (gs);
853
854 if (!handled_component_p (ref_expr)
855 || TREE_CODE (ref_expr) == BIT_FIELD_REF
856 || (TREE_CODE (ref_expr) == COMPONENT_REF
857 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
858 return;
859
860 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
861 &unsignedp, &volatilep, false);
862 index = double_int::from_uhwi (bitpos);
863
864 if (!restructure_reference (&base, &offset, &index, &type))
865 return;
866
867 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
868 type, 0);
869
870 /* Add the candidate to the statement-candidate mapping. */
871 add_cand_for_stmt (gs, c);
872 }
873
874 /* Create a candidate entry for a statement GS, where GS multiplies
875 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
876 about the two SSA names into the new candidate. Return the new
877 candidate. */
878
879 static slsr_cand_t
880 create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed)
881 {
882 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
883 double_int index;
884 unsigned savings = 0;
885 slsr_cand_t c;
886 slsr_cand_t base_cand = base_cand_from_table (base_in);
887
888 /* Look at all interpretations of the base candidate, if necessary,
889 to find information to propagate into this candidate. */
890 while (base_cand && !base && base_cand->kind != CAND_PHI)
891 {
892
893 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
894 {
895 /* Y = (B + i') * 1
896 X = Y * Z
897 ================
898 X = (B + i') * Z */
899 base = base_cand->base_expr;
900 index = base_cand->index;
901 stride = stride_in;
902 ctype = base_cand->cand_type;
903 if (has_single_use (base_in))
904 savings = (base_cand->dead_savings
905 + stmt_cost (base_cand->cand_stmt, speed));
906 }
907 else if (base_cand->kind == CAND_ADD
908 && TREE_CODE (base_cand->stride) == INTEGER_CST)
909 {
910 /* Y = B + (i' * S), S constant
911 X = Y * Z
912 ============================
913 X = B + ((i' * S) * Z) */
914 base = base_cand->base_expr;
915 index = base_cand->index * tree_to_double_int (base_cand->stride);
916 stride = stride_in;
917 ctype = base_cand->cand_type;
918 if (has_single_use (base_in))
919 savings = (base_cand->dead_savings
920 + stmt_cost (base_cand->cand_stmt, speed));
921 }
922
923 if (base_cand->next_interp)
924 base_cand = lookup_cand (base_cand->next_interp);
925 else
926 base_cand = NULL;
927 }
928
929 if (!base)
930 {
931 /* No interpretations had anything useful to propagate, so
932 produce X = (Y + 0) * Z. */
933 base = base_in;
934 index = double_int_zero;
935 stride = stride_in;
936 ctype = TREE_TYPE (base_in);
937 }
938
939 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
940 ctype, savings);
941 return c;
942 }
943
944 /* Create a candidate entry for a statement GS, where GS multiplies
945 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
946 information about BASE_IN into the new candidate. Return the new
947 candidate. */
948
949 static slsr_cand_t
950 create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed)
951 {
952 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
953 double_int index, temp;
954 unsigned savings = 0;
955 slsr_cand_t c;
956 slsr_cand_t base_cand = base_cand_from_table (base_in);
957
958 /* Look at all interpretations of the base candidate, if necessary,
959 to find information to propagate into this candidate. */
960 while (base_cand && !base && base_cand->kind != CAND_PHI)
961 {
962 if (base_cand->kind == CAND_MULT
963 && TREE_CODE (base_cand->stride) == INTEGER_CST)
964 {
965 /* Y = (B + i') * S, S constant
966 X = Y * c
967 ============================
968 X = (B + i') * (S * c) */
969 base = base_cand->base_expr;
970 index = base_cand->index;
971 temp = tree_to_double_int (base_cand->stride)
972 * tree_to_double_int (stride_in);
973 stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
974 ctype = base_cand->cand_type;
975 if (has_single_use (base_in))
976 savings = (base_cand->dead_savings
977 + stmt_cost (base_cand->cand_stmt, speed));
978 }
979 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
980 {
981 /* Y = B + (i' * 1)
982 X = Y * c
983 ===========================
984 X = (B + i') * c */
985 base = base_cand->base_expr;
986 index = base_cand->index;
987 stride = stride_in;
988 ctype = base_cand->cand_type;
989 if (has_single_use (base_in))
990 savings = (base_cand->dead_savings
991 + stmt_cost (base_cand->cand_stmt, speed));
992 }
993 else if (base_cand->kind == CAND_ADD
994 && base_cand->index.is_one ()
995 && TREE_CODE (base_cand->stride) == INTEGER_CST)
996 {
997 /* Y = B + (1 * S), S constant
998 X = Y * c
999 ===========================
1000 X = (B + S) * c */
1001 base = base_cand->base_expr;
1002 index = tree_to_double_int (base_cand->stride);
1003 stride = stride_in;
1004 ctype = base_cand->cand_type;
1005 if (has_single_use (base_in))
1006 savings = (base_cand->dead_savings
1007 + stmt_cost (base_cand->cand_stmt, speed));
1008 }
1009
1010 if (base_cand->next_interp)
1011 base_cand = lookup_cand (base_cand->next_interp);
1012 else
1013 base_cand = NULL;
1014 }
1015
1016 if (!base)
1017 {
1018 /* No interpretations had anything useful to propagate, so
1019 produce X = (Y + 0) * c. */
1020 base = base_in;
1021 index = double_int_zero;
1022 stride = stride_in;
1023 ctype = TREE_TYPE (base_in);
1024 }
1025
1026 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
1027 ctype, savings);
1028 return c;
1029 }
1030
1031 /* Given GS which is a multiply of scalar integers, make an appropriate
1032 entry in the candidate table. If this is a multiply of two SSA names,
1033 create two CAND_MULT interpretations and attempt to find a basis for
1034 each of them. Otherwise, create a single CAND_MULT and attempt to
1035 find a basis. */
1036
1037 static void
1038 slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed)
1039 {
1040 slsr_cand_t c, c2;
1041
1042 /* If this is a multiply of an SSA name with itself, it is highly
1043 unlikely that we will get a strength reduction opportunity, so
1044 don't record it as a candidate. This simplifies the logic for
1045 finding a basis, so if this is removed that must be considered. */
1046 if (rhs1 == rhs2)
1047 return;
1048
1049 if (TREE_CODE (rhs2) == SSA_NAME)
1050 {
1051 /* Record an interpretation of this statement in the candidate table
1052 assuming RHS1 is the base expression and RHS2 is the stride. */
1053 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
1054
1055 /* Add the first interpretation to the statement-candidate mapping. */
1056 add_cand_for_stmt (gs, c);
1057
1058 /* Record another interpretation of this statement assuming RHS1
1059 is the stride and RHS2 is the base expression. */
1060 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
1061 c->next_interp = c2->cand_num;
1062 }
1063 else
1064 {
1065 /* Record an interpretation for the multiply-immediate. */
1066 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
1067
1068 /* Add the interpretation to the statement-candidate mapping. */
1069 add_cand_for_stmt (gs, c);
1070 }
1071 }
1072
1073 /* Create a candidate entry for a statement GS, where GS adds two
1074 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
1075 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
1076 information about the two SSA names into the new candidate.
1077 Return the new candidate. */
1078
1079 static slsr_cand_t
1080 create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
1081 bool subtract_p, bool speed)
1082 {
1083 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
1084 double_int index;
1085 unsigned savings = 0;
1086 slsr_cand_t c;
1087 slsr_cand_t base_cand = base_cand_from_table (base_in);
1088 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
1089
1090 /* The most useful transformation is a multiply-immediate feeding
1091 an add or subtract. Look for that first. */
1092 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
1093 {
1094 if (addend_cand->kind == CAND_MULT
1095 && addend_cand->index.is_zero ()
1096 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
1097 {
1098 /* Z = (B + 0) * S, S constant
1099 X = Y +/- Z
1100 ===========================
1101 X = Y + ((+/-1 * S) * B) */
1102 base = base_in;
1103 index = tree_to_double_int (addend_cand->stride);
1104 if (subtract_p)
1105 index = -index;
1106 stride = addend_cand->base_expr;
1107 ctype = TREE_TYPE (base_in);
1108 if (has_single_use (addend_in))
1109 savings = (addend_cand->dead_savings
1110 + stmt_cost (addend_cand->cand_stmt, speed));
1111 }
1112
1113 if (addend_cand->next_interp)
1114 addend_cand = lookup_cand (addend_cand->next_interp);
1115 else
1116 addend_cand = NULL;
1117 }
1118
1119 while (base_cand && !base && base_cand->kind != CAND_PHI)
1120 {
1121 if (base_cand->kind == CAND_ADD
1122 && (base_cand->index.is_zero ()
1123 || operand_equal_p (base_cand->stride,
1124 integer_zero_node, 0)))
1125 {
1126 /* Y = B + (i' * S), i' * S = 0
1127 X = Y +/- Z
1128 ============================
1129 X = B + (+/-1 * Z) */
1130 base = base_cand->base_expr;
1131 index = subtract_p ? double_int_minus_one : double_int_one;
1132 stride = addend_in;
1133 ctype = base_cand->cand_type;
1134 if (has_single_use (base_in))
1135 savings = (base_cand->dead_savings
1136 + stmt_cost (base_cand->cand_stmt, speed));
1137 }
1138 else if (subtract_p)
1139 {
1140 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
1141
1142 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
1143 {
1144 if (subtrahend_cand->kind == CAND_MULT
1145 && subtrahend_cand->index.is_zero ()
1146 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
1147 {
1148 /* Z = (B + 0) * S, S constant
1149 X = Y - Z
1150 ===========================
1151 Value: X = Y + ((-1 * S) * B) */
1152 base = base_in;
1153 index = tree_to_double_int (subtrahend_cand->stride);
1154 index = -index;
1155 stride = subtrahend_cand->base_expr;
1156 ctype = TREE_TYPE (base_in);
1157 if (has_single_use (addend_in))
1158 savings = (subtrahend_cand->dead_savings
1159 + stmt_cost (subtrahend_cand->cand_stmt, speed));
1160 }
1161
1162 if (subtrahend_cand->next_interp)
1163 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
1164 else
1165 subtrahend_cand = NULL;
1166 }
1167 }
1168
1169 if (base_cand->next_interp)
1170 base_cand = lookup_cand (base_cand->next_interp);
1171 else
1172 base_cand = NULL;
1173 }
1174
1175 if (!base)
1176 {
1177 /* No interpretations had anything useful to propagate, so
1178 produce X = Y + (1 * Z). */
1179 base = base_in;
1180 index = subtract_p ? double_int_minus_one : double_int_one;
1181 stride = addend_in;
1182 ctype = TREE_TYPE (base_in);
1183 }
1184
1185 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
1186 ctype, savings);
1187 return c;
1188 }
1189
1190 /* Create a candidate entry for a statement GS, where GS adds SSA
1191 name BASE_IN to constant INDEX_IN. Propagate any known information
1192 about BASE_IN into the new candidate. Return the new candidate. */
1193
1194 static slsr_cand_t
1195 create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed)
1196 {
1197 enum cand_kind kind = CAND_ADD;
1198 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
1199 double_int index, multiple;
1200 unsigned savings = 0;
1201 slsr_cand_t c;
1202 slsr_cand_t base_cand = base_cand_from_table (base_in);
1203
1204 while (base_cand && !base && base_cand->kind != CAND_PHI)
1205 {
1206 bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
1207
1208 if (TREE_CODE (base_cand->stride) == INTEGER_CST
1209 && index_in.multiple_of (tree_to_double_int (base_cand->stride),
1210 unsigned_p, &multiple))
1211 {
1212 /* Y = (B + i') * S, S constant, c = kS for some integer k
1213 X = Y + c
1214 ============================
1215 X = (B + (i'+ k)) * S
1216 OR
1217 Y = B + (i' * S), S constant, c = kS for some integer k
1218 X = Y + c
1219 ============================
1220 X = (B + (i'+ k)) * S */
1221 kind = base_cand->kind;
1222 base = base_cand->base_expr;
1223 index = base_cand->index + multiple;
1224 stride = base_cand->stride;
1225 ctype = base_cand->cand_type;
1226 if (has_single_use (base_in))
1227 savings = (base_cand->dead_savings
1228 + stmt_cost (base_cand->cand_stmt, speed));
1229 }
1230
1231 if (base_cand->next_interp)
1232 base_cand = lookup_cand (base_cand->next_interp);
1233 else
1234 base_cand = NULL;
1235 }
1236
1237 if (!base)
1238 {
1239 /* No interpretations had anything useful to propagate, so
1240 produce X = Y + (c * 1). */
1241 kind = CAND_ADD;
1242 base = base_in;
1243 index = index_in;
1244 stride = integer_one_node;
1245 ctype = TREE_TYPE (base_in);
1246 }
1247
1248 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
1249 ctype, savings);
1250 return c;
1251 }
1252
1253 /* Given GS which is an add or subtract of scalar integers or pointers,
1254 make at least one appropriate entry in the candidate table. */
1255
1256 static void
1257 slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed)
1258 {
1259 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
1260 slsr_cand_t c = NULL, c2;
1261
1262 if (TREE_CODE (rhs2) == SSA_NAME)
1263 {
1264 /* First record an interpretation assuming RHS1 is the base expression
1265 and RHS2 is the stride. But it doesn't make sense for the
1266 stride to be a pointer, so don't record a candidate in that case. */
1267 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
1268 {
1269 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
1270
1271 /* Add the first interpretation to the statement-candidate
1272 mapping. */
1273 add_cand_for_stmt (gs, c);
1274 }
1275
1276 /* If the two RHS operands are identical, or this is a subtract,
1277 we're done. */
1278 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
1279 return;
1280
1281 /* Otherwise, record another interpretation assuming RHS2 is the
1282 base expression and RHS1 is the stride, again provided that the
1283 stride is not a pointer. */
1284 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
1285 {
1286 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
1287 if (c)
1288 c->next_interp = c2->cand_num;
1289 else
1290 add_cand_for_stmt (gs, c2);
1291 }
1292 }
1293 else
1294 {
1295 double_int index;
1296
1297 /* Record an interpretation for the add-immediate. */
1298 index = tree_to_double_int (rhs2);
1299 if (subtract_p)
1300 index = -index;
1301
1302 c = create_add_imm_cand (gs, rhs1, index, speed);
1303
1304 /* Add the interpretation to the statement-candidate mapping. */
1305 add_cand_for_stmt (gs, c);
1306 }
1307 }
1308
1309 /* Given GS which is a negate of a scalar integer, make an appropriate
1310 entry in the candidate table. A negate is equivalent to a multiply
1311 by -1. */
1312
1313 static void
1314 slsr_process_neg (gimple gs, tree rhs1, bool speed)
1315 {
1316 /* Record a CAND_MULT interpretation for the multiply by -1. */
1317 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
1318
1319 /* Add the interpretation to the statement-candidate mapping. */
1320 add_cand_for_stmt (gs, c);
1321 }
1322
1323 /* Help function for legal_cast_p, operating on two trees. Checks
1324 whether it's allowable to cast from RHS to LHS. See legal_cast_p
1325 for more details. */
1326
1327 static bool
1328 legal_cast_p_1 (tree lhs, tree rhs)
1329 {
1330 tree lhs_type, rhs_type;
1331 unsigned lhs_size, rhs_size;
1332 bool lhs_wraps, rhs_wraps;
1333
1334 lhs_type = TREE_TYPE (lhs);
1335 rhs_type = TREE_TYPE (rhs);
1336 lhs_size = TYPE_PRECISION (lhs_type);
1337 rhs_size = TYPE_PRECISION (rhs_type);
1338 lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
1339 rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
1340
1341 if (lhs_size < rhs_size
1342 || (rhs_wraps && !lhs_wraps)
1343 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
1344 return false;
1345
1346 return true;
1347 }
1348
1349 /* Return TRUE if GS is a statement that defines an SSA name from
1350 a conversion and is legal for us to combine with an add and multiply
1351 in the candidate table. For example, suppose we have:
1352
1353 A = B + i;
1354 C = (type) A;
1355 D = C * S;
1356
1357 Without the type-cast, we would create a CAND_MULT for D with base B,
1358 index i, and stride S. We want to record this candidate only if it
1359 is equivalent to apply the type cast following the multiply:
1360
1361 A = B + i;
1362 E = A * S;
1363 D = (type) E;
1364
1365 We will record the type with the candidate for D. This allows us
1366 to use a similar previous candidate as a basis. If we have earlier seen
1367
1368 A' = B + i';
1369 C' = (type) A';
1370 D' = C' * S;
1371
1372 we can replace D with
1373
1374 D = D' + (i - i') * S;
1375
1376 But if moving the type-cast would change semantics, we mustn't do this.
1377
1378 This is legitimate for casts from a non-wrapping integral type to
1379 any integral type of the same or larger size. It is not legitimate
1380 to convert a wrapping type to a non-wrapping type, or to a wrapping
1381 type of a different size. I.e., with a wrapping type, we must
1382 assume that the addition B + i could wrap, in which case performing
1383 the multiply before or after one of the "illegal" type casts will
1384 have different semantics. */
1385
1386 static bool
1387 legal_cast_p (gimple gs, tree rhs)
1388 {
1389 if (!is_gimple_assign (gs)
1390 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
1391 return false;
1392
1393 return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
1394 }
1395
1396 /* Given GS which is a cast to a scalar integer type, determine whether
1397 the cast is legal for strength reduction. If so, make at least one
1398 appropriate entry in the candidate table. */
1399
1400 static void
1401 slsr_process_cast (gimple gs, tree rhs1, bool speed)
1402 {
1403 tree lhs, ctype;
1404 slsr_cand_t base_cand, c, c2;
1405 unsigned savings = 0;
1406
1407 if (!legal_cast_p (gs, rhs1))
1408 return;
1409
1410 lhs = gimple_assign_lhs (gs);
1411 base_cand = base_cand_from_table (rhs1);
1412 ctype = TREE_TYPE (lhs);
1413
1414 if (base_cand && base_cand->kind != CAND_PHI)
1415 {
1416 while (base_cand)
1417 {
1418 /* Propagate all data from the base candidate except the type,
1419 which comes from the cast, and the base candidate's cast,
1420 which is no longer applicable. */
1421 if (has_single_use (rhs1))
1422 savings = (base_cand->dead_savings
1423 + stmt_cost (base_cand->cand_stmt, speed));
1424
1425 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1426 base_cand->base_expr,
1427 base_cand->index, base_cand->stride,
1428 ctype, savings);
1429 if (base_cand->next_interp)
1430 base_cand = lookup_cand (base_cand->next_interp);
1431 else
1432 base_cand = NULL;
1433 }
1434 }
1435 else
1436 {
1437 /* If nothing is known about the RHS, create fresh CAND_ADD and
1438 CAND_MULT interpretations:
1439
1440 X = Y + (0 * 1)
1441 X = (Y + 0) * 1
1442
1443 The first of these is somewhat arbitrary, but the choice of
1444 1 for the stride simplifies the logic for propagating casts
1445 into their uses. */
1446 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1447 integer_one_node, ctype, 0);
1448 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1449 integer_one_node, ctype, 0);
1450 c->next_interp = c2->cand_num;
1451 }
1452
1453 /* Add the first (or only) interpretation to the statement-candidate
1454 mapping. */
1455 add_cand_for_stmt (gs, c);
1456 }
1457
1458 /* Given GS which is a copy of a scalar integer type, make at least one
1459 appropriate entry in the candidate table.
1460
1461 This interface is included for completeness, but is unnecessary
1462 if this pass immediately follows a pass that performs copy
1463 propagation, such as DOM. */
1464
1465 static void
1466 slsr_process_copy (gimple gs, tree rhs1, bool speed)
1467 {
1468 slsr_cand_t base_cand, c, c2;
1469 unsigned savings = 0;
1470
1471 base_cand = base_cand_from_table (rhs1);
1472
1473 if (base_cand && base_cand->kind != CAND_PHI)
1474 {
1475 while (base_cand)
1476 {
1477 /* Propagate all data from the base candidate. */
1478 if (has_single_use (rhs1))
1479 savings = (base_cand->dead_savings
1480 + stmt_cost (base_cand->cand_stmt, speed));
1481
1482 c = alloc_cand_and_find_basis (base_cand->kind, gs,
1483 base_cand->base_expr,
1484 base_cand->index, base_cand->stride,
1485 base_cand->cand_type, savings);
1486 if (base_cand->next_interp)
1487 base_cand = lookup_cand (base_cand->next_interp);
1488 else
1489 base_cand = NULL;
1490 }
1491 }
1492 else
1493 {
1494 /* If nothing is known about the RHS, create fresh CAND_ADD and
1495 CAND_MULT interpretations:
1496
1497 X = Y + (0 * 1)
1498 X = (Y + 0) * 1
1499
1500 The first of these is somewhat arbitrary, but the choice of
1501 1 for the stride simplifies the logic for propagating casts
1502 into their uses. */
1503 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
1504 integer_one_node, TREE_TYPE (rhs1), 0);
1505 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
1506 integer_one_node, TREE_TYPE (rhs1), 0);
1507 c->next_interp = c2->cand_num;
1508 }
1509
1510 /* Add the first (or only) interpretation to the statement-candidate
1511 mapping. */
1512 add_cand_for_stmt (gs, c);
1513 }
1514 \f
1515 /* Find strength-reduction candidates in block BB. */
1516
1517 static void
1518 find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
1519 basic_block bb)
1520 {
1521 bool speed = optimize_bb_for_speed_p (bb);
1522 gimple_stmt_iterator gsi;
1523
1524 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1525 slsr_process_phi (gsi_stmt (gsi), speed);
1526
1527 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1528 {
1529 gimple gs = gsi_stmt (gsi);
1530
1531 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
1532 slsr_process_ref (gs);
1533
1534 else if (is_gimple_assign (gs)
1535 && SCALAR_INT_MODE_P
1536 (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
1537 {
1538 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
1539
1540 switch (gimple_assign_rhs_code (gs))
1541 {
1542 case MULT_EXPR:
1543 case PLUS_EXPR:
1544 rhs1 = gimple_assign_rhs1 (gs);
1545 rhs2 = gimple_assign_rhs2 (gs);
1546 /* Should never happen, but currently some buggy situations
1547 in earlier phases put constants in rhs1. */
1548 if (TREE_CODE (rhs1) != SSA_NAME)
1549 continue;
1550 break;
1551
1552 /* Possible future opportunity: rhs1 of a ptr+ can be
1553 an ADDR_EXPR. */
1554 case POINTER_PLUS_EXPR:
1555 case MINUS_EXPR:
1556 rhs2 = gimple_assign_rhs2 (gs);
1557 /* Fall-through. */
1558
1559 case NOP_EXPR:
1560 case MODIFY_EXPR:
1561 case NEGATE_EXPR:
1562 rhs1 = gimple_assign_rhs1 (gs);
1563 if (TREE_CODE (rhs1) != SSA_NAME)
1564 continue;
1565 break;
1566
1567 default:
1568 ;
1569 }
1570
1571 switch (gimple_assign_rhs_code (gs))
1572 {
1573 case MULT_EXPR:
1574 slsr_process_mul (gs, rhs1, rhs2, speed);
1575 break;
1576
1577 case PLUS_EXPR:
1578 case POINTER_PLUS_EXPR:
1579 case MINUS_EXPR:
1580 slsr_process_add (gs, rhs1, rhs2, speed);
1581 break;
1582
1583 case NEGATE_EXPR:
1584 slsr_process_neg (gs, rhs1, speed);
1585 break;
1586
1587 case NOP_EXPR:
1588 slsr_process_cast (gs, rhs1, speed);
1589 break;
1590
1591 case MODIFY_EXPR:
1592 slsr_process_copy (gs, rhs1, speed);
1593 break;
1594
1595 default:
1596 ;
1597 }
1598 }
1599 }
1600 }
1601 \f
1602 /* Dump a candidate for debug. */
1603
1604 static void
1605 dump_candidate (slsr_cand_t c)
1606 {
1607 fprintf (dump_file, "%3d [%d] ", c->cand_num,
1608 gimple_bb (c->cand_stmt)->index);
1609 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1610 switch (c->kind)
1611 {
1612 case CAND_MULT:
1613 fputs (" MULT : (", dump_file);
1614 print_generic_expr (dump_file, c->base_expr, 0);
1615 fputs (" + ", dump_file);
1616 dump_double_int (dump_file, c->index, false);
1617 fputs (") * ", dump_file);
1618 print_generic_expr (dump_file, c->stride, 0);
1619 fputs (" : ", dump_file);
1620 break;
1621 case CAND_ADD:
1622 fputs (" ADD : ", dump_file);
1623 print_generic_expr (dump_file, c->base_expr, 0);
1624 fputs (" + (", dump_file);
1625 dump_double_int (dump_file, c->index, false);
1626 fputs (" * ", dump_file);
1627 print_generic_expr (dump_file, c->stride, 0);
1628 fputs (") : ", dump_file);
1629 break;
1630 case CAND_REF:
1631 fputs (" REF : ", dump_file);
1632 print_generic_expr (dump_file, c->base_expr, 0);
1633 fputs (" + (", dump_file);
1634 print_generic_expr (dump_file, c->stride, 0);
1635 fputs (") + ", dump_file);
1636 dump_double_int (dump_file, c->index, false);
1637 fputs (" : ", dump_file);
1638 break;
1639 case CAND_PHI:
1640 fputs (" PHI : ", dump_file);
1641 print_generic_expr (dump_file, c->base_expr, 0);
1642 fputs (" + (unknown * ", dump_file);
1643 print_generic_expr (dump_file, c->stride, 0);
1644 fputs (") : ", dump_file);
1645 break;
1646 default:
1647 gcc_unreachable ();
1648 }
1649 print_generic_expr (dump_file, c->cand_type, 0);
1650 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
1651 c->basis, c->dependent, c->sibling);
1652 fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
1653 c->next_interp, c->dead_savings);
1654 if (c->def_phi)
1655 fprintf (dump_file, " phi: %d\n", c->def_phi);
1656 fputs ("\n", dump_file);
1657 }
1658
1659 /* Dump the candidate vector for debug. */
1660
1661 static void
1662 dump_cand_vec (void)
1663 {
1664 unsigned i;
1665 slsr_cand_t c;
1666
1667 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
1668
1669 FOR_EACH_VEC_ELT (cand_vec, i, c)
1670 dump_candidate (c);
1671 }
1672
1673 /* Callback used to dump the candidate chains hash table. */
1674
1675 int
1676 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
1677 {
1678 const_cand_chain_t chain = *slot;
1679 cand_chain_t p;
1680
1681 print_generic_expr (dump_file, chain->base_expr, 0);
1682 fprintf (dump_file, " -> %d", chain->cand->cand_num);
1683
1684 for (p = chain->next; p; p = p->next)
1685 fprintf (dump_file, " -> %d", p->cand->cand_num);
1686
1687 fputs ("\n", dump_file);
1688 return 1;
1689 }
1690
1691 /* Dump the candidate chains. */
1692
1693 static void
1694 dump_cand_chains (void)
1695 {
1696 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
1697 base_cand_map.traverse_noresize <void *, ssa_base_cand_dump_callback> (NULL);
1698 fputs ("\n", dump_file);
1699 }
1700
1701 /* Dump the increment vector for debug. */
1702
1703 static void
1704 dump_incr_vec (void)
1705 {
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 {
1708 unsigned i;
1709
1710 fprintf (dump_file, "\nIncrement vector:\n\n");
1711
1712 for (i = 0; i < incr_vec_len; i++)
1713 {
1714 fprintf (dump_file, "%3d increment: ", i);
1715 dump_double_int (dump_file, incr_vec[i].incr, false);
1716 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
1717 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
1718 fputs ("\n initializer: ", dump_file);
1719 print_generic_expr (dump_file, incr_vec[i].initializer, 0);
1720 fputs ("\n\n", dump_file);
1721 }
1722 }
1723 }
1724 \f
1725 /* Replace *EXPR in candidate C with an equivalent strength-reduced
1726 data reference. */
1727
1728 static void
1729 replace_ref (tree *expr, slsr_cand_t c)
1730 {
1731 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
1732 unsigned HOST_WIDE_INT misalign;
1733 unsigned align;
1734
1735 /* Ensure the memory reference carries the minimum alignment
1736 requirement for the data type. See PR58041. */
1737 get_object_alignment_1 (*expr, &align, &misalign);
1738 if (misalign != 0)
1739 align = (misalign & -misalign);
1740 if (align < TYPE_ALIGN (acc_type))
1741 acc_type = build_aligned_type (acc_type, align);
1742
1743 add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
1744 c->base_expr, c->stride);
1745 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
1746 double_int_to_tree (c->cand_type, c->index));
1747
1748 /* Gimplify the base addressing expression for the new MEM_REF tree. */
1749 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1750 TREE_OPERAND (mem_ref, 0)
1751 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
1752 /*simple_p=*/true, NULL,
1753 /*before=*/true, GSI_SAME_STMT);
1754 copy_ref_info (mem_ref, *expr);
1755 *expr = mem_ref;
1756 update_stmt (c->cand_stmt);
1757 }
1758
1759 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
1760 dependent of candidate C with an equivalent strength-reduced data
1761 reference. */
1762
1763 static void
1764 replace_refs (slsr_cand_t c)
1765 {
1766 if (gimple_vdef (c->cand_stmt))
1767 {
1768 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
1769 replace_ref (lhs, c);
1770 }
1771 else
1772 {
1773 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
1774 replace_ref (rhs, c);
1775 }
1776
1777 if (c->sibling)
1778 replace_refs (lookup_cand (c->sibling));
1779
1780 if (c->dependent)
1781 replace_refs (lookup_cand (c->dependent));
1782 }
1783
1784 /* Return TRUE if candidate C is dependent upon a PHI. */
1785
1786 static bool
1787 phi_dependent_cand_p (slsr_cand_t c)
1788 {
1789 /* A candidate is not necessarily dependent upon a PHI just because
1790 it has a phi definition for its base name. It may have a basis
1791 that relies upon the same phi definition, in which case the PHI
1792 is irrelevant to this candidate. */
1793 return (c->def_phi
1794 && c->basis
1795 && lookup_cand (c->basis)->def_phi != c->def_phi);
1796 }
1797
1798 /* Calculate the increment required for candidate C relative to
1799 its basis. */
1800
1801 static double_int
1802 cand_increment (slsr_cand_t c)
1803 {
1804 slsr_cand_t basis;
1805
1806 /* If the candidate doesn't have a basis, just return its own
1807 index. This is useful in record_increments to help us find
1808 an existing initializer. Also, if the candidate's basis is
1809 hidden by a phi, then its own index will be the increment
1810 from the newly introduced phi basis. */
1811 if (!c->basis || phi_dependent_cand_p (c))
1812 return c->index;
1813
1814 basis = lookup_cand (c->basis);
1815 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
1816 return c->index - basis->index;
1817 }
1818
1819 /* Calculate the increment required for candidate C relative to
1820 its basis. If we aren't going to generate pointer arithmetic
1821 for this candidate, return the absolute value of that increment
1822 instead. */
1823
1824 static inline double_int
1825 cand_abs_increment (slsr_cand_t c)
1826 {
1827 double_int increment = cand_increment (c);
1828
1829 if (!address_arithmetic_p && increment.is_negative ())
1830 increment = -increment;
1831
1832 return increment;
1833 }
1834
1835 /* Return TRUE iff candidate C has already been replaced under
1836 another interpretation. */
1837
1838 static inline bool
1839 cand_already_replaced (slsr_cand_t c)
1840 {
1841 return (gimple_bb (c->cand_stmt) == 0);
1842 }
1843
1844 /* Common logic used by replace_unconditional_candidate and
1845 replace_conditional_candidate. */
1846
1847 static void
1848 replace_mult_candidate (slsr_cand_t c, tree basis_name, double_int bump)
1849 {
1850 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
1851 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
1852
1853 /* It is highly unlikely, but possible, that the resulting
1854 bump doesn't fit in a HWI. Abandon the replacement
1855 in this case. This does not affect siblings or dependents
1856 of C. Restriction to signed HWI is conservative for unsigned
1857 types but allows for safe negation without twisted logic. */
1858 if (bump.fits_shwi ()
1859 && bump.to_shwi () != HOST_WIDE_INT_MIN
1860 /* It is not useful to replace casts, copies, or adds of
1861 an SSA name and a constant. */
1862 && cand_code != MODIFY_EXPR
1863 && cand_code != NOP_EXPR
1864 && cand_code != PLUS_EXPR
1865 && cand_code != POINTER_PLUS_EXPR
1866 && cand_code != MINUS_EXPR)
1867 {
1868 enum tree_code code = PLUS_EXPR;
1869 tree bump_tree;
1870 gimple stmt_to_print = NULL;
1871
1872 /* If the basis name and the candidate's LHS have incompatible
1873 types, introduce a cast. */
1874 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
1875 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
1876 if (bump.is_negative ())
1877 {
1878 code = MINUS_EXPR;
1879 bump = -bump;
1880 }
1881
1882 bump_tree = double_int_to_tree (target_type, bump);
1883
1884 if (dump_file && (dump_flags & TDF_DETAILS))
1885 {
1886 fputs ("Replacing: ", dump_file);
1887 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
1888 }
1889
1890 if (bump.is_zero ())
1891 {
1892 tree lhs = gimple_assign_lhs (c->cand_stmt);
1893 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
1894 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1895 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
1896 gsi_replace (&gsi, copy_stmt, false);
1897 c->cand_stmt = copy_stmt;
1898 if (dump_file && (dump_flags & TDF_DETAILS))
1899 stmt_to_print = copy_stmt;
1900 }
1901 else
1902 {
1903 tree rhs1, rhs2;
1904 if (cand_code != NEGATE_EXPR) {
1905 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
1906 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
1907 }
1908 if (cand_code != NEGATE_EXPR
1909 && ((operand_equal_p (rhs1, basis_name, 0)
1910 && operand_equal_p (rhs2, bump_tree, 0))
1911 || (operand_equal_p (rhs1, bump_tree, 0)
1912 && operand_equal_p (rhs2, basis_name, 0))))
1913 {
1914 if (dump_file && (dump_flags & TDF_DETAILS))
1915 {
1916 fputs ("(duplicate, not actually replacing)", dump_file);
1917 stmt_to_print = c->cand_stmt;
1918 }
1919 }
1920 else
1921 {
1922 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
1923 gimple_assign_set_rhs_with_ops (&gsi, code,
1924 basis_name, bump_tree);
1925 update_stmt (gsi_stmt (gsi));
1926 c->cand_stmt = gsi_stmt (gsi);
1927 if (dump_file && (dump_flags & TDF_DETAILS))
1928 stmt_to_print = gsi_stmt (gsi);
1929 }
1930 }
1931
1932 if (dump_file && (dump_flags & TDF_DETAILS))
1933 {
1934 fputs ("With: ", dump_file);
1935 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
1936 fputs ("\n", dump_file);
1937 }
1938 }
1939 }
1940
1941 /* Replace candidate C with an add or subtract. Note that we only
1942 operate on CAND_MULTs with known strides, so we will never generate
1943 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
1944 X = Y + ((i - i') * S), as described in the module commentary. The
1945 folded value ((i - i') * S) is referred to here as the "bump." */
1946
1947 static void
1948 replace_unconditional_candidate (slsr_cand_t c)
1949 {
1950 slsr_cand_t basis;
1951 double_int stride, bump;
1952
1953 if (cand_already_replaced (c))
1954 return;
1955
1956 basis = lookup_cand (c->basis);
1957 stride = tree_to_double_int (c->stride);
1958 bump = cand_increment (c) * stride;
1959
1960 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
1961 }
1962 \f
1963 /* Return the index in the increment vector of the given INCREMENT,
1964 or -1 if not found. The latter can occur if more than
1965 MAX_INCR_VEC_LEN increments have been found. */
1966
1967 static inline int
1968 incr_vec_index (double_int increment)
1969 {
1970 unsigned i;
1971
1972 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
1973 ;
1974
1975 if (i < incr_vec_len)
1976 return i;
1977 else
1978 return -1;
1979 }
1980
1981 /* Create a new statement along edge E to add BASIS_NAME to the product
1982 of INCREMENT and the stride of candidate C. Create and return a new
1983 SSA name from *VAR to be used as the LHS of the new statement.
1984 KNOWN_STRIDE is true iff C's stride is a constant. */
1985
1986 static tree
1987 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
1988 double_int increment, edge e, location_t loc,
1989 bool known_stride)
1990 {
1991 basic_block insert_bb;
1992 gimple_stmt_iterator gsi;
1993 tree lhs, basis_type;
1994 gimple new_stmt;
1995
1996 /* If the add candidate along this incoming edge has the same
1997 index as C's hidden basis, the hidden basis represents this
1998 edge correctly. */
1999 if (increment.is_zero ())
2000 return basis_name;
2001
2002 basis_type = TREE_TYPE (basis_name);
2003 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
2004
2005 if (known_stride)
2006 {
2007 tree bump_tree;
2008 enum tree_code code = PLUS_EXPR;
2009 double_int bump = increment * tree_to_double_int (c->stride);
2010 if (bump.is_negative ())
2011 {
2012 code = MINUS_EXPR;
2013 bump = -bump;
2014 }
2015
2016 bump_tree = double_int_to_tree (basis_type, bump);
2017 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2018 bump_tree);
2019 }
2020 else
2021 {
2022 int i;
2023 bool negate_incr = (!address_arithmetic_p && increment.is_negative ());
2024 i = incr_vec_index (negate_incr ? -increment : increment);
2025 gcc_assert (i >= 0);
2026
2027 if (incr_vec[i].initializer)
2028 {
2029 enum tree_code code = negate_incr ? MINUS_EXPR : PLUS_EXPR;
2030 new_stmt = gimple_build_assign_with_ops (code, lhs, basis_name,
2031 incr_vec[i].initializer);
2032 }
2033 else if (increment.is_one ())
2034 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, lhs, basis_name,
2035 c->stride);
2036 else if (increment.is_minus_one ())
2037 new_stmt = gimple_build_assign_with_ops (MINUS_EXPR, lhs, basis_name,
2038 c->stride);
2039 else
2040 gcc_unreachable ();
2041 }
2042
2043 insert_bb = single_succ_p (e->src) ? e->src : split_edge (e);
2044 gsi = gsi_last_bb (insert_bb);
2045
2046 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
2047 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
2048 else
2049 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
2050
2051 gimple_set_location (new_stmt, loc);
2052
2053 if (dump_file && (dump_flags & TDF_DETAILS))
2054 {
2055 fprintf (dump_file, "Inserting in block %d: ", insert_bb->index);
2056 print_gimple_stmt (dump_file, new_stmt, 0, 0);
2057 }
2058
2059 return lhs;
2060 }
2061
2062 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
2063 is hidden by the phi node FROM_PHI, create a new phi node in the same
2064 block as FROM_PHI. The new phi is suitable for use as a basis by C,
2065 with its phi arguments representing conditional adjustments to the
2066 hidden basis along conditional incoming paths. Those adjustments are
2067 made by creating add statements (and sometimes recursively creating
2068 phis) along those incoming paths. LOC is the location to attach to
2069 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
2070 constant. */
2071
2072 static tree
2073 create_phi_basis (slsr_cand_t c, gimple from_phi, tree basis_name,
2074 location_t loc, bool known_stride)
2075 {
2076 int i;
2077 tree name, phi_arg;
2078 gimple phi;
2079 vec<tree> phi_args;
2080 slsr_cand_t basis = lookup_cand (c->basis);
2081 int nargs = gimple_phi_num_args (from_phi);
2082 basic_block phi_bb = gimple_bb (from_phi);
2083 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (from_phi));
2084 phi_args.create (nargs);
2085
2086 /* Process each argument of the existing phi that represents
2087 conditionally-executed add candidates. */
2088 for (i = 0; i < nargs; i++)
2089 {
2090 edge e = (*phi_bb->preds)[i];
2091 tree arg = gimple_phi_arg_def (from_phi, i);
2092 tree feeding_def;
2093
2094 /* If the phi argument is the base name of the CAND_PHI, then
2095 this incoming arc should use the hidden basis. */
2096 if (operand_equal_p (arg, phi_cand->base_expr, 0))
2097 if (basis->index.is_zero ())
2098 feeding_def = gimple_assign_lhs (basis->cand_stmt);
2099 else
2100 {
2101 double_int incr = -basis->index;
2102 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
2103 e, loc, known_stride);
2104 }
2105 else
2106 {
2107 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2108
2109 /* If there is another phi along this incoming edge, we must
2110 process it in the same fashion to ensure that all basis
2111 adjustments are made along its incoming edges. */
2112 if (gimple_code (arg_def) == GIMPLE_PHI)
2113 feeding_def = create_phi_basis (c, arg_def, basis_name,
2114 loc, known_stride);
2115 else
2116 {
2117 slsr_cand_t arg_cand = base_cand_from_table (arg);
2118 double_int diff = arg_cand->index - basis->index;
2119 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
2120 e, loc, known_stride);
2121 }
2122 }
2123
2124 /* Because of recursion, we need to save the arguments in a vector
2125 so we can create the PHI statement all at once. Otherwise the
2126 storage for the half-created PHI can be reclaimed. */
2127 phi_args.safe_push (feeding_def);
2128 }
2129
2130 /* Create the new phi basis. */
2131 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
2132 phi = create_phi_node (name, phi_bb);
2133 SSA_NAME_DEF_STMT (name) = phi;
2134
2135 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
2136 {
2137 edge e = (*phi_bb->preds)[i];
2138 add_phi_arg (phi, phi_arg, e, loc);
2139 }
2140
2141 update_stmt (phi);
2142
2143 if (dump_file && (dump_flags & TDF_DETAILS))
2144 {
2145 fputs ("Introducing new phi basis: ", dump_file);
2146 print_gimple_stmt (dump_file, phi, 0, 0);
2147 }
2148
2149 return name;
2150 }
2151
2152 /* Given a candidate C whose basis is hidden by at least one intervening
2153 phi, introduce a matching number of new phis to represent its basis
2154 adjusted by conditional increments along possible incoming paths. Then
2155 replace C as though it were an unconditional candidate, using the new
2156 basis. */
2157
2158 static void
2159 replace_conditional_candidate (slsr_cand_t c)
2160 {
2161 tree basis_name, name;
2162 slsr_cand_t basis;
2163 location_t loc;
2164 double_int stride, bump;
2165
2166 /* Look up the LHS SSA name from C's basis. This will be the
2167 RHS1 of the adds we will introduce to create new phi arguments. */
2168 basis = lookup_cand (c->basis);
2169 basis_name = gimple_assign_lhs (basis->cand_stmt);
2170
2171 /* Create a new phi statement which will represent C's true basis
2172 after the transformation is complete. */
2173 loc = gimple_location (c->cand_stmt);
2174 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
2175 basis_name, loc, KNOWN_STRIDE);
2176 /* Replace C with an add of the new basis phi and a constant. */
2177 stride = tree_to_double_int (c->stride);
2178 bump = c->index * stride;
2179
2180 replace_mult_candidate (c, name, bump);
2181 }
2182
2183 /* Compute the expected costs of inserting basis adjustments for
2184 candidate C with phi-definition PHI. The cost of inserting
2185 one adjustment is given by ONE_ADD_COST. If PHI has arguments
2186 which are themselves phi results, recursively calculate costs
2187 for those phis as well. */
2188
2189 static int
2190 phi_add_costs (gimple phi, slsr_cand_t c, int one_add_cost)
2191 {
2192 unsigned i;
2193 int cost = 0;
2194 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2195
2196 /* If we work our way back to a phi that isn't dominated by the hidden
2197 basis, this isn't a candidate for replacement. Indicate this by
2198 returning an unreasonably high cost. It's not easy to detect
2199 these situations when determining the basis, so we defer the
2200 decision until now. */
2201 basic_block phi_bb = gimple_bb (phi);
2202 slsr_cand_t basis = lookup_cand (c->basis);
2203 basic_block basis_bb = gimple_bb (basis->cand_stmt);
2204
2205 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
2206 return COST_INFINITE;
2207
2208 for (i = 0; i < gimple_phi_num_args (phi); i++)
2209 {
2210 tree arg = gimple_phi_arg_def (phi, i);
2211
2212 if (arg != phi_cand->base_expr)
2213 {
2214 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2215
2216 if (gimple_code (arg_def) == GIMPLE_PHI)
2217 cost += phi_add_costs (arg_def, c, one_add_cost);
2218 else
2219 {
2220 slsr_cand_t arg_cand = base_cand_from_table (arg);
2221
2222 if (arg_cand->index != c->index)
2223 cost += one_add_cost;
2224 }
2225 }
2226 }
2227
2228 return cost;
2229 }
2230
2231 /* For candidate C, each sibling of candidate C, and each dependent of
2232 candidate C, determine whether the candidate is dependent upon a
2233 phi that hides its basis. If not, replace the candidate unconditionally.
2234 Otherwise, determine whether the cost of introducing compensation code
2235 for the candidate is offset by the gains from strength reduction. If
2236 so, replace the candidate and introduce the compensation code. */
2237
2238 static void
2239 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
2240 {
2241 if (phi_dependent_cand_p (c))
2242 {
2243 if (c->kind == CAND_MULT)
2244 {
2245 /* A candidate dependent upon a phi will replace a multiply by
2246 a constant with an add, and will insert at most one add for
2247 each phi argument. Add these costs with the potential dead-code
2248 savings to determine profitability. */
2249 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
2250 int mult_savings = stmt_cost (c->cand_stmt, speed);
2251 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2252 tree phi_result = gimple_phi_result (phi);
2253 int one_add_cost = add_cost (speed,
2254 TYPE_MODE (TREE_TYPE (phi_result)));
2255 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
2256 int cost = add_costs - mult_savings - c->dead_savings;
2257
2258 if (dump_file && (dump_flags & TDF_DETAILS))
2259 {
2260 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
2261 fprintf (dump_file, " add_costs = %d\n", add_costs);
2262 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
2263 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
2264 fprintf (dump_file, " cost = %d\n", cost);
2265 if (cost <= COST_NEUTRAL)
2266 fputs (" Replacing...\n", dump_file);
2267 else
2268 fputs (" Not replaced.\n", dump_file);
2269 }
2270
2271 if (cost <= COST_NEUTRAL)
2272 replace_conditional_candidate (c);
2273 }
2274 }
2275 else
2276 replace_unconditional_candidate (c);
2277
2278 if (c->sibling)
2279 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
2280
2281 if (c->dependent)
2282 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
2283 }
2284 \f
2285 /* Count the number of candidates in the tree rooted at C that have
2286 not already been replaced under other interpretations. */
2287
2288 static int
2289 count_candidates (slsr_cand_t c)
2290 {
2291 unsigned count = cand_already_replaced (c) ? 0 : 1;
2292
2293 if (c->sibling)
2294 count += count_candidates (lookup_cand (c->sibling));
2295
2296 if (c->dependent)
2297 count += count_candidates (lookup_cand (c->dependent));
2298
2299 return count;
2300 }
2301
2302 /* Increase the count of INCREMENT by one in the increment vector.
2303 INCREMENT is associated with candidate C. If INCREMENT is to be
2304 conditionally executed as part of a conditional candidate replacement,
2305 IS_PHI_ADJUST is true, otherwise false. If an initializer
2306 T_0 = stride * I is provided by a candidate that dominates all
2307 candidates with the same increment, also record T_0 for subsequent use. */
2308
2309 static void
2310 record_increment (slsr_cand_t c, double_int increment, bool is_phi_adjust)
2311 {
2312 bool found = false;
2313 unsigned i;
2314
2315 /* Treat increments that differ only in sign as identical so as to
2316 share initializers, unless we are generating pointer arithmetic. */
2317 if (!address_arithmetic_p && increment.is_negative ())
2318 increment = -increment;
2319
2320 for (i = 0; i < incr_vec_len; i++)
2321 {
2322 if (incr_vec[i].incr == increment)
2323 {
2324 incr_vec[i].count++;
2325 found = true;
2326
2327 /* If we previously recorded an initializer that doesn't
2328 dominate this candidate, it's not going to be useful to
2329 us after all. */
2330 if (incr_vec[i].initializer
2331 && !dominated_by_p (CDI_DOMINATORS,
2332 gimple_bb (c->cand_stmt),
2333 incr_vec[i].init_bb))
2334 {
2335 incr_vec[i].initializer = NULL_TREE;
2336 incr_vec[i].init_bb = NULL;
2337 }
2338
2339 break;
2340 }
2341 }
2342
2343 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
2344 {
2345 /* The first time we see an increment, create the entry for it.
2346 If this is the root candidate which doesn't have a basis, set
2347 the count to zero. We're only processing it so it can possibly
2348 provide an initializer for other candidates. */
2349 incr_vec[incr_vec_len].incr = increment;
2350 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
2351 incr_vec[incr_vec_len].cost = COST_INFINITE;
2352
2353 /* Optimistically record the first occurrence of this increment
2354 as providing an initializer (if it does); we will revise this
2355 opinion later if it doesn't dominate all other occurrences.
2356 Exception: increments of -1, 0, 1 never need initializers;
2357 and phi adjustments don't ever provide initializers. */
2358 if (c->kind == CAND_ADD
2359 && !is_phi_adjust
2360 && c->index == increment
2361 && (increment.sgt (double_int_one)
2362 || increment.slt (double_int_minus_one))
2363 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
2364 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
2365 {
2366 tree t0 = NULL_TREE;
2367 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
2368 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
2369 if (operand_equal_p (rhs1, c->base_expr, 0))
2370 t0 = rhs2;
2371 else if (operand_equal_p (rhs2, c->base_expr, 0))
2372 t0 = rhs1;
2373 if (t0
2374 && SSA_NAME_DEF_STMT (t0)
2375 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
2376 {
2377 incr_vec[incr_vec_len].initializer = t0;
2378 incr_vec[incr_vec_len++].init_bb
2379 = gimple_bb (SSA_NAME_DEF_STMT (t0));
2380 }
2381 else
2382 {
2383 incr_vec[incr_vec_len].initializer = NULL_TREE;
2384 incr_vec[incr_vec_len++].init_bb = NULL;
2385 }
2386 }
2387 else
2388 {
2389 incr_vec[incr_vec_len].initializer = NULL_TREE;
2390 incr_vec[incr_vec_len++].init_bb = NULL;
2391 }
2392 }
2393 }
2394
2395 /* Given phi statement PHI that hides a candidate from its BASIS, find
2396 the increments along each incoming arc (recursively handling additional
2397 phis that may be present) and record them. These increments are the
2398 difference in index between the index-adjusting statements and the
2399 index of the basis. */
2400
2401 static void
2402 record_phi_increments (slsr_cand_t basis, gimple phi)
2403 {
2404 unsigned i;
2405 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2406
2407 for (i = 0; i < gimple_phi_num_args (phi); i++)
2408 {
2409 tree arg = gimple_phi_arg_def (phi, i);
2410
2411 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2412 {
2413 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2414
2415 if (gimple_code (arg_def) == GIMPLE_PHI)
2416 record_phi_increments (basis, arg_def);
2417 else
2418 {
2419 slsr_cand_t arg_cand = base_cand_from_table (arg);
2420 double_int diff = arg_cand->index - basis->index;
2421 record_increment (arg_cand, diff, PHI_ADJUST);
2422 }
2423 }
2424 }
2425 }
2426
2427 /* Determine how many times each unique increment occurs in the set
2428 of candidates rooted at C's parent, recording the data in the
2429 increment vector. For each unique increment I, if an initializer
2430 T_0 = stride * I is provided by a candidate that dominates all
2431 candidates with the same increment, also record T_0 for subsequent
2432 use. */
2433
2434 static void
2435 record_increments (slsr_cand_t c)
2436 {
2437 if (!cand_already_replaced (c))
2438 {
2439 if (!phi_dependent_cand_p (c))
2440 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
2441 else
2442 {
2443 /* A candidate with a basis hidden by a phi will have one
2444 increment for its relationship to the index represented by
2445 the phi, and potentially additional increments along each
2446 incoming edge. For the root of the dependency tree (which
2447 has no basis), process just the initial index in case it has
2448 an initializer that can be used by subsequent candidates. */
2449 record_increment (c, c->index, NOT_PHI_ADJUST);
2450
2451 if (c->basis)
2452 record_phi_increments (lookup_cand (c->basis),
2453 lookup_cand (c->def_phi)->cand_stmt);
2454 }
2455 }
2456
2457 if (c->sibling)
2458 record_increments (lookup_cand (c->sibling));
2459
2460 if (c->dependent)
2461 record_increments (lookup_cand (c->dependent));
2462 }
2463
2464 /* Add up and return the costs of introducing add statements that
2465 require the increment INCR on behalf of candidate C and phi
2466 statement PHI. Accumulate into *SAVINGS the potential savings
2467 from removing existing statements that feed PHI and have no other
2468 uses. */
2469
2470 static int
2471 phi_incr_cost (slsr_cand_t c, double_int incr, gimple phi, int *savings)
2472 {
2473 unsigned i;
2474 int cost = 0;
2475 slsr_cand_t basis = lookup_cand (c->basis);
2476 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2477
2478 for (i = 0; i < gimple_phi_num_args (phi); i++)
2479 {
2480 tree arg = gimple_phi_arg_def (phi, i);
2481
2482 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2483 {
2484 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2485
2486 if (gimple_code (arg_def) == GIMPLE_PHI)
2487 {
2488 int feeding_savings = 0;
2489 cost += phi_incr_cost (c, incr, arg_def, &feeding_savings);
2490 if (has_single_use (gimple_phi_result (arg_def)))
2491 *savings += feeding_savings;
2492 }
2493 else
2494 {
2495 slsr_cand_t arg_cand = base_cand_from_table (arg);
2496 double_int diff = arg_cand->index - basis->index;
2497
2498 if (incr == diff)
2499 {
2500 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
2501 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
2502 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
2503 if (has_single_use (lhs))
2504 *savings += stmt_cost (arg_cand->cand_stmt, true);
2505 }
2506 }
2507 }
2508 }
2509
2510 return cost;
2511 }
2512
2513 /* Return the first candidate in the tree rooted at C that has not
2514 already been replaced, favoring siblings over dependents. */
2515
2516 static slsr_cand_t
2517 unreplaced_cand_in_tree (slsr_cand_t c)
2518 {
2519 if (!cand_already_replaced (c))
2520 return c;
2521
2522 if (c->sibling)
2523 {
2524 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
2525 if (sib)
2526 return sib;
2527 }
2528
2529 if (c->dependent)
2530 {
2531 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
2532 if (dep)
2533 return dep;
2534 }
2535
2536 return NULL;
2537 }
2538
2539 /* Return TRUE if the candidates in the tree rooted at C should be
2540 optimized for speed, else FALSE. We estimate this based on the block
2541 containing the most dominant candidate in the tree that has not yet
2542 been replaced. */
2543
2544 static bool
2545 optimize_cands_for_speed_p (slsr_cand_t c)
2546 {
2547 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
2548 gcc_assert (c2);
2549 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
2550 }
2551
2552 /* Add COST_IN to the lowest cost of any dependent path starting at
2553 candidate C or any of its siblings, counting only candidates along
2554 such paths with increment INCR. Assume that replacing a candidate
2555 reduces cost by REPL_SAVINGS. Also account for savings from any
2556 statements that would go dead. If COUNT_PHIS is true, include
2557 costs of introducing feeding statements for conditional candidates. */
2558
2559 static int
2560 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
2561 double_int incr, bool count_phis)
2562 {
2563 int local_cost, sib_cost, savings = 0;
2564 double_int cand_incr = cand_abs_increment (c);
2565
2566 if (cand_already_replaced (c))
2567 local_cost = cost_in;
2568 else if (incr == cand_incr)
2569 local_cost = cost_in - repl_savings - c->dead_savings;
2570 else
2571 local_cost = cost_in - c->dead_savings;
2572
2573 if (count_phis
2574 && phi_dependent_cand_p (c)
2575 && !cand_already_replaced (c))
2576 {
2577 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2578 local_cost += phi_incr_cost (c, incr, phi, &savings);
2579
2580 if (has_single_use (gimple_phi_result (phi)))
2581 local_cost -= savings;
2582 }
2583
2584 if (c->dependent)
2585 local_cost = lowest_cost_path (local_cost, repl_savings,
2586 lookup_cand (c->dependent), incr,
2587 count_phis);
2588
2589 if (c->sibling)
2590 {
2591 sib_cost = lowest_cost_path (cost_in, repl_savings,
2592 lookup_cand (c->sibling), incr,
2593 count_phis);
2594 local_cost = MIN (local_cost, sib_cost);
2595 }
2596
2597 return local_cost;
2598 }
2599
2600 /* Compute the total savings that would accrue from all replacements
2601 in the candidate tree rooted at C, counting only candidates with
2602 increment INCR. Assume that replacing a candidate reduces cost
2603 by REPL_SAVINGS. Also account for savings from statements that
2604 would go dead. */
2605
2606 static int
2607 total_savings (int repl_savings, slsr_cand_t c, double_int incr,
2608 bool count_phis)
2609 {
2610 int savings = 0;
2611 double_int cand_incr = cand_abs_increment (c);
2612
2613 if (incr == cand_incr && !cand_already_replaced (c))
2614 savings += repl_savings + c->dead_savings;
2615
2616 if (count_phis
2617 && phi_dependent_cand_p (c)
2618 && !cand_already_replaced (c))
2619 {
2620 int phi_savings = 0;
2621 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
2622 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
2623
2624 if (has_single_use (gimple_phi_result (phi)))
2625 savings += phi_savings;
2626 }
2627
2628 if (c->dependent)
2629 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
2630 count_phis);
2631
2632 if (c->sibling)
2633 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
2634 count_phis);
2635
2636 return savings;
2637 }
2638
2639 /* Use target-specific costs to determine and record which increments
2640 in the current candidate tree are profitable to replace, assuming
2641 MODE and SPEED. FIRST_DEP is the first dependent of the root of
2642 the candidate tree.
2643
2644 One slight limitation here is that we don't account for the possible
2645 introduction of casts in some cases. See replace_one_candidate for
2646 the cases where these are introduced. This should probably be cleaned
2647 up sometime. */
2648
2649 static void
2650 analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
2651 {
2652 unsigned i;
2653
2654 for (i = 0; i < incr_vec_len; i++)
2655 {
2656 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
2657
2658 /* If somehow this increment is bigger than a HWI, we won't
2659 be optimizing candidates that use it. And if the increment
2660 has a count of zero, nothing will be done with it. */
2661 if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count)
2662 incr_vec[i].cost = COST_INFINITE;
2663
2664 /* Increments of 0, 1, and -1 are always profitable to replace,
2665 because they always replace a multiply or add with an add or
2666 copy, and may cause one or more existing instructions to go
2667 dead. Exception: -1 can't be assumed to be profitable for
2668 pointer addition. */
2669 else if (incr == 0
2670 || incr == 1
2671 || (incr == -1
2672 && (gimple_assign_rhs_code (first_dep->cand_stmt)
2673 != POINTER_PLUS_EXPR)))
2674 incr_vec[i].cost = COST_NEUTRAL;
2675
2676 /* FORNOW: If we need to add an initializer, give up if a cast from
2677 the candidate's type to its stride's type can lose precision.
2678 This could eventually be handled better by expressly retaining the
2679 result of a cast to a wider type in the stride. Example:
2680
2681 short int _1;
2682 _2 = (int) _1;
2683 _3 = _2 * 10;
2684 _4 = x + _3; ADD: x + (10 * _1) : int
2685 _5 = _2 * 15;
2686 _6 = x + _3; ADD: x + (15 * _1) : int
2687
2688 Right now replacing _6 would cause insertion of an initializer
2689 of the form "short int T = _1 * 5;" followed by a cast to
2690 int, which could overflow incorrectly. Had we recorded _2 or
2691 (int)_1 as the stride, this wouldn't happen. However, doing
2692 this breaks other opportunities, so this will require some
2693 care. */
2694 else if (!incr_vec[i].initializer
2695 && TREE_CODE (first_dep->stride) != INTEGER_CST
2696 && !legal_cast_p_1 (first_dep->stride,
2697 gimple_assign_lhs (first_dep->cand_stmt)))
2698
2699 incr_vec[i].cost = COST_INFINITE;
2700
2701 /* If we need to add an initializer, make sure we don't introduce
2702 a multiply by a pointer type, which can happen in certain cast
2703 scenarios. FIXME: When cleaning up these cast issues, we can
2704 afford to introduce the multiply provided we cast out to an
2705 unsigned int of appropriate size. */
2706 else if (!incr_vec[i].initializer
2707 && TREE_CODE (first_dep->stride) != INTEGER_CST
2708 && POINTER_TYPE_P (TREE_TYPE (first_dep->stride)))
2709
2710 incr_vec[i].cost = COST_INFINITE;
2711
2712 /* For any other increment, if this is a multiply candidate, we
2713 must introduce a temporary T and initialize it with
2714 T_0 = stride * increment. When optimizing for speed, walk the
2715 candidate tree to calculate the best cost reduction along any
2716 path; if it offsets the fixed cost of inserting the initializer,
2717 replacing the increment is profitable. When optimizing for
2718 size, instead calculate the total cost reduction from replacing
2719 all candidates with this increment. */
2720 else if (first_dep->kind == CAND_MULT)
2721 {
2722 int cost = mult_by_coeff_cost (incr, mode, speed);
2723 int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode);
2724 if (speed)
2725 cost = lowest_cost_path (cost, repl_savings, first_dep,
2726 incr_vec[i].incr, COUNT_PHIS);
2727 else
2728 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
2729 COUNT_PHIS);
2730
2731 incr_vec[i].cost = cost;
2732 }
2733
2734 /* If this is an add candidate, the initializer may already
2735 exist, so only calculate the cost of the initializer if it
2736 doesn't. We are replacing one add with another here, so the
2737 known replacement savings is zero. We will account for removal
2738 of dead instructions in lowest_cost_path or total_savings. */
2739 else
2740 {
2741 int cost = 0;
2742 if (!incr_vec[i].initializer)
2743 cost = mult_by_coeff_cost (incr, mode, speed);
2744
2745 if (speed)
2746 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
2747 DONT_COUNT_PHIS);
2748 else
2749 cost -= total_savings (0, first_dep, incr_vec[i].incr,
2750 DONT_COUNT_PHIS);
2751
2752 incr_vec[i].cost = cost;
2753 }
2754 }
2755 }
2756
2757 /* Return the nearest common dominator of BB1 and BB2. If the blocks
2758 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
2759 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
2760 return C2 in *WHERE; and if the NCD matches neither, return NULL in
2761 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
2762
2763 static basic_block
2764 ncd_for_two_cands (basic_block bb1, basic_block bb2,
2765 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
2766 {
2767 basic_block ncd;
2768
2769 if (!bb1)
2770 {
2771 *where = c2;
2772 return bb2;
2773 }
2774
2775 if (!bb2)
2776 {
2777 *where = c1;
2778 return bb1;
2779 }
2780
2781 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
2782
2783 /* If both candidates are in the same block, the earlier
2784 candidate wins. */
2785 if (bb1 == ncd && bb2 == ncd)
2786 {
2787 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
2788 *where = c2;
2789 else
2790 *where = c1;
2791 }
2792
2793 /* Otherwise, if one of them produced a candidate in the
2794 dominator, that one wins. */
2795 else if (bb1 == ncd)
2796 *where = c1;
2797
2798 else if (bb2 == ncd)
2799 *where = c2;
2800
2801 /* If neither matches the dominator, neither wins. */
2802 else
2803 *where = NULL;
2804
2805 return ncd;
2806 }
2807
2808 /* Consider all candidates that feed PHI. Find the nearest common
2809 dominator of those candidates requiring the given increment INCR.
2810 Further find and return the nearest common dominator of this result
2811 with block NCD. If the returned block contains one or more of the
2812 candidates, return the earliest candidate in the block in *WHERE. */
2813
2814 static basic_block
2815 ncd_with_phi (slsr_cand_t c, double_int incr, gimple phi,
2816 basic_block ncd, slsr_cand_t *where)
2817 {
2818 unsigned i;
2819 slsr_cand_t basis = lookup_cand (c->basis);
2820 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
2821
2822 for (i = 0; i < gimple_phi_num_args (phi); i++)
2823 {
2824 tree arg = gimple_phi_arg_def (phi, i);
2825
2826 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
2827 {
2828 gimple arg_def = SSA_NAME_DEF_STMT (arg);
2829
2830 if (gimple_code (arg_def) == GIMPLE_PHI)
2831 ncd = ncd_with_phi (c, incr, arg_def, ncd, where);
2832 else
2833 {
2834 slsr_cand_t arg_cand = base_cand_from_table (arg);
2835 double_int diff = arg_cand->index - basis->index;
2836
2837 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
2838 ncd = ncd_for_two_cands (ncd, gimple_bb (arg_cand->cand_stmt),
2839 *where, arg_cand, where);
2840 }
2841 }
2842 }
2843
2844 return ncd;
2845 }
2846
2847 /* Consider the candidate C together with any candidates that feed
2848 C's phi dependence (if any). Find and return the nearest common
2849 dominator of those candidates requiring the given increment INCR.
2850 If the returned block contains one or more of the candidates,
2851 return the earliest candidate in the block in *WHERE. */
2852
2853 static basic_block
2854 ncd_of_cand_and_phis (slsr_cand_t c, double_int incr, slsr_cand_t *where)
2855 {
2856 basic_block ncd = NULL;
2857
2858 if (cand_abs_increment (c) == incr)
2859 {
2860 ncd = gimple_bb (c->cand_stmt);
2861 *where = c;
2862 }
2863
2864 if (phi_dependent_cand_p (c))
2865 ncd = ncd_with_phi (c, incr, lookup_cand (c->def_phi)->cand_stmt,
2866 ncd, where);
2867
2868 return ncd;
2869 }
2870
2871 /* Consider all candidates in the tree rooted at C for which INCR
2872 represents the required increment of C relative to its basis.
2873 Find and return the basic block that most nearly dominates all
2874 such candidates. If the returned block contains one or more of
2875 the candidates, return the earliest candidate in the block in
2876 *WHERE. */
2877
2878 static basic_block
2879 nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
2880 slsr_cand_t *where)
2881 {
2882 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
2883 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
2884
2885 /* First find the NCD of all siblings and dependents. */
2886 if (c->sibling)
2887 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
2888 incr, &sib_where);
2889 if (c->dependent)
2890 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
2891 incr, &dep_where);
2892 if (!sib_ncd && !dep_ncd)
2893 {
2894 new_where = NULL;
2895 ncd = NULL;
2896 }
2897 else if (sib_ncd && !dep_ncd)
2898 {
2899 new_where = sib_where;
2900 ncd = sib_ncd;
2901 }
2902 else if (dep_ncd && !sib_ncd)
2903 {
2904 new_where = dep_where;
2905 ncd = dep_ncd;
2906 }
2907 else
2908 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
2909 dep_where, &new_where);
2910
2911 /* If the candidate's increment doesn't match the one we're interested
2912 in (and nor do any increments for feeding defs of a phi-dependence),
2913 then the result depends only on siblings and dependents. */
2914 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
2915
2916 if (!this_ncd || cand_already_replaced (c))
2917 {
2918 *where = new_where;
2919 return ncd;
2920 }
2921
2922 /* Otherwise, compare this candidate with the result from all siblings
2923 and dependents. */
2924 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
2925
2926 return ncd;
2927 }
2928
2929 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
2930
2931 static inline bool
2932 profitable_increment_p (unsigned index)
2933 {
2934 return (incr_vec[index].cost <= COST_NEUTRAL);
2935 }
2936
2937 /* For each profitable increment in the increment vector not equal to
2938 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
2939 dominator of all statements in the candidate chain rooted at C
2940 that require that increment, and insert an initializer
2941 T_0 = stride * increment at that location. Record T_0 with the
2942 increment record. */
2943
2944 static void
2945 insert_initializers (slsr_cand_t c)
2946 {
2947 unsigned i;
2948
2949 for (i = 0; i < incr_vec_len; i++)
2950 {
2951 basic_block bb;
2952 slsr_cand_t where = NULL;
2953 gimple init_stmt;
2954 tree stride_type, new_name, incr_tree;
2955 double_int incr = incr_vec[i].incr;
2956
2957 if (!profitable_increment_p (i)
2958 || incr.is_one ()
2959 || (incr.is_minus_one ()
2960 && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
2961 || incr.is_zero ())
2962 continue;
2963
2964 /* We may have already identified an existing initializer that
2965 will suffice. */
2966 if (incr_vec[i].initializer)
2967 {
2968 if (dump_file && (dump_flags & TDF_DETAILS))
2969 {
2970 fputs ("Using existing initializer: ", dump_file);
2971 print_gimple_stmt (dump_file,
2972 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
2973 0, 0);
2974 }
2975 continue;
2976 }
2977
2978 /* Find the block that most closely dominates all candidates
2979 with this increment. If there is at least one candidate in
2980 that block, the earliest one will be returned in WHERE. */
2981 bb = nearest_common_dominator_for_cands (c, incr, &where);
2982
2983 /* Create a new SSA name to hold the initializer's value. */
2984 stride_type = TREE_TYPE (c->stride);
2985 new_name = make_temp_ssa_name (stride_type, NULL, "slsr");
2986 incr_vec[i].initializer = new_name;
2987
2988 /* Create the initializer and insert it in the latest possible
2989 dominating position. */
2990 incr_tree = double_int_to_tree (stride_type, incr);
2991 init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
2992 c->stride, incr_tree);
2993 if (where)
2994 {
2995 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
2996 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
2997 gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
2998 }
2999 else
3000 {
3001 gimple_stmt_iterator gsi = gsi_last_bb (bb);
3002 gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
3003
3004 if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
3005 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
3006 else
3007 gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
3008
3009 gimple_set_location (init_stmt, gimple_location (basis_stmt));
3010 }
3011
3012 if (dump_file && (dump_flags & TDF_DETAILS))
3013 {
3014 fputs ("Inserting initializer: ", dump_file);
3015 print_gimple_stmt (dump_file, init_stmt, 0, 0);
3016 }
3017 }
3018 }
3019
3020 /* Return TRUE iff all required increments for candidates feeding PHI
3021 are profitable to replace on behalf of candidate C. */
3022
3023 static bool
3024 all_phi_incrs_profitable (slsr_cand_t c, gimple phi)
3025 {
3026 unsigned i;
3027 slsr_cand_t basis = lookup_cand (c->basis);
3028 slsr_cand_t phi_cand = base_cand_from_table (gimple_phi_result (phi));
3029
3030 for (i = 0; i < gimple_phi_num_args (phi); i++)
3031 {
3032 tree arg = gimple_phi_arg_def (phi, i);
3033
3034 if (!operand_equal_p (arg, phi_cand->base_expr, 0))
3035 {
3036 gimple arg_def = SSA_NAME_DEF_STMT (arg);
3037
3038 if (gimple_code (arg_def) == GIMPLE_PHI)
3039 {
3040 if (!all_phi_incrs_profitable (c, arg_def))
3041 return false;
3042 }
3043 else
3044 {
3045 int j;
3046 slsr_cand_t arg_cand = base_cand_from_table (arg);
3047 double_int increment = arg_cand->index - basis->index;
3048
3049 if (!address_arithmetic_p && increment.is_negative ())
3050 increment = -increment;
3051
3052 j = incr_vec_index (increment);
3053
3054 if (dump_file && (dump_flags & TDF_DETAILS))
3055 {
3056 fprintf (dump_file, " Conditional candidate %d, phi: ",
3057 c->cand_num);
3058 print_gimple_stmt (dump_file, phi, 0, 0);
3059 fputs (" increment: ", dump_file);
3060 dump_double_int (dump_file, increment, false);
3061 if (j < 0)
3062 fprintf (dump_file,
3063 "\n Not replaced; incr_vec overflow.\n");
3064 else {
3065 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
3066 if (profitable_increment_p (j))
3067 fputs (" Replacing...\n", dump_file);
3068 else
3069 fputs (" Not replaced.\n", dump_file);
3070 }
3071 }
3072
3073 if (j < 0 || !profitable_increment_p (j))
3074 return false;
3075 }
3076 }
3077 }
3078
3079 return true;
3080 }
3081
3082 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
3083 type TO_TYPE, and insert it in front of the statement represented
3084 by candidate C. Use *NEW_VAR to create the new SSA name. Return
3085 the new SSA name. */
3086
3087 static tree
3088 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
3089 {
3090 tree cast_lhs;
3091 gimple cast_stmt;
3092 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3093
3094 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
3095 cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
3096 from_expr, NULL_TREE);
3097 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3098 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
3099
3100 if (dump_file && (dump_flags & TDF_DETAILS))
3101 {
3102 fputs (" Inserting: ", dump_file);
3103 print_gimple_stmt (dump_file, cast_stmt, 0, 0);
3104 }
3105
3106 return cast_lhs;
3107 }
3108
3109 /* Replace the RHS of the statement represented by candidate C with
3110 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
3111 leave C unchanged or just interchange its operands. The original
3112 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
3113 If the replacement was made and we are doing a details dump,
3114 return the revised statement, else NULL. */
3115
3116 static gimple
3117 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
3118 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
3119 slsr_cand_t c)
3120 {
3121 if (new_code != old_code
3122 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
3123 || !operand_equal_p (new_rhs2, old_rhs2, 0))
3124 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
3125 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
3126 {
3127 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3128 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
3129 update_stmt (gsi_stmt (gsi));
3130 c->cand_stmt = gsi_stmt (gsi);
3131
3132 if (dump_file && (dump_flags & TDF_DETAILS))
3133 return gsi_stmt (gsi);
3134 }
3135
3136 else if (dump_file && (dump_flags & TDF_DETAILS))
3137 fputs (" (duplicate, not actually replacing)\n", dump_file);
3138
3139 return NULL;
3140 }
3141
3142 /* Strength-reduce the statement represented by candidate C by replacing
3143 it with an equivalent addition or subtraction. I is the index into
3144 the increment vector identifying C's increment. NEW_VAR is used to
3145 create a new SSA name if a cast needs to be introduced. BASIS_NAME
3146 is the rhs1 to use in creating the add/subtract. */
3147
3148 static void
3149 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
3150 {
3151 gimple stmt_to_print = NULL;
3152 tree orig_rhs1, orig_rhs2;
3153 tree rhs2;
3154 enum tree_code orig_code, repl_code;
3155 double_int cand_incr;
3156
3157 orig_code = gimple_assign_rhs_code (c->cand_stmt);
3158 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
3159 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
3160 cand_incr = cand_increment (c);
3161
3162 if (dump_file && (dump_flags & TDF_DETAILS))
3163 {
3164 fputs ("Replacing: ", dump_file);
3165 print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
3166 stmt_to_print = c->cand_stmt;
3167 }
3168
3169 if (address_arithmetic_p)
3170 repl_code = POINTER_PLUS_EXPR;
3171 else
3172 repl_code = PLUS_EXPR;
3173
3174 /* If the increment has an initializer T_0, replace the candidate
3175 statement with an add of the basis name and the initializer. */
3176 if (incr_vec[i].initializer)
3177 {
3178 tree init_type = TREE_TYPE (incr_vec[i].initializer);
3179 tree orig_type = TREE_TYPE (orig_rhs2);
3180
3181 if (types_compatible_p (orig_type, init_type))
3182 rhs2 = incr_vec[i].initializer;
3183 else
3184 rhs2 = introduce_cast_before_cand (c, orig_type,
3185 incr_vec[i].initializer);
3186
3187 if (incr_vec[i].incr != cand_incr)
3188 {
3189 gcc_assert (repl_code == PLUS_EXPR);
3190 repl_code = MINUS_EXPR;
3191 }
3192
3193 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3194 orig_code, orig_rhs1, orig_rhs2,
3195 c);
3196 }
3197
3198 /* Otherwise, the increment is one of -1, 0, and 1. Replace
3199 with a subtract of the stride from the basis name, a copy
3200 from the basis name, or an add of the stride to the basis
3201 name, respectively. It may be necessary to introduce a
3202 cast (or reuse an existing cast). */
3203 else if (cand_incr.is_one ())
3204 {
3205 tree stride_type = TREE_TYPE (c->stride);
3206 tree orig_type = TREE_TYPE (orig_rhs2);
3207
3208 if (types_compatible_p (orig_type, stride_type))
3209 rhs2 = c->stride;
3210 else
3211 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3212
3213 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
3214 orig_code, orig_rhs1, orig_rhs2,
3215 c);
3216 }
3217
3218 else if (cand_incr.is_minus_one ())
3219 {
3220 tree stride_type = TREE_TYPE (c->stride);
3221 tree orig_type = TREE_TYPE (orig_rhs2);
3222 gcc_assert (repl_code != POINTER_PLUS_EXPR);
3223
3224 if (types_compatible_p (orig_type, stride_type))
3225 rhs2 = c->stride;
3226 else
3227 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
3228
3229 if (orig_code != MINUS_EXPR
3230 || !operand_equal_p (basis_name, orig_rhs1, 0)
3231 || !operand_equal_p (rhs2, orig_rhs2, 0))
3232 {
3233 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3234 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
3235 update_stmt (gsi_stmt (gsi));
3236 c->cand_stmt = gsi_stmt (gsi);
3237
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 stmt_to_print = gsi_stmt (gsi);
3240 }
3241 else if (dump_file && (dump_flags & TDF_DETAILS))
3242 fputs (" (duplicate, not actually replacing)\n", dump_file);
3243 }
3244
3245 else if (cand_incr.is_zero ())
3246 {
3247 tree lhs = gimple_assign_lhs (c->cand_stmt);
3248 tree lhs_type = TREE_TYPE (lhs);
3249 tree basis_type = TREE_TYPE (basis_name);
3250
3251 if (types_compatible_p (lhs_type, basis_type))
3252 {
3253 gimple copy_stmt = gimple_build_assign (lhs, basis_name);
3254 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3255 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
3256 gsi_replace (&gsi, copy_stmt, false);
3257 c->cand_stmt = copy_stmt;
3258
3259 if (dump_file && (dump_flags & TDF_DETAILS))
3260 stmt_to_print = copy_stmt;
3261 }
3262 else
3263 {
3264 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
3265 gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
3266 basis_name,
3267 NULL_TREE);
3268 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
3269 gsi_replace (&gsi, cast_stmt, false);
3270 c->cand_stmt = cast_stmt;
3271
3272 if (dump_file && (dump_flags & TDF_DETAILS))
3273 stmt_to_print = cast_stmt;
3274 }
3275 }
3276 else
3277 gcc_unreachable ();
3278
3279 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
3280 {
3281 fputs ("With: ", dump_file);
3282 print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
3283 fputs ("\n", dump_file);
3284 }
3285 }
3286
3287 /* For each candidate in the tree rooted at C, replace it with
3288 an increment if such has been shown to be profitable. */
3289
3290 static void
3291 replace_profitable_candidates (slsr_cand_t c)
3292 {
3293 if (!cand_already_replaced (c))
3294 {
3295 double_int increment = cand_abs_increment (c);
3296 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
3297 int i;
3298
3299 i = incr_vec_index (increment);
3300
3301 /* Only process profitable increments. Nothing useful can be done
3302 to a cast or copy. */
3303 if (i >= 0
3304 && profitable_increment_p (i)
3305 && orig_code != MODIFY_EXPR
3306 && orig_code != NOP_EXPR)
3307 {
3308 if (phi_dependent_cand_p (c))
3309 {
3310 gimple phi = lookup_cand (c->def_phi)->cand_stmt;
3311
3312 if (all_phi_incrs_profitable (c, phi))
3313 {
3314 /* Look up the LHS SSA name from C's basis. This will be
3315 the RHS1 of the adds we will introduce to create new
3316 phi arguments. */
3317 slsr_cand_t basis = lookup_cand (c->basis);
3318 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3319
3320 /* Create a new phi statement that will represent C's true
3321 basis after the transformation is complete. */
3322 location_t loc = gimple_location (c->cand_stmt);
3323 tree name = create_phi_basis (c, phi, basis_name,
3324 loc, UNKNOWN_STRIDE);
3325
3326 /* Replace C with an add of the new basis phi and the
3327 increment. */
3328 replace_one_candidate (c, i, name);
3329 }
3330 }
3331 else
3332 {
3333 slsr_cand_t basis = lookup_cand (c->basis);
3334 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
3335 replace_one_candidate (c, i, basis_name);
3336 }
3337 }
3338 }
3339
3340 if (c->sibling)
3341 replace_profitable_candidates (lookup_cand (c->sibling));
3342
3343 if (c->dependent)
3344 replace_profitable_candidates (lookup_cand (c->dependent));
3345 }
3346 \f
3347 /* Analyze costs of related candidates in the candidate vector,
3348 and make beneficial replacements. */
3349
3350 static void
3351 analyze_candidates_and_replace (void)
3352 {
3353 unsigned i;
3354 slsr_cand_t c;
3355
3356 /* Each candidate that has a null basis and a non-null
3357 dependent is the root of a tree of related statements.
3358 Analyze each tree to determine a subset of those
3359 statements that can be replaced with maximum benefit. */
3360 FOR_EACH_VEC_ELT (cand_vec, i, c)
3361 {
3362 slsr_cand_t first_dep;
3363
3364 if (c->basis != 0 || c->dependent == 0)
3365 continue;
3366
3367 if (dump_file && (dump_flags & TDF_DETAILS))
3368 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
3369 c->cand_num);
3370
3371 first_dep = lookup_cand (c->dependent);
3372
3373 /* If this is a chain of CAND_REFs, unconditionally replace
3374 each of them with a strength-reduced data reference. */
3375 if (c->kind == CAND_REF)
3376 replace_refs (c);
3377
3378 /* If the common stride of all related candidates is a known
3379 constant, each candidate without a phi-dependence can be
3380 profitably replaced. Each replaces a multiply by a single
3381 add, with the possibility that a feeding add also goes dead.
3382 A candidate with a phi-dependence is replaced only if the
3383 compensation code it requires is offset by the strength
3384 reduction savings. */
3385 else if (TREE_CODE (c->stride) == INTEGER_CST)
3386 replace_uncond_cands_and_profitable_phis (first_dep);
3387
3388 /* When the stride is an SSA name, it may still be profitable
3389 to replace some or all of the dependent candidates, depending
3390 on whether the introduced increments can be reused, or are
3391 less expensive to calculate than the replaced statements. */
3392 else
3393 {
3394 enum machine_mode mode;
3395 bool speed;
3396
3397 /* Determine whether we'll be generating pointer arithmetic
3398 when replacing candidates. */
3399 address_arithmetic_p = (c->kind == CAND_ADD
3400 && POINTER_TYPE_P (c->cand_type));
3401
3402 /* If all candidates have already been replaced under other
3403 interpretations, nothing remains to be done. */
3404 if (!count_candidates (c))
3405 continue;
3406
3407 /* Construct an array of increments for this candidate chain. */
3408 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
3409 incr_vec_len = 0;
3410 record_increments (c);
3411
3412 /* Determine which increments are profitable to replace. */
3413 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
3414 speed = optimize_cands_for_speed_p (c);
3415 analyze_increments (first_dep, mode, speed);
3416
3417 /* Insert initializers of the form T_0 = stride * increment
3418 for use in profitable replacements. */
3419 insert_initializers (first_dep);
3420 dump_incr_vec ();
3421
3422 /* Perform the replacements. */
3423 replace_profitable_candidates (first_dep);
3424 free (incr_vec);
3425 }
3426 }
3427 }
3428
3429 static unsigned
3430 execute_strength_reduction (void)
3431 {
3432 struct dom_walk_data walk_data;
3433
3434 /* Create the obstack where candidates will reside. */
3435 gcc_obstack_init (&cand_obstack);
3436
3437 /* Allocate the candidate vector. */
3438 cand_vec.create (128);
3439
3440 /* Allocate the mapping from statements to candidate indices. */
3441 stmt_cand_map = pointer_map_create ();
3442
3443 /* Create the obstack where candidate chains will reside. */
3444 gcc_obstack_init (&chain_obstack);
3445
3446 /* Allocate the mapping from base expressions to candidate chains. */
3447 base_cand_map.create (500);
3448
3449 /* Initialize the loop optimizer. We need to detect flow across
3450 back edges, and this gives us dominator information as well. */
3451 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3452
3453 /* Set up callbacks for the generic dominator tree walker. */
3454 walk_data.dom_direction = CDI_DOMINATORS;
3455 walk_data.initialize_block_local_data = NULL;
3456 walk_data.before_dom_children = find_candidates_in_block;
3457 walk_data.after_dom_children = NULL;
3458 walk_data.global_data = NULL;
3459 walk_data.block_local_data_size = 0;
3460 init_walk_dominator_tree (&walk_data);
3461
3462 /* Walk the CFG in predominator order looking for strength reduction
3463 candidates. */
3464 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
3465
3466 if (dump_file && (dump_flags & TDF_DETAILS))
3467 {
3468 dump_cand_vec ();
3469 dump_cand_chains ();
3470 }
3471
3472 /* Analyze costs and make appropriate replacements. */
3473 analyze_candidates_and_replace ();
3474
3475 /* Free resources. */
3476 fini_walk_dominator_tree (&walk_data);
3477 loop_optimizer_finalize ();
3478 base_cand_map.dispose ();
3479 obstack_free (&chain_obstack, NULL);
3480 pointer_map_destroy (stmt_cand_map);
3481 cand_vec.release ();
3482 obstack_free (&cand_obstack, NULL);
3483
3484 return 0;
3485 }
3486
3487 static bool
3488 gate_strength_reduction (void)
3489 {
3490 return flag_tree_slsr;
3491 }
3492
3493 namespace {
3494
3495 const pass_data pass_data_strength_reduction =
3496 {
3497 GIMPLE_PASS, /* type */
3498 "slsr", /* name */
3499 OPTGROUP_NONE, /* optinfo_flags */
3500 true, /* has_gate */
3501 true, /* has_execute */
3502 TV_GIMPLE_SLSR, /* tv_id */
3503 ( PROP_cfg | PROP_ssa ), /* properties_required */
3504 0, /* properties_provided */
3505 0, /* properties_destroyed */
3506 0, /* todo_flags_start */
3507 TODO_verify_ssa, /* todo_flags_finish */
3508 };
3509
3510 class pass_strength_reduction : public gimple_opt_pass
3511 {
3512 public:
3513 pass_strength_reduction(gcc::context *ctxt)
3514 : gimple_opt_pass(pass_data_strength_reduction, ctxt)
3515 {}
3516
3517 /* opt_pass methods: */
3518 bool gate () { return gate_strength_reduction (); }
3519 unsigned int execute () { return execute_strength_reduction (); }
3520
3521 }; // class pass_strength_reduction
3522
3523 } // anon namespace
3524
3525 gimple_opt_pass *
3526 make_pass_strength_reduction (gcc::context *ctxt)
3527 {
3528 return new pass_strength_reduction (ctxt);
3529 }