re PR target/65697 (__atomic memory barriers not strong enough for __sync builtins)
[gcc.git] / gcc / tree-predcom.c
1 /* Predictive commoning.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 TODO -- stores killing other stores can be taken into account, e.g.,
160 for (i = 0; i < n; i++)
161 {
162 a[i] = 1;
163 a[i+2] = 2;
164 }
165
166 can be replaced with
167
168 t0 = a[0];
169 t1 = a[1];
170 for (i = 0; i < n; i++)
171 {
172 a[i] = 1;
173 t2 = 2;
174 t0 = t1;
175 t1 = t2;
176 }
177 a[n] = t0;
178 a[n+1] = t1;
179
180 The interesting part is that this would generalize store motion; still, since
181 sm is performed elsewhere, it does not seem that important.
182
183 Predictive commoning can be generalized for arbitrary computations (not
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
186
187 #include "config.h"
188 #include "system.h"
189 #include "coretypes.h"
190 #include "tm.h"
191 #include "alias.h"
192 #include "symtab.h"
193 #include "tree.h"
194 #include "fold-const.h"
195 #include "tm_p.h"
196 #include "cfgloop.h"
197 #include "predict.h"
198 #include "hard-reg-set.h"
199 #include "function.h"
200 #include "dominance.h"
201 #include "cfg.h"
202 #include "basic-block.h"
203 #include "tree-ssa-alias.h"
204 #include "internal-fn.h"
205 #include "tree-eh.h"
206 #include "gimple-expr.h"
207 #include "gimple.h"
208 #include "gimplify.h"
209 #include "gimple-iterator.h"
210 #include "gimplify-me.h"
211 #include "gimple-ssa.h"
212 #include "tree-phinodes.h"
213 #include "ssa-iterators.h"
214 #include "stringpool.h"
215 #include "tree-ssanames.h"
216 #include "tree-ssa-loop-ivopts.h"
217 #include "tree-ssa-loop-manip.h"
218 #include "tree-ssa-loop-niter.h"
219 #include "tree-ssa-loop.h"
220 #include "tree-into-ssa.h"
221 #include "rtl.h"
222 #include "flags.h"
223 #include "insn-config.h"
224 #include "expmed.h"
225 #include "dojump.h"
226 #include "explow.h"
227 #include "calls.h"
228 #include "emit-rtl.h"
229 #include "varasm.h"
230 #include "stmt.h"
231 #include "expr.h"
232 #include "tree-dfa.h"
233 #include "tree-ssa.h"
234 #include "tree-data-ref.h"
235 #include "tree-scalar-evolution.h"
236 #include "tree-chrec.h"
237 #include "params.h"
238 #include "gimple-pretty-print.h"
239 #include "tree-pass.h"
240 #include "tree-affine.h"
241 #include "tree-inline.h"
242 #include "wide-int-print.h"
243
244 /* The maximum number of iterations between the considered memory
245 references. */
246
247 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
248
249 /* Data references (or phi nodes that carry data reference values across
250 loop iterations). */
251
252 typedef struct dref_d
253 {
254 /* The reference itself. */
255 struct data_reference *ref;
256
257 /* The statement in that the reference appears. */
258 gimple stmt;
259
260 /* In case that STMT is a phi node, this field is set to the SSA name
261 defined by it in replace_phis_by_defined_names (in order to avoid
262 pointing to phi node that got reallocated in the meantime). */
263 tree name_defined_by_phi;
264
265 /* Distance of the reference from the root of the chain (in number of
266 iterations of the loop). */
267 unsigned distance;
268
269 /* Number of iterations offset from the first reference in the component. */
270 widest_int offset;
271
272 /* Number of the reference in a component, in dominance ordering. */
273 unsigned pos;
274
275 /* True if the memory reference is always accessed when the loop is
276 entered. */
277 unsigned always_accessed : 1;
278 } *dref;
279
280
281 /* Type of the chain of the references. */
282
283 enum chain_type
284 {
285 /* The addresses of the references in the chain are constant. */
286 CT_INVARIANT,
287
288 /* There are only loads in the chain. */
289 CT_LOAD,
290
291 /* Root of the chain is store, the rest are loads. */
292 CT_STORE_LOAD,
293
294 /* A combination of two chains. */
295 CT_COMBINATION
296 };
297
298 /* Chains of data references. */
299
300 typedef struct chain
301 {
302 /* Type of the chain. */
303 enum chain_type type;
304
305 /* For combination chains, the operator and the two chains that are
306 combined, and the type of the result. */
307 enum tree_code op;
308 tree rslt_type;
309 struct chain *ch1, *ch2;
310
311 /* The references in the chain. */
312 vec<dref> refs;
313
314 /* The maximum distance of the reference in the chain from the root. */
315 unsigned length;
316
317 /* The variables used to copy the value throughout iterations. */
318 vec<tree> vars;
319
320 /* Initializers for the variables. */
321 vec<tree> inits;
322
323 /* True if there is a use of a variable with the maximal distance
324 that comes after the root in the loop. */
325 unsigned has_max_use_after : 1;
326
327 /* True if all the memory references in the chain are always accessed. */
328 unsigned all_always_accessed : 1;
329
330 /* True if this chain was combined together with some other chain. */
331 unsigned combined : 1;
332 } *chain_p;
333
334
335 /* Describes the knowledge about the step of the memory references in
336 the component. */
337
338 enum ref_step_type
339 {
340 /* The step is zero. */
341 RS_INVARIANT,
342
343 /* The step is nonzero. */
344 RS_NONZERO,
345
346 /* The step may or may not be nonzero. */
347 RS_ANY
348 };
349
350 /* Components of the data dependence graph. */
351
352 struct component
353 {
354 /* The references in the component. */
355 vec<dref> refs;
356
357 /* What we know about the step of the references in the component. */
358 enum ref_step_type comp_step;
359
360 /* Next component in the list. */
361 struct component *next;
362 };
363
364 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
365
366 static bitmap looparound_phis;
367
368 /* Cache used by tree_to_aff_combination_expand. */
369
370 static hash_map<tree, name_expansion *> *name_expansions;
371
372 /* Dumps data reference REF to FILE. */
373
374 extern void dump_dref (FILE *, dref);
375 void
376 dump_dref (FILE *file, dref ref)
377 {
378 if (ref->ref)
379 {
380 fprintf (file, " ");
381 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
382 fprintf (file, " (id %u%s)\n", ref->pos,
383 DR_IS_READ (ref->ref) ? "" : ", write");
384
385 fprintf (file, " offset ");
386 print_decs (ref->offset, file);
387 fprintf (file, "\n");
388
389 fprintf (file, " distance %u\n", ref->distance);
390 }
391 else
392 {
393 if (gimple_code (ref->stmt) == GIMPLE_PHI)
394 fprintf (file, " looparound ref\n");
395 else
396 fprintf (file, " combination ref\n");
397 fprintf (file, " in statement ");
398 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
399 fprintf (file, "\n");
400 fprintf (file, " distance %u\n", ref->distance);
401 }
402
403 }
404
405 /* Dumps CHAIN to FILE. */
406
407 extern void dump_chain (FILE *, chain_p);
408 void
409 dump_chain (FILE *file, chain_p chain)
410 {
411 dref a;
412 const char *chain_type;
413 unsigned i;
414 tree var;
415
416 switch (chain->type)
417 {
418 case CT_INVARIANT:
419 chain_type = "Load motion";
420 break;
421
422 case CT_LOAD:
423 chain_type = "Loads-only";
424 break;
425
426 case CT_STORE_LOAD:
427 chain_type = "Store-loads";
428 break;
429
430 case CT_COMBINATION:
431 chain_type = "Combination";
432 break;
433
434 default:
435 gcc_unreachable ();
436 }
437
438 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
439 chain->combined ? " (combined)" : "");
440 if (chain->type != CT_INVARIANT)
441 fprintf (file, " max distance %u%s\n", chain->length,
442 chain->has_max_use_after ? "" : ", may reuse first");
443
444 if (chain->type == CT_COMBINATION)
445 {
446 fprintf (file, " equal to %p %s %p in type ",
447 (void *) chain->ch1, op_symbol_code (chain->op),
448 (void *) chain->ch2);
449 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
450 fprintf (file, "\n");
451 }
452
453 if (chain->vars.exists ())
454 {
455 fprintf (file, " vars");
456 FOR_EACH_VEC_ELT (chain->vars, i, var)
457 {
458 fprintf (file, " ");
459 print_generic_expr (file, var, TDF_SLIM);
460 }
461 fprintf (file, "\n");
462 }
463
464 if (chain->inits.exists ())
465 {
466 fprintf (file, " inits");
467 FOR_EACH_VEC_ELT (chain->inits, i, var)
468 {
469 fprintf (file, " ");
470 print_generic_expr (file, var, TDF_SLIM);
471 }
472 fprintf (file, "\n");
473 }
474
475 fprintf (file, " references:\n");
476 FOR_EACH_VEC_ELT (chain->refs, i, a)
477 dump_dref (file, a);
478
479 fprintf (file, "\n");
480 }
481
482 /* Dumps CHAINS to FILE. */
483
484 extern void dump_chains (FILE *, vec<chain_p> );
485 void
486 dump_chains (FILE *file, vec<chain_p> chains)
487 {
488 chain_p chain;
489 unsigned i;
490
491 FOR_EACH_VEC_ELT (chains, i, chain)
492 dump_chain (file, chain);
493 }
494
495 /* Dumps COMP to FILE. */
496
497 extern void dump_component (FILE *, struct component *);
498 void
499 dump_component (FILE *file, struct component *comp)
500 {
501 dref a;
502 unsigned i;
503
504 fprintf (file, "Component%s:\n",
505 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
506 FOR_EACH_VEC_ELT (comp->refs, i, a)
507 dump_dref (file, a);
508 fprintf (file, "\n");
509 }
510
511 /* Dumps COMPS to FILE. */
512
513 extern void dump_components (FILE *, struct component *);
514 void
515 dump_components (FILE *file, struct component *comps)
516 {
517 struct component *comp;
518
519 for (comp = comps; comp; comp = comp->next)
520 dump_component (file, comp);
521 }
522
523 /* Frees a chain CHAIN. */
524
525 static void
526 release_chain (chain_p chain)
527 {
528 dref ref;
529 unsigned i;
530
531 if (chain == NULL)
532 return;
533
534 FOR_EACH_VEC_ELT (chain->refs, i, ref)
535 free (ref);
536
537 chain->refs.release ();
538 chain->vars.release ();
539 chain->inits.release ();
540
541 free (chain);
542 }
543
544 /* Frees CHAINS. */
545
546 static void
547 release_chains (vec<chain_p> chains)
548 {
549 unsigned i;
550 chain_p chain;
551
552 FOR_EACH_VEC_ELT (chains, i, chain)
553 release_chain (chain);
554 chains.release ();
555 }
556
557 /* Frees a component COMP. */
558
559 static void
560 release_component (struct component *comp)
561 {
562 comp->refs.release ();
563 free (comp);
564 }
565
566 /* Frees list of components COMPS. */
567
568 static void
569 release_components (struct component *comps)
570 {
571 struct component *act, *next;
572
573 for (act = comps; act; act = next)
574 {
575 next = act->next;
576 release_component (act);
577 }
578 }
579
580 /* Finds a root of tree given by FATHERS containing A, and performs path
581 shortening. */
582
583 static unsigned
584 component_of (unsigned fathers[], unsigned a)
585 {
586 unsigned root, n;
587
588 for (root = a; root != fathers[root]; root = fathers[root])
589 continue;
590
591 for (; a != root; a = n)
592 {
593 n = fathers[a];
594 fathers[a] = root;
595 }
596
597 return root;
598 }
599
600 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
601 components, A and B are components to merge. */
602
603 static void
604 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
605 {
606 unsigned ca = component_of (fathers, a);
607 unsigned cb = component_of (fathers, b);
608
609 if (ca == cb)
610 return;
611
612 if (sizes[ca] < sizes[cb])
613 {
614 sizes[cb] += sizes[ca];
615 fathers[ca] = cb;
616 }
617 else
618 {
619 sizes[ca] += sizes[cb];
620 fathers[cb] = ca;
621 }
622 }
623
624 /* Returns true if A is a reference that is suitable for predictive commoning
625 in the innermost loop that contains it. REF_STEP is set according to the
626 step of the reference A. */
627
628 static bool
629 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
630 {
631 tree ref = DR_REF (a), step = DR_STEP (a);
632
633 if (!step
634 || TREE_THIS_VOLATILE (ref)
635 || !is_gimple_reg_type (TREE_TYPE (ref))
636 || tree_could_throw_p (ref))
637 return false;
638
639 if (integer_zerop (step))
640 *ref_step = RS_INVARIANT;
641 else if (integer_nonzerop (step))
642 *ref_step = RS_NONZERO;
643 else
644 *ref_step = RS_ANY;
645
646 return true;
647 }
648
649 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
650
651 static void
652 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
653 {
654 tree type = TREE_TYPE (DR_OFFSET (dr));
655 aff_tree delta;
656
657 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
658 &name_expansions);
659 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
660 aff_combination_add (offset, &delta);
661 }
662
663 /* Determines number of iterations of the innermost enclosing loop before B
664 refers to exactly the same location as A and stores it to OFF. If A and
665 B do not have the same step, they never meet, or anything else fails,
666 returns false, otherwise returns true. Both A and B are assumed to
667 satisfy suitable_reference_p. */
668
669 static bool
670 determine_offset (struct data_reference *a, struct data_reference *b,
671 widest_int *off)
672 {
673 aff_tree diff, baseb, step;
674 tree typea, typeb;
675
676 /* Check that both the references access the location in the same type. */
677 typea = TREE_TYPE (DR_REF (a));
678 typeb = TREE_TYPE (DR_REF (b));
679 if (!useless_type_conversion_p (typeb, typea))
680 return false;
681
682 /* Check whether the base address and the step of both references is the
683 same. */
684 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
685 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
686 return false;
687
688 if (integer_zerop (DR_STEP (a)))
689 {
690 /* If the references have loop invariant address, check that they access
691 exactly the same location. */
692 *off = 0;
693 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
694 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
695 }
696
697 /* Compare the offsets of the addresses, and check whether the difference
698 is a multiple of step. */
699 aff_combination_dr_offset (a, &diff);
700 aff_combination_dr_offset (b, &baseb);
701 aff_combination_scale (&baseb, -1);
702 aff_combination_add (&diff, &baseb);
703
704 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
705 &step, &name_expansions);
706 return aff_combination_constant_multiple_p (&diff, &step, off);
707 }
708
709 /* Returns the last basic block in LOOP for that we are sure that
710 it is executed whenever the loop is entered. */
711
712 static basic_block
713 last_always_executed_block (struct loop *loop)
714 {
715 unsigned i;
716 vec<edge> exits = get_loop_exit_edges (loop);
717 edge ex;
718 basic_block last = loop->latch;
719
720 FOR_EACH_VEC_ELT (exits, i, ex)
721 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
722 exits.release ();
723
724 return last;
725 }
726
727 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
728
729 static struct component *
730 split_data_refs_to_components (struct loop *loop,
731 vec<data_reference_p> datarefs,
732 vec<ddr_p> depends)
733 {
734 unsigned i, n = datarefs.length ();
735 unsigned ca, ia, ib, bad;
736 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
737 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
738 struct component **comps;
739 struct data_reference *dr, *dra, *drb;
740 struct data_dependence_relation *ddr;
741 struct component *comp_list = NULL, *comp;
742 dref dataref;
743 basic_block last_always_executed = last_always_executed_block (loop);
744
745 FOR_EACH_VEC_ELT (datarefs, i, dr)
746 {
747 if (!DR_REF (dr))
748 {
749 /* A fake reference for call or asm_expr that may clobber memory;
750 just fail. */
751 goto end;
752 }
753 /* predcom pass isn't prepared to handle calls with data references. */
754 if (is_gimple_call (DR_STMT (dr)))
755 goto end;
756 dr->aux = (void *) (size_t) i;
757 comp_father[i] = i;
758 comp_size[i] = 1;
759 }
760
761 /* A component reserved for the "bad" data references. */
762 comp_father[n] = n;
763 comp_size[n] = 1;
764
765 FOR_EACH_VEC_ELT (datarefs, i, dr)
766 {
767 enum ref_step_type dummy;
768
769 if (!suitable_reference_p (dr, &dummy))
770 {
771 ia = (unsigned) (size_t) dr->aux;
772 merge_comps (comp_father, comp_size, n, ia);
773 }
774 }
775
776 FOR_EACH_VEC_ELT (depends, i, ddr)
777 {
778 widest_int dummy_off;
779
780 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
781 continue;
782
783 dra = DDR_A (ddr);
784 drb = DDR_B (ddr);
785 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
786 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
787 if (ia == ib)
788 continue;
789
790 bad = component_of (comp_father, n);
791
792 /* If both A and B are reads, we may ignore unsuitable dependences. */
793 if (DR_IS_READ (dra) && DR_IS_READ (drb))
794 {
795 if (ia == bad || ib == bad
796 || !determine_offset (dra, drb, &dummy_off))
797 continue;
798 }
799 /* If A is read and B write or vice versa and there is unsuitable
800 dependence, instead of merging both components into a component
801 that will certainly not pass suitable_component_p, just put the
802 read into bad component, perhaps at least the write together with
803 all the other data refs in it's component will be optimizable. */
804 else if (DR_IS_READ (dra) && ib != bad)
805 {
806 if (ia == bad)
807 continue;
808 else if (!determine_offset (dra, drb, &dummy_off))
809 {
810 merge_comps (comp_father, comp_size, bad, ia);
811 continue;
812 }
813 }
814 else if (DR_IS_READ (drb) && ia != bad)
815 {
816 if (ib == bad)
817 continue;
818 else if (!determine_offset (dra, drb, &dummy_off))
819 {
820 merge_comps (comp_father, comp_size, bad, ib);
821 continue;
822 }
823 }
824
825 merge_comps (comp_father, comp_size, ia, ib);
826 }
827
828 comps = XCNEWVEC (struct component *, n);
829 bad = component_of (comp_father, n);
830 FOR_EACH_VEC_ELT (datarefs, i, dr)
831 {
832 ia = (unsigned) (size_t) dr->aux;
833 ca = component_of (comp_father, ia);
834 if (ca == bad)
835 continue;
836
837 comp = comps[ca];
838 if (!comp)
839 {
840 comp = XCNEW (struct component);
841 comp->refs.create (comp_size[ca]);
842 comps[ca] = comp;
843 }
844
845 dataref = XCNEW (struct dref_d);
846 dataref->ref = dr;
847 dataref->stmt = DR_STMT (dr);
848 dataref->offset = 0;
849 dataref->distance = 0;
850
851 dataref->always_accessed
852 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
853 gimple_bb (dataref->stmt));
854 dataref->pos = comp->refs.length ();
855 comp->refs.quick_push (dataref);
856 }
857
858 for (i = 0; i < n; i++)
859 {
860 comp = comps[i];
861 if (comp)
862 {
863 comp->next = comp_list;
864 comp_list = comp;
865 }
866 }
867 free (comps);
868
869 end:
870 free (comp_father);
871 free (comp_size);
872 return comp_list;
873 }
874
875 /* Returns true if the component COMP satisfies the conditions
876 described in 2) at the beginning of this file. LOOP is the current
877 loop. */
878
879 static bool
880 suitable_component_p (struct loop *loop, struct component *comp)
881 {
882 unsigned i;
883 dref a, first;
884 basic_block ba, bp = loop->header;
885 bool ok, has_write = false;
886
887 FOR_EACH_VEC_ELT (comp->refs, i, a)
888 {
889 ba = gimple_bb (a->stmt);
890
891 if (!just_once_each_iteration_p (loop, ba))
892 return false;
893
894 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
895 bp = ba;
896
897 if (DR_IS_WRITE (a->ref))
898 has_write = true;
899 }
900
901 first = comp->refs[0];
902 ok = suitable_reference_p (first->ref, &comp->comp_step);
903 gcc_assert (ok);
904 first->offset = 0;
905
906 for (i = 1; comp->refs.iterate (i, &a); i++)
907 {
908 if (!determine_offset (first->ref, a->ref, &a->offset))
909 return false;
910
911 #ifdef ENABLE_CHECKING
912 {
913 enum ref_step_type a_step;
914 ok = suitable_reference_p (a->ref, &a_step);
915 gcc_assert (ok && a_step == comp->comp_step);
916 }
917 #endif
918 }
919
920 /* If there is a write inside the component, we must know whether the
921 step is nonzero or not -- we would not otherwise be able to recognize
922 whether the value accessed by reads comes from the OFFSET-th iteration
923 or the previous one. */
924 if (has_write && comp->comp_step == RS_ANY)
925 return false;
926
927 return true;
928 }
929
930 /* Check the conditions on references inside each of components COMPS,
931 and remove the unsuitable components from the list. The new list
932 of components is returned. The conditions are described in 2) at
933 the beginning of this file. LOOP is the current loop. */
934
935 static struct component *
936 filter_suitable_components (struct loop *loop, struct component *comps)
937 {
938 struct component **comp, *act;
939
940 for (comp = &comps; *comp; )
941 {
942 act = *comp;
943 if (suitable_component_p (loop, act))
944 comp = &act->next;
945 else
946 {
947 dref ref;
948 unsigned i;
949
950 *comp = act->next;
951 FOR_EACH_VEC_ELT (act->refs, i, ref)
952 free (ref);
953 release_component (act);
954 }
955 }
956
957 return comps;
958 }
959
960 /* Compares two drefs A and B by their offset and position. Callback for
961 qsort. */
962
963 static int
964 order_drefs (const void *a, const void *b)
965 {
966 const dref *const da = (const dref *) a;
967 const dref *const db = (const dref *) b;
968 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
969
970 if (offcmp != 0)
971 return offcmp;
972
973 return (*da)->pos - (*db)->pos;
974 }
975
976 /* Returns root of the CHAIN. */
977
978 static inline dref
979 get_chain_root (chain_p chain)
980 {
981 return chain->refs[0];
982 }
983
984 /* Adds REF to the chain CHAIN. */
985
986 static void
987 add_ref_to_chain (chain_p chain, dref ref)
988 {
989 dref root = get_chain_root (chain);
990
991 gcc_assert (wi::les_p (root->offset, ref->offset));
992 widest_int dist = ref->offset - root->offset;
993 if (wi::leu_p (MAX_DISTANCE, dist))
994 {
995 free (ref);
996 return;
997 }
998 gcc_assert (wi::fits_uhwi_p (dist));
999
1000 chain->refs.safe_push (ref);
1001
1002 ref->distance = dist.to_uhwi ();
1003
1004 if (ref->distance >= chain->length)
1005 {
1006 chain->length = ref->distance;
1007 chain->has_max_use_after = false;
1008 }
1009
1010 if (ref->distance == chain->length
1011 && ref->pos > root->pos)
1012 chain->has_max_use_after = true;
1013
1014 chain->all_always_accessed &= ref->always_accessed;
1015 }
1016
1017 /* Returns the chain for invariant component COMP. */
1018
1019 static chain_p
1020 make_invariant_chain (struct component *comp)
1021 {
1022 chain_p chain = XCNEW (struct chain);
1023 unsigned i;
1024 dref ref;
1025
1026 chain->type = CT_INVARIANT;
1027
1028 chain->all_always_accessed = true;
1029
1030 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1031 {
1032 chain->refs.safe_push (ref);
1033 chain->all_always_accessed &= ref->always_accessed;
1034 }
1035
1036 return chain;
1037 }
1038
1039 /* Make a new chain rooted at REF. */
1040
1041 static chain_p
1042 make_rooted_chain (dref ref)
1043 {
1044 chain_p chain = XCNEW (struct chain);
1045
1046 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
1047
1048 chain->refs.safe_push (ref);
1049 chain->all_always_accessed = ref->always_accessed;
1050
1051 ref->distance = 0;
1052
1053 return chain;
1054 }
1055
1056 /* Returns true if CHAIN is not trivial. */
1057
1058 static bool
1059 nontrivial_chain_p (chain_p chain)
1060 {
1061 return chain != NULL && chain->refs.length () > 1;
1062 }
1063
1064 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1065 is no such name. */
1066
1067 static tree
1068 name_for_ref (dref ref)
1069 {
1070 tree name;
1071
1072 if (is_gimple_assign (ref->stmt))
1073 {
1074 if (!ref->ref || DR_IS_READ (ref->ref))
1075 name = gimple_assign_lhs (ref->stmt);
1076 else
1077 name = gimple_assign_rhs1 (ref->stmt);
1078 }
1079 else
1080 name = PHI_RESULT (ref->stmt);
1081
1082 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1083 }
1084
1085 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1086 iterations of the innermost enclosing loop). */
1087
1088 static bool
1089 valid_initializer_p (struct data_reference *ref,
1090 unsigned distance, struct data_reference *root)
1091 {
1092 aff_tree diff, base, step;
1093 widest_int off;
1094
1095 /* Both REF and ROOT must be accessing the same object. */
1096 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1097 return false;
1098
1099 /* The initializer is defined outside of loop, hence its address must be
1100 invariant inside the loop. */
1101 gcc_assert (integer_zerop (DR_STEP (ref)));
1102
1103 /* If the address of the reference is invariant, initializer must access
1104 exactly the same location. */
1105 if (integer_zerop (DR_STEP (root)))
1106 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1107 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1108
1109 /* Verify that this index of REF is equal to the root's index at
1110 -DISTANCE-th iteration. */
1111 aff_combination_dr_offset (root, &diff);
1112 aff_combination_dr_offset (ref, &base);
1113 aff_combination_scale (&base, -1);
1114 aff_combination_add (&diff, &base);
1115
1116 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1117 &step, &name_expansions);
1118 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1119 return false;
1120
1121 if (off != distance)
1122 return false;
1123
1124 return true;
1125 }
1126
1127 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1128 initial value is correct (equal to initial value of REF shifted by one
1129 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1130 is the root of the current chain. */
1131
1132 static gphi *
1133 find_looparound_phi (struct loop *loop, dref ref, dref root)
1134 {
1135 tree name, init, init_ref;
1136 gphi *phi = NULL;
1137 gimple init_stmt;
1138 edge latch = loop_latch_edge (loop);
1139 struct data_reference init_dr;
1140 gphi_iterator psi;
1141
1142 if (is_gimple_assign (ref->stmt))
1143 {
1144 if (DR_IS_READ (ref->ref))
1145 name = gimple_assign_lhs (ref->stmt);
1146 else
1147 name = gimple_assign_rhs1 (ref->stmt);
1148 }
1149 else
1150 name = PHI_RESULT (ref->stmt);
1151 if (!name)
1152 return NULL;
1153
1154 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1155 {
1156 phi = psi.phi ();
1157 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1158 break;
1159 }
1160
1161 if (gsi_end_p (psi))
1162 return NULL;
1163
1164 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1165 if (TREE_CODE (init) != SSA_NAME)
1166 return NULL;
1167 init_stmt = SSA_NAME_DEF_STMT (init);
1168 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1169 return NULL;
1170 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1171
1172 init_ref = gimple_assign_rhs1 (init_stmt);
1173 if (!REFERENCE_CLASS_P (init_ref)
1174 && !DECL_P (init_ref))
1175 return NULL;
1176
1177 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1178 loop enclosing PHI). */
1179 memset (&init_dr, 0, sizeof (struct data_reference));
1180 DR_REF (&init_dr) = init_ref;
1181 DR_STMT (&init_dr) = phi;
1182 if (!dr_analyze_innermost (&init_dr, loop))
1183 return NULL;
1184
1185 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1186 return NULL;
1187
1188 return phi;
1189 }
1190
1191 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1192
1193 static void
1194 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1195 {
1196 dref nw = XCNEW (struct dref_d), aref;
1197 unsigned i;
1198
1199 nw->stmt = phi;
1200 nw->distance = ref->distance + 1;
1201 nw->always_accessed = 1;
1202
1203 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1204 if (aref->distance >= nw->distance)
1205 break;
1206 chain->refs.safe_insert (i, nw);
1207
1208 if (nw->distance > chain->length)
1209 {
1210 chain->length = nw->distance;
1211 chain->has_max_use_after = false;
1212 }
1213 }
1214
1215 /* For references in CHAIN that are copied around the LOOP (created previously
1216 by PRE, or by user), add the results of such copies to the chain. This
1217 enables us to remove the copies by unrolling, and may need less registers
1218 (also, it may allow us to combine chains together). */
1219
1220 static void
1221 add_looparound_copies (struct loop *loop, chain_p chain)
1222 {
1223 unsigned i;
1224 dref ref, root = get_chain_root (chain);
1225 gphi *phi;
1226
1227 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1228 {
1229 phi = find_looparound_phi (loop, ref, root);
1230 if (!phi)
1231 continue;
1232
1233 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1234 insert_looparound_copy (chain, ref, phi);
1235 }
1236 }
1237
1238 /* Find roots of the values and determine distances in the component COMP.
1239 The references are redistributed into CHAINS. LOOP is the current
1240 loop. */
1241
1242 static void
1243 determine_roots_comp (struct loop *loop,
1244 struct component *comp,
1245 vec<chain_p> *chains)
1246 {
1247 unsigned i;
1248 dref a;
1249 chain_p chain = NULL;
1250 widest_int last_ofs = 0;
1251
1252 /* Invariants are handled specially. */
1253 if (comp->comp_step == RS_INVARIANT)
1254 {
1255 chain = make_invariant_chain (comp);
1256 chains->safe_push (chain);
1257 return;
1258 }
1259
1260 comp->refs.qsort (order_drefs);
1261
1262 FOR_EACH_VEC_ELT (comp->refs, i, a)
1263 {
1264 if (!chain || DR_IS_WRITE (a->ref)
1265 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1266 {
1267 if (nontrivial_chain_p (chain))
1268 {
1269 add_looparound_copies (loop, chain);
1270 chains->safe_push (chain);
1271 }
1272 else
1273 release_chain (chain);
1274 chain = make_rooted_chain (a);
1275 last_ofs = a->offset;
1276 continue;
1277 }
1278
1279 add_ref_to_chain (chain, a);
1280 }
1281
1282 if (nontrivial_chain_p (chain))
1283 {
1284 add_looparound_copies (loop, chain);
1285 chains->safe_push (chain);
1286 }
1287 else
1288 release_chain (chain);
1289 }
1290
1291 /* Find roots of the values and determine distances in components COMPS, and
1292 separates the references to CHAINS. LOOP is the current loop. */
1293
1294 static void
1295 determine_roots (struct loop *loop,
1296 struct component *comps, vec<chain_p> *chains)
1297 {
1298 struct component *comp;
1299
1300 for (comp = comps; comp; comp = comp->next)
1301 determine_roots_comp (loop, comp, chains);
1302 }
1303
1304 /* Replace the reference in statement STMT with temporary variable
1305 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1306 the reference in the statement. IN_LHS is true if the reference
1307 is in the lhs of STMT, false if it is in rhs. */
1308
1309 static void
1310 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
1311 {
1312 tree val;
1313 gassign *new_stmt;
1314 gimple_stmt_iterator bsi, psi;
1315
1316 if (gimple_code (stmt) == GIMPLE_PHI)
1317 {
1318 gcc_assert (!in_lhs && !set);
1319
1320 val = PHI_RESULT (stmt);
1321 bsi = gsi_after_labels (gimple_bb (stmt));
1322 psi = gsi_for_stmt (stmt);
1323 remove_phi_node (&psi, false);
1324
1325 /* Turn the phi node into GIMPLE_ASSIGN. */
1326 new_stmt = gimple_build_assign (val, new_tree);
1327 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1328 return;
1329 }
1330
1331 /* Since the reference is of gimple_reg type, it should only
1332 appear as lhs or rhs of modify statement. */
1333 gcc_assert (is_gimple_assign (stmt));
1334
1335 bsi = gsi_for_stmt (stmt);
1336
1337 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1338 if (!set)
1339 {
1340 gcc_assert (!in_lhs);
1341 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1342 stmt = gsi_stmt (bsi);
1343 update_stmt (stmt);
1344 return;
1345 }
1346
1347 if (in_lhs)
1348 {
1349 /* We have statement
1350
1351 OLD = VAL
1352
1353 If OLD is a memory reference, then VAL is gimple_val, and we transform
1354 this to
1355
1356 OLD = VAL
1357 NEW = VAL
1358
1359 Otherwise, we are replacing a combination chain,
1360 VAL is the expression that performs the combination, and OLD is an
1361 SSA name. In this case, we transform the assignment to
1362
1363 OLD = VAL
1364 NEW = OLD
1365
1366 */
1367
1368 val = gimple_assign_lhs (stmt);
1369 if (TREE_CODE (val) != SSA_NAME)
1370 {
1371 val = gimple_assign_rhs1 (stmt);
1372 gcc_assert (gimple_assign_single_p (stmt));
1373 if (TREE_CLOBBER_P (val))
1374 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1375 else
1376 gcc_assert (gimple_assign_copy_p (stmt));
1377 }
1378 }
1379 else
1380 {
1381 /* VAL = OLD
1382
1383 is transformed to
1384
1385 VAL = OLD
1386 NEW = VAL */
1387
1388 val = gimple_assign_lhs (stmt);
1389 }
1390
1391 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1392 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1393 }
1394
1395 /* Returns a memory reference to DR in the ITER-th iteration of
1396 the loop it was analyzed in. Append init stmts to STMTS. */
1397
1398 static tree
1399 ref_at_iteration (data_reference_p dr, int iter, gimple_seq *stmts)
1400 {
1401 tree off = DR_OFFSET (dr);
1402 tree coff = DR_INIT (dr);
1403 if (iter == 0)
1404 ;
1405 else if (TREE_CODE (DR_STEP (dr)) == INTEGER_CST)
1406 coff = size_binop (PLUS_EXPR, coff,
1407 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1408 else
1409 off = size_binop (PLUS_EXPR, off,
1410 size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter)));
1411 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1412 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1413 is_gimple_mem_ref_addr, NULL_TREE);
1414 tree alias_ptr = fold_convert (reference_alias_ptr_type (DR_REF (dr)), coff);
1415 /* While data-ref analysis punts on bit offsets it still handles
1416 bitfield accesses at byte boundaries. Cope with that. Note that
1417 we cannot simply re-apply the outer COMPONENT_REF because the
1418 byte-granular portion of it is already applied via DR_INIT and
1419 DR_OFFSET, so simply build a BIT_FIELD_REF knowing that the bits
1420 start at offset zero. */
1421 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
1422 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
1423 {
1424 tree field = TREE_OPERAND (DR_REF (dr), 1);
1425 return build3 (BIT_FIELD_REF, TREE_TYPE (DR_REF (dr)),
1426 build2 (MEM_REF, DECL_BIT_FIELD_TYPE (field),
1427 addr, alias_ptr),
1428 DECL_SIZE (field), bitsize_zero_node);
1429 }
1430 else
1431 return fold_build2 (MEM_REF, TREE_TYPE (DR_REF (dr)), addr, alias_ptr);
1432 }
1433
1434 /* Get the initialization expression for the INDEX-th temporary variable
1435 of CHAIN. */
1436
1437 static tree
1438 get_init_expr (chain_p chain, unsigned index)
1439 {
1440 if (chain->type == CT_COMBINATION)
1441 {
1442 tree e1 = get_init_expr (chain->ch1, index);
1443 tree e2 = get_init_expr (chain->ch2, index);
1444
1445 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1446 }
1447 else
1448 return chain->inits[index];
1449 }
1450
1451 /* Returns a new temporary variable used for the I-th variable carrying
1452 value of REF. The variable's uid is marked in TMP_VARS. */
1453
1454 static tree
1455 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1456 {
1457 tree type = TREE_TYPE (ref);
1458 /* We never access the components of the temporary variable in predictive
1459 commoning. */
1460 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1461 bitmap_set_bit (tmp_vars, DECL_UID (var));
1462 return var;
1463 }
1464
1465 /* Creates the variables for CHAIN, as well as phi nodes for them and
1466 initialization on entry to LOOP. Uids of the newly created
1467 temporary variables are marked in TMP_VARS. */
1468
1469 static void
1470 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1471 {
1472 unsigned i;
1473 unsigned n = chain->length;
1474 dref root = get_chain_root (chain);
1475 bool reuse_first = !chain->has_max_use_after;
1476 tree ref, init, var, next;
1477 gphi *phi;
1478 gimple_seq stmts;
1479 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1480
1481 /* If N == 0, then all the references are within the single iteration. And
1482 since this is an nonempty chain, reuse_first cannot be true. */
1483 gcc_assert (n > 0 || !reuse_first);
1484
1485 chain->vars.create (n + 1);
1486
1487 if (chain->type == CT_COMBINATION)
1488 ref = gimple_assign_lhs (root->stmt);
1489 else
1490 ref = DR_REF (root->ref);
1491
1492 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1493 {
1494 var = predcom_tmp_var (ref, i, tmp_vars);
1495 chain->vars.quick_push (var);
1496 }
1497 if (reuse_first)
1498 chain->vars.quick_push (chain->vars[0]);
1499
1500 FOR_EACH_VEC_ELT (chain->vars, i, var)
1501 chain->vars[i] = make_ssa_name (var);
1502
1503 for (i = 0; i < n; i++)
1504 {
1505 var = chain->vars[i];
1506 next = chain->vars[i + 1];
1507 init = get_init_expr (chain, i);
1508
1509 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1510 if (stmts)
1511 gsi_insert_seq_on_edge_immediate (entry, stmts);
1512
1513 phi = create_phi_node (var, loop->header);
1514 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1515 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1516 }
1517 }
1518
1519 /* Create the variables and initialization statement for root of chain
1520 CHAIN. Uids of the newly created temporary variables are marked
1521 in TMP_VARS. */
1522
1523 static void
1524 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
1525 {
1526 dref root = get_chain_root (chain);
1527 bool in_lhs = (chain->type == CT_STORE_LOAD
1528 || chain->type == CT_COMBINATION);
1529
1530 initialize_root_vars (loop, chain, tmp_vars);
1531 replace_ref_with (root->stmt,
1532 chain->vars[chain->length],
1533 true, in_lhs);
1534 }
1535
1536 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1537 initialization on entry to LOOP if necessary. The ssa name for the variable
1538 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1539 around the loop is created. Uid of the newly created temporary variable
1540 is marked in TMP_VARS. INITS is the list containing the (single)
1541 initializer. */
1542
1543 static void
1544 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1545 vec<tree> *vars, vec<tree> inits,
1546 bitmap tmp_vars)
1547 {
1548 unsigned i;
1549 tree ref = DR_REF (root->ref), init, var, next;
1550 gimple_seq stmts;
1551 gphi *phi;
1552 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1553
1554 /* Find the initializer for the variable, and check that it cannot
1555 trap. */
1556 init = inits[0];
1557
1558 vars->create (written ? 2 : 1);
1559 var = predcom_tmp_var (ref, 0, tmp_vars);
1560 vars->quick_push (var);
1561 if (written)
1562 vars->quick_push ((*vars)[0]);
1563
1564 FOR_EACH_VEC_ELT (*vars, i, var)
1565 (*vars)[i] = make_ssa_name (var);
1566
1567 var = (*vars)[0];
1568
1569 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1570 if (stmts)
1571 gsi_insert_seq_on_edge_immediate (entry, stmts);
1572
1573 if (written)
1574 {
1575 next = (*vars)[1];
1576 phi = create_phi_node (var, loop->header);
1577 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1578 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1579 }
1580 else
1581 {
1582 gassign *init_stmt = gimple_build_assign (var, init);
1583 gsi_insert_on_edge_immediate (entry, init_stmt);
1584 }
1585 }
1586
1587
1588 /* Execute load motion for references in chain CHAIN. Uids of the newly
1589 created temporary variables are marked in TMP_VARS. */
1590
1591 static void
1592 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1593 {
1594 auto_vec<tree> vars;
1595 dref a;
1596 unsigned n_writes = 0, ridx, i;
1597 tree var;
1598
1599 gcc_assert (chain->type == CT_INVARIANT);
1600 gcc_assert (!chain->combined);
1601 FOR_EACH_VEC_ELT (chain->refs, i, a)
1602 if (DR_IS_WRITE (a->ref))
1603 n_writes++;
1604
1605 /* If there are no reads in the loop, there is nothing to do. */
1606 if (n_writes == chain->refs.length ())
1607 return;
1608
1609 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1610 &vars, chain->inits, tmp_vars);
1611
1612 ridx = 0;
1613 FOR_EACH_VEC_ELT (chain->refs, i, a)
1614 {
1615 bool is_read = DR_IS_READ (a->ref);
1616
1617 if (DR_IS_WRITE (a->ref))
1618 {
1619 n_writes--;
1620 if (n_writes)
1621 {
1622 var = vars[0];
1623 var = make_ssa_name (SSA_NAME_VAR (var));
1624 vars[0] = var;
1625 }
1626 else
1627 ridx = 1;
1628 }
1629
1630 replace_ref_with (a->stmt, vars[ridx],
1631 !is_read, !is_read);
1632 }
1633 }
1634
1635 /* Returns the single statement in that NAME is used, excepting
1636 the looparound phi nodes contained in one of the chains. If there is no
1637 such statement, or more statements, NULL is returned. */
1638
1639 static gimple
1640 single_nonlooparound_use (tree name)
1641 {
1642 use_operand_p use;
1643 imm_use_iterator it;
1644 gimple stmt, ret = NULL;
1645
1646 FOR_EACH_IMM_USE_FAST (use, it, name)
1647 {
1648 stmt = USE_STMT (use);
1649
1650 if (gimple_code (stmt) == GIMPLE_PHI)
1651 {
1652 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
1653 could not be processed anyway, so just fail for them. */
1654 if (bitmap_bit_p (looparound_phis,
1655 SSA_NAME_VERSION (PHI_RESULT (stmt))))
1656 continue;
1657
1658 return NULL;
1659 }
1660 else if (is_gimple_debug (stmt))
1661 continue;
1662 else if (ret != NULL)
1663 return NULL;
1664 else
1665 ret = stmt;
1666 }
1667
1668 return ret;
1669 }
1670
1671 /* Remove statement STMT, as well as the chain of assignments in that it is
1672 used. */
1673
1674 static void
1675 remove_stmt (gimple stmt)
1676 {
1677 tree name;
1678 gimple next;
1679 gimple_stmt_iterator psi;
1680
1681 if (gimple_code (stmt) == GIMPLE_PHI)
1682 {
1683 name = PHI_RESULT (stmt);
1684 next = single_nonlooparound_use (name);
1685 reset_debug_uses (stmt);
1686 psi = gsi_for_stmt (stmt);
1687 remove_phi_node (&psi, true);
1688
1689 if (!next
1690 || !gimple_assign_ssa_name_copy_p (next)
1691 || gimple_assign_rhs1 (next) != name)
1692 return;
1693
1694 stmt = next;
1695 }
1696
1697 while (1)
1698 {
1699 gimple_stmt_iterator bsi;
1700
1701 bsi = gsi_for_stmt (stmt);
1702
1703 name = gimple_assign_lhs (stmt);
1704 gcc_assert (TREE_CODE (name) == SSA_NAME);
1705
1706 next = single_nonlooparound_use (name);
1707 reset_debug_uses (stmt);
1708
1709 unlink_stmt_vdef (stmt);
1710 gsi_remove (&bsi, true);
1711 release_defs (stmt);
1712
1713 if (!next
1714 || !gimple_assign_ssa_name_copy_p (next)
1715 || gimple_assign_rhs1 (next) != name)
1716 return;
1717
1718 stmt = next;
1719 }
1720 }
1721
1722 /* Perform the predictive commoning optimization for a chain CHAIN.
1723 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1724
1725 static void
1726 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1727 bitmap tmp_vars)
1728 {
1729 unsigned i;
1730 dref a;
1731 tree var;
1732
1733 if (chain->combined)
1734 {
1735 /* For combined chains, just remove the statements that are used to
1736 compute the values of the expression (except for the root one).
1737 We delay this until after all chains are processed. */
1738 }
1739 else
1740 {
1741 /* For non-combined chains, set up the variables that hold its value,
1742 and replace the uses of the original references by these
1743 variables. */
1744 initialize_root (loop, chain, tmp_vars);
1745 for (i = 1; chain->refs.iterate (i, &a); i++)
1746 {
1747 var = chain->vars[chain->length - a->distance];
1748 replace_ref_with (a->stmt, var, false, false);
1749 }
1750 }
1751 }
1752
1753 /* Determines the unroll factor necessary to remove as many temporary variable
1754 copies as possible. CHAINS is the list of chains that will be
1755 optimized. */
1756
1757 static unsigned
1758 determine_unroll_factor (vec<chain_p> chains)
1759 {
1760 chain_p chain;
1761 unsigned factor = 1, af, nfactor, i;
1762 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1763
1764 FOR_EACH_VEC_ELT (chains, i, chain)
1765 {
1766 if (chain->type == CT_INVARIANT)
1767 continue;
1768
1769 if (chain->combined)
1770 {
1771 /* For combined chains, we can't handle unrolling if we replace
1772 looparound PHIs. */
1773 dref a;
1774 unsigned j;
1775 for (j = 1; chain->refs.iterate (j, &a); j++)
1776 if (gimple_code (a->stmt) == GIMPLE_PHI)
1777 return 1;
1778 continue;
1779 }
1780
1781 /* The best unroll factor for this chain is equal to the number of
1782 temporary variables that we create for it. */
1783 af = chain->length;
1784 if (chain->has_max_use_after)
1785 af++;
1786
1787 nfactor = factor * af / gcd (factor, af);
1788 if (nfactor <= max)
1789 factor = nfactor;
1790 }
1791
1792 return factor;
1793 }
1794
1795 /* Perform the predictive commoning optimization for CHAINS.
1796 Uids of the newly created temporary variables are marked in TMP_VARS. */
1797
1798 static void
1799 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1800 bitmap tmp_vars)
1801 {
1802 chain_p chain;
1803 unsigned i;
1804
1805 FOR_EACH_VEC_ELT (chains, i, chain)
1806 {
1807 if (chain->type == CT_INVARIANT)
1808 execute_load_motion (loop, chain, tmp_vars);
1809 else
1810 execute_pred_commoning_chain (loop, chain, tmp_vars);
1811 }
1812
1813 FOR_EACH_VEC_ELT (chains, i, chain)
1814 {
1815 if (chain->type == CT_INVARIANT)
1816 ;
1817 else if (chain->combined)
1818 {
1819 /* For combined chains, just remove the statements that are used to
1820 compute the values of the expression (except for the root one). */
1821 dref a;
1822 unsigned j;
1823 for (j = 1; chain->refs.iterate (j, &a); j++)
1824 remove_stmt (a->stmt);
1825 }
1826 }
1827
1828 update_ssa (TODO_update_ssa_only_virtuals);
1829 }
1830
1831 /* For each reference in CHAINS, if its defining statement is
1832 phi node, record the ssa name that is defined by it. */
1833
1834 static void
1835 replace_phis_by_defined_names (vec<chain_p> chains)
1836 {
1837 chain_p chain;
1838 dref a;
1839 unsigned i, j;
1840
1841 FOR_EACH_VEC_ELT (chains, i, chain)
1842 FOR_EACH_VEC_ELT (chain->refs, j, a)
1843 {
1844 if (gimple_code (a->stmt) == GIMPLE_PHI)
1845 {
1846 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1847 a->stmt = NULL;
1848 }
1849 }
1850 }
1851
1852 /* For each reference in CHAINS, if name_defined_by_phi is not
1853 NULL, use it to set the stmt field. */
1854
1855 static void
1856 replace_names_by_phis (vec<chain_p> chains)
1857 {
1858 chain_p chain;
1859 dref a;
1860 unsigned i, j;
1861
1862 FOR_EACH_VEC_ELT (chains, i, chain)
1863 FOR_EACH_VEC_ELT (chain->refs, j, a)
1864 if (a->stmt == NULL)
1865 {
1866 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1867 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1868 a->name_defined_by_phi = NULL_TREE;
1869 }
1870 }
1871
1872 /* Wrapper over execute_pred_commoning, to pass it as a callback
1873 to tree_transform_and_unroll_loop. */
1874
1875 struct epcc_data
1876 {
1877 vec<chain_p> chains;
1878 bitmap tmp_vars;
1879 };
1880
1881 static void
1882 execute_pred_commoning_cbck (struct loop *loop, void *data)
1883 {
1884 struct epcc_data *const dta = (struct epcc_data *) data;
1885
1886 /* Restore phi nodes that were replaced by ssa names before
1887 tree_transform_and_unroll_loop (see detailed description in
1888 tree_predictive_commoning_loop). */
1889 replace_names_by_phis (dta->chains);
1890 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
1891 }
1892
1893 /* Base NAME and all the names in the chain of phi nodes that use it
1894 on variable VAR. The phi nodes are recognized by being in the copies of
1895 the header of the LOOP. */
1896
1897 static void
1898 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1899 {
1900 gimple stmt, phi;
1901 imm_use_iterator iter;
1902
1903 replace_ssa_name_symbol (name, var);
1904
1905 while (1)
1906 {
1907 phi = NULL;
1908 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1909 {
1910 if (gimple_code (stmt) == GIMPLE_PHI
1911 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1912 {
1913 phi = stmt;
1914 BREAK_FROM_IMM_USE_STMT (iter);
1915 }
1916 }
1917 if (!phi)
1918 return;
1919
1920 name = PHI_RESULT (phi);
1921 replace_ssa_name_symbol (name, var);
1922 }
1923 }
1924
1925 /* Given an unrolled LOOP after predictive commoning, remove the
1926 register copies arising from phi nodes by changing the base
1927 variables of SSA names. TMP_VARS is the set of the temporary variables
1928 for those we want to perform this. */
1929
1930 static void
1931 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1932 {
1933 edge e;
1934 gphi *phi;
1935 gimple stmt;
1936 tree name, use, var;
1937 gphi_iterator psi;
1938
1939 e = loop_latch_edge (loop);
1940 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1941 {
1942 phi = psi.phi ();
1943 name = PHI_RESULT (phi);
1944 var = SSA_NAME_VAR (name);
1945 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1946 continue;
1947 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1948 gcc_assert (TREE_CODE (use) == SSA_NAME);
1949
1950 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1951 stmt = SSA_NAME_DEF_STMT (use);
1952 while (gimple_code (stmt) == GIMPLE_PHI
1953 /* In case we could not unroll the loop enough to eliminate
1954 all copies, we may reach the loop header before the defining
1955 statement (in that case, some register copies will be present
1956 in loop latch in the final code, corresponding to the newly
1957 created looparound phi nodes). */
1958 && gimple_bb (stmt) != loop->header)
1959 {
1960 gcc_assert (single_pred_p (gimple_bb (stmt)));
1961 use = PHI_ARG_DEF (stmt, 0);
1962 stmt = SSA_NAME_DEF_STMT (use);
1963 }
1964
1965 base_names_in_chain_on (loop, use, var);
1966 }
1967 }
1968
1969 /* Returns true if CHAIN is suitable to be combined. */
1970
1971 static bool
1972 chain_can_be_combined_p (chain_p chain)
1973 {
1974 return (!chain->combined
1975 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
1976 }
1977
1978 /* Returns the modify statement that uses NAME. Skips over assignment
1979 statements, NAME is replaced with the actual name used in the returned
1980 statement. */
1981
1982 static gimple
1983 find_use_stmt (tree *name)
1984 {
1985 gimple stmt;
1986 tree rhs, lhs;
1987
1988 /* Skip over assignments. */
1989 while (1)
1990 {
1991 stmt = single_nonlooparound_use (*name);
1992 if (!stmt)
1993 return NULL;
1994
1995 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1996 return NULL;
1997
1998 lhs = gimple_assign_lhs (stmt);
1999 if (TREE_CODE (lhs) != SSA_NAME)
2000 return NULL;
2001
2002 if (gimple_assign_copy_p (stmt))
2003 {
2004 rhs = gimple_assign_rhs1 (stmt);
2005 if (rhs != *name)
2006 return NULL;
2007
2008 *name = lhs;
2009 }
2010 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2011 == GIMPLE_BINARY_RHS)
2012 return stmt;
2013 else
2014 return NULL;
2015 }
2016 }
2017
2018 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2019
2020 static bool
2021 may_reassociate_p (tree type, enum tree_code code)
2022 {
2023 if (FLOAT_TYPE_P (type)
2024 && !flag_unsafe_math_optimizations)
2025 return false;
2026
2027 return (commutative_tree_code (code)
2028 && associative_tree_code (code));
2029 }
2030
2031 /* If the operation used in STMT is associative and commutative, go through the
2032 tree of the same operations and returns its root. Distance to the root
2033 is stored in DISTANCE. */
2034
2035 static gimple
2036 find_associative_operation_root (gimple stmt, unsigned *distance)
2037 {
2038 tree lhs;
2039 gimple next;
2040 enum tree_code code = gimple_assign_rhs_code (stmt);
2041 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2042 unsigned dist = 0;
2043
2044 if (!may_reassociate_p (type, code))
2045 return NULL;
2046
2047 while (1)
2048 {
2049 lhs = gimple_assign_lhs (stmt);
2050 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2051
2052 next = find_use_stmt (&lhs);
2053 if (!next
2054 || gimple_assign_rhs_code (next) != code)
2055 break;
2056
2057 stmt = next;
2058 dist++;
2059 }
2060
2061 if (distance)
2062 *distance = dist;
2063 return stmt;
2064 }
2065
2066 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2067 is no such statement, returns NULL_TREE. In case the operation used on
2068 NAME1 and NAME2 is associative and commutative, returns the root of the
2069 tree formed by this operation instead of the statement that uses NAME1 or
2070 NAME2. */
2071
2072 static gimple
2073 find_common_use_stmt (tree *name1, tree *name2)
2074 {
2075 gimple stmt1, stmt2;
2076
2077 stmt1 = find_use_stmt (name1);
2078 if (!stmt1)
2079 return NULL;
2080
2081 stmt2 = find_use_stmt (name2);
2082 if (!stmt2)
2083 return NULL;
2084
2085 if (stmt1 == stmt2)
2086 return stmt1;
2087
2088 stmt1 = find_associative_operation_root (stmt1, NULL);
2089 if (!stmt1)
2090 return NULL;
2091 stmt2 = find_associative_operation_root (stmt2, NULL);
2092 if (!stmt2)
2093 return NULL;
2094
2095 return (stmt1 == stmt2 ? stmt1 : NULL);
2096 }
2097
2098 /* Checks whether R1 and R2 are combined together using CODE, with the result
2099 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2100 if it is true. If CODE is ERROR_MARK, set these values instead. */
2101
2102 static bool
2103 combinable_refs_p (dref r1, dref r2,
2104 enum tree_code *code, bool *swap, tree *rslt_type)
2105 {
2106 enum tree_code acode;
2107 bool aswap;
2108 tree atype;
2109 tree name1, name2;
2110 gimple stmt;
2111
2112 name1 = name_for_ref (r1);
2113 name2 = name_for_ref (r2);
2114 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2115
2116 stmt = find_common_use_stmt (&name1, &name2);
2117
2118 if (!stmt
2119 /* A simple post-dominance check - make sure the combination
2120 is executed under the same condition as the references. */
2121 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2122 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2123 return false;
2124
2125 acode = gimple_assign_rhs_code (stmt);
2126 aswap = (!commutative_tree_code (acode)
2127 && gimple_assign_rhs1 (stmt) != name1);
2128 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2129
2130 if (*code == ERROR_MARK)
2131 {
2132 *code = acode;
2133 *swap = aswap;
2134 *rslt_type = atype;
2135 return true;
2136 }
2137
2138 return (*code == acode
2139 && *swap == aswap
2140 && *rslt_type == atype);
2141 }
2142
2143 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2144 an assignment of the remaining operand. */
2145
2146 static void
2147 remove_name_from_operation (gimple stmt, tree op)
2148 {
2149 tree other_op;
2150 gimple_stmt_iterator si;
2151
2152 gcc_assert (is_gimple_assign (stmt));
2153
2154 if (gimple_assign_rhs1 (stmt) == op)
2155 other_op = gimple_assign_rhs2 (stmt);
2156 else
2157 other_op = gimple_assign_rhs1 (stmt);
2158
2159 si = gsi_for_stmt (stmt);
2160 gimple_assign_set_rhs_from_tree (&si, other_op);
2161
2162 /* We should not have reallocated STMT. */
2163 gcc_assert (gsi_stmt (si) == stmt);
2164
2165 update_stmt (stmt);
2166 }
2167
2168 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2169 are combined in a single statement, and returns this statement. */
2170
2171 static gimple
2172 reassociate_to_the_same_stmt (tree name1, tree name2)
2173 {
2174 gimple stmt1, stmt2, root1, root2, s1, s2;
2175 gassign *new_stmt, *tmp_stmt;
2176 tree new_name, tmp_name, var, r1, r2;
2177 unsigned dist1, dist2;
2178 enum tree_code code;
2179 tree type = TREE_TYPE (name1);
2180 gimple_stmt_iterator bsi;
2181
2182 stmt1 = find_use_stmt (&name1);
2183 stmt2 = find_use_stmt (&name2);
2184 root1 = find_associative_operation_root (stmt1, &dist1);
2185 root2 = find_associative_operation_root (stmt2, &dist2);
2186 code = gimple_assign_rhs_code (stmt1);
2187
2188 gcc_assert (root1 && root2 && root1 == root2
2189 && code == gimple_assign_rhs_code (stmt2));
2190
2191 /* Find the root of the nearest expression in that both NAME1 and NAME2
2192 are used. */
2193 r1 = name1;
2194 s1 = stmt1;
2195 r2 = name2;
2196 s2 = stmt2;
2197
2198 while (dist1 > dist2)
2199 {
2200 s1 = find_use_stmt (&r1);
2201 r1 = gimple_assign_lhs (s1);
2202 dist1--;
2203 }
2204 while (dist2 > dist1)
2205 {
2206 s2 = find_use_stmt (&r2);
2207 r2 = gimple_assign_lhs (s2);
2208 dist2--;
2209 }
2210
2211 while (s1 != s2)
2212 {
2213 s1 = find_use_stmt (&r1);
2214 r1 = gimple_assign_lhs (s1);
2215 s2 = find_use_stmt (&r2);
2216 r2 = gimple_assign_lhs (s2);
2217 }
2218
2219 /* Remove NAME1 and NAME2 from the statements in that they are used
2220 currently. */
2221 remove_name_from_operation (stmt1, name1);
2222 remove_name_from_operation (stmt2, name2);
2223
2224 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2225 combine it with the rhs of S1. */
2226 var = create_tmp_reg (type, "predreastmp");
2227 new_name = make_ssa_name (var);
2228 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2229
2230 var = create_tmp_reg (type, "predreastmp");
2231 tmp_name = make_ssa_name (var);
2232
2233 /* Rhs of S1 may now be either a binary expression with operation
2234 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2235 so that name1 or name2 was removed from it). */
2236 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2237 gimple_assign_rhs1 (s1),
2238 gimple_assign_rhs2 (s1));
2239
2240 bsi = gsi_for_stmt (s1);
2241 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2242 s1 = gsi_stmt (bsi);
2243 update_stmt (s1);
2244
2245 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2246 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2247
2248 return new_stmt;
2249 }
2250
2251 /* Returns the statement that combines references R1 and R2. In case R1
2252 and R2 are not used in the same statement, but they are used with an
2253 associative and commutative operation in the same expression, reassociate
2254 the expression so that they are used in the same statement. */
2255
2256 static gimple
2257 stmt_combining_refs (dref r1, dref r2)
2258 {
2259 gimple stmt1, stmt2;
2260 tree name1 = name_for_ref (r1);
2261 tree name2 = name_for_ref (r2);
2262
2263 stmt1 = find_use_stmt (&name1);
2264 stmt2 = find_use_stmt (&name2);
2265 if (stmt1 == stmt2)
2266 return stmt1;
2267
2268 return reassociate_to_the_same_stmt (name1, name2);
2269 }
2270
2271 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2272 description of the new chain is returned, otherwise we return NULL. */
2273
2274 static chain_p
2275 combine_chains (chain_p ch1, chain_p ch2)
2276 {
2277 dref r1, r2, nw;
2278 enum tree_code op = ERROR_MARK;
2279 bool swap = false;
2280 chain_p new_chain;
2281 unsigned i;
2282 gimple root_stmt;
2283 tree rslt_type = NULL_TREE;
2284
2285 if (ch1 == ch2)
2286 return NULL;
2287 if (ch1->length != ch2->length)
2288 return NULL;
2289
2290 if (ch1->refs.length () != ch2->refs.length ())
2291 return NULL;
2292
2293 for (i = 0; (ch1->refs.iterate (i, &r1)
2294 && ch2->refs.iterate (i, &r2)); i++)
2295 {
2296 if (r1->distance != r2->distance)
2297 return NULL;
2298
2299 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2300 return NULL;
2301 }
2302
2303 if (swap)
2304 std::swap (ch1, ch2);
2305
2306 new_chain = XCNEW (struct chain);
2307 new_chain->type = CT_COMBINATION;
2308 new_chain->op = op;
2309 new_chain->ch1 = ch1;
2310 new_chain->ch2 = ch2;
2311 new_chain->rslt_type = rslt_type;
2312 new_chain->length = ch1->length;
2313
2314 for (i = 0; (ch1->refs.iterate (i, &r1)
2315 && ch2->refs.iterate (i, &r2)); i++)
2316 {
2317 nw = XCNEW (struct dref_d);
2318 nw->stmt = stmt_combining_refs (r1, r2);
2319 nw->distance = r1->distance;
2320
2321 new_chain->refs.safe_push (nw);
2322 }
2323
2324 new_chain->has_max_use_after = false;
2325 root_stmt = get_chain_root (new_chain)->stmt;
2326 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2327 {
2328 if (nw->distance == new_chain->length
2329 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2330 {
2331 new_chain->has_max_use_after = true;
2332 break;
2333 }
2334 }
2335
2336 ch1->combined = true;
2337 ch2->combined = true;
2338 return new_chain;
2339 }
2340
2341 /* Try to combine the CHAINS. */
2342
2343 static void
2344 try_combine_chains (vec<chain_p> *chains)
2345 {
2346 unsigned i, j;
2347 chain_p ch1, ch2, cch;
2348 auto_vec<chain_p> worklist;
2349
2350 FOR_EACH_VEC_ELT (*chains, i, ch1)
2351 if (chain_can_be_combined_p (ch1))
2352 worklist.safe_push (ch1);
2353
2354 while (!worklist.is_empty ())
2355 {
2356 ch1 = worklist.pop ();
2357 if (!chain_can_be_combined_p (ch1))
2358 continue;
2359
2360 FOR_EACH_VEC_ELT (*chains, j, ch2)
2361 {
2362 if (!chain_can_be_combined_p (ch2))
2363 continue;
2364
2365 cch = combine_chains (ch1, ch2);
2366 if (cch)
2367 {
2368 worklist.safe_push (cch);
2369 chains->safe_push (cch);
2370 break;
2371 }
2372 }
2373 }
2374 }
2375
2376 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2377 impossible because one of these initializers may trap, true otherwise. */
2378
2379 static bool
2380 prepare_initializers_chain (struct loop *loop, chain_p chain)
2381 {
2382 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2383 struct data_reference *dr = get_chain_root (chain)->ref;
2384 tree init;
2385 dref laref;
2386 edge entry = loop_preheader_edge (loop);
2387
2388 /* Find the initializers for the variables, and check that they cannot
2389 trap. */
2390 chain->inits.create (n);
2391 for (i = 0; i < n; i++)
2392 chain->inits.quick_push (NULL_TREE);
2393
2394 /* If we have replaced some looparound phi nodes, use their initializers
2395 instead of creating our own. */
2396 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2397 {
2398 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2399 continue;
2400
2401 gcc_assert (laref->distance > 0);
2402 chain->inits[n - laref->distance]
2403 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2404 }
2405
2406 for (i = 0; i < n; i++)
2407 {
2408 gimple_seq stmts = NULL;
2409
2410 if (chain->inits[i] != NULL_TREE)
2411 continue;
2412
2413 init = ref_at_iteration (dr, (int) i - n, &stmts);
2414 if (!chain->all_always_accessed && tree_could_trap_p (init))
2415 {
2416 gimple_seq_discard (stmts);
2417 return false;
2418 }
2419
2420 if (stmts)
2421 gsi_insert_seq_on_edge_immediate (entry, stmts);
2422
2423 chain->inits[i] = init;
2424 }
2425
2426 return true;
2427 }
2428
2429 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2430 be used because the initializers might trap. */
2431
2432 static void
2433 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2434 {
2435 chain_p chain;
2436 unsigned i;
2437
2438 for (i = 0; i < chains.length (); )
2439 {
2440 chain = chains[i];
2441 if (prepare_initializers_chain (loop, chain))
2442 i++;
2443 else
2444 {
2445 release_chain (chain);
2446 chains.unordered_remove (i);
2447 }
2448 }
2449 }
2450
2451 /* Performs predictive commoning for LOOP. Returns true if LOOP was
2452 unrolled. */
2453
2454 static bool
2455 tree_predictive_commoning_loop (struct loop *loop)
2456 {
2457 vec<data_reference_p> datarefs;
2458 vec<ddr_p> dependences;
2459 struct component *components;
2460 vec<chain_p> chains = vNULL;
2461 unsigned unroll_factor;
2462 struct tree_niter_desc desc;
2463 bool unroll = false;
2464 edge exit;
2465 bitmap tmp_vars;
2466
2467 if (dump_file && (dump_flags & TDF_DETAILS))
2468 fprintf (dump_file, "Processing loop %d\n", loop->num);
2469
2470 /* Find the data references and split them into components according to their
2471 dependence relations. */
2472 auto_vec<loop_p, 3> loop_nest;
2473 dependences.create (10);
2474 datarefs.create (10);
2475 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2476 &dependences))
2477 {
2478 if (dump_file && (dump_flags & TDF_DETAILS))
2479 fprintf (dump_file, "Cannot analyze data dependencies\n");
2480 free_data_refs (datarefs);
2481 free_dependence_relations (dependences);
2482 return false;
2483 }
2484
2485 if (dump_file && (dump_flags & TDF_DETAILS))
2486 dump_data_dependence_relations (dump_file, dependences);
2487
2488 components = split_data_refs_to_components (loop, datarefs, dependences);
2489 loop_nest.release ();
2490 free_dependence_relations (dependences);
2491 if (!components)
2492 {
2493 free_data_refs (datarefs);
2494 free_affine_expand_cache (&name_expansions);
2495 return false;
2496 }
2497
2498 if (dump_file && (dump_flags & TDF_DETAILS))
2499 {
2500 fprintf (dump_file, "Initial state:\n\n");
2501 dump_components (dump_file, components);
2502 }
2503
2504 /* Find the suitable components and split them into chains. */
2505 components = filter_suitable_components (loop, components);
2506
2507 tmp_vars = BITMAP_ALLOC (NULL);
2508 looparound_phis = BITMAP_ALLOC (NULL);
2509 determine_roots (loop, components, &chains);
2510 release_components (components);
2511
2512 if (!chains.exists ())
2513 {
2514 if (dump_file && (dump_flags & TDF_DETAILS))
2515 fprintf (dump_file,
2516 "Predictive commoning failed: no suitable chains\n");
2517 goto end;
2518 }
2519 prepare_initializers (loop, chains);
2520
2521 /* Try to combine the chains that are always worked with together. */
2522 try_combine_chains (&chains);
2523
2524 if (dump_file && (dump_flags & TDF_DETAILS))
2525 {
2526 fprintf (dump_file, "Before commoning:\n\n");
2527 dump_chains (dump_file, chains);
2528 }
2529
2530 /* Determine the unroll factor, and if the loop should be unrolled, ensure
2531 that its number of iterations is divisible by the factor. */
2532 unroll_factor = determine_unroll_factor (chains);
2533 scev_reset ();
2534 unroll = (unroll_factor > 1
2535 && can_unroll_loop_p (loop, unroll_factor, &desc));
2536 exit = single_dom_exit (loop);
2537
2538 /* Execute the predictive commoning transformations, and possibly unroll the
2539 loop. */
2540 if (unroll)
2541 {
2542 struct epcc_data dta;
2543
2544 if (dump_file && (dump_flags & TDF_DETAILS))
2545 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
2546
2547 dta.chains = chains;
2548 dta.tmp_vars = tmp_vars;
2549
2550 update_ssa (TODO_update_ssa_only_virtuals);
2551
2552 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
2553 execute_pred_commoning_cbck is called may cause phi nodes to be
2554 reallocated, which is a problem since CHAINS may point to these
2555 statements. To fix this, we store the ssa names defined by the
2556 phi nodes here instead of the phi nodes themselves, and restore
2557 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
2558 replace_phis_by_defined_names (chains);
2559
2560 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
2561 execute_pred_commoning_cbck, &dta);
2562 eliminate_temp_copies (loop, tmp_vars);
2563 }
2564 else
2565 {
2566 if (dump_file && (dump_flags & TDF_DETAILS))
2567 fprintf (dump_file,
2568 "Executing predictive commoning without unrolling.\n");
2569 execute_pred_commoning (loop, chains, tmp_vars);
2570 }
2571
2572 end: ;
2573 release_chains (chains);
2574 free_data_refs (datarefs);
2575 BITMAP_FREE (tmp_vars);
2576 BITMAP_FREE (looparound_phis);
2577
2578 free_affine_expand_cache (&name_expansions);
2579
2580 return unroll;
2581 }
2582
2583 /* Runs predictive commoning. */
2584
2585 unsigned
2586 tree_predictive_commoning (void)
2587 {
2588 bool unrolled = false;
2589 struct loop *loop;
2590 unsigned ret = 0;
2591
2592 initialize_original_copy_tables ();
2593 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2594 if (optimize_loop_for_speed_p (loop))
2595 {
2596 unrolled |= tree_predictive_commoning_loop (loop);
2597 }
2598
2599 if (unrolled)
2600 {
2601 scev_reset ();
2602 ret = TODO_cleanup_cfg;
2603 }
2604 free_original_copy_tables ();
2605
2606 return ret;
2607 }
2608
2609 /* Predictive commoning Pass. */
2610
2611 static unsigned
2612 run_tree_predictive_commoning (struct function *fun)
2613 {
2614 if (number_of_loops (fun) <= 1)
2615 return 0;
2616
2617 return tree_predictive_commoning ();
2618 }
2619
2620 namespace {
2621
2622 const pass_data pass_data_predcom =
2623 {
2624 GIMPLE_PASS, /* type */
2625 "pcom", /* name */
2626 OPTGROUP_LOOP, /* optinfo_flags */
2627 TV_PREDCOM, /* tv_id */
2628 PROP_cfg, /* properties_required */
2629 0, /* properties_provided */
2630 0, /* properties_destroyed */
2631 0, /* todo_flags_start */
2632 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
2633 };
2634
2635 class pass_predcom : public gimple_opt_pass
2636 {
2637 public:
2638 pass_predcom (gcc::context *ctxt)
2639 : gimple_opt_pass (pass_data_predcom, ctxt)
2640 {}
2641
2642 /* opt_pass methods: */
2643 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
2644 virtual unsigned int execute (function *fun)
2645 {
2646 return run_tree_predictive_commoning (fun);
2647 }
2648
2649 }; // class pass_predcom
2650
2651 } // anon namespace
2652
2653 gimple_opt_pass *
2654 make_pass_predcom (gcc::context *ctxt)
2655 {
2656 return new pass_predcom (ctxt);
2657 }
2658
2659