coretypes.h: Include machmode.h...
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "hash-set.h"
48 #include "vec.h"
49 #include "input.h"
50 #include "alias.h"
51 #include "symtab.h"
52 #include "options.h"
53 #include "inchash.h"
54 #include "tree.h"
55 #include "fold-const.h"
56 #include "predict.h"
57 #include "tm.h"
58 #include "hard-reg-set.h"
59 #include "input.h"
60 #include "function.h"
61 #include "dominance.h"
62 #include "cfg.h"
63 #include "cfganal.h"
64 #include "basic-block.h"
65 #include "tree-ssa-alias.h"
66 #include "internal-fn.h"
67 #include "gimple-expr.h"
68 #include "is-a.h"
69 #include "gimple.h"
70 #include "gimple-iterator.h"
71 #include "gimplify-me.h"
72 #include "stor-layout.h"
73 #include "gimple-ssa.h"
74 #include "tree-cfg.h"
75 #include "tree-phinodes.h"
76 #include "ssa-iterators.h"
77 #include "stringpool.h"
78 #include "tree-ssanames.h"
79 #include "tree-ssa-loop-manip.h"
80 #include "tree-ssa-loop.h"
81 #include "tree-into-ssa.h"
82 #include "tree-ssa.h"
83 #include "cfgloop.h"
84 #include "tree-chrec.h"
85 #include "tree-data-ref.h"
86 #include "tree-scalar-evolution.h"
87 #include "tree-pass.h"
88 #include "gimple-pretty-print.h"
89 #include "tree-vectorizer.h"
90
91
92 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
93 typedef struct rdg_vertex
94 {
95 /* The statement represented by this vertex. */
96 gimple stmt;
97
98 /* Vector of data-references in this statement. */
99 vec<data_reference_p> datarefs;
100
101 /* True when the statement contains a write to memory. */
102 bool has_mem_write;
103
104 /* True when the statement contains a read from memory. */
105 bool has_mem_reads;
106 } *rdg_vertex_p;
107
108 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
109 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
110 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
111 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
112 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
113 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
114 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
115 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
116
117 /* Data dependence type. */
118
119 enum rdg_dep_type
120 {
121 /* Read After Write (RAW). */
122 flow_dd = 'f',
123
124 /* Control dependence (execute conditional on). */
125 control_dd = 'c'
126 };
127
128 /* Dependence information attached to an edge of the RDG. */
129
130 typedef struct rdg_edge
131 {
132 /* Type of the dependence. */
133 enum rdg_dep_type type;
134 } *rdg_edge_p;
135
136 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
137
138 /* Dump vertex I in RDG to FILE. */
139
140 static void
141 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
142 {
143 struct vertex *v = &(rdg->vertices[i]);
144 struct graph_edge *e;
145
146 fprintf (file, "(vertex %d: (%s%s) (in:", i,
147 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
148 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
149
150 if (v->pred)
151 for (e = v->pred; e; e = e->pred_next)
152 fprintf (file, " %d", e->src);
153
154 fprintf (file, ") (out:");
155
156 if (v->succ)
157 for (e = v->succ; e; e = e->succ_next)
158 fprintf (file, " %d", e->dest);
159
160 fprintf (file, ")\n");
161 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
162 fprintf (file, ")\n");
163 }
164
165 /* Call dump_rdg_vertex on stderr. */
166
167 DEBUG_FUNCTION void
168 debug_rdg_vertex (struct graph *rdg, int i)
169 {
170 dump_rdg_vertex (stderr, rdg, i);
171 }
172
173 /* Dump the reduced dependence graph RDG to FILE. */
174
175 static void
176 dump_rdg (FILE *file, struct graph *rdg)
177 {
178 fprintf (file, "(rdg\n");
179 for (int i = 0; i < rdg->n_vertices; i++)
180 dump_rdg_vertex (file, rdg, i);
181 fprintf (file, ")\n");
182 }
183
184 /* Call dump_rdg on stderr. */
185
186 DEBUG_FUNCTION void
187 debug_rdg (struct graph *rdg)
188 {
189 dump_rdg (stderr, rdg);
190 }
191
192 static void
193 dot_rdg_1 (FILE *file, struct graph *rdg)
194 {
195 int i;
196 pretty_printer buffer;
197 pp_needs_newline (&buffer) = false;
198 buffer.buffer->stream = file;
199
200 fprintf (file, "digraph RDG {\n");
201
202 for (i = 0; i < rdg->n_vertices; i++)
203 {
204 struct vertex *v = &(rdg->vertices[i]);
205 struct graph_edge *e;
206
207 fprintf (file, "%d [label=\"[%d] ", i, i);
208 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
209 pp_flush (&buffer);
210 fprintf (file, "\"]\n");
211
212 /* Highlight reads from memory. */
213 if (RDG_MEM_READS_STMT (rdg, i))
214 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
215
216 /* Highlight stores to memory. */
217 if (RDG_MEM_WRITE_STMT (rdg, i))
218 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
219
220 if (v->succ)
221 for (e = v->succ; e; e = e->succ_next)
222 switch (RDGE_TYPE (e))
223 {
224 case flow_dd:
225 /* These are the most common dependences: don't print these. */
226 fprintf (file, "%d -> %d \n", i, e->dest);
227 break;
228
229 case control_dd:
230 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
231 break;
232
233 default:
234 gcc_unreachable ();
235 }
236 }
237
238 fprintf (file, "}\n\n");
239 }
240
241 /* Display the Reduced Dependence Graph using dotty. */
242
243 DEBUG_FUNCTION void
244 dot_rdg (struct graph *rdg)
245 {
246 /* When debugging, you may want to enable the following code. */
247 #ifdef HAVE_POPEN
248 FILE *file = popen ("dot -Tx11", "w");
249 if (!file)
250 return;
251 dot_rdg_1 (file, rdg);
252 fflush (file);
253 close (fileno (file));
254 pclose (file);
255 #else
256 dot_rdg_1 (stderr, rdg);
257 #endif
258 }
259
260 /* Returns the index of STMT in RDG. */
261
262 static int
263 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
264 {
265 int index = gimple_uid (stmt);
266 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
267 return index;
268 }
269
270 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
271 the index of DEF in RDG. */
272
273 static void
274 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
275 {
276 use_operand_p imm_use_p;
277 imm_use_iterator iterator;
278
279 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
280 {
281 struct graph_edge *e;
282 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
283
284 if (use < 0)
285 continue;
286
287 e = add_edge (rdg, idef, use);
288 e->data = XNEW (struct rdg_edge);
289 RDGE_TYPE (e) = flow_dd;
290 }
291 }
292
293 /* Creates an edge for the control dependences of BB to the vertex V. */
294
295 static void
296 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
297 int v, control_dependences *cd)
298 {
299 bitmap_iterator bi;
300 unsigned edge_n;
301 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
302 0, edge_n, bi)
303 {
304 basic_block cond_bb = cd->get_edge (edge_n)->src;
305 gimple stmt = last_stmt (cond_bb);
306 if (stmt && is_ctrl_stmt (stmt))
307 {
308 struct graph_edge *e;
309 int c = rdg_vertex_for_stmt (rdg, stmt);
310 if (c < 0)
311 continue;
312
313 e = add_edge (rdg, c, v);
314 e->data = XNEW (struct rdg_edge);
315 RDGE_TYPE (e) = control_dd;
316 }
317 }
318 }
319
320 /* Creates the edges of the reduced dependence graph RDG. */
321
322 static void
323 create_rdg_flow_edges (struct graph *rdg)
324 {
325 int i;
326 def_operand_p def_p;
327 ssa_op_iter iter;
328
329 for (i = 0; i < rdg->n_vertices; i++)
330 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
331 iter, SSA_OP_DEF)
332 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
333 }
334
335 /* Creates the edges of the reduced dependence graph RDG. */
336
337 static void
338 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
339 {
340 int i;
341
342 for (i = 0; i < rdg->n_vertices; i++)
343 {
344 gimple stmt = RDG_STMT (rdg, i);
345 if (gimple_code (stmt) == GIMPLE_PHI)
346 {
347 edge_iterator ei;
348 edge e;
349 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
350 create_edge_for_control_dependence (rdg, e->src, i, cd);
351 }
352 else
353 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
354 }
355 }
356
357 /* Build the vertices of the reduced dependence graph RDG. Return false
358 if that failed. */
359
360 static bool
361 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
362 vec<data_reference_p> *datarefs)
363 {
364 int i;
365 gimple stmt;
366
367 FOR_EACH_VEC_ELT (stmts, i, stmt)
368 {
369 struct vertex *v = &(rdg->vertices[i]);
370
371 /* Record statement to vertex mapping. */
372 gimple_set_uid (stmt, i);
373
374 v->data = XNEW (struct rdg_vertex);
375 RDGV_STMT (v) = stmt;
376 RDGV_DATAREFS (v).create (0);
377 RDGV_HAS_MEM_WRITE (v) = false;
378 RDGV_HAS_MEM_READS (v) = false;
379 if (gimple_code (stmt) == GIMPLE_PHI)
380 continue;
381
382 unsigned drp = datarefs->length ();
383 if (!find_data_references_in_stmt (loop, stmt, datarefs))
384 return false;
385 for (unsigned j = drp; j < datarefs->length (); ++j)
386 {
387 data_reference_p dr = (*datarefs)[j];
388 if (DR_IS_READ (dr))
389 RDGV_HAS_MEM_READS (v) = true;
390 else
391 RDGV_HAS_MEM_WRITE (v) = true;
392 RDGV_DATAREFS (v).safe_push (dr);
393 }
394 }
395 return true;
396 }
397
398 /* Initialize STMTS with all the statements of LOOP. The order in
399 which we discover statements is important as
400 generate_loops_for_partition is using the same traversal for
401 identifying statements in loop copies. */
402
403 static void
404 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
405 {
406 unsigned int i;
407 basic_block *bbs = get_loop_body_in_dom_order (loop);
408
409 for (i = 0; i < loop->num_nodes; i++)
410 {
411 basic_block bb = bbs[i];
412
413 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
414 gsi_next (&bsi))
415 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
416 stmts->safe_push (bsi.phi ());
417
418 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
419 gsi_next (&bsi))
420 {
421 gimple stmt = gsi_stmt (bsi);
422 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
423 stmts->safe_push (stmt);
424 }
425 }
426
427 free (bbs);
428 }
429
430 /* Free the reduced dependence graph RDG. */
431
432 static void
433 free_rdg (struct graph *rdg)
434 {
435 int i;
436
437 for (i = 0; i < rdg->n_vertices; i++)
438 {
439 struct vertex *v = &(rdg->vertices[i]);
440 struct graph_edge *e;
441
442 for (e = v->succ; e; e = e->succ_next)
443 free (e->data);
444
445 if (v->data)
446 {
447 gimple_set_uid (RDGV_STMT (v), -1);
448 free_data_refs (RDGV_DATAREFS (v));
449 free (v->data);
450 }
451 }
452
453 free_graph (rdg);
454 }
455
456 /* Build the Reduced Dependence Graph (RDG) with one vertex per
457 statement of the loop nest LOOP_NEST, and one edge per data dependence or
458 scalar dependence. */
459
460 static struct graph *
461 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
462 {
463 struct graph *rdg;
464 vec<data_reference_p> datarefs;
465
466 /* Create the RDG vertices from the stmts of the loop nest. */
467 auto_vec<gimple, 10> stmts;
468 stmts_from_loop (loop_nest[0], &stmts);
469 rdg = new_graph (stmts.length ());
470 datarefs.create (10);
471 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
472 {
473 datarefs.release ();
474 free_rdg (rdg);
475 return NULL;
476 }
477 stmts.release ();
478
479 create_rdg_flow_edges (rdg);
480 if (cd)
481 create_rdg_cd_edges (rdg, cd);
482
483 datarefs.release ();
484
485 return rdg;
486 }
487
488
489
490 enum partition_kind {
491 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
492 };
493
494 typedef struct partition_s
495 {
496 bitmap stmts;
497 bitmap loops;
498 bool reduction_p;
499 enum partition_kind kind;
500 /* data-references a kind != PKIND_NORMAL partition is about. */
501 data_reference_p main_dr;
502 data_reference_p secondary_dr;
503 tree niter;
504 bool plus_one;
505 } *partition_t;
506
507
508 /* Allocate and initialize a partition from BITMAP. */
509
510 static partition_t
511 partition_alloc (bitmap stmts, bitmap loops)
512 {
513 partition_t partition = XCNEW (struct partition_s);
514 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
515 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
516 partition->reduction_p = false;
517 partition->kind = PKIND_NORMAL;
518 return partition;
519 }
520
521 /* Free PARTITION. */
522
523 static void
524 partition_free (partition_t partition)
525 {
526 BITMAP_FREE (partition->stmts);
527 BITMAP_FREE (partition->loops);
528 free (partition);
529 }
530
531 /* Returns true if the partition can be generated as a builtin. */
532
533 static bool
534 partition_builtin_p (partition_t partition)
535 {
536 return partition->kind != PKIND_NORMAL;
537 }
538
539 /* Returns true if the partition contains a reduction. */
540
541 static bool
542 partition_reduction_p (partition_t partition)
543 {
544 return partition->reduction_p;
545 }
546
547 /* Merge PARTITION into the partition DEST. */
548
549 static void
550 partition_merge_into (partition_t dest, partition_t partition)
551 {
552 dest->kind = PKIND_NORMAL;
553 bitmap_ior_into (dest->stmts, partition->stmts);
554 if (partition_reduction_p (partition))
555 dest->reduction_p = true;
556 }
557
558
559 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
560 the LOOP. */
561
562 static bool
563 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
564 {
565 imm_use_iterator imm_iter;
566 use_operand_p use_p;
567
568 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
569 {
570 gimple use_stmt = USE_STMT (use_p);
571 if (!is_gimple_debug (use_stmt)
572 && loop != loop_containing_stmt (use_stmt))
573 return true;
574 }
575
576 return false;
577 }
578
579 /* Returns true when STMT defines a scalar variable used after the
580 loop LOOP. */
581
582 static bool
583 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
584 {
585 def_operand_p def_p;
586 ssa_op_iter op_iter;
587
588 if (gimple_code (stmt) == GIMPLE_PHI)
589 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
590
591 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
592 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
593 return true;
594
595 return false;
596 }
597
598 /* Return a copy of LOOP placed before LOOP. */
599
600 static struct loop *
601 copy_loop_before (struct loop *loop)
602 {
603 struct loop *res;
604 edge preheader = loop_preheader_edge (loop);
605
606 initialize_original_copy_tables ();
607 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
608 gcc_assert (res != NULL);
609 free_original_copy_tables ();
610 delete_update_ssa ();
611
612 return res;
613 }
614
615 /* Creates an empty basic block after LOOP. */
616
617 static void
618 create_bb_after_loop (struct loop *loop)
619 {
620 edge exit = single_exit (loop);
621
622 if (!exit)
623 return;
624
625 split_edge (exit);
626 }
627
628 /* Generate code for PARTITION from the code in LOOP. The loop is
629 copied when COPY_P is true. All the statements not flagged in the
630 PARTITION bitmap are removed from the loop or from its copy. The
631 statements are indexed in sequence inside a basic block, and the
632 basic blocks of a loop are taken in dom order. */
633
634 static void
635 generate_loops_for_partition (struct loop *loop, partition_t partition,
636 bool copy_p)
637 {
638 unsigned i;
639 basic_block *bbs;
640
641 if (copy_p)
642 {
643 loop = copy_loop_before (loop);
644 gcc_assert (loop != NULL);
645 create_preheader (loop, CP_SIMPLE_PREHEADERS);
646 create_bb_after_loop (loop);
647 }
648
649 /* Remove stmts not in the PARTITION bitmap. */
650 bbs = get_loop_body_in_dom_order (loop);
651
652 if (MAY_HAVE_DEBUG_STMTS)
653 for (i = 0; i < loop->num_nodes; i++)
654 {
655 basic_block bb = bbs[i];
656
657 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
658 gsi_next (&bsi))
659 {
660 gphi *phi = bsi.phi ();
661 if (!virtual_operand_p (gimple_phi_result (phi))
662 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
663 reset_debug_uses (phi);
664 }
665
666 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
667 {
668 gimple stmt = gsi_stmt (bsi);
669 if (gimple_code (stmt) != GIMPLE_LABEL
670 && !is_gimple_debug (stmt)
671 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
672 reset_debug_uses (stmt);
673 }
674 }
675
676 for (i = 0; i < loop->num_nodes; i++)
677 {
678 basic_block bb = bbs[i];
679
680 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
681 {
682 gphi *phi = bsi.phi ();
683 if (!virtual_operand_p (gimple_phi_result (phi))
684 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
685 remove_phi_node (&bsi, true);
686 else
687 gsi_next (&bsi);
688 }
689
690 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
691 {
692 gimple stmt = gsi_stmt (bsi);
693 if (gimple_code (stmt) != GIMPLE_LABEL
694 && !is_gimple_debug (stmt)
695 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
696 {
697 /* Choose an arbitrary path through the empty CFG part
698 that this unnecessary control stmt controls. */
699 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
700 {
701 gimple_cond_make_false (cond_stmt);
702 update_stmt (stmt);
703 }
704 else if (gimple_code (stmt) == GIMPLE_SWITCH)
705 {
706 gswitch *switch_stmt = as_a <gswitch *> (stmt);
707 gimple_switch_set_index
708 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
709 update_stmt (stmt);
710 }
711 else
712 {
713 unlink_stmt_vdef (stmt);
714 gsi_remove (&bsi, true);
715 release_defs (stmt);
716 continue;
717 }
718 }
719 gsi_next (&bsi);
720 }
721 }
722
723 free (bbs);
724 }
725
726 /* Build the size argument for a memory operation call. */
727
728 static tree
729 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
730 bool plus_one)
731 {
732 tree size = fold_convert_loc (loc, sizetype, nb_iter);
733 if (plus_one)
734 size = size_binop (PLUS_EXPR, size, size_one_node);
735 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
736 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
737 size = fold_convert_loc (loc, size_type_node, size);
738 return size;
739 }
740
741 /* Build an address argument for a memory operation call. */
742
743 static tree
744 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
745 {
746 tree addr_base;
747
748 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
749 addr_base = fold_convert_loc (loc, sizetype, addr_base);
750
751 /* Test for a negative stride, iterating over every element. */
752 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
753 {
754 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
755 fold_convert_loc (loc, sizetype, nb_bytes));
756 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
757 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
758 }
759
760 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
761 }
762
763 /* If VAL memory representation contains the same value in all bytes,
764 return that value, otherwise return -1.
765 E.g. for 0x24242424 return 0x24, for IEEE double
766 747708026454360457216.0 return 0x44, etc. */
767
768 static int
769 const_with_all_bytes_same (tree val)
770 {
771 unsigned char buf[64];
772 int i, len;
773
774 if (integer_zerop (val)
775 || real_zerop (val)
776 || (TREE_CODE (val) == CONSTRUCTOR
777 && !TREE_CLOBBER_P (val)
778 && CONSTRUCTOR_NELTS (val) == 0))
779 return 0;
780
781 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
782 return -1;
783
784 len = native_encode_expr (val, buf, sizeof (buf));
785 if (len == 0)
786 return -1;
787 for (i = 1; i < len; i++)
788 if (buf[i] != buf[0])
789 return -1;
790 return buf[0];
791 }
792
793 /* Generate a call to memset for PARTITION in LOOP. */
794
795 static void
796 generate_memset_builtin (struct loop *loop, partition_t partition)
797 {
798 gimple_stmt_iterator gsi;
799 gimple stmt, fn_call;
800 tree mem, fn, nb_bytes;
801 location_t loc;
802 tree val;
803
804 stmt = DR_STMT (partition->main_dr);
805 loc = gimple_location (stmt);
806
807 /* The new statements will be placed before LOOP. */
808 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
809
810 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
811 partition->plus_one);
812 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
813 false, GSI_CONTINUE_LINKING);
814 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
815 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
816 false, GSI_CONTINUE_LINKING);
817
818 /* This exactly matches the pattern recognition in classify_partition. */
819 val = gimple_assign_rhs1 (stmt);
820 /* Handle constants like 0x15151515 and similarly
821 floating point constants etc. where all bytes are the same. */
822 int bytev = const_with_all_bytes_same (val);
823 if (bytev != -1)
824 val = build_int_cst (integer_type_node, bytev);
825 else if (TREE_CODE (val) == INTEGER_CST)
826 val = fold_convert (integer_type_node, val);
827 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
828 {
829 tree tem = make_ssa_name (integer_type_node);
830 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
831 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
832 val = tem;
833 }
834
835 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
836 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
837 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
838
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 {
841 fprintf (dump_file, "generated memset");
842 if (bytev == 0)
843 fprintf (dump_file, " zero\n");
844 else
845 fprintf (dump_file, "\n");
846 }
847 }
848
849 /* Generate a call to memcpy for PARTITION in LOOP. */
850
851 static void
852 generate_memcpy_builtin (struct loop *loop, partition_t partition)
853 {
854 gimple_stmt_iterator gsi;
855 gimple stmt, fn_call;
856 tree dest, src, fn, nb_bytes;
857 location_t loc;
858 enum built_in_function kind;
859
860 stmt = DR_STMT (partition->main_dr);
861 loc = gimple_location (stmt);
862
863 /* The new statements will be placed before LOOP. */
864 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
865
866 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
867 partition->plus_one);
868 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
869 false, GSI_CONTINUE_LINKING);
870 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
871 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
872 if (ptr_derefs_may_alias_p (dest, src))
873 kind = BUILT_IN_MEMMOVE;
874 else
875 kind = BUILT_IN_MEMCPY;
876
877 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
878 false, GSI_CONTINUE_LINKING);
879 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
880 false, GSI_CONTINUE_LINKING);
881 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
882 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
883 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
884
885 if (dump_file && (dump_flags & TDF_DETAILS))
886 {
887 if (kind == BUILT_IN_MEMCPY)
888 fprintf (dump_file, "generated memcpy\n");
889 else
890 fprintf (dump_file, "generated memmove\n");
891 }
892 }
893
894 /* Remove and destroy the loop LOOP. */
895
896 static void
897 destroy_loop (struct loop *loop)
898 {
899 unsigned nbbs = loop->num_nodes;
900 edge exit = single_exit (loop);
901 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
902 basic_block *bbs;
903 unsigned i;
904
905 bbs = get_loop_body_in_dom_order (loop);
906
907 redirect_edge_pred (exit, src);
908 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
909 exit->flags |= EDGE_FALLTHRU;
910 cancel_loop_tree (loop);
911 rescan_loop_exit (exit, false, true);
912
913 for (i = 0; i < nbbs; i++)
914 {
915 /* We have made sure to not leave any dangling uses of SSA
916 names defined in the loop. With the exception of virtuals.
917 Make sure we replace all uses of virtual defs that will remain
918 outside of the loop with the bare symbol as delete_basic_block
919 will release them. */
920 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
921 gsi_next (&gsi))
922 {
923 gphi *phi = gsi.phi ();
924 if (virtual_operand_p (gimple_phi_result (phi)))
925 mark_virtual_phi_result_for_renaming (phi);
926 }
927 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
928 gsi_next (&gsi))
929 {
930 gimple stmt = gsi_stmt (gsi);
931 tree vdef = gimple_vdef (stmt);
932 if (vdef && TREE_CODE (vdef) == SSA_NAME)
933 mark_virtual_operand_for_renaming (vdef);
934 }
935 delete_basic_block (bbs[i]);
936 }
937 free (bbs);
938
939 set_immediate_dominator (CDI_DOMINATORS, dest,
940 recompute_dominator (CDI_DOMINATORS, dest));
941 }
942
943 /* Generates code for PARTITION. */
944
945 static void
946 generate_code_for_partition (struct loop *loop,
947 partition_t partition, bool copy_p)
948 {
949 switch (partition->kind)
950 {
951 case PKIND_NORMAL:
952 /* Reductions all have to be in the last partition. */
953 gcc_assert (!partition_reduction_p (partition)
954 || !copy_p);
955 generate_loops_for_partition (loop, partition, copy_p);
956 return;
957
958 case PKIND_MEMSET:
959 generate_memset_builtin (loop, partition);
960 break;
961
962 case PKIND_MEMCPY:
963 generate_memcpy_builtin (loop, partition);
964 break;
965
966 default:
967 gcc_unreachable ();
968 }
969
970 /* Common tail for partitions we turn into a call. If this was the last
971 partition for which we generate code, we have to destroy the loop. */
972 if (!copy_p)
973 destroy_loop (loop);
974 }
975
976
977 /* Returns a partition with all the statements needed for computing
978 the vertex V of the RDG, also including the loop exit conditions. */
979
980 static partition_t
981 build_rdg_partition_for_vertex (struct graph *rdg, int v)
982 {
983 partition_t partition = partition_alloc (NULL, NULL);
984 auto_vec<int, 3> nodes;
985 unsigned i;
986 int x;
987
988 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
989
990 FOR_EACH_VEC_ELT (nodes, i, x)
991 {
992 bitmap_set_bit (partition->stmts, x);
993 bitmap_set_bit (partition->loops,
994 loop_containing_stmt (RDG_STMT (rdg, x))->num);
995 }
996
997 return partition;
998 }
999
1000 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1001 For the moment we detect only the memset zero pattern. */
1002
1003 static void
1004 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1005 {
1006 bitmap_iterator bi;
1007 unsigned i;
1008 tree nb_iter;
1009 data_reference_p single_load, single_store;
1010 bool volatiles_p = false;
1011 bool plus_one = false;
1012
1013 partition->kind = PKIND_NORMAL;
1014 partition->main_dr = NULL;
1015 partition->secondary_dr = NULL;
1016 partition->niter = NULL_TREE;
1017 partition->plus_one = false;
1018
1019 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1020 {
1021 gimple stmt = RDG_STMT (rdg, i);
1022
1023 if (gimple_has_volatile_ops (stmt))
1024 volatiles_p = true;
1025
1026 /* If the stmt has uses outside of the loop mark it as reduction. */
1027 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1028 {
1029 partition->reduction_p = true;
1030 return;
1031 }
1032 }
1033
1034 /* Perform general partition disqualification for builtins. */
1035 if (volatiles_p
1036 || !flag_tree_loop_distribute_patterns)
1037 return;
1038
1039 /* Detect memset and memcpy. */
1040 single_load = NULL;
1041 single_store = NULL;
1042 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1043 {
1044 gimple stmt = RDG_STMT (rdg, i);
1045 data_reference_p dr;
1046 unsigned j;
1047
1048 if (gimple_code (stmt) == GIMPLE_PHI)
1049 continue;
1050
1051 /* Any scalar stmts are ok. */
1052 if (!gimple_vuse (stmt))
1053 continue;
1054
1055 /* Otherwise just regular loads/stores. */
1056 if (!gimple_assign_single_p (stmt))
1057 return;
1058
1059 /* But exactly one store and/or load. */
1060 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1061 {
1062 if (DR_IS_READ (dr))
1063 {
1064 if (single_load != NULL)
1065 return;
1066 single_load = dr;
1067 }
1068 else
1069 {
1070 if (single_store != NULL)
1071 return;
1072 single_store = dr;
1073 }
1074 }
1075 }
1076
1077 if (!single_store)
1078 return;
1079
1080 nb_iter = number_of_latch_executions (loop);
1081 if (!nb_iter || nb_iter == chrec_dont_know)
1082 return;
1083 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1084 gimple_bb (DR_STMT (single_store))))
1085 plus_one = true;
1086
1087 if (single_store && !single_load)
1088 {
1089 gimple stmt = DR_STMT (single_store);
1090 tree rhs = gimple_assign_rhs1 (stmt);
1091 if (const_with_all_bytes_same (rhs) == -1
1092 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1093 || (TYPE_MODE (TREE_TYPE (rhs))
1094 != TYPE_MODE (unsigned_char_type_node))))
1095 return;
1096 if (TREE_CODE (rhs) == SSA_NAME
1097 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1098 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1099 return;
1100 if (!adjacent_dr_p (single_store)
1101 || !dominated_by_p (CDI_DOMINATORS,
1102 loop->latch, gimple_bb (stmt)))
1103 return;
1104 partition->kind = PKIND_MEMSET;
1105 partition->main_dr = single_store;
1106 partition->niter = nb_iter;
1107 partition->plus_one = plus_one;
1108 }
1109 else if (single_store && single_load)
1110 {
1111 gimple store = DR_STMT (single_store);
1112 gimple load = DR_STMT (single_load);
1113 /* Direct aggregate copy or via an SSA name temporary. */
1114 if (load != store
1115 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1116 return;
1117 if (!adjacent_dr_p (single_store)
1118 || !adjacent_dr_p (single_load)
1119 || !operand_equal_p (DR_STEP (single_store),
1120 DR_STEP (single_load), 0)
1121 || !dominated_by_p (CDI_DOMINATORS,
1122 loop->latch, gimple_bb (store)))
1123 return;
1124 /* Now check that if there is a dependence this dependence is
1125 of a suitable form for memmove. */
1126 vec<loop_p> loops = vNULL;
1127 ddr_p ddr;
1128 loops.safe_push (loop);
1129 ddr = initialize_data_dependence_relation (single_load, single_store,
1130 loops);
1131 compute_affine_dependence (ddr, loop);
1132 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1133 {
1134 free_dependence_relation (ddr);
1135 loops.release ();
1136 return;
1137 }
1138 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1139 {
1140 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1141 {
1142 free_dependence_relation (ddr);
1143 loops.release ();
1144 return;
1145 }
1146 lambda_vector dist_v;
1147 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1148 {
1149 int dist = dist_v[index_in_loop_nest (loop->num,
1150 DDR_LOOP_NEST (ddr))];
1151 if (dist > 0 && !DDR_REVERSED_P (ddr))
1152 {
1153 free_dependence_relation (ddr);
1154 loops.release ();
1155 return;
1156 }
1157 }
1158 }
1159 free_dependence_relation (ddr);
1160 loops.release ();
1161 partition->kind = PKIND_MEMCPY;
1162 partition->main_dr = single_store;
1163 partition->secondary_dr = single_load;
1164 partition->niter = nb_iter;
1165 partition->plus_one = plus_one;
1166 }
1167 }
1168
1169 /* For a data reference REF, return the declaration of its base
1170 address or NULL_TREE if the base is not determined. */
1171
1172 static tree
1173 ref_base_address (data_reference_p dr)
1174 {
1175 tree base_address = DR_BASE_ADDRESS (dr);
1176 if (base_address
1177 && TREE_CODE (base_address) == ADDR_EXPR)
1178 return TREE_OPERAND (base_address, 0);
1179
1180 return base_address;
1181 }
1182
1183 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1184 accesses in RDG. */
1185
1186 static bool
1187 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1188 partition_t partition2)
1189 {
1190 unsigned i, j, k, l;
1191 bitmap_iterator bi, bj;
1192 data_reference_p ref1, ref2;
1193
1194 /* First check whether in the intersection of the two partitions are
1195 any loads or stores. Common loads are the situation that happens
1196 most often. */
1197 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1198 if (RDG_MEM_WRITE_STMT (rdg, i)
1199 || RDG_MEM_READS_STMT (rdg, i))
1200 return true;
1201
1202 /* Then check all data-references against each other. */
1203 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1204 if (RDG_MEM_WRITE_STMT (rdg, i)
1205 || RDG_MEM_READS_STMT (rdg, i))
1206 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1207 if (RDG_MEM_WRITE_STMT (rdg, j)
1208 || RDG_MEM_READS_STMT (rdg, j))
1209 {
1210 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1211 {
1212 tree base1 = ref_base_address (ref1);
1213 if (base1)
1214 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1215 if (base1 == ref_base_address (ref2))
1216 return true;
1217 }
1218 }
1219
1220 return false;
1221 }
1222
1223 /* Aggregate several components into a useful partition that is
1224 registered in the PARTITIONS vector. Partitions will be
1225 distributed in different loops. */
1226
1227 static void
1228 rdg_build_partitions (struct graph *rdg,
1229 vec<gimple> starting_stmts,
1230 vec<partition_t> *partitions)
1231 {
1232 bitmap processed = BITMAP_ALLOC (NULL);
1233 int i;
1234 gimple stmt;
1235
1236 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1237 {
1238 int v = rdg_vertex_for_stmt (rdg, stmt);
1239
1240 if (dump_file && (dump_flags & TDF_DETAILS))
1241 fprintf (dump_file,
1242 "ldist asked to generate code for vertex %d\n", v);
1243
1244 /* If the vertex is already contained in another partition so
1245 is the partition rooted at it. */
1246 if (bitmap_bit_p (processed, v))
1247 continue;
1248
1249 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1250 bitmap_ior_into (processed, partition->stmts);
1251
1252 if (dump_file && (dump_flags & TDF_DETAILS))
1253 {
1254 fprintf (dump_file, "ldist useful partition:\n");
1255 dump_bitmap (dump_file, partition->stmts);
1256 }
1257
1258 partitions->safe_push (partition);
1259 }
1260
1261 /* All vertices should have been assigned to at least one partition now,
1262 other than vertices belonging to dead code. */
1263
1264 BITMAP_FREE (processed);
1265 }
1266
1267 /* Dump to FILE the PARTITIONS. */
1268
1269 static void
1270 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1271 {
1272 int i;
1273 partition_t partition;
1274
1275 FOR_EACH_VEC_ELT (partitions, i, partition)
1276 debug_bitmap_file (file, partition->stmts);
1277 }
1278
1279 /* Debug PARTITIONS. */
1280 extern void debug_rdg_partitions (vec<partition_t> );
1281
1282 DEBUG_FUNCTION void
1283 debug_rdg_partitions (vec<partition_t> partitions)
1284 {
1285 dump_rdg_partitions (stderr, partitions);
1286 }
1287
1288 /* Returns the number of read and write operations in the RDG. */
1289
1290 static int
1291 number_of_rw_in_rdg (struct graph *rdg)
1292 {
1293 int i, res = 0;
1294
1295 for (i = 0; i < rdg->n_vertices; i++)
1296 {
1297 if (RDG_MEM_WRITE_STMT (rdg, i))
1298 ++res;
1299
1300 if (RDG_MEM_READS_STMT (rdg, i))
1301 ++res;
1302 }
1303
1304 return res;
1305 }
1306
1307 /* Returns the number of read and write operations in a PARTITION of
1308 the RDG. */
1309
1310 static int
1311 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1312 {
1313 int res = 0;
1314 unsigned i;
1315 bitmap_iterator ii;
1316
1317 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1318 {
1319 if (RDG_MEM_WRITE_STMT (rdg, i))
1320 ++res;
1321
1322 if (RDG_MEM_READS_STMT (rdg, i))
1323 ++res;
1324 }
1325
1326 return res;
1327 }
1328
1329 /* Returns true when one of the PARTITIONS contains all the read or
1330 write operations of RDG. */
1331
1332 static bool
1333 partition_contains_all_rw (struct graph *rdg,
1334 vec<partition_t> partitions)
1335 {
1336 int i;
1337 partition_t partition;
1338 int nrw = number_of_rw_in_rdg (rdg);
1339
1340 FOR_EACH_VEC_ELT (partitions, i, partition)
1341 if (nrw == number_of_rw_in_partition (rdg, partition))
1342 return true;
1343
1344 return false;
1345 }
1346
1347 /* Compute partition dependence created by the data references in DRS1
1348 and DRS2 and modify and return DIR according to that. */
1349
1350 static int
1351 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1352 vec<data_reference_p> drs1,
1353 vec<data_reference_p> drs2)
1354 {
1355 data_reference_p dr1, dr2;
1356
1357 /* dependence direction - 0 is no dependence, -1 is back,
1358 1 is forth, 2 is both (we can stop then, merging will occur). */
1359 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1360 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1361 {
1362 data_reference_p saved_dr1 = dr1;
1363 int this_dir = 1;
1364 ddr_p ddr;
1365 /* Re-shuffle data-refs to be in dominator order. */
1366 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1367 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1368 {
1369 data_reference_p tem = dr1;
1370 dr1 = dr2;
1371 dr2 = tem;
1372 this_dir = -this_dir;
1373 }
1374 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1375 compute_affine_dependence (ddr, loops[0]);
1376 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1377 this_dir = 2;
1378 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1379 {
1380 if (DDR_REVERSED_P (ddr))
1381 {
1382 data_reference_p tem = dr1;
1383 dr1 = dr2;
1384 dr2 = tem;
1385 this_dir = -this_dir;
1386 }
1387 /* Known dependences can still be unordered througout the
1388 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1389 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1390 this_dir = 2;
1391 /* If the overlap is exact preserve stmt order. */
1392 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1393 ;
1394 else
1395 {
1396 /* Else as the distance vector is lexicographic positive
1397 swap the dependence direction. */
1398 this_dir = -this_dir;
1399 }
1400 }
1401 else
1402 this_dir = 0;
1403 free_dependence_relation (ddr);
1404 if (dir == 0)
1405 dir = this_dir;
1406 else if (dir != this_dir)
1407 return 2;
1408 /* Shuffle "back" dr1. */
1409 dr1 = saved_dr1;
1410 }
1411 return dir;
1412 }
1413
1414 /* Compare postorder number of the partition graph vertices V1 and V2. */
1415
1416 static int
1417 pgcmp (const void *v1_, const void *v2_)
1418 {
1419 const vertex *v1 = (const vertex *)v1_;
1420 const vertex *v2 = (const vertex *)v2_;
1421 return v2->post - v1->post;
1422 }
1423
1424 /* Distributes the code from LOOP in such a way that producer
1425 statements are placed before consumer statements. Tries to separate
1426 only the statements from STMTS into separate loops.
1427 Returns the number of distributed loops. */
1428
1429 static int
1430 distribute_loop (struct loop *loop, vec<gimple> stmts,
1431 control_dependences *cd, int *nb_calls)
1432 {
1433 struct graph *rdg;
1434 partition_t partition;
1435 bool any_builtin;
1436 int i, nbp;
1437 graph *pg = NULL;
1438 int num_sccs = 1;
1439
1440 *nb_calls = 0;
1441 auto_vec<loop_p, 3> loop_nest;
1442 if (!find_loop_nest (loop, &loop_nest))
1443 return 0;
1444
1445 rdg = build_rdg (loop_nest, cd);
1446 if (!rdg)
1447 {
1448 if (dump_file && (dump_flags & TDF_DETAILS))
1449 fprintf (dump_file,
1450 "Loop %d not distributed: failed to build the RDG.\n",
1451 loop->num);
1452
1453 return 0;
1454 }
1455
1456 if (dump_file && (dump_flags & TDF_DETAILS))
1457 dump_rdg (dump_file, rdg);
1458
1459 auto_vec<partition_t, 3> partitions;
1460 rdg_build_partitions (rdg, stmts, &partitions);
1461
1462 any_builtin = false;
1463 FOR_EACH_VEC_ELT (partitions, i, partition)
1464 {
1465 classify_partition (loop, rdg, partition);
1466 any_builtin |= partition_builtin_p (partition);
1467 }
1468
1469 /* If we are only distributing patterns but did not detect any,
1470 simply bail out. */
1471 if (!flag_tree_loop_distribution
1472 && !any_builtin)
1473 {
1474 nbp = 0;
1475 goto ldist_done;
1476 }
1477
1478 /* If we are only distributing patterns fuse all partitions that
1479 were not classified as builtins. This also avoids chopping
1480 a loop into pieces, separated by builtin calls. That is, we
1481 only want no or a single loop body remaining. */
1482 partition_t into;
1483 if (!flag_tree_loop_distribution)
1484 {
1485 for (i = 0; partitions.iterate (i, &into); ++i)
1486 if (!partition_builtin_p (into))
1487 break;
1488 for (++i; partitions.iterate (i, &partition); ++i)
1489 if (!partition_builtin_p (partition))
1490 {
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1492 {
1493 fprintf (dump_file, "fusing non-builtin partitions\n");
1494 dump_bitmap (dump_file, into->stmts);
1495 dump_bitmap (dump_file, partition->stmts);
1496 }
1497 partition_merge_into (into, partition);
1498 partitions.unordered_remove (i);
1499 partition_free (partition);
1500 i--;
1501 }
1502 }
1503
1504 /* Due to limitations in the transform phase we have to fuse all
1505 reduction partitions into the last partition so the existing
1506 loop will contain all loop-closed PHI nodes. */
1507 for (i = 0; partitions.iterate (i, &into); ++i)
1508 if (partition_reduction_p (into))
1509 break;
1510 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1511 if (partition_reduction_p (partition))
1512 {
1513 if (dump_file && (dump_flags & TDF_DETAILS))
1514 {
1515 fprintf (dump_file, "fusing partitions\n");
1516 dump_bitmap (dump_file, into->stmts);
1517 dump_bitmap (dump_file, partition->stmts);
1518 fprintf (dump_file, "because they have reductions\n");
1519 }
1520 partition_merge_into (into, partition);
1521 partitions.unordered_remove (i);
1522 partition_free (partition);
1523 i--;
1524 }
1525
1526 /* Apply our simple cost model - fuse partitions with similar
1527 memory accesses. */
1528 for (i = 0; partitions.iterate (i, &into); ++i)
1529 {
1530 if (partition_builtin_p (into))
1531 continue;
1532 for (int j = i + 1;
1533 partitions.iterate (j, &partition); ++j)
1534 {
1535 if (!partition_builtin_p (partition)
1536 && similar_memory_accesses (rdg, into, partition))
1537 {
1538 if (dump_file && (dump_flags & TDF_DETAILS))
1539 {
1540 fprintf (dump_file, "fusing partitions\n");
1541 dump_bitmap (dump_file, into->stmts);
1542 dump_bitmap (dump_file, partition->stmts);
1543 fprintf (dump_file, "because they have similar "
1544 "memory accesses\n");
1545 }
1546 partition_merge_into (into, partition);
1547 partitions.unordered_remove (j);
1548 partition_free (partition);
1549 j--;
1550 }
1551 }
1552 }
1553
1554 /* Build the partition dependency graph. */
1555 if (partitions.length () > 1)
1556 {
1557 pg = new_graph (partitions.length ());
1558 struct pgdata {
1559 partition_t partition;
1560 vec<data_reference_p> writes;
1561 vec<data_reference_p> reads;
1562 };
1563 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1564 for (i = 0; partitions.iterate (i, &partition); ++i)
1565 {
1566 vertex *v = &pg->vertices[i];
1567 pgdata *data = new pgdata;
1568 data_reference_p dr;
1569 /* FIXME - leaks. */
1570 v->data = data;
1571 bitmap_iterator bi;
1572 unsigned j;
1573 data->partition = partition;
1574 data->reads = vNULL;
1575 data->writes = vNULL;
1576 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1577 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1578 if (DR_IS_READ (dr))
1579 data->reads.safe_push (dr);
1580 else
1581 data->writes.safe_push (dr);
1582 }
1583 partition_t partition1, partition2;
1584 for (i = 0; partitions.iterate (i, &partition1); ++i)
1585 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1586 {
1587 /* dependence direction - 0 is no dependence, -1 is back,
1588 1 is forth, 2 is both (we can stop then, merging will occur). */
1589 int dir = 0;
1590 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1591 PGDATA(i)->writes,
1592 PGDATA(j)->reads);
1593 if (dir != 2)
1594 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1595 PGDATA(i)->reads,
1596 PGDATA(j)->writes);
1597 if (dir != 2)
1598 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1599 PGDATA(i)->writes,
1600 PGDATA(j)->writes);
1601 if (dir == 1 || dir == 2)
1602 add_edge (pg, i, j);
1603 if (dir == -1 || dir == 2)
1604 add_edge (pg, j, i);
1605 }
1606
1607 /* Add edges to the reduction partition (if any) to force it last. */
1608 unsigned j;
1609 for (j = 0; partitions.iterate (j, &partition); ++j)
1610 if (partition_reduction_p (partition))
1611 break;
1612 if (j < partitions.length ())
1613 {
1614 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1615 if (i != j)
1616 add_edge (pg, i, j);
1617 }
1618
1619 /* Compute partitions we cannot separate and fuse them. */
1620 num_sccs = graphds_scc (pg, NULL);
1621 for (i = 0; i < num_sccs; ++i)
1622 {
1623 partition_t first;
1624 int j;
1625 for (j = 0; partitions.iterate (j, &first); ++j)
1626 if (pg->vertices[j].component == i)
1627 break;
1628 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1629 if (pg->vertices[j].component == i)
1630 {
1631 if (dump_file && (dump_flags & TDF_DETAILS))
1632 {
1633 fprintf (dump_file, "fusing partitions\n");
1634 dump_bitmap (dump_file, first->stmts);
1635 dump_bitmap (dump_file, partition->stmts);
1636 fprintf (dump_file, "because they are in the same "
1637 "dependence SCC\n");
1638 }
1639 partition_merge_into (first, partition);
1640 partitions[j] = NULL;
1641 partition_free (partition);
1642 PGDATA (j)->partition = NULL;
1643 }
1644 }
1645
1646 /* Now order the remaining nodes in postorder. */
1647 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1648 partitions.truncate (0);
1649 for (i = 0; i < pg->n_vertices; ++i)
1650 {
1651 pgdata *data = PGDATA (i);
1652 if (data->partition)
1653 partitions.safe_push (data->partition);
1654 data->reads.release ();
1655 data->writes.release ();
1656 delete data;
1657 }
1658 gcc_assert (partitions.length () == (unsigned)num_sccs);
1659 free_graph (pg);
1660 }
1661
1662 nbp = partitions.length ();
1663 if (nbp == 0
1664 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1665 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1666 {
1667 nbp = 0;
1668 goto ldist_done;
1669 }
1670
1671 if (dump_file && (dump_flags & TDF_DETAILS))
1672 dump_rdg_partitions (dump_file, partitions);
1673
1674 FOR_EACH_VEC_ELT (partitions, i, partition)
1675 {
1676 if (partition_builtin_p (partition))
1677 (*nb_calls)++;
1678 generate_code_for_partition (loop, partition, i < nbp - 1);
1679 }
1680
1681 ldist_done:
1682
1683 FOR_EACH_VEC_ELT (partitions, i, partition)
1684 partition_free (partition);
1685
1686 free_rdg (rdg);
1687 return nbp - *nb_calls;
1688 }
1689
1690 /* Distribute all loops in the current function. */
1691
1692 namespace {
1693
1694 const pass_data pass_data_loop_distribution =
1695 {
1696 GIMPLE_PASS, /* type */
1697 "ldist", /* name */
1698 OPTGROUP_LOOP, /* optinfo_flags */
1699 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1700 ( PROP_cfg | PROP_ssa ), /* properties_required */
1701 0, /* properties_provided */
1702 0, /* properties_destroyed */
1703 0, /* todo_flags_start */
1704 0, /* todo_flags_finish */
1705 };
1706
1707 class pass_loop_distribution : public gimple_opt_pass
1708 {
1709 public:
1710 pass_loop_distribution (gcc::context *ctxt)
1711 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1712 {}
1713
1714 /* opt_pass methods: */
1715 virtual bool gate (function *)
1716 {
1717 return flag_tree_loop_distribution
1718 || flag_tree_loop_distribute_patterns;
1719 }
1720
1721 virtual unsigned int execute (function *);
1722
1723 }; // class pass_loop_distribution
1724
1725 unsigned int
1726 pass_loop_distribution::execute (function *fun)
1727 {
1728 struct loop *loop;
1729 bool changed = false;
1730 basic_block bb;
1731 control_dependences *cd = NULL;
1732
1733 FOR_ALL_BB_FN (bb, fun)
1734 {
1735 gimple_stmt_iterator gsi;
1736 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1737 gimple_set_uid (gsi_stmt (gsi), -1);
1738 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1739 gimple_set_uid (gsi_stmt (gsi), -1);
1740 }
1741
1742 /* We can at the moment only distribute non-nested loops, thus restrict
1743 walking to innermost loops. */
1744 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1745 {
1746 auto_vec<gimple> work_list;
1747 basic_block *bbs;
1748 int num = loop->num;
1749 unsigned int i;
1750
1751 /* If the loop doesn't have a single exit we will fail anyway,
1752 so do that early. */
1753 if (!single_exit (loop))
1754 continue;
1755
1756 /* Only optimize hot loops. */
1757 if (!optimize_loop_for_speed_p (loop))
1758 continue;
1759
1760 /* Initialize the worklist with stmts we seed the partitions with. */
1761 bbs = get_loop_body_in_dom_order (loop);
1762 for (i = 0; i < loop->num_nodes; ++i)
1763 {
1764 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1765 !gsi_end_p (gsi);
1766 gsi_next (&gsi))
1767 {
1768 gphi *phi = gsi.phi ();
1769 if (virtual_operand_p (gimple_phi_result (phi)))
1770 continue;
1771 /* Distribute stmts which have defs that are used outside of
1772 the loop. */
1773 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1774 continue;
1775 work_list.safe_push (phi);
1776 }
1777 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1778 !gsi_end_p (gsi);
1779 gsi_next (&gsi))
1780 {
1781 gimple stmt = gsi_stmt (gsi);
1782
1783 /* If there is a stmt with side-effects bail out - we
1784 cannot and should not distribute this loop. */
1785 if (gimple_has_side_effects (stmt))
1786 {
1787 work_list.truncate (0);
1788 goto out;
1789 }
1790
1791 /* Distribute stmts which have defs that are used outside of
1792 the loop. */
1793 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1794 ;
1795 /* Otherwise only distribute stores for now. */
1796 else if (!gimple_vdef (stmt))
1797 continue;
1798
1799 work_list.safe_push (stmt);
1800 }
1801 }
1802 out:
1803 free (bbs);
1804
1805 int nb_generated_loops = 0;
1806 int nb_generated_calls = 0;
1807 location_t loc = find_loop_location (loop);
1808 if (work_list.length () > 0)
1809 {
1810 if (!cd)
1811 {
1812 calculate_dominance_info (CDI_DOMINATORS);
1813 calculate_dominance_info (CDI_POST_DOMINATORS);
1814 cd = new control_dependences (create_edge_list ());
1815 free_dominance_info (CDI_POST_DOMINATORS);
1816 }
1817 nb_generated_loops = distribute_loop (loop, work_list, cd,
1818 &nb_generated_calls);
1819 }
1820
1821 if (nb_generated_loops + nb_generated_calls > 0)
1822 {
1823 changed = true;
1824 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1825 loc, "Loop %d distributed: split to %d loops "
1826 "and %d library calls.\n",
1827 num, nb_generated_loops, nb_generated_calls);
1828 }
1829 else if (dump_file && (dump_flags & TDF_DETAILS))
1830 fprintf (dump_file, "Loop %d is the same.\n", num);
1831 }
1832
1833 if (cd)
1834 delete cd;
1835
1836 if (changed)
1837 {
1838 /* Cached scalar evolutions now may refer to wrong or non-existing
1839 loops. */
1840 scev_reset_htab ();
1841 mark_virtual_operands_for_renaming (fun);
1842 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1843 }
1844
1845 #ifdef ENABLE_CHECKING
1846 verify_loop_structure ();
1847 #endif
1848
1849 return 0;
1850 }
1851
1852 } // anon namespace
1853
1854 gimple_opt_pass *
1855 make_pass_loop_distribution (gcc::context *ctxt)
1856 {
1857 return new pass_loop_distribution (ctxt);
1858 }