genattrtab.c (write_header): Include hash-set.h...
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "hash-set.h"
48 #include "machmode.h"
49 #include "vec.h"
50 #include "double-int.h"
51 #include "input.h"
52 #include "alias.h"
53 #include "symtab.h"
54 #include "options.h"
55 #include "wide-int.h"
56 #include "inchash.h"
57 #include "tree.h"
58 #include "fold-const.h"
59 #include "predict.h"
60 #include "tm.h"
61 #include "hard-reg-set.h"
62 #include "input.h"
63 #include "function.h"
64 #include "dominance.h"
65 #include "cfg.h"
66 #include "cfganal.h"
67 #include "basic-block.h"
68 #include "tree-ssa-alias.h"
69 #include "internal-fn.h"
70 #include "gimple-expr.h"
71 #include "is-a.h"
72 #include "gimple.h"
73 #include "gimple-iterator.h"
74 #include "gimplify-me.h"
75 #include "stor-layout.h"
76 #include "gimple-ssa.h"
77 #include "tree-cfg.h"
78 #include "tree-phinodes.h"
79 #include "ssa-iterators.h"
80 #include "stringpool.h"
81 #include "tree-ssanames.h"
82 #include "tree-ssa-loop-manip.h"
83 #include "tree-ssa-loop.h"
84 #include "tree-into-ssa.h"
85 #include "tree-ssa.h"
86 #include "cfgloop.h"
87 #include "tree-chrec.h"
88 #include "tree-data-ref.h"
89 #include "tree-scalar-evolution.h"
90 #include "tree-pass.h"
91 #include "gimple-pretty-print.h"
92 #include "tree-vectorizer.h"
93
94
95 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
96 typedef struct rdg_vertex
97 {
98 /* The statement represented by this vertex. */
99 gimple stmt;
100
101 /* Vector of data-references in this statement. */
102 vec<data_reference_p> datarefs;
103
104 /* True when the statement contains a write to memory. */
105 bool has_mem_write;
106
107 /* True when the statement contains a read from memory. */
108 bool has_mem_reads;
109 } *rdg_vertex_p;
110
111 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
112 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
113 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
114 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
115 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
116 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
117 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
118 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
119
120 /* Data dependence type. */
121
122 enum rdg_dep_type
123 {
124 /* Read After Write (RAW). */
125 flow_dd = 'f',
126
127 /* Control dependence (execute conditional on). */
128 control_dd = 'c'
129 };
130
131 /* Dependence information attached to an edge of the RDG. */
132
133 typedef struct rdg_edge
134 {
135 /* Type of the dependence. */
136 enum rdg_dep_type type;
137 } *rdg_edge_p;
138
139 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
140
141 /* Dump vertex I in RDG to FILE. */
142
143 static void
144 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
145 {
146 struct vertex *v = &(rdg->vertices[i]);
147 struct graph_edge *e;
148
149 fprintf (file, "(vertex %d: (%s%s) (in:", i,
150 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
151 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
152
153 if (v->pred)
154 for (e = v->pred; e; e = e->pred_next)
155 fprintf (file, " %d", e->src);
156
157 fprintf (file, ") (out:");
158
159 if (v->succ)
160 for (e = v->succ; e; e = e->succ_next)
161 fprintf (file, " %d", e->dest);
162
163 fprintf (file, ")\n");
164 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
165 fprintf (file, ")\n");
166 }
167
168 /* Call dump_rdg_vertex on stderr. */
169
170 DEBUG_FUNCTION void
171 debug_rdg_vertex (struct graph *rdg, int i)
172 {
173 dump_rdg_vertex (stderr, rdg, i);
174 }
175
176 /* Dump the reduced dependence graph RDG to FILE. */
177
178 static void
179 dump_rdg (FILE *file, struct graph *rdg)
180 {
181 fprintf (file, "(rdg\n");
182 for (int i = 0; i < rdg->n_vertices; i++)
183 dump_rdg_vertex (file, rdg, i);
184 fprintf (file, ")\n");
185 }
186
187 /* Call dump_rdg on stderr. */
188
189 DEBUG_FUNCTION void
190 debug_rdg (struct graph *rdg)
191 {
192 dump_rdg (stderr, rdg);
193 }
194
195 static void
196 dot_rdg_1 (FILE *file, struct graph *rdg)
197 {
198 int i;
199 pretty_printer buffer;
200 pp_needs_newline (&buffer) = false;
201 buffer.buffer->stream = file;
202
203 fprintf (file, "digraph RDG {\n");
204
205 for (i = 0; i < rdg->n_vertices; i++)
206 {
207 struct vertex *v = &(rdg->vertices[i]);
208 struct graph_edge *e;
209
210 fprintf (file, "%d [label=\"[%d] ", i, i);
211 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
212 pp_flush (&buffer);
213 fprintf (file, "\"]\n");
214
215 /* Highlight reads from memory. */
216 if (RDG_MEM_READS_STMT (rdg, i))
217 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
218
219 /* Highlight stores to memory. */
220 if (RDG_MEM_WRITE_STMT (rdg, i))
221 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
222
223 if (v->succ)
224 for (e = v->succ; e; e = e->succ_next)
225 switch (RDGE_TYPE (e))
226 {
227 case flow_dd:
228 /* These are the most common dependences: don't print these. */
229 fprintf (file, "%d -> %d \n", i, e->dest);
230 break;
231
232 case control_dd:
233 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
234 break;
235
236 default:
237 gcc_unreachable ();
238 }
239 }
240
241 fprintf (file, "}\n\n");
242 }
243
244 /* Display the Reduced Dependence Graph using dotty. */
245
246 DEBUG_FUNCTION void
247 dot_rdg (struct graph *rdg)
248 {
249 /* When debugging, you may want to enable the following code. */
250 #ifdef HAVE_POPEN
251 FILE *file = popen ("dot -Tx11", "w");
252 if (!file)
253 return;
254 dot_rdg_1 (file, rdg);
255 fflush (file);
256 close (fileno (file));
257 pclose (file);
258 #else
259 dot_rdg_1 (stderr, rdg);
260 #endif
261 }
262
263 /* Returns the index of STMT in RDG. */
264
265 static int
266 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
267 {
268 int index = gimple_uid (stmt);
269 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
270 return index;
271 }
272
273 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
274 the index of DEF in RDG. */
275
276 static void
277 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
278 {
279 use_operand_p imm_use_p;
280 imm_use_iterator iterator;
281
282 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
283 {
284 struct graph_edge *e;
285 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
286
287 if (use < 0)
288 continue;
289
290 e = add_edge (rdg, idef, use);
291 e->data = XNEW (struct rdg_edge);
292 RDGE_TYPE (e) = flow_dd;
293 }
294 }
295
296 /* Creates an edge for the control dependences of BB to the vertex V. */
297
298 static void
299 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
300 int v, control_dependences *cd)
301 {
302 bitmap_iterator bi;
303 unsigned edge_n;
304 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
305 0, edge_n, bi)
306 {
307 basic_block cond_bb = cd->get_edge (edge_n)->src;
308 gimple stmt = last_stmt (cond_bb);
309 if (stmt && is_ctrl_stmt (stmt))
310 {
311 struct graph_edge *e;
312 int c = rdg_vertex_for_stmt (rdg, stmt);
313 if (c < 0)
314 continue;
315
316 e = add_edge (rdg, c, v);
317 e->data = XNEW (struct rdg_edge);
318 RDGE_TYPE (e) = control_dd;
319 }
320 }
321 }
322
323 /* Creates the edges of the reduced dependence graph RDG. */
324
325 static void
326 create_rdg_flow_edges (struct graph *rdg)
327 {
328 int i;
329 def_operand_p def_p;
330 ssa_op_iter iter;
331
332 for (i = 0; i < rdg->n_vertices; i++)
333 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
334 iter, SSA_OP_DEF)
335 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
336 }
337
338 /* Creates the edges of the reduced dependence graph RDG. */
339
340 static void
341 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
342 {
343 int i;
344
345 for (i = 0; i < rdg->n_vertices; i++)
346 {
347 gimple stmt = RDG_STMT (rdg, i);
348 if (gimple_code (stmt) == GIMPLE_PHI)
349 {
350 edge_iterator ei;
351 edge e;
352 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
353 create_edge_for_control_dependence (rdg, e->src, i, cd);
354 }
355 else
356 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
357 }
358 }
359
360 /* Build the vertices of the reduced dependence graph RDG. Return false
361 if that failed. */
362
363 static bool
364 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
365 vec<data_reference_p> *datarefs)
366 {
367 int i;
368 gimple stmt;
369
370 FOR_EACH_VEC_ELT (stmts, i, stmt)
371 {
372 struct vertex *v = &(rdg->vertices[i]);
373
374 /* Record statement to vertex mapping. */
375 gimple_set_uid (stmt, i);
376
377 v->data = XNEW (struct rdg_vertex);
378 RDGV_STMT (v) = stmt;
379 RDGV_DATAREFS (v).create (0);
380 RDGV_HAS_MEM_WRITE (v) = false;
381 RDGV_HAS_MEM_READS (v) = false;
382 if (gimple_code (stmt) == GIMPLE_PHI)
383 continue;
384
385 unsigned drp = datarefs->length ();
386 if (!find_data_references_in_stmt (loop, stmt, datarefs))
387 return false;
388 for (unsigned j = drp; j < datarefs->length (); ++j)
389 {
390 data_reference_p dr = (*datarefs)[j];
391 if (DR_IS_READ (dr))
392 RDGV_HAS_MEM_READS (v) = true;
393 else
394 RDGV_HAS_MEM_WRITE (v) = true;
395 RDGV_DATAREFS (v).safe_push (dr);
396 }
397 }
398 return true;
399 }
400
401 /* Initialize STMTS with all the statements of LOOP. The order in
402 which we discover statements is important as
403 generate_loops_for_partition is using the same traversal for
404 identifying statements in loop copies. */
405
406 static void
407 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
408 {
409 unsigned int i;
410 basic_block *bbs = get_loop_body_in_dom_order (loop);
411
412 for (i = 0; i < loop->num_nodes; i++)
413 {
414 basic_block bb = bbs[i];
415
416 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
417 gsi_next (&bsi))
418 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
419 stmts->safe_push (bsi.phi ());
420
421 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
422 gsi_next (&bsi))
423 {
424 gimple stmt = gsi_stmt (bsi);
425 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
426 stmts->safe_push (stmt);
427 }
428 }
429
430 free (bbs);
431 }
432
433 /* Free the reduced dependence graph RDG. */
434
435 static void
436 free_rdg (struct graph *rdg)
437 {
438 int i;
439
440 for (i = 0; i < rdg->n_vertices; i++)
441 {
442 struct vertex *v = &(rdg->vertices[i]);
443 struct graph_edge *e;
444
445 for (e = v->succ; e; e = e->succ_next)
446 free (e->data);
447
448 if (v->data)
449 {
450 gimple_set_uid (RDGV_STMT (v), -1);
451 free_data_refs (RDGV_DATAREFS (v));
452 free (v->data);
453 }
454 }
455
456 free_graph (rdg);
457 }
458
459 /* Build the Reduced Dependence Graph (RDG) with one vertex per
460 statement of the loop nest LOOP_NEST, and one edge per data dependence or
461 scalar dependence. */
462
463 static struct graph *
464 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
465 {
466 struct graph *rdg;
467 vec<data_reference_p> datarefs;
468
469 /* Create the RDG vertices from the stmts of the loop nest. */
470 auto_vec<gimple, 10> stmts;
471 stmts_from_loop (loop_nest[0], &stmts);
472 rdg = new_graph (stmts.length ());
473 datarefs.create (10);
474 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
475 {
476 datarefs.release ();
477 free_rdg (rdg);
478 return NULL;
479 }
480 stmts.release ();
481
482 create_rdg_flow_edges (rdg);
483 if (cd)
484 create_rdg_cd_edges (rdg, cd);
485
486 datarefs.release ();
487
488 return rdg;
489 }
490
491
492
493 enum partition_kind {
494 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
495 };
496
497 typedef struct partition_s
498 {
499 bitmap stmts;
500 bitmap loops;
501 bool reduction_p;
502 enum partition_kind kind;
503 /* data-references a kind != PKIND_NORMAL partition is about. */
504 data_reference_p main_dr;
505 data_reference_p secondary_dr;
506 tree niter;
507 bool plus_one;
508 } *partition_t;
509
510
511 /* Allocate and initialize a partition from BITMAP. */
512
513 static partition_t
514 partition_alloc (bitmap stmts, bitmap loops)
515 {
516 partition_t partition = XCNEW (struct partition_s);
517 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
518 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
519 partition->reduction_p = false;
520 partition->kind = PKIND_NORMAL;
521 return partition;
522 }
523
524 /* Free PARTITION. */
525
526 static void
527 partition_free (partition_t partition)
528 {
529 BITMAP_FREE (partition->stmts);
530 BITMAP_FREE (partition->loops);
531 free (partition);
532 }
533
534 /* Returns true if the partition can be generated as a builtin. */
535
536 static bool
537 partition_builtin_p (partition_t partition)
538 {
539 return partition->kind != PKIND_NORMAL;
540 }
541
542 /* Returns true if the partition contains a reduction. */
543
544 static bool
545 partition_reduction_p (partition_t partition)
546 {
547 return partition->reduction_p;
548 }
549
550 /* Merge PARTITION into the partition DEST. */
551
552 static void
553 partition_merge_into (partition_t dest, partition_t partition)
554 {
555 dest->kind = PKIND_NORMAL;
556 bitmap_ior_into (dest->stmts, partition->stmts);
557 if (partition_reduction_p (partition))
558 dest->reduction_p = true;
559 }
560
561
562 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
563 the LOOP. */
564
565 static bool
566 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
567 {
568 imm_use_iterator imm_iter;
569 use_operand_p use_p;
570
571 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
572 {
573 gimple use_stmt = USE_STMT (use_p);
574 if (!is_gimple_debug (use_stmt)
575 && loop != loop_containing_stmt (use_stmt))
576 return true;
577 }
578
579 return false;
580 }
581
582 /* Returns true when STMT defines a scalar variable used after the
583 loop LOOP. */
584
585 static bool
586 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
587 {
588 def_operand_p def_p;
589 ssa_op_iter op_iter;
590
591 if (gimple_code (stmt) == GIMPLE_PHI)
592 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
593
594 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
595 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
596 return true;
597
598 return false;
599 }
600
601 /* Return a copy of LOOP placed before LOOP. */
602
603 static struct loop *
604 copy_loop_before (struct loop *loop)
605 {
606 struct loop *res;
607 edge preheader = loop_preheader_edge (loop);
608
609 initialize_original_copy_tables ();
610 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
611 gcc_assert (res != NULL);
612 free_original_copy_tables ();
613 delete_update_ssa ();
614
615 return res;
616 }
617
618 /* Creates an empty basic block after LOOP. */
619
620 static void
621 create_bb_after_loop (struct loop *loop)
622 {
623 edge exit = single_exit (loop);
624
625 if (!exit)
626 return;
627
628 split_edge (exit);
629 }
630
631 /* Generate code for PARTITION from the code in LOOP. The loop is
632 copied when COPY_P is true. All the statements not flagged in the
633 PARTITION bitmap are removed from the loop or from its copy. The
634 statements are indexed in sequence inside a basic block, and the
635 basic blocks of a loop are taken in dom order. */
636
637 static void
638 generate_loops_for_partition (struct loop *loop, partition_t partition,
639 bool copy_p)
640 {
641 unsigned i;
642 basic_block *bbs;
643
644 if (copy_p)
645 {
646 loop = copy_loop_before (loop);
647 gcc_assert (loop != NULL);
648 create_preheader (loop, CP_SIMPLE_PREHEADERS);
649 create_bb_after_loop (loop);
650 }
651
652 /* Remove stmts not in the PARTITION bitmap. */
653 bbs = get_loop_body_in_dom_order (loop);
654
655 if (MAY_HAVE_DEBUG_STMTS)
656 for (i = 0; i < loop->num_nodes; i++)
657 {
658 basic_block bb = bbs[i];
659
660 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
661 gsi_next (&bsi))
662 {
663 gphi *phi = bsi.phi ();
664 if (!virtual_operand_p (gimple_phi_result (phi))
665 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
666 reset_debug_uses (phi);
667 }
668
669 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
670 {
671 gimple stmt = gsi_stmt (bsi);
672 if (gimple_code (stmt) != GIMPLE_LABEL
673 && !is_gimple_debug (stmt)
674 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
675 reset_debug_uses (stmt);
676 }
677 }
678
679 for (i = 0; i < loop->num_nodes; i++)
680 {
681 basic_block bb = bbs[i];
682
683 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
684 {
685 gphi *phi = bsi.phi ();
686 if (!virtual_operand_p (gimple_phi_result (phi))
687 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
688 remove_phi_node (&bsi, true);
689 else
690 gsi_next (&bsi);
691 }
692
693 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
694 {
695 gimple stmt = gsi_stmt (bsi);
696 if (gimple_code (stmt) != GIMPLE_LABEL
697 && !is_gimple_debug (stmt)
698 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
699 {
700 /* Choose an arbitrary path through the empty CFG part
701 that this unnecessary control stmt controls. */
702 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
703 {
704 gimple_cond_make_false (cond_stmt);
705 update_stmt (stmt);
706 }
707 else if (gimple_code (stmt) == GIMPLE_SWITCH)
708 {
709 gswitch *switch_stmt = as_a <gswitch *> (stmt);
710 gimple_switch_set_index
711 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
712 update_stmt (stmt);
713 }
714 else
715 {
716 unlink_stmt_vdef (stmt);
717 gsi_remove (&bsi, true);
718 release_defs (stmt);
719 continue;
720 }
721 }
722 gsi_next (&bsi);
723 }
724 }
725
726 free (bbs);
727 }
728
729 /* Build the size argument for a memory operation call. */
730
731 static tree
732 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
733 bool plus_one)
734 {
735 tree size = fold_convert_loc (loc, sizetype, nb_iter);
736 if (plus_one)
737 size = size_binop (PLUS_EXPR, size, size_one_node);
738 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
739 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
740 size = fold_convert_loc (loc, size_type_node, size);
741 return size;
742 }
743
744 /* Build an address argument for a memory operation call. */
745
746 static tree
747 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
748 {
749 tree addr_base;
750
751 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
752 addr_base = fold_convert_loc (loc, sizetype, addr_base);
753
754 /* Test for a negative stride, iterating over every element. */
755 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
756 {
757 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
758 fold_convert_loc (loc, sizetype, nb_bytes));
759 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
760 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
761 }
762
763 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
764 }
765
766 /* If VAL memory representation contains the same value in all bytes,
767 return that value, otherwise return -1.
768 E.g. for 0x24242424 return 0x24, for IEEE double
769 747708026454360457216.0 return 0x44, etc. */
770
771 static int
772 const_with_all_bytes_same (tree val)
773 {
774 unsigned char buf[64];
775 int i, len;
776
777 if (integer_zerop (val)
778 || real_zerop (val)
779 || (TREE_CODE (val) == CONSTRUCTOR
780 && !TREE_CLOBBER_P (val)
781 && CONSTRUCTOR_NELTS (val) == 0))
782 return 0;
783
784 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
785 return -1;
786
787 len = native_encode_expr (val, buf, sizeof (buf));
788 if (len == 0)
789 return -1;
790 for (i = 1; i < len; i++)
791 if (buf[i] != buf[0])
792 return -1;
793 return buf[0];
794 }
795
796 /* Generate a call to memset for PARTITION in LOOP. */
797
798 static void
799 generate_memset_builtin (struct loop *loop, partition_t partition)
800 {
801 gimple_stmt_iterator gsi;
802 gimple stmt, fn_call;
803 tree mem, fn, nb_bytes;
804 location_t loc;
805 tree val;
806
807 stmt = DR_STMT (partition->main_dr);
808 loc = gimple_location (stmt);
809
810 /* The new statements will be placed before LOOP. */
811 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
812
813 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
814 partition->plus_one);
815 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
816 false, GSI_CONTINUE_LINKING);
817 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
818 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
819 false, GSI_CONTINUE_LINKING);
820
821 /* This exactly matches the pattern recognition in classify_partition. */
822 val = gimple_assign_rhs1 (stmt);
823 /* Handle constants like 0x15151515 and similarly
824 floating point constants etc. where all bytes are the same. */
825 int bytev = const_with_all_bytes_same (val);
826 if (bytev != -1)
827 val = build_int_cst (integer_type_node, bytev);
828 else if (TREE_CODE (val) == INTEGER_CST)
829 val = fold_convert (integer_type_node, val);
830 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
831 {
832 tree tem = make_ssa_name (integer_type_node);
833 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
834 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
835 val = tem;
836 }
837
838 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
839 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
840 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
841
842 if (dump_file && (dump_flags & TDF_DETAILS))
843 {
844 fprintf (dump_file, "generated memset");
845 if (bytev == 0)
846 fprintf (dump_file, " zero\n");
847 else
848 fprintf (dump_file, "\n");
849 }
850 }
851
852 /* Generate a call to memcpy for PARTITION in LOOP. */
853
854 static void
855 generate_memcpy_builtin (struct loop *loop, partition_t partition)
856 {
857 gimple_stmt_iterator gsi;
858 gimple stmt, fn_call;
859 tree dest, src, fn, nb_bytes;
860 location_t loc;
861 enum built_in_function kind;
862
863 stmt = DR_STMT (partition->main_dr);
864 loc = gimple_location (stmt);
865
866 /* The new statements will be placed before LOOP. */
867 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
868
869 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
870 partition->plus_one);
871 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
872 false, GSI_CONTINUE_LINKING);
873 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
874 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
875 if (ptr_derefs_may_alias_p (dest, src))
876 kind = BUILT_IN_MEMMOVE;
877 else
878 kind = BUILT_IN_MEMCPY;
879
880 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
881 false, GSI_CONTINUE_LINKING);
882 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
883 false, GSI_CONTINUE_LINKING);
884 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
885 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
886 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
887
888 if (dump_file && (dump_flags & TDF_DETAILS))
889 {
890 if (kind == BUILT_IN_MEMCPY)
891 fprintf (dump_file, "generated memcpy\n");
892 else
893 fprintf (dump_file, "generated memmove\n");
894 }
895 }
896
897 /* Remove and destroy the loop LOOP. */
898
899 static void
900 destroy_loop (struct loop *loop)
901 {
902 unsigned nbbs = loop->num_nodes;
903 edge exit = single_exit (loop);
904 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
905 basic_block *bbs;
906 unsigned i;
907
908 bbs = get_loop_body_in_dom_order (loop);
909
910 redirect_edge_pred (exit, src);
911 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
912 exit->flags |= EDGE_FALLTHRU;
913 cancel_loop_tree (loop);
914 rescan_loop_exit (exit, false, true);
915
916 for (i = 0; i < nbbs; i++)
917 {
918 /* We have made sure to not leave any dangling uses of SSA
919 names defined in the loop. With the exception of virtuals.
920 Make sure we replace all uses of virtual defs that will remain
921 outside of the loop with the bare symbol as delete_basic_block
922 will release them. */
923 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
924 gsi_next (&gsi))
925 {
926 gphi *phi = gsi.phi ();
927 if (virtual_operand_p (gimple_phi_result (phi)))
928 mark_virtual_phi_result_for_renaming (phi);
929 }
930 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
931 gsi_next (&gsi))
932 {
933 gimple stmt = gsi_stmt (gsi);
934 tree vdef = gimple_vdef (stmt);
935 if (vdef && TREE_CODE (vdef) == SSA_NAME)
936 mark_virtual_operand_for_renaming (vdef);
937 }
938 delete_basic_block (bbs[i]);
939 }
940 free (bbs);
941
942 set_immediate_dominator (CDI_DOMINATORS, dest,
943 recompute_dominator (CDI_DOMINATORS, dest));
944 }
945
946 /* Generates code for PARTITION. */
947
948 static void
949 generate_code_for_partition (struct loop *loop,
950 partition_t partition, bool copy_p)
951 {
952 switch (partition->kind)
953 {
954 case PKIND_NORMAL:
955 /* Reductions all have to be in the last partition. */
956 gcc_assert (!partition_reduction_p (partition)
957 || !copy_p);
958 generate_loops_for_partition (loop, partition, copy_p);
959 return;
960
961 case PKIND_MEMSET:
962 generate_memset_builtin (loop, partition);
963 break;
964
965 case PKIND_MEMCPY:
966 generate_memcpy_builtin (loop, partition);
967 break;
968
969 default:
970 gcc_unreachable ();
971 }
972
973 /* Common tail for partitions we turn into a call. If this was the last
974 partition for which we generate code, we have to destroy the loop. */
975 if (!copy_p)
976 destroy_loop (loop);
977 }
978
979
980 /* Returns a partition with all the statements needed for computing
981 the vertex V of the RDG, also including the loop exit conditions. */
982
983 static partition_t
984 build_rdg_partition_for_vertex (struct graph *rdg, int v)
985 {
986 partition_t partition = partition_alloc (NULL, NULL);
987 auto_vec<int, 3> nodes;
988 unsigned i;
989 int x;
990
991 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
992
993 FOR_EACH_VEC_ELT (nodes, i, x)
994 {
995 bitmap_set_bit (partition->stmts, x);
996 bitmap_set_bit (partition->loops,
997 loop_containing_stmt (RDG_STMT (rdg, x))->num);
998 }
999
1000 return partition;
1001 }
1002
1003 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1004 For the moment we detect only the memset zero pattern. */
1005
1006 static void
1007 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1008 {
1009 bitmap_iterator bi;
1010 unsigned i;
1011 tree nb_iter;
1012 data_reference_p single_load, single_store;
1013 bool volatiles_p = false;
1014 bool plus_one = false;
1015
1016 partition->kind = PKIND_NORMAL;
1017 partition->main_dr = NULL;
1018 partition->secondary_dr = NULL;
1019 partition->niter = NULL_TREE;
1020 partition->plus_one = false;
1021
1022 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1023 {
1024 gimple stmt = RDG_STMT (rdg, i);
1025
1026 if (gimple_has_volatile_ops (stmt))
1027 volatiles_p = true;
1028
1029 /* If the stmt has uses outside of the loop mark it as reduction. */
1030 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1031 {
1032 partition->reduction_p = true;
1033 return;
1034 }
1035 }
1036
1037 /* Perform general partition disqualification for builtins. */
1038 if (volatiles_p
1039 || !flag_tree_loop_distribute_patterns)
1040 return;
1041
1042 /* Detect memset and memcpy. */
1043 single_load = NULL;
1044 single_store = NULL;
1045 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1046 {
1047 gimple stmt = RDG_STMT (rdg, i);
1048 data_reference_p dr;
1049 unsigned j;
1050
1051 if (gimple_code (stmt) == GIMPLE_PHI)
1052 continue;
1053
1054 /* Any scalar stmts are ok. */
1055 if (!gimple_vuse (stmt))
1056 continue;
1057
1058 /* Otherwise just regular loads/stores. */
1059 if (!gimple_assign_single_p (stmt))
1060 return;
1061
1062 /* But exactly one store and/or load. */
1063 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1064 {
1065 if (DR_IS_READ (dr))
1066 {
1067 if (single_load != NULL)
1068 return;
1069 single_load = dr;
1070 }
1071 else
1072 {
1073 if (single_store != NULL)
1074 return;
1075 single_store = dr;
1076 }
1077 }
1078 }
1079
1080 if (!single_store)
1081 return;
1082
1083 nb_iter = number_of_latch_executions (loop);
1084 if (!nb_iter || nb_iter == chrec_dont_know)
1085 return;
1086 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1087 gimple_bb (DR_STMT (single_store))))
1088 plus_one = true;
1089
1090 if (single_store && !single_load)
1091 {
1092 gimple stmt = DR_STMT (single_store);
1093 tree rhs = gimple_assign_rhs1 (stmt);
1094 if (const_with_all_bytes_same (rhs) == -1
1095 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1096 || (TYPE_MODE (TREE_TYPE (rhs))
1097 != TYPE_MODE (unsigned_char_type_node))))
1098 return;
1099 if (TREE_CODE (rhs) == SSA_NAME
1100 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1101 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1102 return;
1103 if (!adjacent_dr_p (single_store)
1104 || !dominated_by_p (CDI_DOMINATORS,
1105 loop->latch, gimple_bb (stmt)))
1106 return;
1107 partition->kind = PKIND_MEMSET;
1108 partition->main_dr = single_store;
1109 partition->niter = nb_iter;
1110 partition->plus_one = plus_one;
1111 }
1112 else if (single_store && single_load)
1113 {
1114 gimple store = DR_STMT (single_store);
1115 gimple load = DR_STMT (single_load);
1116 /* Direct aggregate copy or via an SSA name temporary. */
1117 if (load != store
1118 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1119 return;
1120 if (!adjacent_dr_p (single_store)
1121 || !adjacent_dr_p (single_load)
1122 || !operand_equal_p (DR_STEP (single_store),
1123 DR_STEP (single_load), 0)
1124 || !dominated_by_p (CDI_DOMINATORS,
1125 loop->latch, gimple_bb (store)))
1126 return;
1127 /* Now check that if there is a dependence this dependence is
1128 of a suitable form for memmove. */
1129 vec<loop_p> loops = vNULL;
1130 ddr_p ddr;
1131 loops.safe_push (loop);
1132 ddr = initialize_data_dependence_relation (single_load, single_store,
1133 loops);
1134 compute_affine_dependence (ddr, loop);
1135 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1136 {
1137 free_dependence_relation (ddr);
1138 loops.release ();
1139 return;
1140 }
1141 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1142 {
1143 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1144 {
1145 free_dependence_relation (ddr);
1146 loops.release ();
1147 return;
1148 }
1149 lambda_vector dist_v;
1150 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1151 {
1152 int dist = dist_v[index_in_loop_nest (loop->num,
1153 DDR_LOOP_NEST (ddr))];
1154 if (dist > 0 && !DDR_REVERSED_P (ddr))
1155 {
1156 free_dependence_relation (ddr);
1157 loops.release ();
1158 return;
1159 }
1160 }
1161 }
1162 free_dependence_relation (ddr);
1163 loops.release ();
1164 partition->kind = PKIND_MEMCPY;
1165 partition->main_dr = single_store;
1166 partition->secondary_dr = single_load;
1167 partition->niter = nb_iter;
1168 partition->plus_one = plus_one;
1169 }
1170 }
1171
1172 /* For a data reference REF, return the declaration of its base
1173 address or NULL_TREE if the base is not determined. */
1174
1175 static tree
1176 ref_base_address (data_reference_p dr)
1177 {
1178 tree base_address = DR_BASE_ADDRESS (dr);
1179 if (base_address
1180 && TREE_CODE (base_address) == ADDR_EXPR)
1181 return TREE_OPERAND (base_address, 0);
1182
1183 return base_address;
1184 }
1185
1186 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1187 accesses in RDG. */
1188
1189 static bool
1190 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1191 partition_t partition2)
1192 {
1193 unsigned i, j, k, l;
1194 bitmap_iterator bi, bj;
1195 data_reference_p ref1, ref2;
1196
1197 /* First check whether in the intersection of the two partitions are
1198 any loads or stores. Common loads are the situation that happens
1199 most often. */
1200 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1201 if (RDG_MEM_WRITE_STMT (rdg, i)
1202 || RDG_MEM_READS_STMT (rdg, i))
1203 return true;
1204
1205 /* Then check all data-references against each other. */
1206 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1207 if (RDG_MEM_WRITE_STMT (rdg, i)
1208 || RDG_MEM_READS_STMT (rdg, i))
1209 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1210 if (RDG_MEM_WRITE_STMT (rdg, j)
1211 || RDG_MEM_READS_STMT (rdg, j))
1212 {
1213 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1214 {
1215 tree base1 = ref_base_address (ref1);
1216 if (base1)
1217 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1218 if (base1 == ref_base_address (ref2))
1219 return true;
1220 }
1221 }
1222
1223 return false;
1224 }
1225
1226 /* Aggregate several components into a useful partition that is
1227 registered in the PARTITIONS vector. Partitions will be
1228 distributed in different loops. */
1229
1230 static void
1231 rdg_build_partitions (struct graph *rdg,
1232 vec<gimple> starting_stmts,
1233 vec<partition_t> *partitions)
1234 {
1235 bitmap processed = BITMAP_ALLOC (NULL);
1236 int i;
1237 gimple stmt;
1238
1239 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1240 {
1241 int v = rdg_vertex_for_stmt (rdg, stmt);
1242
1243 if (dump_file && (dump_flags & TDF_DETAILS))
1244 fprintf (dump_file,
1245 "ldist asked to generate code for vertex %d\n", v);
1246
1247 /* If the vertex is already contained in another partition so
1248 is the partition rooted at it. */
1249 if (bitmap_bit_p (processed, v))
1250 continue;
1251
1252 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1253 bitmap_ior_into (processed, partition->stmts);
1254
1255 if (dump_file && (dump_flags & TDF_DETAILS))
1256 {
1257 fprintf (dump_file, "ldist useful partition:\n");
1258 dump_bitmap (dump_file, partition->stmts);
1259 }
1260
1261 partitions->safe_push (partition);
1262 }
1263
1264 /* All vertices should have been assigned to at least one partition now,
1265 other than vertices belonging to dead code. */
1266
1267 BITMAP_FREE (processed);
1268 }
1269
1270 /* Dump to FILE the PARTITIONS. */
1271
1272 static void
1273 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1274 {
1275 int i;
1276 partition_t partition;
1277
1278 FOR_EACH_VEC_ELT (partitions, i, partition)
1279 debug_bitmap_file (file, partition->stmts);
1280 }
1281
1282 /* Debug PARTITIONS. */
1283 extern void debug_rdg_partitions (vec<partition_t> );
1284
1285 DEBUG_FUNCTION void
1286 debug_rdg_partitions (vec<partition_t> partitions)
1287 {
1288 dump_rdg_partitions (stderr, partitions);
1289 }
1290
1291 /* Returns the number of read and write operations in the RDG. */
1292
1293 static int
1294 number_of_rw_in_rdg (struct graph *rdg)
1295 {
1296 int i, res = 0;
1297
1298 for (i = 0; i < rdg->n_vertices; i++)
1299 {
1300 if (RDG_MEM_WRITE_STMT (rdg, i))
1301 ++res;
1302
1303 if (RDG_MEM_READS_STMT (rdg, i))
1304 ++res;
1305 }
1306
1307 return res;
1308 }
1309
1310 /* Returns the number of read and write operations in a PARTITION of
1311 the RDG. */
1312
1313 static int
1314 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1315 {
1316 int res = 0;
1317 unsigned i;
1318 bitmap_iterator ii;
1319
1320 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1321 {
1322 if (RDG_MEM_WRITE_STMT (rdg, i))
1323 ++res;
1324
1325 if (RDG_MEM_READS_STMT (rdg, i))
1326 ++res;
1327 }
1328
1329 return res;
1330 }
1331
1332 /* Returns true when one of the PARTITIONS contains all the read or
1333 write operations of RDG. */
1334
1335 static bool
1336 partition_contains_all_rw (struct graph *rdg,
1337 vec<partition_t> partitions)
1338 {
1339 int i;
1340 partition_t partition;
1341 int nrw = number_of_rw_in_rdg (rdg);
1342
1343 FOR_EACH_VEC_ELT (partitions, i, partition)
1344 if (nrw == number_of_rw_in_partition (rdg, partition))
1345 return true;
1346
1347 return false;
1348 }
1349
1350 /* Compute partition dependence created by the data references in DRS1
1351 and DRS2 and modify and return DIR according to that. */
1352
1353 static int
1354 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1355 vec<data_reference_p> drs1,
1356 vec<data_reference_p> drs2)
1357 {
1358 data_reference_p dr1, dr2;
1359
1360 /* dependence direction - 0 is no dependence, -1 is back,
1361 1 is forth, 2 is both (we can stop then, merging will occur). */
1362 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1363 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1364 {
1365 int this_dir = 1;
1366 ddr_p ddr;
1367 /* Re-shuffle data-refs to be in dominator order. */
1368 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1369 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1370 {
1371 data_reference_p tem = dr1;
1372 dr1 = dr2;
1373 dr2 = tem;
1374 this_dir = -this_dir;
1375 }
1376 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1377 compute_affine_dependence (ddr, loops[0]);
1378 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1379 this_dir = 2;
1380 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1381 {
1382 if (DDR_REVERSED_P (ddr))
1383 {
1384 data_reference_p tem = dr1;
1385 dr1 = dr2;
1386 dr2 = tem;
1387 this_dir = -this_dir;
1388 }
1389 /* Known dependences can still be unordered througout the
1390 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1391 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1392 this_dir = 2;
1393 /* If the overlap is exact preserve stmt order. */
1394 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1395 ;
1396 else
1397 {
1398 /* Else as the distance vector is lexicographic positive
1399 swap the dependence direction. */
1400 this_dir = -this_dir;
1401 }
1402 }
1403 else
1404 this_dir = 0;
1405 free_dependence_relation (ddr);
1406 if (dir == 0)
1407 dir = this_dir;
1408 else if (dir != this_dir)
1409 return 2;
1410 }
1411 return dir;
1412 }
1413
1414 /* Compare postorder number of the partition graph vertices V1 and V2. */
1415
1416 static int
1417 pgcmp (const void *v1_, const void *v2_)
1418 {
1419 const vertex *v1 = (const vertex *)v1_;
1420 const vertex *v2 = (const vertex *)v2_;
1421 return v2->post - v1->post;
1422 }
1423
1424 /* Distributes the code from LOOP in such a way that producer
1425 statements are placed before consumer statements. Tries to separate
1426 only the statements from STMTS into separate loops.
1427 Returns the number of distributed loops. */
1428
1429 static int
1430 distribute_loop (struct loop *loop, vec<gimple> stmts,
1431 control_dependences *cd, int *nb_calls)
1432 {
1433 struct graph *rdg;
1434 partition_t partition;
1435 bool any_builtin;
1436 int i, nbp;
1437 graph *pg = NULL;
1438 int num_sccs = 1;
1439
1440 *nb_calls = 0;
1441 auto_vec<loop_p, 3> loop_nest;
1442 if (!find_loop_nest (loop, &loop_nest))
1443 return 0;
1444
1445 rdg = build_rdg (loop_nest, cd);
1446 if (!rdg)
1447 {
1448 if (dump_file && (dump_flags & TDF_DETAILS))
1449 fprintf (dump_file,
1450 "Loop %d not distributed: failed to build the RDG.\n",
1451 loop->num);
1452
1453 return 0;
1454 }
1455
1456 if (dump_file && (dump_flags & TDF_DETAILS))
1457 dump_rdg (dump_file, rdg);
1458
1459 auto_vec<partition_t, 3> partitions;
1460 rdg_build_partitions (rdg, stmts, &partitions);
1461
1462 any_builtin = false;
1463 FOR_EACH_VEC_ELT (partitions, i, partition)
1464 {
1465 classify_partition (loop, rdg, partition);
1466 any_builtin |= partition_builtin_p (partition);
1467 }
1468
1469 /* If we are only distributing patterns but did not detect any,
1470 simply bail out. */
1471 if (!flag_tree_loop_distribution
1472 && !any_builtin)
1473 {
1474 nbp = 0;
1475 goto ldist_done;
1476 }
1477
1478 /* If we are only distributing patterns fuse all partitions that
1479 were not classified as builtins. This also avoids chopping
1480 a loop into pieces, separated by builtin calls. That is, we
1481 only want no or a single loop body remaining. */
1482 partition_t into;
1483 if (!flag_tree_loop_distribution)
1484 {
1485 for (i = 0; partitions.iterate (i, &into); ++i)
1486 if (!partition_builtin_p (into))
1487 break;
1488 for (++i; partitions.iterate (i, &partition); ++i)
1489 if (!partition_builtin_p (partition))
1490 {
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1492 {
1493 fprintf (dump_file, "fusing non-builtin partitions\n");
1494 dump_bitmap (dump_file, into->stmts);
1495 dump_bitmap (dump_file, partition->stmts);
1496 }
1497 partition_merge_into (into, partition);
1498 partitions.unordered_remove (i);
1499 partition_free (partition);
1500 i--;
1501 }
1502 }
1503
1504 /* Due to limitations in the transform phase we have to fuse all
1505 reduction partitions into the last partition so the existing
1506 loop will contain all loop-closed PHI nodes. */
1507 for (i = 0; partitions.iterate (i, &into); ++i)
1508 if (partition_reduction_p (into))
1509 break;
1510 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1511 if (partition_reduction_p (partition))
1512 {
1513 if (dump_file && (dump_flags & TDF_DETAILS))
1514 {
1515 fprintf (dump_file, "fusing partitions\n");
1516 dump_bitmap (dump_file, into->stmts);
1517 dump_bitmap (dump_file, partition->stmts);
1518 fprintf (dump_file, "because they have reductions\n");
1519 }
1520 partition_merge_into (into, partition);
1521 partitions.unordered_remove (i);
1522 partition_free (partition);
1523 i--;
1524 }
1525
1526 /* Apply our simple cost model - fuse partitions with similar
1527 memory accesses. */
1528 for (i = 0; partitions.iterate (i, &into); ++i)
1529 {
1530 if (partition_builtin_p (into))
1531 continue;
1532 for (int j = i + 1;
1533 partitions.iterate (j, &partition); ++j)
1534 {
1535 if (!partition_builtin_p (partition)
1536 && similar_memory_accesses (rdg, into, partition))
1537 {
1538 if (dump_file && (dump_flags & TDF_DETAILS))
1539 {
1540 fprintf (dump_file, "fusing partitions\n");
1541 dump_bitmap (dump_file, into->stmts);
1542 dump_bitmap (dump_file, partition->stmts);
1543 fprintf (dump_file, "because they have similar "
1544 "memory accesses\n");
1545 }
1546 partition_merge_into (into, partition);
1547 partitions.unordered_remove (j);
1548 partition_free (partition);
1549 j--;
1550 }
1551 }
1552 }
1553
1554 /* Build the partition dependency graph. */
1555 if (partitions.length () > 1)
1556 {
1557 pg = new_graph (partitions.length ());
1558 struct pgdata {
1559 partition_t partition;
1560 vec<data_reference_p> writes;
1561 vec<data_reference_p> reads;
1562 };
1563 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1564 for (i = 0; partitions.iterate (i, &partition); ++i)
1565 {
1566 vertex *v = &pg->vertices[i];
1567 pgdata *data = new pgdata;
1568 data_reference_p dr;
1569 /* FIXME - leaks. */
1570 v->data = data;
1571 bitmap_iterator bi;
1572 unsigned j;
1573 data->partition = partition;
1574 data->reads = vNULL;
1575 data->writes = vNULL;
1576 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1577 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1578 if (DR_IS_READ (dr))
1579 data->reads.safe_push (dr);
1580 else
1581 data->writes.safe_push (dr);
1582 }
1583 partition_t partition1, partition2;
1584 for (i = 0; partitions.iterate (i, &partition1); ++i)
1585 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1586 {
1587 /* dependence direction - 0 is no dependence, -1 is back,
1588 1 is forth, 2 is both (we can stop then, merging will occur). */
1589 int dir = 0;
1590 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1591 PGDATA(i)->writes,
1592 PGDATA(j)->reads);
1593 if (dir != 2)
1594 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1595 PGDATA(i)->reads,
1596 PGDATA(j)->writes);
1597 if (dir != 2)
1598 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1599 PGDATA(i)->writes,
1600 PGDATA(j)->writes);
1601 if (dir == 1 || dir == 2)
1602 add_edge (pg, i, j);
1603 if (dir == -1 || dir == 2)
1604 add_edge (pg, j, i);
1605 }
1606
1607 /* Add edges to the reduction partition (if any) to force it last. */
1608 unsigned j;
1609 for (j = 0; partitions.iterate (j, &partition); ++j)
1610 if (partition_reduction_p (partition))
1611 break;
1612 if (j < partitions.length ())
1613 {
1614 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1615 if (i != j)
1616 add_edge (pg, i, j);
1617 }
1618
1619 /* Compute partitions we cannot separate and fuse them. */
1620 num_sccs = graphds_scc (pg, NULL);
1621 for (i = 0; i < num_sccs; ++i)
1622 {
1623 partition_t first;
1624 int j;
1625 for (j = 0; partitions.iterate (j, &first); ++j)
1626 if (pg->vertices[j].component == i)
1627 break;
1628 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1629 if (pg->vertices[j].component == i)
1630 {
1631 if (dump_file && (dump_flags & TDF_DETAILS))
1632 {
1633 fprintf (dump_file, "fusing partitions\n");
1634 dump_bitmap (dump_file, first->stmts);
1635 dump_bitmap (dump_file, partition->stmts);
1636 fprintf (dump_file, "because they are in the same "
1637 "dependence SCC\n");
1638 }
1639 partition_merge_into (first, partition);
1640 partitions[j] = NULL;
1641 partition_free (partition);
1642 PGDATA (j)->partition = NULL;
1643 }
1644 }
1645
1646 /* Now order the remaining nodes in postorder. */
1647 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1648 partitions.truncate (0);
1649 for (i = 0; i < pg->n_vertices; ++i)
1650 {
1651 pgdata *data = PGDATA (i);
1652 if (data->partition)
1653 partitions.safe_push (data->partition);
1654 data->reads.release ();
1655 data->writes.release ();
1656 delete data;
1657 }
1658 gcc_assert (partitions.length () == (unsigned)num_sccs);
1659 free_graph (pg);
1660 }
1661
1662 nbp = partitions.length ();
1663 if (nbp == 0
1664 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1665 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1666 {
1667 nbp = 0;
1668 goto ldist_done;
1669 }
1670
1671 if (dump_file && (dump_flags & TDF_DETAILS))
1672 dump_rdg_partitions (dump_file, partitions);
1673
1674 FOR_EACH_VEC_ELT (partitions, i, partition)
1675 {
1676 if (partition_builtin_p (partition))
1677 (*nb_calls)++;
1678 generate_code_for_partition (loop, partition, i < nbp - 1);
1679 }
1680
1681 ldist_done:
1682
1683 FOR_EACH_VEC_ELT (partitions, i, partition)
1684 partition_free (partition);
1685
1686 free_rdg (rdg);
1687 return nbp - *nb_calls;
1688 }
1689
1690 /* Distribute all loops in the current function. */
1691
1692 namespace {
1693
1694 const pass_data pass_data_loop_distribution =
1695 {
1696 GIMPLE_PASS, /* type */
1697 "ldist", /* name */
1698 OPTGROUP_LOOP, /* optinfo_flags */
1699 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1700 ( PROP_cfg | PROP_ssa ), /* properties_required */
1701 0, /* properties_provided */
1702 0, /* properties_destroyed */
1703 0, /* todo_flags_start */
1704 0, /* todo_flags_finish */
1705 };
1706
1707 class pass_loop_distribution : public gimple_opt_pass
1708 {
1709 public:
1710 pass_loop_distribution (gcc::context *ctxt)
1711 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1712 {}
1713
1714 /* opt_pass methods: */
1715 virtual bool gate (function *)
1716 {
1717 return flag_tree_loop_distribution
1718 || flag_tree_loop_distribute_patterns;
1719 }
1720
1721 virtual unsigned int execute (function *);
1722
1723 }; // class pass_loop_distribution
1724
1725 unsigned int
1726 pass_loop_distribution::execute (function *fun)
1727 {
1728 struct loop *loop;
1729 bool changed = false;
1730 basic_block bb;
1731 control_dependences *cd = NULL;
1732
1733 FOR_ALL_BB_FN (bb, fun)
1734 {
1735 gimple_stmt_iterator gsi;
1736 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1737 gimple_set_uid (gsi_stmt (gsi), -1);
1738 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1739 gimple_set_uid (gsi_stmt (gsi), -1);
1740 }
1741
1742 /* We can at the moment only distribute non-nested loops, thus restrict
1743 walking to innermost loops. */
1744 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1745 {
1746 auto_vec<gimple> work_list;
1747 basic_block *bbs;
1748 int num = loop->num;
1749 unsigned int i;
1750
1751 /* If the loop doesn't have a single exit we will fail anyway,
1752 so do that early. */
1753 if (!single_exit (loop))
1754 continue;
1755
1756 /* Only optimize hot loops. */
1757 if (!optimize_loop_for_speed_p (loop))
1758 continue;
1759
1760 /* Initialize the worklist with stmts we seed the partitions with. */
1761 bbs = get_loop_body_in_dom_order (loop);
1762 for (i = 0; i < loop->num_nodes; ++i)
1763 {
1764 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1765 !gsi_end_p (gsi);
1766 gsi_next (&gsi))
1767 {
1768 gphi *phi = gsi.phi ();
1769 if (virtual_operand_p (gimple_phi_result (phi)))
1770 continue;
1771 /* Distribute stmts which have defs that are used outside of
1772 the loop. */
1773 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1774 continue;
1775 work_list.safe_push (phi);
1776 }
1777 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1778 !gsi_end_p (gsi);
1779 gsi_next (&gsi))
1780 {
1781 gimple stmt = gsi_stmt (gsi);
1782
1783 /* If there is a stmt with side-effects bail out - we
1784 cannot and should not distribute this loop. */
1785 if (gimple_has_side_effects (stmt))
1786 {
1787 work_list.truncate (0);
1788 goto out;
1789 }
1790
1791 /* Distribute stmts which have defs that are used outside of
1792 the loop. */
1793 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1794 ;
1795 /* Otherwise only distribute stores for now. */
1796 else if (!gimple_vdef (stmt))
1797 continue;
1798
1799 work_list.safe_push (stmt);
1800 }
1801 }
1802 out:
1803 free (bbs);
1804
1805 int nb_generated_loops = 0;
1806 int nb_generated_calls = 0;
1807 location_t loc = find_loop_location (loop);
1808 if (work_list.length () > 0)
1809 {
1810 if (!cd)
1811 {
1812 calculate_dominance_info (CDI_DOMINATORS);
1813 calculate_dominance_info (CDI_POST_DOMINATORS);
1814 cd = new control_dependences (create_edge_list ());
1815 free_dominance_info (CDI_POST_DOMINATORS);
1816 }
1817 nb_generated_loops = distribute_loop (loop, work_list, cd,
1818 &nb_generated_calls);
1819 }
1820
1821 if (nb_generated_loops + nb_generated_calls > 0)
1822 {
1823 changed = true;
1824 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1825 loc, "Loop %d distributed: split to %d loops "
1826 "and %d library calls.\n",
1827 num, nb_generated_loops, nb_generated_calls);
1828 }
1829 else if (dump_file && (dump_flags & TDF_DETAILS))
1830 fprintf (dump_file, "Loop %d is the same.\n", num);
1831 }
1832
1833 if (cd)
1834 delete cd;
1835
1836 if (changed)
1837 {
1838 mark_virtual_operands_for_renaming (fun);
1839 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1840 }
1841
1842 #ifdef ENABLE_CHECKING
1843 verify_loop_structure ();
1844 #endif
1845
1846 return 0;
1847 }
1848
1849 } // anon namespace
1850
1851 gimple_opt_pass *
1852 make_pass_loop_distribution (gcc::context *ctxt)
1853 {
1854 return new pass_loop_distribution (ctxt);
1855 }