re PR debug/66691 (ICE on valid code at -O3 with -g enabled in simplify_subreg, at...
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "alias.h"
48 #include "symtab.h"
49 #include "options.h"
50 #include "tree.h"
51 #include "fold-const.h"
52 #include "predict.h"
53 #include "tm.h"
54 #include "hard-reg-set.h"
55 #include "function.h"
56 #include "dominance.h"
57 #include "cfg.h"
58 #include "cfganal.h"
59 #include "basic-block.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-expr.h"
63 #include "gimple.h"
64 #include "gimple-iterator.h"
65 #include "gimplify-me.h"
66 #include "stor-layout.h"
67 #include "gimple-ssa.h"
68 #include "tree-cfg.h"
69 #include "tree-phinodes.h"
70 #include "ssa-iterators.h"
71 #include "stringpool.h"
72 #include "tree-ssanames.h"
73 #include "tree-ssa-loop-manip.h"
74 #include "tree-ssa-loop.h"
75 #include "tree-into-ssa.h"
76 #include "tree-ssa.h"
77 #include "cfgloop.h"
78 #include "tree-chrec.h"
79 #include "tree-data-ref.h"
80 #include "tree-scalar-evolution.h"
81 #include "tree-pass.h"
82 #include "gimple-pretty-print.h"
83 #include "tree-vectorizer.h"
84
85
86 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
87 typedef struct rdg_vertex
88 {
89 /* The statement represented by this vertex. */
90 gimple stmt;
91
92 /* Vector of data-references in this statement. */
93 vec<data_reference_p> datarefs;
94
95 /* True when the statement contains a write to memory. */
96 bool has_mem_write;
97
98 /* True when the statement contains a read from memory. */
99 bool has_mem_reads;
100 } *rdg_vertex_p;
101
102 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
103 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
104 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
105 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
106 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
107 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
108 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
109 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
110
111 /* Data dependence type. */
112
113 enum rdg_dep_type
114 {
115 /* Read After Write (RAW). */
116 flow_dd = 'f',
117
118 /* Control dependence (execute conditional on). */
119 control_dd = 'c'
120 };
121
122 /* Dependence information attached to an edge of the RDG. */
123
124 typedef struct rdg_edge
125 {
126 /* Type of the dependence. */
127 enum rdg_dep_type type;
128 } *rdg_edge_p;
129
130 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
131
132 /* Dump vertex I in RDG to FILE. */
133
134 static void
135 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
136 {
137 struct vertex *v = &(rdg->vertices[i]);
138 struct graph_edge *e;
139
140 fprintf (file, "(vertex %d: (%s%s) (in:", i,
141 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
142 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
143
144 if (v->pred)
145 for (e = v->pred; e; e = e->pred_next)
146 fprintf (file, " %d", e->src);
147
148 fprintf (file, ") (out:");
149
150 if (v->succ)
151 for (e = v->succ; e; e = e->succ_next)
152 fprintf (file, " %d", e->dest);
153
154 fprintf (file, ")\n");
155 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
156 fprintf (file, ")\n");
157 }
158
159 /* Call dump_rdg_vertex on stderr. */
160
161 DEBUG_FUNCTION void
162 debug_rdg_vertex (struct graph *rdg, int i)
163 {
164 dump_rdg_vertex (stderr, rdg, i);
165 }
166
167 /* Dump the reduced dependence graph RDG to FILE. */
168
169 static void
170 dump_rdg (FILE *file, struct graph *rdg)
171 {
172 fprintf (file, "(rdg\n");
173 for (int i = 0; i < rdg->n_vertices; i++)
174 dump_rdg_vertex (file, rdg, i);
175 fprintf (file, ")\n");
176 }
177
178 /* Call dump_rdg on stderr. */
179
180 DEBUG_FUNCTION void
181 debug_rdg (struct graph *rdg)
182 {
183 dump_rdg (stderr, rdg);
184 }
185
186 static void
187 dot_rdg_1 (FILE *file, struct graph *rdg)
188 {
189 int i;
190 pretty_printer buffer;
191 pp_needs_newline (&buffer) = false;
192 buffer.buffer->stream = file;
193
194 fprintf (file, "digraph RDG {\n");
195
196 for (i = 0; i < rdg->n_vertices; i++)
197 {
198 struct vertex *v = &(rdg->vertices[i]);
199 struct graph_edge *e;
200
201 fprintf (file, "%d [label=\"[%d] ", i, i);
202 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
203 pp_flush (&buffer);
204 fprintf (file, "\"]\n");
205
206 /* Highlight reads from memory. */
207 if (RDG_MEM_READS_STMT (rdg, i))
208 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
209
210 /* Highlight stores to memory. */
211 if (RDG_MEM_WRITE_STMT (rdg, i))
212 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
213
214 if (v->succ)
215 for (e = v->succ; e; e = e->succ_next)
216 switch (RDGE_TYPE (e))
217 {
218 case flow_dd:
219 /* These are the most common dependences: don't print these. */
220 fprintf (file, "%d -> %d \n", i, e->dest);
221 break;
222
223 case control_dd:
224 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
225 break;
226
227 default:
228 gcc_unreachable ();
229 }
230 }
231
232 fprintf (file, "}\n\n");
233 }
234
235 /* Display the Reduced Dependence Graph using dotty. */
236
237 DEBUG_FUNCTION void
238 dot_rdg (struct graph *rdg)
239 {
240 /* When debugging, you may want to enable the following code. */
241 #ifdef HAVE_POPEN
242 FILE *file = popen ("dot -Tx11", "w");
243 if (!file)
244 return;
245 dot_rdg_1 (file, rdg);
246 fflush (file);
247 close (fileno (file));
248 pclose (file);
249 #else
250 dot_rdg_1 (stderr, rdg);
251 #endif
252 }
253
254 /* Returns the index of STMT in RDG. */
255
256 static int
257 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
258 {
259 int index = gimple_uid (stmt);
260 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
261 return index;
262 }
263
264 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
265 the index of DEF in RDG. */
266
267 static void
268 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
269 {
270 use_operand_p imm_use_p;
271 imm_use_iterator iterator;
272
273 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
274 {
275 struct graph_edge *e;
276 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
277
278 if (use < 0)
279 continue;
280
281 e = add_edge (rdg, idef, use);
282 e->data = XNEW (struct rdg_edge);
283 RDGE_TYPE (e) = flow_dd;
284 }
285 }
286
287 /* Creates an edge for the control dependences of BB to the vertex V. */
288
289 static void
290 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
291 int v, control_dependences *cd)
292 {
293 bitmap_iterator bi;
294 unsigned edge_n;
295 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
296 0, edge_n, bi)
297 {
298 basic_block cond_bb = cd->get_edge (edge_n)->src;
299 gimple stmt = last_stmt (cond_bb);
300 if (stmt && is_ctrl_stmt (stmt))
301 {
302 struct graph_edge *e;
303 int c = rdg_vertex_for_stmt (rdg, stmt);
304 if (c < 0)
305 continue;
306
307 e = add_edge (rdg, c, v);
308 e->data = XNEW (struct rdg_edge);
309 RDGE_TYPE (e) = control_dd;
310 }
311 }
312 }
313
314 /* Creates the edges of the reduced dependence graph RDG. */
315
316 static void
317 create_rdg_flow_edges (struct graph *rdg)
318 {
319 int i;
320 def_operand_p def_p;
321 ssa_op_iter iter;
322
323 for (i = 0; i < rdg->n_vertices; i++)
324 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
325 iter, SSA_OP_DEF)
326 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
327 }
328
329 /* Creates the edges of the reduced dependence graph RDG. */
330
331 static void
332 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
333 {
334 int i;
335
336 for (i = 0; i < rdg->n_vertices; i++)
337 {
338 gimple stmt = RDG_STMT (rdg, i);
339 if (gimple_code (stmt) == GIMPLE_PHI)
340 {
341 edge_iterator ei;
342 edge e;
343 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
344 create_edge_for_control_dependence (rdg, e->src, i, cd);
345 }
346 else
347 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
348 }
349 }
350
351 /* Build the vertices of the reduced dependence graph RDG. Return false
352 if that failed. */
353
354 static bool
355 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
356 vec<data_reference_p> *datarefs)
357 {
358 int i;
359 gimple stmt;
360
361 FOR_EACH_VEC_ELT (stmts, i, stmt)
362 {
363 struct vertex *v = &(rdg->vertices[i]);
364
365 /* Record statement to vertex mapping. */
366 gimple_set_uid (stmt, i);
367
368 v->data = XNEW (struct rdg_vertex);
369 RDGV_STMT (v) = stmt;
370 RDGV_DATAREFS (v).create (0);
371 RDGV_HAS_MEM_WRITE (v) = false;
372 RDGV_HAS_MEM_READS (v) = false;
373 if (gimple_code (stmt) == GIMPLE_PHI)
374 continue;
375
376 unsigned drp = datarefs->length ();
377 if (!find_data_references_in_stmt (loop, stmt, datarefs))
378 return false;
379 for (unsigned j = drp; j < datarefs->length (); ++j)
380 {
381 data_reference_p dr = (*datarefs)[j];
382 if (DR_IS_READ (dr))
383 RDGV_HAS_MEM_READS (v) = true;
384 else
385 RDGV_HAS_MEM_WRITE (v) = true;
386 RDGV_DATAREFS (v).safe_push (dr);
387 }
388 }
389 return true;
390 }
391
392 /* Initialize STMTS with all the statements of LOOP. The order in
393 which we discover statements is important as
394 generate_loops_for_partition is using the same traversal for
395 identifying statements in loop copies. */
396
397 static void
398 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
399 {
400 unsigned int i;
401 basic_block *bbs = get_loop_body_in_dom_order (loop);
402
403 for (i = 0; i < loop->num_nodes; i++)
404 {
405 basic_block bb = bbs[i];
406
407 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
408 gsi_next (&bsi))
409 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
410 stmts->safe_push (bsi.phi ());
411
412 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
413 gsi_next (&bsi))
414 {
415 gimple stmt = gsi_stmt (bsi);
416 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
417 stmts->safe_push (stmt);
418 }
419 }
420
421 free (bbs);
422 }
423
424 /* Free the reduced dependence graph RDG. */
425
426 static void
427 free_rdg (struct graph *rdg)
428 {
429 int i;
430
431 for (i = 0; i < rdg->n_vertices; i++)
432 {
433 struct vertex *v = &(rdg->vertices[i]);
434 struct graph_edge *e;
435
436 for (e = v->succ; e; e = e->succ_next)
437 free (e->data);
438
439 if (v->data)
440 {
441 gimple_set_uid (RDGV_STMT (v), -1);
442 free_data_refs (RDGV_DATAREFS (v));
443 free (v->data);
444 }
445 }
446
447 free_graph (rdg);
448 }
449
450 /* Build the Reduced Dependence Graph (RDG) with one vertex per
451 statement of the loop nest LOOP_NEST, and one edge per data dependence or
452 scalar dependence. */
453
454 static struct graph *
455 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
456 {
457 struct graph *rdg;
458 vec<data_reference_p> datarefs;
459
460 /* Create the RDG vertices from the stmts of the loop nest. */
461 auto_vec<gimple, 10> stmts;
462 stmts_from_loop (loop_nest[0], &stmts);
463 rdg = new_graph (stmts.length ());
464 datarefs.create (10);
465 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
466 {
467 datarefs.release ();
468 free_rdg (rdg);
469 return NULL;
470 }
471 stmts.release ();
472
473 create_rdg_flow_edges (rdg);
474 if (cd)
475 create_rdg_cd_edges (rdg, cd);
476
477 datarefs.release ();
478
479 return rdg;
480 }
481
482
483
484 enum partition_kind {
485 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
486 };
487
488 typedef struct partition_s
489 {
490 bitmap stmts;
491 bitmap loops;
492 bool reduction_p;
493 enum partition_kind kind;
494 /* data-references a kind != PKIND_NORMAL partition is about. */
495 data_reference_p main_dr;
496 data_reference_p secondary_dr;
497 tree niter;
498 bool plus_one;
499 } *partition_t;
500
501
502 /* Allocate and initialize a partition from BITMAP. */
503
504 static partition_t
505 partition_alloc (bitmap stmts, bitmap loops)
506 {
507 partition_t partition = XCNEW (struct partition_s);
508 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
509 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
510 partition->reduction_p = false;
511 partition->kind = PKIND_NORMAL;
512 return partition;
513 }
514
515 /* Free PARTITION. */
516
517 static void
518 partition_free (partition_t partition)
519 {
520 BITMAP_FREE (partition->stmts);
521 BITMAP_FREE (partition->loops);
522 free (partition);
523 }
524
525 /* Returns true if the partition can be generated as a builtin. */
526
527 static bool
528 partition_builtin_p (partition_t partition)
529 {
530 return partition->kind != PKIND_NORMAL;
531 }
532
533 /* Returns true if the partition contains a reduction. */
534
535 static bool
536 partition_reduction_p (partition_t partition)
537 {
538 return partition->reduction_p;
539 }
540
541 /* Merge PARTITION into the partition DEST. */
542
543 static void
544 partition_merge_into (partition_t dest, partition_t partition)
545 {
546 dest->kind = PKIND_NORMAL;
547 bitmap_ior_into (dest->stmts, partition->stmts);
548 if (partition_reduction_p (partition))
549 dest->reduction_p = true;
550 }
551
552
553 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
554 the LOOP. */
555
556 static bool
557 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
558 {
559 imm_use_iterator imm_iter;
560 use_operand_p use_p;
561
562 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
563 {
564 gimple use_stmt = USE_STMT (use_p);
565 if (!is_gimple_debug (use_stmt)
566 && loop != loop_containing_stmt (use_stmt))
567 return true;
568 }
569
570 return false;
571 }
572
573 /* Returns true when STMT defines a scalar variable used after the
574 loop LOOP. */
575
576 static bool
577 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
578 {
579 def_operand_p def_p;
580 ssa_op_iter op_iter;
581
582 if (gimple_code (stmt) == GIMPLE_PHI)
583 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
584
585 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
586 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
587 return true;
588
589 return false;
590 }
591
592 /* Return a copy of LOOP placed before LOOP. */
593
594 static struct loop *
595 copy_loop_before (struct loop *loop)
596 {
597 struct loop *res;
598 edge preheader = loop_preheader_edge (loop);
599
600 initialize_original_copy_tables ();
601 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
602 gcc_assert (res != NULL);
603 free_original_copy_tables ();
604 delete_update_ssa ();
605
606 return res;
607 }
608
609 /* Creates an empty basic block after LOOP. */
610
611 static void
612 create_bb_after_loop (struct loop *loop)
613 {
614 edge exit = single_exit (loop);
615
616 if (!exit)
617 return;
618
619 split_edge (exit);
620 }
621
622 /* Generate code for PARTITION from the code in LOOP. The loop is
623 copied when COPY_P is true. All the statements not flagged in the
624 PARTITION bitmap are removed from the loop or from its copy. The
625 statements are indexed in sequence inside a basic block, and the
626 basic blocks of a loop are taken in dom order. */
627
628 static void
629 generate_loops_for_partition (struct loop *loop, partition_t partition,
630 bool copy_p)
631 {
632 unsigned i;
633 basic_block *bbs;
634
635 if (copy_p)
636 {
637 loop = copy_loop_before (loop);
638 gcc_assert (loop != NULL);
639 create_preheader (loop, CP_SIMPLE_PREHEADERS);
640 create_bb_after_loop (loop);
641 }
642
643 /* Remove stmts not in the PARTITION bitmap. */
644 bbs = get_loop_body_in_dom_order (loop);
645
646 if (MAY_HAVE_DEBUG_STMTS)
647 for (i = 0; i < loop->num_nodes; i++)
648 {
649 basic_block bb = bbs[i];
650
651 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
652 gsi_next (&bsi))
653 {
654 gphi *phi = bsi.phi ();
655 if (!virtual_operand_p (gimple_phi_result (phi))
656 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
657 reset_debug_uses (phi);
658 }
659
660 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
661 {
662 gimple stmt = gsi_stmt (bsi);
663 if (gimple_code (stmt) != GIMPLE_LABEL
664 && !is_gimple_debug (stmt)
665 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
666 reset_debug_uses (stmt);
667 }
668 }
669
670 for (i = 0; i < loop->num_nodes; i++)
671 {
672 basic_block bb = bbs[i];
673
674 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
675 {
676 gphi *phi = bsi.phi ();
677 if (!virtual_operand_p (gimple_phi_result (phi))
678 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
679 remove_phi_node (&bsi, true);
680 else
681 gsi_next (&bsi);
682 }
683
684 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
685 {
686 gimple stmt = gsi_stmt (bsi);
687 if (gimple_code (stmt) != GIMPLE_LABEL
688 && !is_gimple_debug (stmt)
689 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
690 {
691 /* Choose an arbitrary path through the empty CFG part
692 that this unnecessary control stmt controls. */
693 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
694 {
695 gimple_cond_make_false (cond_stmt);
696 update_stmt (stmt);
697 }
698 else if (gimple_code (stmt) == GIMPLE_SWITCH)
699 {
700 gswitch *switch_stmt = as_a <gswitch *> (stmt);
701 gimple_switch_set_index
702 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
703 update_stmt (stmt);
704 }
705 else
706 {
707 unlink_stmt_vdef (stmt);
708 gsi_remove (&bsi, true);
709 release_defs (stmt);
710 continue;
711 }
712 }
713 gsi_next (&bsi);
714 }
715 }
716
717 free (bbs);
718 }
719
720 /* Build the size argument for a memory operation call. */
721
722 static tree
723 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
724 bool plus_one)
725 {
726 tree size = fold_convert_loc (loc, sizetype, nb_iter);
727 if (plus_one)
728 size = size_binop (PLUS_EXPR, size, size_one_node);
729 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
730 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
731 size = fold_convert_loc (loc, size_type_node, size);
732 return size;
733 }
734
735 /* Build an address argument for a memory operation call. */
736
737 static tree
738 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
739 {
740 tree addr_base;
741
742 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
743 addr_base = fold_convert_loc (loc, sizetype, addr_base);
744
745 /* Test for a negative stride, iterating over every element. */
746 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
747 {
748 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
749 fold_convert_loc (loc, sizetype, nb_bytes));
750 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
751 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
752 }
753
754 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
755 }
756
757 /* If VAL memory representation contains the same value in all bytes,
758 return that value, otherwise return -1.
759 E.g. for 0x24242424 return 0x24, for IEEE double
760 747708026454360457216.0 return 0x44, etc. */
761
762 static int
763 const_with_all_bytes_same (tree val)
764 {
765 unsigned char buf[64];
766 int i, len;
767
768 if (integer_zerop (val)
769 || real_zerop (val)
770 || (TREE_CODE (val) == CONSTRUCTOR
771 && !TREE_CLOBBER_P (val)
772 && CONSTRUCTOR_NELTS (val) == 0))
773 return 0;
774
775 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
776 return -1;
777
778 len = native_encode_expr (val, buf, sizeof (buf));
779 if (len == 0)
780 return -1;
781 for (i = 1; i < len; i++)
782 if (buf[i] != buf[0])
783 return -1;
784 return buf[0];
785 }
786
787 /* Generate a call to memset for PARTITION in LOOP. */
788
789 static void
790 generate_memset_builtin (struct loop *loop, partition_t partition)
791 {
792 gimple_stmt_iterator gsi;
793 gimple stmt, fn_call;
794 tree mem, fn, nb_bytes;
795 location_t loc;
796 tree val;
797
798 stmt = DR_STMT (partition->main_dr);
799 loc = gimple_location (stmt);
800
801 /* The new statements will be placed before LOOP. */
802 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
803
804 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
805 partition->plus_one);
806 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
807 false, GSI_CONTINUE_LINKING);
808 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
809 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
810 false, GSI_CONTINUE_LINKING);
811
812 /* This exactly matches the pattern recognition in classify_partition. */
813 val = gimple_assign_rhs1 (stmt);
814 /* Handle constants like 0x15151515 and similarly
815 floating point constants etc. where all bytes are the same. */
816 int bytev = const_with_all_bytes_same (val);
817 if (bytev != -1)
818 val = build_int_cst (integer_type_node, bytev);
819 else if (TREE_CODE (val) == INTEGER_CST)
820 val = fold_convert (integer_type_node, val);
821 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
822 {
823 tree tem = make_ssa_name (integer_type_node);
824 gimple cstmt = gimple_build_assign (tem, NOP_EXPR, val);
825 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
826 val = tem;
827 }
828
829 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
830 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
831 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
832
833 if (dump_file && (dump_flags & TDF_DETAILS))
834 {
835 fprintf (dump_file, "generated memset");
836 if (bytev == 0)
837 fprintf (dump_file, " zero\n");
838 else
839 fprintf (dump_file, "\n");
840 }
841 }
842
843 /* Generate a call to memcpy for PARTITION in LOOP. */
844
845 static void
846 generate_memcpy_builtin (struct loop *loop, partition_t partition)
847 {
848 gimple_stmt_iterator gsi;
849 gimple stmt, fn_call;
850 tree dest, src, fn, nb_bytes;
851 location_t loc;
852 enum built_in_function kind;
853
854 stmt = DR_STMT (partition->main_dr);
855 loc = gimple_location (stmt);
856
857 /* The new statements will be placed before LOOP. */
858 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
859
860 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
861 partition->plus_one);
862 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
863 false, GSI_CONTINUE_LINKING);
864 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
865 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
866 if (ptr_derefs_may_alias_p (dest, src))
867 kind = BUILT_IN_MEMMOVE;
868 else
869 kind = BUILT_IN_MEMCPY;
870
871 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
872 false, GSI_CONTINUE_LINKING);
873 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
874 false, GSI_CONTINUE_LINKING);
875 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
876 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
877 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
878
879 if (dump_file && (dump_flags & TDF_DETAILS))
880 {
881 if (kind == BUILT_IN_MEMCPY)
882 fprintf (dump_file, "generated memcpy\n");
883 else
884 fprintf (dump_file, "generated memmove\n");
885 }
886 }
887
888 /* Remove and destroy the loop LOOP. */
889
890 static void
891 destroy_loop (struct loop *loop)
892 {
893 unsigned nbbs = loop->num_nodes;
894 edge exit = single_exit (loop);
895 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
896 basic_block *bbs;
897 unsigned i;
898
899 bbs = get_loop_body_in_dom_order (loop);
900
901 redirect_edge_pred (exit, src);
902 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
903 exit->flags |= EDGE_FALLTHRU;
904 cancel_loop_tree (loop);
905 rescan_loop_exit (exit, false, true);
906
907 for (i = 0; i < nbbs; i++)
908 {
909 /* We have made sure to not leave any dangling uses of SSA
910 names defined in the loop. With the exception of virtuals.
911 Make sure we replace all uses of virtual defs that will remain
912 outside of the loop with the bare symbol as delete_basic_block
913 will release them. */
914 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
915 gsi_next (&gsi))
916 {
917 gphi *phi = gsi.phi ();
918 if (virtual_operand_p (gimple_phi_result (phi)))
919 mark_virtual_phi_result_for_renaming (phi);
920 }
921 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
922 gsi_next (&gsi))
923 {
924 gimple stmt = gsi_stmt (gsi);
925 tree vdef = gimple_vdef (stmt);
926 if (vdef && TREE_CODE (vdef) == SSA_NAME)
927 mark_virtual_operand_for_renaming (vdef);
928 }
929 delete_basic_block (bbs[i]);
930 }
931 free (bbs);
932
933 set_immediate_dominator (CDI_DOMINATORS, dest,
934 recompute_dominator (CDI_DOMINATORS, dest));
935 }
936
937 /* Generates code for PARTITION. */
938
939 static void
940 generate_code_for_partition (struct loop *loop,
941 partition_t partition, bool copy_p)
942 {
943 switch (partition->kind)
944 {
945 case PKIND_NORMAL:
946 /* Reductions all have to be in the last partition. */
947 gcc_assert (!partition_reduction_p (partition)
948 || !copy_p);
949 generate_loops_for_partition (loop, partition, copy_p);
950 return;
951
952 case PKIND_MEMSET:
953 generate_memset_builtin (loop, partition);
954 break;
955
956 case PKIND_MEMCPY:
957 generate_memcpy_builtin (loop, partition);
958 break;
959
960 default:
961 gcc_unreachable ();
962 }
963
964 /* Common tail for partitions we turn into a call. If this was the last
965 partition for which we generate code, we have to destroy the loop. */
966 if (!copy_p)
967 destroy_loop (loop);
968 }
969
970
971 /* Returns a partition with all the statements needed for computing
972 the vertex V of the RDG, also including the loop exit conditions. */
973
974 static partition_t
975 build_rdg_partition_for_vertex (struct graph *rdg, int v)
976 {
977 partition_t partition = partition_alloc (NULL, NULL);
978 auto_vec<int, 3> nodes;
979 unsigned i;
980 int x;
981
982 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
983
984 FOR_EACH_VEC_ELT (nodes, i, x)
985 {
986 bitmap_set_bit (partition->stmts, x);
987 bitmap_set_bit (partition->loops,
988 loop_containing_stmt (RDG_STMT (rdg, x))->num);
989 }
990
991 return partition;
992 }
993
994 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
995 For the moment we detect only the memset zero pattern. */
996
997 static void
998 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
999 {
1000 bitmap_iterator bi;
1001 unsigned i;
1002 tree nb_iter;
1003 data_reference_p single_load, single_store;
1004 bool volatiles_p = false;
1005 bool plus_one = false;
1006
1007 partition->kind = PKIND_NORMAL;
1008 partition->main_dr = NULL;
1009 partition->secondary_dr = NULL;
1010 partition->niter = NULL_TREE;
1011 partition->plus_one = false;
1012
1013 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1014 {
1015 gimple stmt = RDG_STMT (rdg, i);
1016
1017 if (gimple_has_volatile_ops (stmt))
1018 volatiles_p = true;
1019
1020 /* If the stmt has uses outside of the loop mark it as reduction. */
1021 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1022 {
1023 partition->reduction_p = true;
1024 return;
1025 }
1026 }
1027
1028 /* Perform general partition disqualification for builtins. */
1029 if (volatiles_p
1030 || !flag_tree_loop_distribute_patterns)
1031 return;
1032
1033 /* Detect memset and memcpy. */
1034 single_load = NULL;
1035 single_store = NULL;
1036 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1037 {
1038 gimple stmt = RDG_STMT (rdg, i);
1039 data_reference_p dr;
1040 unsigned j;
1041
1042 if (gimple_code (stmt) == GIMPLE_PHI)
1043 continue;
1044
1045 /* Any scalar stmts are ok. */
1046 if (!gimple_vuse (stmt))
1047 continue;
1048
1049 /* Otherwise just regular loads/stores. */
1050 if (!gimple_assign_single_p (stmt))
1051 return;
1052
1053 /* But exactly one store and/or load. */
1054 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1055 {
1056 if (DR_IS_READ (dr))
1057 {
1058 if (single_load != NULL)
1059 return;
1060 single_load = dr;
1061 }
1062 else
1063 {
1064 if (single_store != NULL)
1065 return;
1066 single_store = dr;
1067 }
1068 }
1069 }
1070
1071 if (!single_store)
1072 return;
1073
1074 nb_iter = number_of_latch_executions (loop);
1075 if (!nb_iter || nb_iter == chrec_dont_know)
1076 return;
1077 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1078 gimple_bb (DR_STMT (single_store))))
1079 plus_one = true;
1080
1081 if (single_store && !single_load)
1082 {
1083 gimple stmt = DR_STMT (single_store);
1084 tree rhs = gimple_assign_rhs1 (stmt);
1085 if (const_with_all_bytes_same (rhs) == -1
1086 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1087 || (TYPE_MODE (TREE_TYPE (rhs))
1088 != TYPE_MODE (unsigned_char_type_node))))
1089 return;
1090 if (TREE_CODE (rhs) == SSA_NAME
1091 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1092 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1093 return;
1094 if (!adjacent_dr_p (single_store)
1095 || !dominated_by_p (CDI_DOMINATORS,
1096 loop->latch, gimple_bb (stmt)))
1097 return;
1098 partition->kind = PKIND_MEMSET;
1099 partition->main_dr = single_store;
1100 partition->niter = nb_iter;
1101 partition->plus_one = plus_one;
1102 }
1103 else if (single_store && single_load)
1104 {
1105 gimple store = DR_STMT (single_store);
1106 gimple load = DR_STMT (single_load);
1107 /* Direct aggregate copy or via an SSA name temporary. */
1108 if (load != store
1109 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1110 return;
1111 if (!adjacent_dr_p (single_store)
1112 || !adjacent_dr_p (single_load)
1113 || !operand_equal_p (DR_STEP (single_store),
1114 DR_STEP (single_load), 0)
1115 || !dominated_by_p (CDI_DOMINATORS,
1116 loop->latch, gimple_bb (store)))
1117 return;
1118 /* Now check that if there is a dependence this dependence is
1119 of a suitable form for memmove. */
1120 vec<loop_p> loops = vNULL;
1121 ddr_p ddr;
1122 loops.safe_push (loop);
1123 ddr = initialize_data_dependence_relation (single_load, single_store,
1124 loops);
1125 compute_affine_dependence (ddr, loop);
1126 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1127 {
1128 free_dependence_relation (ddr);
1129 loops.release ();
1130 return;
1131 }
1132 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1133 {
1134 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1135 {
1136 free_dependence_relation (ddr);
1137 loops.release ();
1138 return;
1139 }
1140 lambda_vector dist_v;
1141 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1142 {
1143 int dist = dist_v[index_in_loop_nest (loop->num,
1144 DDR_LOOP_NEST (ddr))];
1145 if (dist > 0 && !DDR_REVERSED_P (ddr))
1146 {
1147 free_dependence_relation (ddr);
1148 loops.release ();
1149 return;
1150 }
1151 }
1152 }
1153 free_dependence_relation (ddr);
1154 loops.release ();
1155 partition->kind = PKIND_MEMCPY;
1156 partition->main_dr = single_store;
1157 partition->secondary_dr = single_load;
1158 partition->niter = nb_iter;
1159 partition->plus_one = plus_one;
1160 }
1161 }
1162
1163 /* For a data reference REF, return the declaration of its base
1164 address or NULL_TREE if the base is not determined. */
1165
1166 static tree
1167 ref_base_address (data_reference_p dr)
1168 {
1169 tree base_address = DR_BASE_ADDRESS (dr);
1170 if (base_address
1171 && TREE_CODE (base_address) == ADDR_EXPR)
1172 return TREE_OPERAND (base_address, 0);
1173
1174 return base_address;
1175 }
1176
1177 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1178 accesses in RDG. */
1179
1180 static bool
1181 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1182 partition_t partition2)
1183 {
1184 unsigned i, j, k, l;
1185 bitmap_iterator bi, bj;
1186 data_reference_p ref1, ref2;
1187
1188 /* First check whether in the intersection of the two partitions are
1189 any loads or stores. Common loads are the situation that happens
1190 most often. */
1191 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1192 if (RDG_MEM_WRITE_STMT (rdg, i)
1193 || RDG_MEM_READS_STMT (rdg, i))
1194 return true;
1195
1196 /* Then check all data-references against each other. */
1197 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1198 if (RDG_MEM_WRITE_STMT (rdg, i)
1199 || RDG_MEM_READS_STMT (rdg, i))
1200 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1201 if (RDG_MEM_WRITE_STMT (rdg, j)
1202 || RDG_MEM_READS_STMT (rdg, j))
1203 {
1204 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1205 {
1206 tree base1 = ref_base_address (ref1);
1207 if (base1)
1208 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1209 if (base1 == ref_base_address (ref2))
1210 return true;
1211 }
1212 }
1213
1214 return false;
1215 }
1216
1217 /* Aggregate several components into a useful partition that is
1218 registered in the PARTITIONS vector. Partitions will be
1219 distributed in different loops. */
1220
1221 static void
1222 rdg_build_partitions (struct graph *rdg,
1223 vec<gimple> starting_stmts,
1224 vec<partition_t> *partitions)
1225 {
1226 bitmap processed = BITMAP_ALLOC (NULL);
1227 int i;
1228 gimple stmt;
1229
1230 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1231 {
1232 int v = rdg_vertex_for_stmt (rdg, stmt);
1233
1234 if (dump_file && (dump_flags & TDF_DETAILS))
1235 fprintf (dump_file,
1236 "ldist asked to generate code for vertex %d\n", v);
1237
1238 /* If the vertex is already contained in another partition so
1239 is the partition rooted at it. */
1240 if (bitmap_bit_p (processed, v))
1241 continue;
1242
1243 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1244 bitmap_ior_into (processed, partition->stmts);
1245
1246 if (dump_file && (dump_flags & TDF_DETAILS))
1247 {
1248 fprintf (dump_file, "ldist useful partition:\n");
1249 dump_bitmap (dump_file, partition->stmts);
1250 }
1251
1252 partitions->safe_push (partition);
1253 }
1254
1255 /* All vertices should have been assigned to at least one partition now,
1256 other than vertices belonging to dead code. */
1257
1258 BITMAP_FREE (processed);
1259 }
1260
1261 /* Dump to FILE the PARTITIONS. */
1262
1263 static void
1264 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1265 {
1266 int i;
1267 partition_t partition;
1268
1269 FOR_EACH_VEC_ELT (partitions, i, partition)
1270 debug_bitmap_file (file, partition->stmts);
1271 }
1272
1273 /* Debug PARTITIONS. */
1274 extern void debug_rdg_partitions (vec<partition_t> );
1275
1276 DEBUG_FUNCTION void
1277 debug_rdg_partitions (vec<partition_t> partitions)
1278 {
1279 dump_rdg_partitions (stderr, partitions);
1280 }
1281
1282 /* Returns the number of read and write operations in the RDG. */
1283
1284 static int
1285 number_of_rw_in_rdg (struct graph *rdg)
1286 {
1287 int i, res = 0;
1288
1289 for (i = 0; i < rdg->n_vertices; i++)
1290 {
1291 if (RDG_MEM_WRITE_STMT (rdg, i))
1292 ++res;
1293
1294 if (RDG_MEM_READS_STMT (rdg, i))
1295 ++res;
1296 }
1297
1298 return res;
1299 }
1300
1301 /* Returns the number of read and write operations in a PARTITION of
1302 the RDG. */
1303
1304 static int
1305 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1306 {
1307 int res = 0;
1308 unsigned i;
1309 bitmap_iterator ii;
1310
1311 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1312 {
1313 if (RDG_MEM_WRITE_STMT (rdg, i))
1314 ++res;
1315
1316 if (RDG_MEM_READS_STMT (rdg, i))
1317 ++res;
1318 }
1319
1320 return res;
1321 }
1322
1323 /* Returns true when one of the PARTITIONS contains all the read or
1324 write operations of RDG. */
1325
1326 static bool
1327 partition_contains_all_rw (struct graph *rdg,
1328 vec<partition_t> partitions)
1329 {
1330 int i;
1331 partition_t partition;
1332 int nrw = number_of_rw_in_rdg (rdg);
1333
1334 FOR_EACH_VEC_ELT (partitions, i, partition)
1335 if (nrw == number_of_rw_in_partition (rdg, partition))
1336 return true;
1337
1338 return false;
1339 }
1340
1341 /* Compute partition dependence created by the data references in DRS1
1342 and DRS2 and modify and return DIR according to that. */
1343
1344 static int
1345 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1346 vec<data_reference_p> drs1,
1347 vec<data_reference_p> drs2)
1348 {
1349 data_reference_p dr1, dr2;
1350
1351 /* dependence direction - 0 is no dependence, -1 is back,
1352 1 is forth, 2 is both (we can stop then, merging will occur). */
1353 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1354 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1355 {
1356 data_reference_p saved_dr1 = dr1;
1357 int this_dir = 1;
1358 ddr_p ddr;
1359 /* Re-shuffle data-refs to be in dominator order. */
1360 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1361 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1362 {
1363 std::swap (dr1, dr2);
1364 this_dir = -this_dir;
1365 }
1366 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1367 compute_affine_dependence (ddr, loops[0]);
1368 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1369 this_dir = 2;
1370 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1371 {
1372 if (DDR_REVERSED_P (ddr))
1373 {
1374 std::swap (dr1, dr2);
1375 this_dir = -this_dir;
1376 }
1377 /* Known dependences can still be unordered througout the
1378 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1379 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1380 this_dir = 2;
1381 /* If the overlap is exact preserve stmt order. */
1382 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1383 ;
1384 else
1385 {
1386 /* Else as the distance vector is lexicographic positive
1387 swap the dependence direction. */
1388 this_dir = -this_dir;
1389 }
1390 }
1391 else
1392 this_dir = 0;
1393 free_dependence_relation (ddr);
1394 if (dir == 0)
1395 dir = this_dir;
1396 else if (dir != this_dir)
1397 return 2;
1398 /* Shuffle "back" dr1. */
1399 dr1 = saved_dr1;
1400 }
1401 return dir;
1402 }
1403
1404 /* Compare postorder number of the partition graph vertices V1 and V2. */
1405
1406 static int
1407 pgcmp (const void *v1_, const void *v2_)
1408 {
1409 const vertex *v1 = (const vertex *)v1_;
1410 const vertex *v2 = (const vertex *)v2_;
1411 return v2->post - v1->post;
1412 }
1413
1414 /* Distributes the code from LOOP in such a way that producer
1415 statements are placed before consumer statements. Tries to separate
1416 only the statements from STMTS into separate loops.
1417 Returns the number of distributed loops. */
1418
1419 static int
1420 distribute_loop (struct loop *loop, vec<gimple> stmts,
1421 control_dependences *cd, int *nb_calls)
1422 {
1423 struct graph *rdg;
1424 partition_t partition;
1425 bool any_builtin;
1426 int i, nbp;
1427 graph *pg = NULL;
1428 int num_sccs = 1;
1429
1430 *nb_calls = 0;
1431 auto_vec<loop_p, 3> loop_nest;
1432 if (!find_loop_nest (loop, &loop_nest))
1433 return 0;
1434
1435 rdg = build_rdg (loop_nest, cd);
1436 if (!rdg)
1437 {
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1439 fprintf (dump_file,
1440 "Loop %d not distributed: failed to build the RDG.\n",
1441 loop->num);
1442
1443 return 0;
1444 }
1445
1446 if (dump_file && (dump_flags & TDF_DETAILS))
1447 dump_rdg (dump_file, rdg);
1448
1449 auto_vec<partition_t, 3> partitions;
1450 rdg_build_partitions (rdg, stmts, &partitions);
1451
1452 any_builtin = false;
1453 FOR_EACH_VEC_ELT (partitions, i, partition)
1454 {
1455 classify_partition (loop, rdg, partition);
1456 any_builtin |= partition_builtin_p (partition);
1457 }
1458
1459 /* If we are only distributing patterns but did not detect any,
1460 simply bail out. */
1461 if (!flag_tree_loop_distribution
1462 && !any_builtin)
1463 {
1464 nbp = 0;
1465 goto ldist_done;
1466 }
1467
1468 /* If we are only distributing patterns fuse all partitions that
1469 were not classified as builtins. This also avoids chopping
1470 a loop into pieces, separated by builtin calls. That is, we
1471 only want no or a single loop body remaining. */
1472 partition_t into;
1473 if (!flag_tree_loop_distribution)
1474 {
1475 for (i = 0; partitions.iterate (i, &into); ++i)
1476 if (!partition_builtin_p (into))
1477 break;
1478 for (++i; partitions.iterate (i, &partition); ++i)
1479 if (!partition_builtin_p (partition))
1480 {
1481 if (dump_file && (dump_flags & TDF_DETAILS))
1482 {
1483 fprintf (dump_file, "fusing non-builtin partitions\n");
1484 dump_bitmap (dump_file, into->stmts);
1485 dump_bitmap (dump_file, partition->stmts);
1486 }
1487 partition_merge_into (into, partition);
1488 partitions.unordered_remove (i);
1489 partition_free (partition);
1490 i--;
1491 }
1492 }
1493
1494 /* Due to limitations in the transform phase we have to fuse all
1495 reduction partitions into the last partition so the existing
1496 loop will contain all loop-closed PHI nodes. */
1497 for (i = 0; partitions.iterate (i, &into); ++i)
1498 if (partition_reduction_p (into))
1499 break;
1500 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1501 if (partition_reduction_p (partition))
1502 {
1503 if (dump_file && (dump_flags & TDF_DETAILS))
1504 {
1505 fprintf (dump_file, "fusing partitions\n");
1506 dump_bitmap (dump_file, into->stmts);
1507 dump_bitmap (dump_file, partition->stmts);
1508 fprintf (dump_file, "because they have reductions\n");
1509 }
1510 partition_merge_into (into, partition);
1511 partitions.unordered_remove (i);
1512 partition_free (partition);
1513 i--;
1514 }
1515
1516 /* Apply our simple cost model - fuse partitions with similar
1517 memory accesses. */
1518 for (i = 0; partitions.iterate (i, &into); ++i)
1519 {
1520 if (partition_builtin_p (into))
1521 continue;
1522 for (int j = i + 1;
1523 partitions.iterate (j, &partition); ++j)
1524 {
1525 if (!partition_builtin_p (partition)
1526 && similar_memory_accesses (rdg, into, partition))
1527 {
1528 if (dump_file && (dump_flags & TDF_DETAILS))
1529 {
1530 fprintf (dump_file, "fusing partitions\n");
1531 dump_bitmap (dump_file, into->stmts);
1532 dump_bitmap (dump_file, partition->stmts);
1533 fprintf (dump_file, "because they have similar "
1534 "memory accesses\n");
1535 }
1536 partition_merge_into (into, partition);
1537 partitions.unordered_remove (j);
1538 partition_free (partition);
1539 j--;
1540 }
1541 }
1542 }
1543
1544 /* Build the partition dependency graph. */
1545 if (partitions.length () > 1)
1546 {
1547 pg = new_graph (partitions.length ());
1548 struct pgdata {
1549 partition_t partition;
1550 vec<data_reference_p> writes;
1551 vec<data_reference_p> reads;
1552 };
1553 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1554 for (i = 0; partitions.iterate (i, &partition); ++i)
1555 {
1556 vertex *v = &pg->vertices[i];
1557 pgdata *data = new pgdata;
1558 data_reference_p dr;
1559 /* FIXME - leaks. */
1560 v->data = data;
1561 bitmap_iterator bi;
1562 unsigned j;
1563 data->partition = partition;
1564 data->reads = vNULL;
1565 data->writes = vNULL;
1566 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1567 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1568 if (DR_IS_READ (dr))
1569 data->reads.safe_push (dr);
1570 else
1571 data->writes.safe_push (dr);
1572 }
1573 partition_t partition1, partition2;
1574 for (i = 0; partitions.iterate (i, &partition1); ++i)
1575 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1576 {
1577 /* dependence direction - 0 is no dependence, -1 is back,
1578 1 is forth, 2 is both (we can stop then, merging will occur). */
1579 int dir = 0;
1580 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1581 PGDATA(i)->writes,
1582 PGDATA(j)->reads);
1583 if (dir != 2)
1584 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1585 PGDATA(i)->reads,
1586 PGDATA(j)->writes);
1587 if (dir != 2)
1588 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1589 PGDATA(i)->writes,
1590 PGDATA(j)->writes);
1591 if (dir == 1 || dir == 2)
1592 add_edge (pg, i, j);
1593 if (dir == -1 || dir == 2)
1594 add_edge (pg, j, i);
1595 }
1596
1597 /* Add edges to the reduction partition (if any) to force it last. */
1598 unsigned j;
1599 for (j = 0; partitions.iterate (j, &partition); ++j)
1600 if (partition_reduction_p (partition))
1601 break;
1602 if (j < partitions.length ())
1603 {
1604 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1605 if (i != j)
1606 add_edge (pg, i, j);
1607 }
1608
1609 /* Compute partitions we cannot separate and fuse them. */
1610 num_sccs = graphds_scc (pg, NULL);
1611 for (i = 0; i < num_sccs; ++i)
1612 {
1613 partition_t first;
1614 int j;
1615 for (j = 0; partitions.iterate (j, &first); ++j)
1616 if (pg->vertices[j].component == i)
1617 break;
1618 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1619 if (pg->vertices[j].component == i)
1620 {
1621 if (dump_file && (dump_flags & TDF_DETAILS))
1622 {
1623 fprintf (dump_file, "fusing partitions\n");
1624 dump_bitmap (dump_file, first->stmts);
1625 dump_bitmap (dump_file, partition->stmts);
1626 fprintf (dump_file, "because they are in the same "
1627 "dependence SCC\n");
1628 }
1629 partition_merge_into (first, partition);
1630 partitions[j] = NULL;
1631 partition_free (partition);
1632 PGDATA (j)->partition = NULL;
1633 }
1634 }
1635
1636 /* Now order the remaining nodes in postorder. */
1637 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1638 partitions.truncate (0);
1639 for (i = 0; i < pg->n_vertices; ++i)
1640 {
1641 pgdata *data = PGDATA (i);
1642 if (data->partition)
1643 partitions.safe_push (data->partition);
1644 data->reads.release ();
1645 data->writes.release ();
1646 delete data;
1647 }
1648 gcc_assert (partitions.length () == (unsigned)num_sccs);
1649 free_graph (pg);
1650 }
1651
1652 nbp = partitions.length ();
1653 if (nbp == 0
1654 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1655 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1656 {
1657 nbp = 0;
1658 goto ldist_done;
1659 }
1660
1661 if (dump_file && (dump_flags & TDF_DETAILS))
1662 dump_rdg_partitions (dump_file, partitions);
1663
1664 FOR_EACH_VEC_ELT (partitions, i, partition)
1665 {
1666 if (partition_builtin_p (partition))
1667 (*nb_calls)++;
1668 generate_code_for_partition (loop, partition, i < nbp - 1);
1669 }
1670
1671 ldist_done:
1672
1673 FOR_EACH_VEC_ELT (partitions, i, partition)
1674 partition_free (partition);
1675
1676 free_rdg (rdg);
1677 return nbp - *nb_calls;
1678 }
1679
1680 /* Distribute all loops in the current function. */
1681
1682 namespace {
1683
1684 const pass_data pass_data_loop_distribution =
1685 {
1686 GIMPLE_PASS, /* type */
1687 "ldist", /* name */
1688 OPTGROUP_LOOP, /* optinfo_flags */
1689 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1690 ( PROP_cfg | PROP_ssa ), /* properties_required */
1691 0, /* properties_provided */
1692 0, /* properties_destroyed */
1693 0, /* todo_flags_start */
1694 0, /* todo_flags_finish */
1695 };
1696
1697 class pass_loop_distribution : public gimple_opt_pass
1698 {
1699 public:
1700 pass_loop_distribution (gcc::context *ctxt)
1701 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1702 {}
1703
1704 /* opt_pass methods: */
1705 virtual bool gate (function *)
1706 {
1707 return flag_tree_loop_distribution
1708 || flag_tree_loop_distribute_patterns;
1709 }
1710
1711 virtual unsigned int execute (function *);
1712
1713 }; // class pass_loop_distribution
1714
1715 unsigned int
1716 pass_loop_distribution::execute (function *fun)
1717 {
1718 struct loop *loop;
1719 bool changed = false;
1720 basic_block bb;
1721 control_dependences *cd = NULL;
1722
1723 FOR_ALL_BB_FN (bb, fun)
1724 {
1725 gimple_stmt_iterator gsi;
1726 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1727 gimple_set_uid (gsi_stmt (gsi), -1);
1728 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1729 gimple_set_uid (gsi_stmt (gsi), -1);
1730 }
1731
1732 /* We can at the moment only distribute non-nested loops, thus restrict
1733 walking to innermost loops. */
1734 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1735 {
1736 auto_vec<gimple> work_list;
1737 basic_block *bbs;
1738 int num = loop->num;
1739 unsigned int i;
1740
1741 /* If the loop doesn't have a single exit we will fail anyway,
1742 so do that early. */
1743 if (!single_exit (loop))
1744 continue;
1745
1746 /* Only optimize hot loops. */
1747 if (!optimize_loop_for_speed_p (loop))
1748 continue;
1749
1750 /* Initialize the worklist with stmts we seed the partitions with. */
1751 bbs = get_loop_body_in_dom_order (loop);
1752 for (i = 0; i < loop->num_nodes; ++i)
1753 {
1754 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1755 !gsi_end_p (gsi);
1756 gsi_next (&gsi))
1757 {
1758 gphi *phi = gsi.phi ();
1759 if (virtual_operand_p (gimple_phi_result (phi)))
1760 continue;
1761 /* Distribute stmts which have defs that are used outside of
1762 the loop. */
1763 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1764 continue;
1765 work_list.safe_push (phi);
1766 }
1767 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1768 !gsi_end_p (gsi);
1769 gsi_next (&gsi))
1770 {
1771 gimple stmt = gsi_stmt (gsi);
1772
1773 /* If there is a stmt with side-effects bail out - we
1774 cannot and should not distribute this loop. */
1775 if (gimple_has_side_effects (stmt))
1776 {
1777 work_list.truncate (0);
1778 goto out;
1779 }
1780
1781 /* Distribute stmts which have defs that are used outside of
1782 the loop. */
1783 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1784 ;
1785 /* Otherwise only distribute stores for now. */
1786 else if (!gimple_vdef (stmt))
1787 continue;
1788
1789 work_list.safe_push (stmt);
1790 }
1791 }
1792 out:
1793 free (bbs);
1794
1795 int nb_generated_loops = 0;
1796 int nb_generated_calls = 0;
1797 location_t loc = find_loop_location (loop);
1798 if (work_list.length () > 0)
1799 {
1800 if (!cd)
1801 {
1802 calculate_dominance_info (CDI_DOMINATORS);
1803 calculate_dominance_info (CDI_POST_DOMINATORS);
1804 cd = new control_dependences (create_edge_list ());
1805 free_dominance_info (CDI_POST_DOMINATORS);
1806 }
1807 nb_generated_loops = distribute_loop (loop, work_list, cd,
1808 &nb_generated_calls);
1809 }
1810
1811 if (nb_generated_loops + nb_generated_calls > 0)
1812 {
1813 changed = true;
1814 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1815 loc, "Loop %d distributed: split to %d loops "
1816 "and %d library calls.\n",
1817 num, nb_generated_loops, nb_generated_calls);
1818 }
1819 else if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, "Loop %d is the same.\n", num);
1821 }
1822
1823 if (cd)
1824 delete cd;
1825
1826 if (changed)
1827 {
1828 /* Cached scalar evolutions now may refer to wrong or non-existing
1829 loops. */
1830 scev_reset_htab ();
1831 mark_virtual_operands_for_renaming (fun);
1832 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1833 }
1834
1835 #ifdef ENABLE_CHECKING
1836 verify_loop_structure ();
1837 #endif
1838
1839 return 0;
1840 }
1841
1842 } // anon namespace
1843
1844 gimple_opt_pass *
1845 make_pass_loop_distribution (gcc::context *ctxt)
1846 {
1847 return new pass_loop_distribution (ctxt);
1848 }