Merger of git branch "gimple-classes-v2-option-3"
[gcc.git] / gcc / tree-loop-distribution.c
1 /* Loop distribution.
2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
11 later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass performs loop distribution: for example, the loop
23
24 |DO I = 2, N
25 | A(I) = B(I) + C
26 | D(I) = A(I-1)*E
27 |ENDDO
28
29 is transformed to
30
31 |DOALL I = 2, N
32 | A(I) = B(I) + C
33 |ENDDO
34 |
35 |DOALL I = 2, N
36 | D(I) = A(I-1)*E
37 |ENDDO
38
39 This pass uses an RDG, Reduced Dependence Graph built on top of the
40 data dependence relations. The RDG is then topologically sorted to
41 obtain a map of information producers/consumers based on which it
42 generates the new loops. */
43
44 #include "config.h"
45 #include "system.h"
46 #include "coretypes.h"
47 #include "tree.h"
48 #include "predict.h"
49 #include "vec.h"
50 #include "hashtab.h"
51 #include "hash-set.h"
52 #include "machmode.h"
53 #include "tm.h"
54 #include "hard-reg-set.h"
55 #include "input.h"
56 #include "function.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfganal.h"
60 #include "basic-block.h"
61 #include "tree-ssa-alias.h"
62 #include "internal-fn.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimple-iterator.h"
67 #include "gimplify-me.h"
68 #include "stor-layout.h"
69 #include "gimple-ssa.h"
70 #include "tree-cfg.h"
71 #include "tree-phinodes.h"
72 #include "ssa-iterators.h"
73 #include "stringpool.h"
74 #include "tree-ssanames.h"
75 #include "tree-ssa-loop-manip.h"
76 #include "tree-ssa-loop.h"
77 #include "tree-into-ssa.h"
78 #include "tree-ssa.h"
79 #include "cfgloop.h"
80 #include "tree-chrec.h"
81 #include "tree-data-ref.h"
82 #include "tree-scalar-evolution.h"
83 #include "tree-pass.h"
84 #include "gimple-pretty-print.h"
85 #include "tree-vectorizer.h"
86
87
88 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
89 typedef struct rdg_vertex
90 {
91 /* The statement represented by this vertex. */
92 gimple stmt;
93
94 /* Vector of data-references in this statement. */
95 vec<data_reference_p> datarefs;
96
97 /* True when the statement contains a write to memory. */
98 bool has_mem_write;
99
100 /* True when the statement contains a read from memory. */
101 bool has_mem_reads;
102 } *rdg_vertex_p;
103
104 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
105 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
106 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
107 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
108 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
109 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
110 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
111 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
112
113 /* Data dependence type. */
114
115 enum rdg_dep_type
116 {
117 /* Read After Write (RAW). */
118 flow_dd = 'f',
119
120 /* Control dependence (execute conditional on). */
121 control_dd = 'c'
122 };
123
124 /* Dependence information attached to an edge of the RDG. */
125
126 typedef struct rdg_edge
127 {
128 /* Type of the dependence. */
129 enum rdg_dep_type type;
130 } *rdg_edge_p;
131
132 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
133
134 /* Dump vertex I in RDG to FILE. */
135
136 static void
137 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
138 {
139 struct vertex *v = &(rdg->vertices[i]);
140 struct graph_edge *e;
141
142 fprintf (file, "(vertex %d: (%s%s) (in:", i,
143 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
144 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
145
146 if (v->pred)
147 for (e = v->pred; e; e = e->pred_next)
148 fprintf (file, " %d", e->src);
149
150 fprintf (file, ") (out:");
151
152 if (v->succ)
153 for (e = v->succ; e; e = e->succ_next)
154 fprintf (file, " %d", e->dest);
155
156 fprintf (file, ")\n");
157 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
158 fprintf (file, ")\n");
159 }
160
161 /* Call dump_rdg_vertex on stderr. */
162
163 DEBUG_FUNCTION void
164 debug_rdg_vertex (struct graph *rdg, int i)
165 {
166 dump_rdg_vertex (stderr, rdg, i);
167 }
168
169 /* Dump the reduced dependence graph RDG to FILE. */
170
171 static void
172 dump_rdg (FILE *file, struct graph *rdg)
173 {
174 fprintf (file, "(rdg\n");
175 for (int i = 0; i < rdg->n_vertices; i++)
176 dump_rdg_vertex (file, rdg, i);
177 fprintf (file, ")\n");
178 }
179
180 /* Call dump_rdg on stderr. */
181
182 DEBUG_FUNCTION void
183 debug_rdg (struct graph *rdg)
184 {
185 dump_rdg (stderr, rdg);
186 }
187
188 static void
189 dot_rdg_1 (FILE *file, struct graph *rdg)
190 {
191 int i;
192 pretty_printer buffer;
193 pp_needs_newline (&buffer) = false;
194 buffer.buffer->stream = file;
195
196 fprintf (file, "digraph RDG {\n");
197
198 for (i = 0; i < rdg->n_vertices; i++)
199 {
200 struct vertex *v = &(rdg->vertices[i]);
201 struct graph_edge *e;
202
203 fprintf (file, "%d [label=\"[%d] ", i, i);
204 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
205 pp_flush (&buffer);
206 fprintf (file, "\"]\n");
207
208 /* Highlight reads from memory. */
209 if (RDG_MEM_READS_STMT (rdg, i))
210 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
211
212 /* Highlight stores to memory. */
213 if (RDG_MEM_WRITE_STMT (rdg, i))
214 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
215
216 if (v->succ)
217 for (e = v->succ; e; e = e->succ_next)
218 switch (RDGE_TYPE (e))
219 {
220 case flow_dd:
221 /* These are the most common dependences: don't print these. */
222 fprintf (file, "%d -> %d \n", i, e->dest);
223 break;
224
225 case control_dd:
226 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
227 break;
228
229 default:
230 gcc_unreachable ();
231 }
232 }
233
234 fprintf (file, "}\n\n");
235 }
236
237 /* Display the Reduced Dependence Graph using dotty. */
238
239 DEBUG_FUNCTION void
240 dot_rdg (struct graph *rdg)
241 {
242 /* When debugging, you may want to enable the following code. */
243 #ifdef HAVE_POPEN
244 FILE *file = popen ("dot -Tx11", "w");
245 if (!file)
246 return;
247 dot_rdg_1 (file, rdg);
248 fflush (file);
249 close (fileno (file));
250 pclose (file);
251 #else
252 dot_rdg_1 (stderr, rdg);
253 #endif
254 }
255
256 /* Returns the index of STMT in RDG. */
257
258 static int
259 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt)
260 {
261 int index = gimple_uid (stmt);
262 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
263 return index;
264 }
265
266 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
267 the index of DEF in RDG. */
268
269 static void
270 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
271 {
272 use_operand_p imm_use_p;
273 imm_use_iterator iterator;
274
275 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
276 {
277 struct graph_edge *e;
278 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
279
280 if (use < 0)
281 continue;
282
283 e = add_edge (rdg, idef, use);
284 e->data = XNEW (struct rdg_edge);
285 RDGE_TYPE (e) = flow_dd;
286 }
287 }
288
289 /* Creates an edge for the control dependences of BB to the vertex V. */
290
291 static void
292 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
293 int v, control_dependences *cd)
294 {
295 bitmap_iterator bi;
296 unsigned edge_n;
297 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
298 0, edge_n, bi)
299 {
300 basic_block cond_bb = cd->get_edge (edge_n)->src;
301 gimple stmt = last_stmt (cond_bb);
302 if (stmt && is_ctrl_stmt (stmt))
303 {
304 struct graph_edge *e;
305 int c = rdg_vertex_for_stmt (rdg, stmt);
306 if (c < 0)
307 continue;
308
309 e = add_edge (rdg, c, v);
310 e->data = XNEW (struct rdg_edge);
311 RDGE_TYPE (e) = control_dd;
312 }
313 }
314 }
315
316 /* Creates the edges of the reduced dependence graph RDG. */
317
318 static void
319 create_rdg_flow_edges (struct graph *rdg)
320 {
321 int i;
322 def_operand_p def_p;
323 ssa_op_iter iter;
324
325 for (i = 0; i < rdg->n_vertices; i++)
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
327 iter, SSA_OP_DEF)
328 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
329 }
330
331 /* Creates the edges of the reduced dependence graph RDG. */
332
333 static void
334 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd)
335 {
336 int i;
337
338 for (i = 0; i < rdg->n_vertices; i++)
339 {
340 gimple stmt = RDG_STMT (rdg, i);
341 if (gimple_code (stmt) == GIMPLE_PHI)
342 {
343 edge_iterator ei;
344 edge e;
345 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
346 create_edge_for_control_dependence (rdg, e->src, i, cd);
347 }
348 else
349 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
350 }
351 }
352
353 /* Build the vertices of the reduced dependence graph RDG. Return false
354 if that failed. */
355
356 static bool
357 create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop,
358 vec<data_reference_p> *datarefs)
359 {
360 int i;
361 gimple stmt;
362
363 FOR_EACH_VEC_ELT (stmts, i, stmt)
364 {
365 struct vertex *v = &(rdg->vertices[i]);
366
367 /* Record statement to vertex mapping. */
368 gimple_set_uid (stmt, i);
369
370 v->data = XNEW (struct rdg_vertex);
371 RDGV_STMT (v) = stmt;
372 RDGV_DATAREFS (v).create (0);
373 RDGV_HAS_MEM_WRITE (v) = false;
374 RDGV_HAS_MEM_READS (v) = false;
375 if (gimple_code (stmt) == GIMPLE_PHI)
376 continue;
377
378 unsigned drp = datarefs->length ();
379 if (!find_data_references_in_stmt (loop, stmt, datarefs))
380 return false;
381 for (unsigned j = drp; j < datarefs->length (); ++j)
382 {
383 data_reference_p dr = (*datarefs)[j];
384 if (DR_IS_READ (dr))
385 RDGV_HAS_MEM_READS (v) = true;
386 else
387 RDGV_HAS_MEM_WRITE (v) = true;
388 RDGV_DATAREFS (v).safe_push (dr);
389 }
390 }
391 return true;
392 }
393
394 /* Initialize STMTS with all the statements of LOOP. The order in
395 which we discover statements is important as
396 generate_loops_for_partition is using the same traversal for
397 identifying statements in loop copies. */
398
399 static void
400 stmts_from_loop (struct loop *loop, vec<gimple> *stmts)
401 {
402 unsigned int i;
403 basic_block *bbs = get_loop_body_in_dom_order (loop);
404
405 for (i = 0; i < loop->num_nodes; i++)
406 {
407 basic_block bb = bbs[i];
408
409 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
410 gsi_next (&bsi))
411 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
412 stmts->safe_push (bsi.phi ());
413
414 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
415 gsi_next (&bsi))
416 {
417 gimple stmt = gsi_stmt (bsi);
418 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
419 stmts->safe_push (stmt);
420 }
421 }
422
423 free (bbs);
424 }
425
426 /* Free the reduced dependence graph RDG. */
427
428 static void
429 free_rdg (struct graph *rdg)
430 {
431 int i;
432
433 for (i = 0; i < rdg->n_vertices; i++)
434 {
435 struct vertex *v = &(rdg->vertices[i]);
436 struct graph_edge *e;
437
438 for (e = v->succ; e; e = e->succ_next)
439 free (e->data);
440
441 if (v->data)
442 {
443 gimple_set_uid (RDGV_STMT (v), -1);
444 free_data_refs (RDGV_DATAREFS (v));
445 free (v->data);
446 }
447 }
448
449 free_graph (rdg);
450 }
451
452 /* Build the Reduced Dependence Graph (RDG) with one vertex per
453 statement of the loop nest LOOP_NEST, and one edge per data dependence or
454 scalar dependence. */
455
456 static struct graph *
457 build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
458 {
459 struct graph *rdg;
460 vec<data_reference_p> datarefs;
461
462 /* Create the RDG vertices from the stmts of the loop nest. */
463 auto_vec<gimple, 10> stmts;
464 stmts_from_loop (loop_nest[0], &stmts);
465 rdg = new_graph (stmts.length ());
466 datarefs.create (10);
467 if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs))
468 {
469 datarefs.release ();
470 free_rdg (rdg);
471 return NULL;
472 }
473 stmts.release ();
474
475 create_rdg_flow_edges (rdg);
476 if (cd)
477 create_rdg_cd_edges (rdg, cd);
478
479 datarefs.release ();
480
481 return rdg;
482 }
483
484
485
486 enum partition_kind {
487 PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
488 };
489
490 typedef struct partition_s
491 {
492 bitmap stmts;
493 bitmap loops;
494 bool reduction_p;
495 enum partition_kind kind;
496 /* data-references a kind != PKIND_NORMAL partition is about. */
497 data_reference_p main_dr;
498 data_reference_p secondary_dr;
499 tree niter;
500 bool plus_one;
501 } *partition_t;
502
503
504 /* Allocate and initialize a partition from BITMAP. */
505
506 static partition_t
507 partition_alloc (bitmap stmts, bitmap loops)
508 {
509 partition_t partition = XCNEW (struct partition_s);
510 partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL);
511 partition->loops = loops ? loops : BITMAP_ALLOC (NULL);
512 partition->reduction_p = false;
513 partition->kind = PKIND_NORMAL;
514 return partition;
515 }
516
517 /* Free PARTITION. */
518
519 static void
520 partition_free (partition_t partition)
521 {
522 BITMAP_FREE (partition->stmts);
523 BITMAP_FREE (partition->loops);
524 free (partition);
525 }
526
527 /* Returns true if the partition can be generated as a builtin. */
528
529 static bool
530 partition_builtin_p (partition_t partition)
531 {
532 return partition->kind != PKIND_NORMAL;
533 }
534
535 /* Returns true if the partition contains a reduction. */
536
537 static bool
538 partition_reduction_p (partition_t partition)
539 {
540 return partition->reduction_p;
541 }
542
543 /* Merge PARTITION into the partition DEST. */
544
545 static void
546 partition_merge_into (partition_t dest, partition_t partition)
547 {
548 dest->kind = PKIND_NORMAL;
549 bitmap_ior_into (dest->stmts, partition->stmts);
550 if (partition_reduction_p (partition))
551 dest->reduction_p = true;
552 }
553
554
555 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
556 the LOOP. */
557
558 static bool
559 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
560 {
561 imm_use_iterator imm_iter;
562 use_operand_p use_p;
563
564 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
565 {
566 gimple use_stmt = USE_STMT (use_p);
567 if (!is_gimple_debug (use_stmt)
568 && loop != loop_containing_stmt (use_stmt))
569 return true;
570 }
571
572 return false;
573 }
574
575 /* Returns true when STMT defines a scalar variable used after the
576 loop LOOP. */
577
578 static bool
579 stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt)
580 {
581 def_operand_p def_p;
582 ssa_op_iter op_iter;
583
584 if (gimple_code (stmt) == GIMPLE_PHI)
585 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop);
586
587 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
588 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop))
589 return true;
590
591 return false;
592 }
593
594 /* Return a copy of LOOP placed before LOOP. */
595
596 static struct loop *
597 copy_loop_before (struct loop *loop)
598 {
599 struct loop *res;
600 edge preheader = loop_preheader_edge (loop);
601
602 initialize_original_copy_tables ();
603 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
604 gcc_assert (res != NULL);
605 free_original_copy_tables ();
606 delete_update_ssa ();
607
608 return res;
609 }
610
611 /* Creates an empty basic block after LOOP. */
612
613 static void
614 create_bb_after_loop (struct loop *loop)
615 {
616 edge exit = single_exit (loop);
617
618 if (!exit)
619 return;
620
621 split_edge (exit);
622 }
623
624 /* Generate code for PARTITION from the code in LOOP. The loop is
625 copied when COPY_P is true. All the statements not flagged in the
626 PARTITION bitmap are removed from the loop or from its copy. The
627 statements are indexed in sequence inside a basic block, and the
628 basic blocks of a loop are taken in dom order. */
629
630 static void
631 generate_loops_for_partition (struct loop *loop, partition_t partition,
632 bool copy_p)
633 {
634 unsigned i;
635 basic_block *bbs;
636
637 if (copy_p)
638 {
639 loop = copy_loop_before (loop);
640 gcc_assert (loop != NULL);
641 create_preheader (loop, CP_SIMPLE_PREHEADERS);
642 create_bb_after_loop (loop);
643 }
644
645 /* Remove stmts not in the PARTITION bitmap. */
646 bbs = get_loop_body_in_dom_order (loop);
647
648 if (MAY_HAVE_DEBUG_STMTS)
649 for (i = 0; i < loop->num_nodes; i++)
650 {
651 basic_block bb = bbs[i];
652
653 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
654 gsi_next (&bsi))
655 {
656 gphi *phi = bsi.phi ();
657 if (!virtual_operand_p (gimple_phi_result (phi))
658 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
659 reset_debug_uses (phi);
660 }
661
662 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
663 {
664 gimple stmt = gsi_stmt (bsi);
665 if (gimple_code (stmt) != GIMPLE_LABEL
666 && !is_gimple_debug (stmt)
667 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
668 reset_debug_uses (stmt);
669 }
670 }
671
672 for (i = 0; i < loop->num_nodes; i++)
673 {
674 basic_block bb = bbs[i];
675
676 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
677 {
678 gphi *phi = bsi.phi ();
679 if (!virtual_operand_p (gimple_phi_result (phi))
680 && !bitmap_bit_p (partition->stmts, gimple_uid (phi)))
681 remove_phi_node (&bsi, true);
682 else
683 gsi_next (&bsi);
684 }
685
686 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
687 {
688 gimple stmt = gsi_stmt (bsi);
689 if (gimple_code (stmt) != GIMPLE_LABEL
690 && !is_gimple_debug (stmt)
691 && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
692 {
693 /* Choose an arbitrary path through the empty CFG part
694 that this unnecessary control stmt controls. */
695 if (gcond *cond_stmt = dyn_cast <gcond *> (stmt))
696 {
697 gimple_cond_make_false (cond_stmt);
698 update_stmt (stmt);
699 }
700 else if (gimple_code (stmt) == GIMPLE_SWITCH)
701 {
702 gswitch *switch_stmt = as_a <gswitch *> (stmt);
703 gimple_switch_set_index
704 (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1)));
705 update_stmt (stmt);
706 }
707 else
708 {
709 unlink_stmt_vdef (stmt);
710 gsi_remove (&bsi, true);
711 release_defs (stmt);
712 continue;
713 }
714 }
715 gsi_next (&bsi);
716 }
717 }
718
719 free (bbs);
720 }
721
722 /* Build the size argument for a memory operation call. */
723
724 static tree
725 build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
726 bool plus_one)
727 {
728 tree size = fold_convert_loc (loc, sizetype, nb_iter);
729 if (plus_one)
730 size = size_binop (PLUS_EXPR, size, size_one_node);
731 size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
732 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
733 size = fold_convert_loc (loc, size_type_node, size);
734 return size;
735 }
736
737 /* Build an address argument for a memory operation call. */
738
739 static tree
740 build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
741 {
742 tree addr_base;
743
744 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
745 addr_base = fold_convert_loc (loc, sizetype, addr_base);
746
747 /* Test for a negative stride, iterating over every element. */
748 if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
749 {
750 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
751 fold_convert_loc (loc, sizetype, nb_bytes));
752 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
753 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
754 }
755
756 return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
757 }
758
759 /* If VAL memory representation contains the same value in all bytes,
760 return that value, otherwise return -1.
761 E.g. for 0x24242424 return 0x24, for IEEE double
762 747708026454360457216.0 return 0x44, etc. */
763
764 static int
765 const_with_all_bytes_same (tree val)
766 {
767 unsigned char buf[64];
768 int i, len;
769
770 if (integer_zerop (val)
771 || real_zerop (val)
772 || (TREE_CODE (val) == CONSTRUCTOR
773 && !TREE_CLOBBER_P (val)
774 && CONSTRUCTOR_NELTS (val) == 0))
775 return 0;
776
777 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
778 return -1;
779
780 len = native_encode_expr (val, buf, sizeof (buf));
781 if (len == 0)
782 return -1;
783 for (i = 1; i < len; i++)
784 if (buf[i] != buf[0])
785 return -1;
786 return buf[0];
787 }
788
789 /* Generate a call to memset for PARTITION in LOOP. */
790
791 static void
792 generate_memset_builtin (struct loop *loop, partition_t partition)
793 {
794 gimple_stmt_iterator gsi;
795 gimple stmt, fn_call;
796 tree mem, fn, nb_bytes;
797 location_t loc;
798 tree val;
799
800 stmt = DR_STMT (partition->main_dr);
801 loc = gimple_location (stmt);
802
803 /* The new statements will be placed before LOOP. */
804 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
805
806 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
807 partition->plus_one);
808 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
809 false, GSI_CONTINUE_LINKING);
810 mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
811 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
812 false, GSI_CONTINUE_LINKING);
813
814 /* This exactly matches the pattern recognition in classify_partition. */
815 val = gimple_assign_rhs1 (stmt);
816 /* Handle constants like 0x15151515 and similarly
817 floating point constants etc. where all bytes are the same. */
818 int bytev = const_with_all_bytes_same (val);
819 if (bytev != -1)
820 val = build_int_cst (integer_type_node, bytev);
821 else if (TREE_CODE (val) == INTEGER_CST)
822 val = fold_convert (integer_type_node, val);
823 else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val)))
824 {
825 tree tem = make_ssa_name (integer_type_node, NULL);
826 gimple cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val);
827 gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
828 val = tem;
829 }
830
831 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
832 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
833 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
834
835 if (dump_file && (dump_flags & TDF_DETAILS))
836 {
837 fprintf (dump_file, "generated memset");
838 if (bytev == 0)
839 fprintf (dump_file, " zero\n");
840 else
841 fprintf (dump_file, "\n");
842 }
843 }
844
845 /* Generate a call to memcpy for PARTITION in LOOP. */
846
847 static void
848 generate_memcpy_builtin (struct loop *loop, partition_t partition)
849 {
850 gimple_stmt_iterator gsi;
851 gimple stmt, fn_call;
852 tree dest, src, fn, nb_bytes;
853 location_t loc;
854 enum built_in_function kind;
855
856 stmt = DR_STMT (partition->main_dr);
857 loc = gimple_location (stmt);
858
859 /* The new statements will be placed before LOOP. */
860 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
861
862 nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
863 partition->plus_one);
864 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
865 false, GSI_CONTINUE_LINKING);
866 dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
867 src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
868 if (ptr_derefs_may_alias_p (dest, src))
869 kind = BUILT_IN_MEMMOVE;
870 else
871 kind = BUILT_IN_MEMCPY;
872
873 dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
874 false, GSI_CONTINUE_LINKING);
875 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
876 false, GSI_CONTINUE_LINKING);
877 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
878 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
879 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
880
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 {
883 if (kind == BUILT_IN_MEMCPY)
884 fprintf (dump_file, "generated memcpy\n");
885 else
886 fprintf (dump_file, "generated memmove\n");
887 }
888 }
889
890 /* Remove and destroy the loop LOOP. */
891
892 static void
893 destroy_loop (struct loop *loop)
894 {
895 unsigned nbbs = loop->num_nodes;
896 edge exit = single_exit (loop);
897 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
898 basic_block *bbs;
899 unsigned i;
900
901 bbs = get_loop_body_in_dom_order (loop);
902
903 redirect_edge_pred (exit, src);
904 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
905 exit->flags |= EDGE_FALLTHRU;
906 cancel_loop_tree (loop);
907 rescan_loop_exit (exit, false, true);
908
909 for (i = 0; i < nbbs; i++)
910 {
911 /* We have made sure to not leave any dangling uses of SSA
912 names defined in the loop. With the exception of virtuals.
913 Make sure we replace all uses of virtual defs that will remain
914 outside of the loop with the bare symbol as delete_basic_block
915 will release them. */
916 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
917 gsi_next (&gsi))
918 {
919 gphi *phi = gsi.phi ();
920 if (virtual_operand_p (gimple_phi_result (phi)))
921 mark_virtual_phi_result_for_renaming (phi);
922 }
923 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
924 gsi_next (&gsi))
925 {
926 gimple stmt = gsi_stmt (gsi);
927 tree vdef = gimple_vdef (stmt);
928 if (vdef && TREE_CODE (vdef) == SSA_NAME)
929 mark_virtual_operand_for_renaming (vdef);
930 }
931 delete_basic_block (bbs[i]);
932 }
933 free (bbs);
934
935 set_immediate_dominator (CDI_DOMINATORS, dest,
936 recompute_dominator (CDI_DOMINATORS, dest));
937 }
938
939 /* Generates code for PARTITION. */
940
941 static void
942 generate_code_for_partition (struct loop *loop,
943 partition_t partition, bool copy_p)
944 {
945 switch (partition->kind)
946 {
947 case PKIND_NORMAL:
948 /* Reductions all have to be in the last partition. */
949 gcc_assert (!partition_reduction_p (partition)
950 || !copy_p);
951 generate_loops_for_partition (loop, partition, copy_p);
952 return;
953
954 case PKIND_MEMSET:
955 generate_memset_builtin (loop, partition);
956 break;
957
958 case PKIND_MEMCPY:
959 generate_memcpy_builtin (loop, partition);
960 break;
961
962 default:
963 gcc_unreachable ();
964 }
965
966 /* Common tail for partitions we turn into a call. If this was the last
967 partition for which we generate code, we have to destroy the loop. */
968 if (!copy_p)
969 destroy_loop (loop);
970 }
971
972
973 /* Returns a partition with all the statements needed for computing
974 the vertex V of the RDG, also including the loop exit conditions. */
975
976 static partition_t
977 build_rdg_partition_for_vertex (struct graph *rdg, int v)
978 {
979 partition_t partition = partition_alloc (NULL, NULL);
980 auto_vec<int, 3> nodes;
981 unsigned i;
982 int x;
983
984 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
985
986 FOR_EACH_VEC_ELT (nodes, i, x)
987 {
988 bitmap_set_bit (partition->stmts, x);
989 bitmap_set_bit (partition->loops,
990 loop_containing_stmt (RDG_STMT (rdg, x))->num);
991 }
992
993 return partition;
994 }
995
996 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
997 For the moment we detect only the memset zero pattern. */
998
999 static void
1000 classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
1001 {
1002 bitmap_iterator bi;
1003 unsigned i;
1004 tree nb_iter;
1005 data_reference_p single_load, single_store;
1006 bool volatiles_p = false;
1007 bool plus_one = false;
1008
1009 partition->kind = PKIND_NORMAL;
1010 partition->main_dr = NULL;
1011 partition->secondary_dr = NULL;
1012 partition->niter = NULL_TREE;
1013 partition->plus_one = false;
1014
1015 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1016 {
1017 gimple stmt = RDG_STMT (rdg, i);
1018
1019 if (gimple_has_volatile_ops (stmt))
1020 volatiles_p = true;
1021
1022 /* If the stmt has uses outside of the loop mark it as reduction. */
1023 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1024 {
1025 partition->reduction_p = true;
1026 return;
1027 }
1028 }
1029
1030 /* Perform general partition disqualification for builtins. */
1031 if (volatiles_p
1032 || !flag_tree_loop_distribute_patterns)
1033 return;
1034
1035 /* Detect memset and memcpy. */
1036 single_load = NULL;
1037 single_store = NULL;
1038 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
1039 {
1040 gimple stmt = RDG_STMT (rdg, i);
1041 data_reference_p dr;
1042 unsigned j;
1043
1044 if (gimple_code (stmt) == GIMPLE_PHI)
1045 continue;
1046
1047 /* Any scalar stmts are ok. */
1048 if (!gimple_vuse (stmt))
1049 continue;
1050
1051 /* Otherwise just regular loads/stores. */
1052 if (!gimple_assign_single_p (stmt))
1053 return;
1054
1055 /* But exactly one store and/or load. */
1056 for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
1057 {
1058 if (DR_IS_READ (dr))
1059 {
1060 if (single_load != NULL)
1061 return;
1062 single_load = dr;
1063 }
1064 else
1065 {
1066 if (single_store != NULL)
1067 return;
1068 single_store = dr;
1069 }
1070 }
1071 }
1072
1073 if (!single_store)
1074 return;
1075
1076 nb_iter = number_of_latch_executions (loop);
1077 if (!nb_iter || nb_iter == chrec_dont_know)
1078 return;
1079 if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
1080 gimple_bb (DR_STMT (single_store))))
1081 plus_one = true;
1082
1083 if (single_store && !single_load)
1084 {
1085 gimple stmt = DR_STMT (single_store);
1086 tree rhs = gimple_assign_rhs1 (stmt);
1087 if (const_with_all_bytes_same (rhs) == -1
1088 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
1089 || (TYPE_MODE (TREE_TYPE (rhs))
1090 != TYPE_MODE (unsigned_char_type_node))))
1091 return;
1092 if (TREE_CODE (rhs) == SSA_NAME
1093 && !SSA_NAME_IS_DEFAULT_DEF (rhs)
1094 && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
1095 return;
1096 if (!adjacent_dr_p (single_store)
1097 || !dominated_by_p (CDI_DOMINATORS,
1098 loop->latch, gimple_bb (stmt)))
1099 return;
1100 partition->kind = PKIND_MEMSET;
1101 partition->main_dr = single_store;
1102 partition->niter = nb_iter;
1103 partition->plus_one = plus_one;
1104 }
1105 else if (single_store && single_load)
1106 {
1107 gimple store = DR_STMT (single_store);
1108 gimple load = DR_STMT (single_load);
1109 /* Direct aggregate copy or via an SSA name temporary. */
1110 if (load != store
1111 && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
1112 return;
1113 if (!adjacent_dr_p (single_store)
1114 || !adjacent_dr_p (single_load)
1115 || !operand_equal_p (DR_STEP (single_store),
1116 DR_STEP (single_load), 0)
1117 || !dominated_by_p (CDI_DOMINATORS,
1118 loop->latch, gimple_bb (store)))
1119 return;
1120 /* Now check that if there is a dependence this dependence is
1121 of a suitable form for memmove. */
1122 vec<loop_p> loops = vNULL;
1123 ddr_p ddr;
1124 loops.safe_push (loop);
1125 ddr = initialize_data_dependence_relation (single_load, single_store,
1126 loops);
1127 compute_affine_dependence (ddr, loop);
1128 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1129 {
1130 free_dependence_relation (ddr);
1131 loops.release ();
1132 return;
1133 }
1134 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
1135 {
1136 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1137 {
1138 free_dependence_relation (ddr);
1139 loops.release ();
1140 return;
1141 }
1142 lambda_vector dist_v;
1143 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
1144 {
1145 int dist = dist_v[index_in_loop_nest (loop->num,
1146 DDR_LOOP_NEST (ddr))];
1147 if (dist > 0 && !DDR_REVERSED_P (ddr))
1148 {
1149 free_dependence_relation (ddr);
1150 loops.release ();
1151 return;
1152 }
1153 }
1154 }
1155 free_dependence_relation (ddr);
1156 loops.release ();
1157 partition->kind = PKIND_MEMCPY;
1158 partition->main_dr = single_store;
1159 partition->secondary_dr = single_load;
1160 partition->niter = nb_iter;
1161 partition->plus_one = plus_one;
1162 }
1163 }
1164
1165 /* For a data reference REF, return the declaration of its base
1166 address or NULL_TREE if the base is not determined. */
1167
1168 static tree
1169 ref_base_address (data_reference_p dr)
1170 {
1171 tree base_address = DR_BASE_ADDRESS (dr);
1172 if (base_address
1173 && TREE_CODE (base_address) == ADDR_EXPR)
1174 return TREE_OPERAND (base_address, 0);
1175
1176 return base_address;
1177 }
1178
1179 /* Returns true when PARTITION1 and PARTITION2 have similar memory
1180 accesses in RDG. */
1181
1182 static bool
1183 similar_memory_accesses (struct graph *rdg, partition_t partition1,
1184 partition_t partition2)
1185 {
1186 unsigned i, j, k, l;
1187 bitmap_iterator bi, bj;
1188 data_reference_p ref1, ref2;
1189
1190 /* First check whether in the intersection of the two partitions are
1191 any loads or stores. Common loads are the situation that happens
1192 most often. */
1193 EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi)
1194 if (RDG_MEM_WRITE_STMT (rdg, i)
1195 || RDG_MEM_READS_STMT (rdg, i))
1196 return true;
1197
1198 /* Then check all data-references against each other. */
1199 EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi)
1200 if (RDG_MEM_WRITE_STMT (rdg, i)
1201 || RDG_MEM_READS_STMT (rdg, i))
1202 EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj)
1203 if (RDG_MEM_WRITE_STMT (rdg, j)
1204 || RDG_MEM_READS_STMT (rdg, j))
1205 {
1206 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1)
1207 {
1208 tree base1 = ref_base_address (ref1);
1209 if (base1)
1210 FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2)
1211 if (base1 == ref_base_address (ref2))
1212 return true;
1213 }
1214 }
1215
1216 return false;
1217 }
1218
1219 /* Aggregate several components into a useful partition that is
1220 registered in the PARTITIONS vector. Partitions will be
1221 distributed in different loops. */
1222
1223 static void
1224 rdg_build_partitions (struct graph *rdg,
1225 vec<gimple> starting_stmts,
1226 vec<partition_t> *partitions)
1227 {
1228 bitmap processed = BITMAP_ALLOC (NULL);
1229 int i;
1230 gimple stmt;
1231
1232 FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
1233 {
1234 int v = rdg_vertex_for_stmt (rdg, stmt);
1235
1236 if (dump_file && (dump_flags & TDF_DETAILS))
1237 fprintf (dump_file,
1238 "ldist asked to generate code for vertex %d\n", v);
1239
1240 /* If the vertex is already contained in another partition so
1241 is the partition rooted at it. */
1242 if (bitmap_bit_p (processed, v))
1243 continue;
1244
1245 partition_t partition = build_rdg_partition_for_vertex (rdg, v);
1246 bitmap_ior_into (processed, partition->stmts);
1247
1248 if (dump_file && (dump_flags & TDF_DETAILS))
1249 {
1250 fprintf (dump_file, "ldist useful partition:\n");
1251 dump_bitmap (dump_file, partition->stmts);
1252 }
1253
1254 partitions->safe_push (partition);
1255 }
1256
1257 /* All vertices should have been assigned to at least one partition now,
1258 other than vertices belonging to dead code. */
1259
1260 BITMAP_FREE (processed);
1261 }
1262
1263 /* Dump to FILE the PARTITIONS. */
1264
1265 static void
1266 dump_rdg_partitions (FILE *file, vec<partition_t> partitions)
1267 {
1268 int i;
1269 partition_t partition;
1270
1271 FOR_EACH_VEC_ELT (partitions, i, partition)
1272 debug_bitmap_file (file, partition->stmts);
1273 }
1274
1275 /* Debug PARTITIONS. */
1276 extern void debug_rdg_partitions (vec<partition_t> );
1277
1278 DEBUG_FUNCTION void
1279 debug_rdg_partitions (vec<partition_t> partitions)
1280 {
1281 dump_rdg_partitions (stderr, partitions);
1282 }
1283
1284 /* Returns the number of read and write operations in the RDG. */
1285
1286 static int
1287 number_of_rw_in_rdg (struct graph *rdg)
1288 {
1289 int i, res = 0;
1290
1291 for (i = 0; i < rdg->n_vertices; i++)
1292 {
1293 if (RDG_MEM_WRITE_STMT (rdg, i))
1294 ++res;
1295
1296 if (RDG_MEM_READS_STMT (rdg, i))
1297 ++res;
1298 }
1299
1300 return res;
1301 }
1302
1303 /* Returns the number of read and write operations in a PARTITION of
1304 the RDG. */
1305
1306 static int
1307 number_of_rw_in_partition (struct graph *rdg, partition_t partition)
1308 {
1309 int res = 0;
1310 unsigned i;
1311 bitmap_iterator ii;
1312
1313 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii)
1314 {
1315 if (RDG_MEM_WRITE_STMT (rdg, i))
1316 ++res;
1317
1318 if (RDG_MEM_READS_STMT (rdg, i))
1319 ++res;
1320 }
1321
1322 return res;
1323 }
1324
1325 /* Returns true when one of the PARTITIONS contains all the read or
1326 write operations of RDG. */
1327
1328 static bool
1329 partition_contains_all_rw (struct graph *rdg,
1330 vec<partition_t> partitions)
1331 {
1332 int i;
1333 partition_t partition;
1334 int nrw = number_of_rw_in_rdg (rdg);
1335
1336 FOR_EACH_VEC_ELT (partitions, i, partition)
1337 if (nrw == number_of_rw_in_partition (rdg, partition))
1338 return true;
1339
1340 return false;
1341 }
1342
1343 /* Compute partition dependence created by the data references in DRS1
1344 and DRS2 and modify and return DIR according to that. */
1345
1346 static int
1347 pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir,
1348 vec<data_reference_p> drs1,
1349 vec<data_reference_p> drs2)
1350 {
1351 data_reference_p dr1, dr2;
1352
1353 /* dependence direction - 0 is no dependence, -1 is back,
1354 1 is forth, 2 is both (we can stop then, merging will occur). */
1355 for (int ii = 0; drs1.iterate (ii, &dr1); ++ii)
1356 for (int jj = 0; drs2.iterate (jj, &dr2); ++jj)
1357 {
1358 int this_dir = 1;
1359 ddr_p ddr;
1360 /* Re-shuffle data-refs to be in dominator order. */
1361 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1362 > rdg_vertex_for_stmt (rdg, DR_STMT (dr2)))
1363 {
1364 data_reference_p tem = dr1;
1365 dr1 = dr2;
1366 dr2 = tem;
1367 this_dir = -this_dir;
1368 }
1369 ddr = initialize_data_dependence_relation (dr1, dr2, loops);
1370 compute_affine_dependence (ddr, loops[0]);
1371 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1372 this_dir = 2;
1373 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1374 {
1375 if (DDR_REVERSED_P (ddr))
1376 {
1377 data_reference_p tem = dr1;
1378 dr1 = dr2;
1379 dr2 = tem;
1380 this_dir = -this_dir;
1381 }
1382 /* Known dependences can still be unordered througout the
1383 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1384 if (DDR_NUM_DIST_VECTS (ddr) != 1)
1385 this_dir = 2;
1386 /* If the overlap is exact preserve stmt order. */
1387 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1))
1388 ;
1389 else
1390 {
1391 /* Else as the distance vector is lexicographic positive
1392 swap the dependence direction. */
1393 this_dir = -this_dir;
1394 }
1395 }
1396 else
1397 this_dir = 0;
1398 free_dependence_relation (ddr);
1399 if (dir == 0)
1400 dir = this_dir;
1401 else if (dir != this_dir)
1402 return 2;
1403 }
1404 return dir;
1405 }
1406
1407 /* Compare postorder number of the partition graph vertices V1 and V2. */
1408
1409 static int
1410 pgcmp (const void *v1_, const void *v2_)
1411 {
1412 const vertex *v1 = (const vertex *)v1_;
1413 const vertex *v2 = (const vertex *)v2_;
1414 return v2->post - v1->post;
1415 }
1416
1417 /* Distributes the code from LOOP in such a way that producer
1418 statements are placed before consumer statements. Tries to separate
1419 only the statements from STMTS into separate loops.
1420 Returns the number of distributed loops. */
1421
1422 static int
1423 distribute_loop (struct loop *loop, vec<gimple> stmts,
1424 control_dependences *cd, int *nb_calls)
1425 {
1426 struct graph *rdg;
1427 partition_t partition;
1428 bool any_builtin;
1429 int i, nbp;
1430 graph *pg = NULL;
1431 int num_sccs = 1;
1432
1433 *nb_calls = 0;
1434 auto_vec<loop_p, 3> loop_nest;
1435 if (!find_loop_nest (loop, &loop_nest))
1436 return 0;
1437
1438 rdg = build_rdg (loop_nest, cd);
1439 if (!rdg)
1440 {
1441 if (dump_file && (dump_flags & TDF_DETAILS))
1442 fprintf (dump_file,
1443 "Loop %d not distributed: failed to build the RDG.\n",
1444 loop->num);
1445
1446 return 0;
1447 }
1448
1449 if (dump_file && (dump_flags & TDF_DETAILS))
1450 dump_rdg (dump_file, rdg);
1451
1452 auto_vec<partition_t, 3> partitions;
1453 rdg_build_partitions (rdg, stmts, &partitions);
1454
1455 any_builtin = false;
1456 FOR_EACH_VEC_ELT (partitions, i, partition)
1457 {
1458 classify_partition (loop, rdg, partition);
1459 any_builtin |= partition_builtin_p (partition);
1460 }
1461
1462 /* If we are only distributing patterns but did not detect any,
1463 simply bail out. */
1464 if (!flag_tree_loop_distribution
1465 && !any_builtin)
1466 {
1467 nbp = 0;
1468 goto ldist_done;
1469 }
1470
1471 /* If we are only distributing patterns fuse all partitions that
1472 were not classified as builtins. This also avoids chopping
1473 a loop into pieces, separated by builtin calls. That is, we
1474 only want no or a single loop body remaining. */
1475 partition_t into;
1476 if (!flag_tree_loop_distribution)
1477 {
1478 for (i = 0; partitions.iterate (i, &into); ++i)
1479 if (!partition_builtin_p (into))
1480 break;
1481 for (++i; partitions.iterate (i, &partition); ++i)
1482 if (!partition_builtin_p (partition))
1483 {
1484 if (dump_file && (dump_flags & TDF_DETAILS))
1485 {
1486 fprintf (dump_file, "fusing non-builtin partitions\n");
1487 dump_bitmap (dump_file, into->stmts);
1488 dump_bitmap (dump_file, partition->stmts);
1489 }
1490 partition_merge_into (into, partition);
1491 partitions.unordered_remove (i);
1492 partition_free (partition);
1493 i--;
1494 }
1495 }
1496
1497 /* Due to limitations in the transform phase we have to fuse all
1498 reduction partitions into the last partition so the existing
1499 loop will contain all loop-closed PHI nodes. */
1500 for (i = 0; partitions.iterate (i, &into); ++i)
1501 if (partition_reduction_p (into))
1502 break;
1503 for (i = i + 1; partitions.iterate (i, &partition); ++i)
1504 if (partition_reduction_p (partition))
1505 {
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1507 {
1508 fprintf (dump_file, "fusing partitions\n");
1509 dump_bitmap (dump_file, into->stmts);
1510 dump_bitmap (dump_file, partition->stmts);
1511 fprintf (dump_file, "because they have reductions\n");
1512 }
1513 partition_merge_into (into, partition);
1514 partitions.unordered_remove (i);
1515 partition_free (partition);
1516 i--;
1517 }
1518
1519 /* Apply our simple cost model - fuse partitions with similar
1520 memory accesses. */
1521 for (i = 0; partitions.iterate (i, &into); ++i)
1522 {
1523 if (partition_builtin_p (into))
1524 continue;
1525 for (int j = i + 1;
1526 partitions.iterate (j, &partition); ++j)
1527 {
1528 if (!partition_builtin_p (partition)
1529 && similar_memory_accesses (rdg, into, partition))
1530 {
1531 if (dump_file && (dump_flags & TDF_DETAILS))
1532 {
1533 fprintf (dump_file, "fusing partitions\n");
1534 dump_bitmap (dump_file, into->stmts);
1535 dump_bitmap (dump_file, partition->stmts);
1536 fprintf (dump_file, "because they have similar "
1537 "memory accesses\n");
1538 }
1539 partition_merge_into (into, partition);
1540 partitions.unordered_remove (j);
1541 partition_free (partition);
1542 j--;
1543 }
1544 }
1545 }
1546
1547 /* Build the partition dependency graph. */
1548 if (partitions.length () > 1)
1549 {
1550 pg = new_graph (partitions.length ());
1551 struct pgdata {
1552 partition_t partition;
1553 vec<data_reference_p> writes;
1554 vec<data_reference_p> reads;
1555 };
1556 #define PGDATA(i) ((pgdata *)(pg->vertices[i].data))
1557 for (i = 0; partitions.iterate (i, &partition); ++i)
1558 {
1559 vertex *v = &pg->vertices[i];
1560 pgdata *data = new pgdata;
1561 data_reference_p dr;
1562 /* FIXME - leaks. */
1563 v->data = data;
1564 bitmap_iterator bi;
1565 unsigned j;
1566 data->partition = partition;
1567 data->reads = vNULL;
1568 data->writes = vNULL;
1569 EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi)
1570 for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k)
1571 if (DR_IS_READ (dr))
1572 data->reads.safe_push (dr);
1573 else
1574 data->writes.safe_push (dr);
1575 }
1576 partition_t partition1, partition2;
1577 for (i = 0; partitions.iterate (i, &partition1); ++i)
1578 for (int j = i + 1; partitions.iterate (j, &partition2); ++j)
1579 {
1580 /* dependence direction - 0 is no dependence, -1 is back,
1581 1 is forth, 2 is both (we can stop then, merging will occur). */
1582 int dir = 0;
1583 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1584 PGDATA(i)->writes,
1585 PGDATA(j)->reads);
1586 if (dir != 2)
1587 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1588 PGDATA(i)->reads,
1589 PGDATA(j)->writes);
1590 if (dir != 2)
1591 dir = pg_add_dependence_edges (rdg, loop_nest, dir,
1592 PGDATA(i)->writes,
1593 PGDATA(j)->writes);
1594 if (dir == 1 || dir == 2)
1595 add_edge (pg, i, j);
1596 if (dir == -1 || dir == 2)
1597 add_edge (pg, j, i);
1598 }
1599
1600 /* Add edges to the reduction partition (if any) to force it last. */
1601 unsigned j;
1602 for (j = 0; partitions.iterate (j, &partition); ++j)
1603 if (partition_reduction_p (partition))
1604 break;
1605 if (j < partitions.length ())
1606 {
1607 for (unsigned i = 0; partitions.iterate (i, &partition); ++i)
1608 if (i != j)
1609 add_edge (pg, i, j);
1610 }
1611
1612 /* Compute partitions we cannot separate and fuse them. */
1613 num_sccs = graphds_scc (pg, NULL);
1614 for (i = 0; i < num_sccs; ++i)
1615 {
1616 partition_t first;
1617 int j;
1618 for (j = 0; partitions.iterate (j, &first); ++j)
1619 if (pg->vertices[j].component == i)
1620 break;
1621 for (j = j + 1; partitions.iterate (j, &partition); ++j)
1622 if (pg->vertices[j].component == i)
1623 {
1624 if (dump_file && (dump_flags & TDF_DETAILS))
1625 {
1626 fprintf (dump_file, "fusing partitions\n");
1627 dump_bitmap (dump_file, first->stmts);
1628 dump_bitmap (dump_file, partition->stmts);
1629 fprintf (dump_file, "because they are in the same "
1630 "dependence SCC\n");
1631 }
1632 partition_merge_into (first, partition);
1633 partitions[j] = NULL;
1634 partition_free (partition);
1635 PGDATA (j)->partition = NULL;
1636 }
1637 }
1638
1639 /* Now order the remaining nodes in postorder. */
1640 qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
1641 partitions.truncate (0);
1642 for (i = 0; i < pg->n_vertices; ++i)
1643 {
1644 pgdata *data = PGDATA (i);
1645 if (data->partition)
1646 partitions.safe_push (data->partition);
1647 data->reads.release ();
1648 data->writes.release ();
1649 delete data;
1650 }
1651 gcc_assert (partitions.length () == (unsigned)num_sccs);
1652 free_graph (pg);
1653 }
1654
1655 nbp = partitions.length ();
1656 if (nbp == 0
1657 || (nbp == 1 && !partition_builtin_p (partitions[0]))
1658 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
1659 {
1660 nbp = 0;
1661 goto ldist_done;
1662 }
1663
1664 if (dump_file && (dump_flags & TDF_DETAILS))
1665 dump_rdg_partitions (dump_file, partitions);
1666
1667 FOR_EACH_VEC_ELT (partitions, i, partition)
1668 {
1669 if (partition_builtin_p (partition))
1670 (*nb_calls)++;
1671 generate_code_for_partition (loop, partition, i < nbp - 1);
1672 }
1673
1674 ldist_done:
1675
1676 FOR_EACH_VEC_ELT (partitions, i, partition)
1677 partition_free (partition);
1678
1679 free_rdg (rdg);
1680 return nbp - *nb_calls;
1681 }
1682
1683 /* Distribute all loops in the current function. */
1684
1685 namespace {
1686
1687 const pass_data pass_data_loop_distribution =
1688 {
1689 GIMPLE_PASS, /* type */
1690 "ldist", /* name */
1691 OPTGROUP_LOOP, /* optinfo_flags */
1692 TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
1693 ( PROP_cfg | PROP_ssa ), /* properties_required */
1694 0, /* properties_provided */
1695 0, /* properties_destroyed */
1696 0, /* todo_flags_start */
1697 0, /* todo_flags_finish */
1698 };
1699
1700 class pass_loop_distribution : public gimple_opt_pass
1701 {
1702 public:
1703 pass_loop_distribution (gcc::context *ctxt)
1704 : gimple_opt_pass (pass_data_loop_distribution, ctxt)
1705 {}
1706
1707 /* opt_pass methods: */
1708 virtual bool gate (function *)
1709 {
1710 return flag_tree_loop_distribution
1711 || flag_tree_loop_distribute_patterns;
1712 }
1713
1714 virtual unsigned int execute (function *);
1715
1716 }; // class pass_loop_distribution
1717
1718 unsigned int
1719 pass_loop_distribution::execute (function *fun)
1720 {
1721 struct loop *loop;
1722 bool changed = false;
1723 basic_block bb;
1724 control_dependences *cd = NULL;
1725
1726 FOR_ALL_BB_FN (bb, fun)
1727 {
1728 gimple_stmt_iterator gsi;
1729 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1730 gimple_set_uid (gsi_stmt (gsi), -1);
1731 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1732 gimple_set_uid (gsi_stmt (gsi), -1);
1733 }
1734
1735 /* We can at the moment only distribute non-nested loops, thus restrict
1736 walking to innermost loops. */
1737 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
1738 {
1739 auto_vec<gimple> work_list;
1740 basic_block *bbs;
1741 int num = loop->num;
1742 unsigned int i;
1743
1744 /* If the loop doesn't have a single exit we will fail anyway,
1745 so do that early. */
1746 if (!single_exit (loop))
1747 continue;
1748
1749 /* Only optimize hot loops. */
1750 if (!optimize_loop_for_speed_p (loop))
1751 continue;
1752
1753 /* Initialize the worklist with stmts we seed the partitions with. */
1754 bbs = get_loop_body_in_dom_order (loop);
1755 for (i = 0; i < loop->num_nodes; ++i)
1756 {
1757 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
1758 !gsi_end_p (gsi);
1759 gsi_next (&gsi))
1760 {
1761 gphi *phi = gsi.phi ();
1762 if (virtual_operand_p (gimple_phi_result (phi)))
1763 continue;
1764 /* Distribute stmts which have defs that are used outside of
1765 the loop. */
1766 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
1767 continue;
1768 work_list.safe_push (phi);
1769 }
1770 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1771 !gsi_end_p (gsi);
1772 gsi_next (&gsi))
1773 {
1774 gimple stmt = gsi_stmt (gsi);
1775
1776 /* If there is a stmt with side-effects bail out - we
1777 cannot and should not distribute this loop. */
1778 if (gimple_has_side_effects (stmt))
1779 {
1780 work_list.truncate (0);
1781 goto out;
1782 }
1783
1784 /* Distribute stmts which have defs that are used outside of
1785 the loop. */
1786 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
1787 ;
1788 /* Otherwise only distribute stores for now. */
1789 else if (!gimple_vdef (stmt))
1790 continue;
1791
1792 work_list.safe_push (stmt);
1793 }
1794 }
1795 out:
1796 free (bbs);
1797
1798 int nb_generated_loops = 0;
1799 int nb_generated_calls = 0;
1800 location_t loc = find_loop_location (loop);
1801 if (work_list.length () > 0)
1802 {
1803 if (!cd)
1804 {
1805 calculate_dominance_info (CDI_DOMINATORS);
1806 calculate_dominance_info (CDI_POST_DOMINATORS);
1807 cd = new control_dependences (create_edge_list ());
1808 free_dominance_info (CDI_POST_DOMINATORS);
1809 }
1810 nb_generated_loops = distribute_loop (loop, work_list, cd,
1811 &nb_generated_calls);
1812 }
1813
1814 if (nb_generated_loops + nb_generated_calls > 0)
1815 {
1816 changed = true;
1817 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
1818 loc, "Loop %d distributed: split to %d loops "
1819 "and %d library calls.\n",
1820 num, nb_generated_loops, nb_generated_calls);
1821 }
1822 else if (dump_file && (dump_flags & TDF_DETAILS))
1823 fprintf (dump_file, "Loop %d is the same.\n", num);
1824 }
1825
1826 if (cd)
1827 delete cd;
1828
1829 if (changed)
1830 {
1831 mark_virtual_operands_for_renaming (fun);
1832 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1833 }
1834
1835 #ifdef ENABLE_CHECKING
1836 verify_loop_structure ();
1837 #endif
1838
1839 return 0;
1840 }
1841
1842 } // anon namespace
1843
1844 gimple_opt_pass *
1845 make_pass_loop_distribution (gcc::context *ctxt)
1846 {
1847 return new pass_loop_distribution (ctxt);
1848 }