inclhack.def (hpux_imaginary_i): Remove spaces.
[gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "tree-flow.h"
29 #include "cfgloop.h"
30 #include "ggc.h"
31 #include "tree-data-ref.h"
32 #include "diagnostic.h"
33 #include "tree-pass.h"
34 #include "tree-scalar-evolution.h"
35 #include "hashtab.h"
36 #include "langhooks.h"
37 #include "tree-vectorizer.h"
38
39 /* This pass tries to distribute iterations of loops into several threads.
40 The implementation is straightforward -- for each loop we test whether its
41 iterations are independent, and if it is the case (and some additional
42 conditions regarding profitability and correctness are satisfied), we
43 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
44 machinery do its job.
45
46 The most of the complexity is in bringing the code into shape expected
47 by the omp expanders:
48 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
49 variable and that the exit test is at the start of the loop body
50 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
51 variables by accesses through pointers, and breaking up ssa chains
52 by storing the values incoming to the parallelized loop to a structure
53 passed to the new function as an argument (something similar is done
54 in omp gimplification, unfortunately only a small part of the code
55 can be shared).
56
57 TODO:
58 -- if there are several parallelizable loops in a function, it may be
59 possible to generate the threads just once (using synchronization to
60 ensure that cross-loop dependences are obeyed).
61 -- handling of common scalar dependence patterns (accumulation, ...)
62 -- handling of non-innermost loops */
63
64 /*
65 Reduction handling:
66 currently we use vect_is_simple_reduction() to detect reduction patterns.
67 The code transformation will be introduced by an example.
68
69
70 parloop
71 {
72 int sum=1;
73
74 for (i = 0; i < N; i++)
75 {
76 x[i] = i + 3;
77 sum+=x[i];
78 }
79 }
80
81 gimple-like code:
82 header_bb:
83
84 # sum_29 = PHI <sum_11(5), 1(3)>
85 # i_28 = PHI <i_12(5), 0(3)>
86 D.1795_8 = i_28 + 3;
87 x[i_28] = D.1795_8;
88 sum_11 = D.1795_8 + sum_29;
89 i_12 = i_28 + 1;
90 if (N_6(D) > i_12)
91 goto header_bb;
92
93
94 exit_bb:
95
96 # sum_21 = PHI <sum_11(4)>
97 printf (&"%d"[0], sum_21);
98
99
100 after reduction transformation (only relevant parts):
101
102 parloop
103 {
104
105 ....
106
107
108 # Storing the initial value given by the user. #
109
110 .paral_data_store.32.sum.27 = 1;
111
112 #pragma omp parallel num_threads(4)
113
114 #pragma omp for schedule(static)
115
116 # The neutral element corresponding to the particular
117 reduction's operation, e.g. 0 for PLUS_EXPR,
118 1 for MULT_EXPR, etc. replaces the user's initial value. #
119
120 # sum.27_29 = PHI <sum.27_11, 0>
121
122 sum.27_11 = D.1827_8 + sum.27_29;
123
124 GIMPLE_OMP_CONTINUE
125
126 # Adding this reduction phi is done at create_phi_for_local_result() #
127 # sum.27_56 = PHI <sum.27_11, 0>
128 GIMPLE_OMP_RETURN
129
130 # Creating the atomic operation is done at
131 create_call_for_reduction_1() #
132
133 #pragma omp atomic_load
134 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
135 D.1840_60 = sum.27_56 + D.1839_59;
136 #pragma omp atomic_store (D.1840_60);
137
138 GIMPLE_OMP_RETURN
139
140 # collecting the result after the join of the threads is done at
141 create_loads_for_reductions().
142 The value computed by the threads is loaded from the
143 shared struct. #
144
145
146 .paral_data_load.33_52 = &.paral_data_store.32;
147 sum_37 = .paral_data_load.33_52->sum.27;
148 sum_43 = D.1795_41 + sum_37;
149
150 exit bb:
151 # sum_21 = PHI <sum_43, sum_26>
152 printf (&"%d"[0], sum_21);
153
154 ...
155
156 }
157
158 */
159
160 /* Minimal number of iterations of a loop that should be executed in each
161 thread. */
162 #define MIN_PER_THREAD 100
163
164 /* Element of the hashtable, representing a
165 reduction in the current loop. */
166 struct reduction_info
167 {
168 gimple reduc_stmt; /* reduction statement. */
169 gimple reduc_phi; /* The phi node defining the reduction. */
170 enum tree_code reduction_code;/* code for the reduction operation. */
171 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
172 of the reduction variable when existing the loop. */
173 tree initial_value; /* The initial value of the reduction var before entering the loop. */
174 tree field; /* the name of the field in the parloop data structure intended for reduction. */
175 tree init; /* reduction initialization value. */
176 gimple new_phi; /* (helper field) Newly created phi node whose result
177 will be passed to the atomic operation. Represents
178 the local result each thread computed for the reduction
179 operation. */
180 };
181
182 /* Equality and hash functions for hashtab code. */
183
184 static int
185 reduction_info_eq (const void *aa, const void *bb)
186 {
187 const struct reduction_info *a = (const struct reduction_info *) aa;
188 const struct reduction_info *b = (const struct reduction_info *) bb;
189
190 return (a->reduc_phi == b->reduc_phi);
191 }
192
193 static hashval_t
194 reduction_info_hash (const void *aa)
195 {
196 const struct reduction_info *a = (const struct reduction_info *) aa;
197
198 return htab_hash_pointer (a->reduc_phi);
199 }
200
201 static struct reduction_info *
202 reduction_phi (htab_t reduction_list, gimple phi)
203 {
204 struct reduction_info tmpred, *red;
205
206 if (htab_elements (reduction_list) == 0)
207 return NULL;
208
209 tmpred.reduc_phi = phi;
210 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
211
212 return red;
213 }
214
215 /* Element of hashtable of names to copy. */
216
217 struct name_to_copy_elt
218 {
219 unsigned version; /* The version of the name to copy. */
220 tree new_name; /* The new name used in the copy. */
221 tree field; /* The field of the structure used to pass the
222 value. */
223 };
224
225 /* Equality and hash functions for hashtab code. */
226
227 static int
228 name_to_copy_elt_eq (const void *aa, const void *bb)
229 {
230 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
231 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
232
233 return a->version == b->version;
234 }
235
236 static hashval_t
237 name_to_copy_elt_hash (const void *aa)
238 {
239 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
240
241 return (hashval_t) a->version;
242 }
243
244
245 /* Data dependency analysis. Returns true if the iterations of LOOP
246 are independent on each other (that is, if we can execute them
247 in parallel). */
248
249 static bool
250 loop_parallel_p (struct loop *loop)
251 {
252 VEC (ddr_p, heap) * dependence_relations;
253 VEC (data_reference_p, heap) *datarefs;
254 lambda_trans_matrix trans;
255 bool ret = false;
256
257 if (dump_file && (dump_flags & TDF_DETAILS))
258 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
259
260 /* Check for problems with dependences. If the loop can be reversed,
261 the iterations are independent. */
262 datarefs = VEC_alloc (data_reference_p, heap, 10);
263 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
264 compute_data_dependences_for_loop (loop, true, &datarefs,
265 &dependence_relations);
266 if (dump_file && (dump_flags & TDF_DETAILS))
267 dump_data_dependence_relations (dump_file, dependence_relations);
268
269 trans = lambda_trans_matrix_new (1, 1);
270 LTM_MATRIX (trans)[0][0] = -1;
271
272 if (lambda_transform_legal_p (trans, 1, dependence_relations))
273 {
274 ret = true;
275 if (dump_file && (dump_flags & TDF_DETAILS))
276 fprintf (dump_file, " SUCCESS: may be parallelized\n");
277 }
278 else if (dump_file && (dump_flags & TDF_DETAILS))
279 fprintf (dump_file,
280 " FAILED: data dependencies exist across iterations\n");
281
282 free_dependence_relations (dependence_relations);
283 free_data_refs (datarefs);
284
285 return ret;
286 }
287
288 /* Return true when LOOP contains basic blocks marked with the
289 BB_IRREDUCIBLE_LOOP flag. */
290
291 static inline bool
292 loop_has_blocks_with_irreducible_flag (struct loop *loop)
293 {
294 unsigned i;
295 basic_block *bbs = get_loop_body_in_dom_order (loop);
296 bool res = true;
297
298 for (i = 0; i < loop->num_nodes; i++)
299 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
300 goto end;
301
302 res = false;
303 end:
304 free (bbs);
305 return res;
306 }
307
308 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
309 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
310 to their addresses that can be reused. The address of OBJ is known to
311 be invariant in the whole function. */
312
313 static tree
314 take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
315 {
316 int uid;
317 void **dslot;
318 struct int_tree_map ielt, *nielt;
319 tree *var_p, name, bvar, addr;
320 gimple stmt;
321 gimple_seq stmts;
322
323 /* Since the address of OBJ is invariant, the trees may be shared.
324 Avoid rewriting unrelated parts of the code. */
325 obj = unshare_expr (obj);
326 for (var_p = &obj;
327 handled_component_p (*var_p);
328 var_p = &TREE_OPERAND (*var_p, 0))
329 continue;
330 uid = DECL_UID (*var_p);
331
332 ielt.uid = uid;
333 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
334 if (!*dslot)
335 {
336 addr = build_addr (*var_p, current_function_decl);
337 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
338 add_referenced_var (bvar);
339 stmt = gimple_build_assign (bvar, addr);
340 name = make_ssa_name (bvar, stmt);
341 gimple_assign_set_lhs (stmt, name);
342 gsi_insert_on_edge_immediate (entry, stmt);
343
344 nielt = XNEW (struct int_tree_map);
345 nielt->uid = uid;
346 nielt->to = name;
347 *dslot = nielt;
348 }
349 else
350 name = ((struct int_tree_map *) *dslot)->to;
351
352 if (var_p != &obj)
353 {
354 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
355 name = force_gimple_operand (build_addr (obj, current_function_decl),
356 &stmts, true, NULL_TREE);
357 if (!gimple_seq_empty_p (stmts))
358 gsi_insert_seq_on_edge_immediate (entry, stmts);
359 }
360
361 if (TREE_TYPE (name) != type)
362 {
363 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
364 NULL_TREE);
365 if (!gimple_seq_empty_p (stmts))
366 gsi_insert_seq_on_edge_immediate (entry, stmts);
367 }
368
369 return name;
370 }
371
372 /* Callback for htab_traverse. Create the initialization statement
373 for reduction described in SLOT, and place it at the preheader of
374 the loop described in DATA. */
375
376 static int
377 initialize_reductions (void **slot, void *data)
378 {
379 tree init, c;
380 tree bvar, type, arg;
381 edge e;
382
383 struct reduction_info *const reduc = (struct reduction_info *) *slot;
384 struct loop *loop = (struct loop *) data;
385
386 /* Create initialization in preheader:
387 reduction_variable = initialization value of reduction. */
388
389 /* In the phi node at the header, replace the argument coming
390 from the preheader with the reduction initialization value. */
391
392 /* Create a new variable to initialize the reduction. */
393 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
394 bvar = create_tmp_var (type, "reduction");
395 add_referenced_var (bvar);
396
397 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
398 OMP_CLAUSE_REDUCTION);
399 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
400 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
401
402 init = omp_reduction_init (c, TREE_TYPE (bvar));
403 reduc->init = init;
404
405 /* Replace the argument representing the initialization value
406 with the initialization value for the reduction (neutral
407 element for the particular operation, e.g. 0 for PLUS_EXPR,
408 1 for MULT_EXPR, etc).
409 Keep the old value in a new variable "reduction_initial",
410 that will be taken in consideration after the parallel
411 computing is done. */
412
413 e = loop_preheader_edge (loop);
414 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
415 /* Create new variable to hold the initial value. */
416
417 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
418 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
419 reduc->initial_value = arg;
420 return 1;
421 }
422
423 struct elv_data
424 {
425 struct walk_stmt_info info;
426 edge entry;
427 htab_t decl_address;
428 bool changed;
429 };
430
431 /* Eliminates references to local variables in *TP out of the single
432 entry single exit region starting at DTA->ENTRY.
433 DECL_ADDRESS contains addresses of the references that had their
434 address taken already. If the expression is changed, CHANGED is
435 set to true. Callback for walk_tree. */
436
437 static tree
438 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
439 {
440 struct elv_data *const dta = (struct elv_data *) data;
441 tree t = *tp, var, addr, addr_type, type, obj;
442
443 if (DECL_P (t))
444 {
445 *walk_subtrees = 0;
446
447 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
448 return NULL_TREE;
449
450 type = TREE_TYPE (t);
451 addr_type = build_pointer_type (type);
452 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
453 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
454
455 dta->changed = true;
456 return NULL_TREE;
457 }
458
459 if (TREE_CODE (t) == ADDR_EXPR)
460 {
461 /* ADDR_EXPR may appear in two contexts:
462 -- as a gimple operand, when the address taken is a function invariant
463 -- as gimple rhs, when the resulting address in not a function
464 invariant
465 We do not need to do anything special in the latter case (the base of
466 the memory reference whose address is taken may be replaced in the
467 DECL_P case). The former case is more complicated, as we need to
468 ensure that the new address is still a gimple operand. Thus, it
469 is not sufficient to replace just the base of the memory reference --
470 we need to move the whole computation of the address out of the
471 loop. */
472 if (!is_gimple_val (t))
473 return NULL_TREE;
474
475 *walk_subtrees = 0;
476 obj = TREE_OPERAND (t, 0);
477 var = get_base_address (obj);
478 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
479 return NULL_TREE;
480
481 addr_type = TREE_TYPE (t);
482 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
483 *tp = addr;
484
485 dta->changed = true;
486 return NULL_TREE;
487 }
488
489 if (!EXPR_P (t))
490 *walk_subtrees = 0;
491
492 return NULL_TREE;
493 }
494
495 /* Moves the references to local variables in STMT out of the single
496 entry single exit region starting at ENTRY. DECL_ADDRESS contains
497 addresses of the references that had their address taken
498 already. */
499
500 static void
501 eliminate_local_variables_stmt (edge entry, gimple stmt,
502 htab_t decl_address)
503 {
504 struct elv_data dta;
505
506 memset (&dta.info, '\0', sizeof (dta.info));
507 dta.entry = entry;
508 dta.decl_address = decl_address;
509 dta.changed = false;
510
511 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
512
513 if (dta.changed)
514 update_stmt (stmt);
515 }
516
517 /* Eliminates the references to local variables from the single entry
518 single exit region between the ENTRY and EXIT edges.
519
520 This includes:
521 1) Taking address of a local variable -- these are moved out of the
522 region (and temporary variable is created to hold the address if
523 necessary).
524
525 2) Dereferencing a local variable -- these are replaced with indirect
526 references. */
527
528 static void
529 eliminate_local_variables (edge entry, edge exit)
530 {
531 basic_block bb;
532 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
533 unsigned i;
534 gimple_stmt_iterator gsi;
535 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
536 free);
537 basic_block entry_bb = entry->src;
538 basic_block exit_bb = exit->dest;
539
540 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
541
542 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
543 if (bb != entry_bb && bb != exit_bb)
544 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
545 eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
546 decl_address);
547
548 htab_delete (decl_address);
549 VEC_free (basic_block, heap, body);
550 }
551
552 /* Returns true if expression EXPR is not defined between ENTRY and
553 EXIT, i.e. if all its operands are defined outside of the region. */
554
555 static bool
556 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
557 {
558 basic_block entry_bb = entry->src;
559 basic_block exit_bb = exit->dest;
560 basic_block def_bb;
561
562 if (is_gimple_min_invariant (expr))
563 return true;
564
565 if (TREE_CODE (expr) == SSA_NAME)
566 {
567 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
568 if (def_bb
569 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
570 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
571 return false;
572
573 return true;
574 }
575
576 return false;
577 }
578
579 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
580 The copies are stored to NAME_COPIES, if NAME was already duplicated,
581 its duplicate stored in NAME_COPIES is returned.
582
583 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
584 duplicated, storing the copies in DECL_COPIES. */
585
586 static tree
587 separate_decls_in_region_name (tree name,
588 htab_t name_copies, htab_t decl_copies,
589 bool copy_name_p)
590 {
591 tree copy, var, var_copy;
592 unsigned idx, uid, nuid;
593 struct int_tree_map ielt, *nielt;
594 struct name_to_copy_elt elt, *nelt;
595 void **slot, **dslot;
596
597 if (TREE_CODE (name) != SSA_NAME)
598 return name;
599
600 idx = SSA_NAME_VERSION (name);
601 elt.version = idx;
602 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
603 copy_name_p ? INSERT : NO_INSERT);
604 if (slot && *slot)
605 return ((struct name_to_copy_elt *) *slot)->new_name;
606
607 var = SSA_NAME_VAR (name);
608 uid = DECL_UID (var);
609 ielt.uid = uid;
610 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
611 if (!*dslot)
612 {
613 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
614 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
615 add_referenced_var (var_copy);
616 nielt = XNEW (struct int_tree_map);
617 nielt->uid = uid;
618 nielt->to = var_copy;
619 *dslot = nielt;
620
621 /* Ensure that when we meet this decl next time, we won't duplicate
622 it again. */
623 nuid = DECL_UID (var_copy);
624 ielt.uid = nuid;
625 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
626 gcc_assert (!*dslot);
627 nielt = XNEW (struct int_tree_map);
628 nielt->uid = nuid;
629 nielt->to = var_copy;
630 *dslot = nielt;
631 }
632 else
633 var_copy = ((struct int_tree_map *) *dslot)->to;
634
635 if (copy_name_p)
636 {
637 copy = duplicate_ssa_name (name, NULL);
638 nelt = XNEW (struct name_to_copy_elt);
639 nelt->version = idx;
640 nelt->new_name = copy;
641 nelt->field = NULL_TREE;
642 *slot = nelt;
643 }
644 else
645 {
646 gcc_assert (!slot);
647 copy = name;
648 }
649
650 SSA_NAME_VAR (copy) = var_copy;
651 return copy;
652 }
653
654 /* Finds the ssa names used in STMT that are defined outside the
655 region between ENTRY and EXIT and replaces such ssa names with
656 their duplicates. The duplicates are stored to NAME_COPIES. Base
657 decls of all ssa names used in STMT (including those defined in
658 LOOP) are replaced with the new temporary variables; the
659 replacement decls are stored in DECL_COPIES. */
660
661 static void
662 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
663 htab_t name_copies, htab_t decl_copies)
664 {
665 use_operand_p use;
666 def_operand_p def;
667 ssa_op_iter oi;
668 tree name, copy;
669 bool copy_name_p;
670
671 mark_virtual_ops_for_renaming (stmt);
672
673 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
674 {
675 name = DEF_FROM_PTR (def);
676 gcc_assert (TREE_CODE (name) == SSA_NAME);
677 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
678 false);
679 gcc_assert (copy == name);
680 }
681
682 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
683 {
684 name = USE_FROM_PTR (use);
685 if (TREE_CODE (name) != SSA_NAME)
686 continue;
687
688 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
689 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
690 copy_name_p);
691 SET_USE (use, copy);
692 }
693 }
694
695 /* Callback for htab_traverse. Adds a field corresponding to the reduction
696 specified in SLOT. The type is passed in DATA. */
697
698 static int
699 add_field_for_reduction (void **slot, void *data)
700 {
701
702 struct reduction_info *const red = (struct reduction_info *) *slot;
703 tree const type = (tree) data;
704 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
705 tree field = build_decl (gimple_location (red->reduc_stmt),
706 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
707
708 insert_field_into_struct (type, field);
709
710 red->field = field;
711
712 return 1;
713 }
714
715 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
716 described in SLOT. The type is passed in DATA. */
717
718 static int
719 add_field_for_name (void **slot, void *data)
720 {
721 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
722 tree type = (tree) data;
723 tree name = ssa_name (elt->version);
724 tree var = SSA_NAME_VAR (name);
725 tree field = build_decl (DECL_SOURCE_LOCATION (var),
726 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
727
728 insert_field_into_struct (type, field);
729 elt->field = field;
730
731 return 1;
732 }
733
734 /* Callback for htab_traverse. A local result is the intermediate result
735 computed by a single
736 thread, or the initial value in case no iteration was executed.
737 This function creates a phi node reflecting these values.
738 The phi's result will be stored in NEW_PHI field of the
739 reduction's data structure. */
740
741 static int
742 create_phi_for_local_result (void **slot, void *data)
743 {
744 struct reduction_info *const reduc = (struct reduction_info *) *slot;
745 const struct loop *const loop = (const struct loop *) data;
746 edge e;
747 gimple new_phi;
748 basic_block store_bb;
749 tree local_res;
750 source_location locus;
751
752 /* STORE_BB is the block where the phi
753 should be stored. It is the destination of the loop exit.
754 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
755 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
756
757 /* STORE_BB has two predecessors. One coming from the loop
758 (the reduction's result is computed at the loop),
759 and another coming from a block preceding the loop,
760 when no iterations
761 are executed (the initial value should be taken). */
762 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
763 e = EDGE_PRED (store_bb, 1);
764 else
765 e = EDGE_PRED (store_bb, 0);
766 local_res
767 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
768 NULL);
769 locus = gimple_location (reduc->reduc_stmt);
770 new_phi = create_phi_node (local_res, store_bb);
771 SSA_NAME_DEF_STMT (local_res) = new_phi;
772 add_phi_arg (new_phi, reduc->init, e, locus);
773 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
774 FALLTHRU_EDGE (loop->latch), locus);
775 reduc->new_phi = new_phi;
776
777 return 1;
778 }
779
780 struct clsn_data
781 {
782 tree store;
783 tree load;
784
785 basic_block store_bb;
786 basic_block load_bb;
787 };
788
789 /* Callback for htab_traverse. Create an atomic instruction for the
790 reduction described in SLOT.
791 DATA annotates the place in memory the atomic operation relates to,
792 and the basic block it needs to be generated in. */
793
794 static int
795 create_call_for_reduction_1 (void **slot, void *data)
796 {
797 struct reduction_info *const reduc = (struct reduction_info *) *slot;
798 struct clsn_data *const clsn_data = (struct clsn_data *) data;
799 gimple_stmt_iterator gsi;
800 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
801 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
802 tree load_struct;
803 basic_block bb;
804 basic_block new_bb;
805 edge e;
806 tree t, addr, addr_type, ref, x;
807 tree tmp_load, name;
808 gimple load;
809
810 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
811 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
812 addr_type = build_pointer_type (type);
813
814 addr = build_addr (t, current_function_decl);
815
816 /* Create phi node. */
817 bb = clsn_data->load_bb;
818
819 e = split_block (bb, t);
820 new_bb = e->dest;
821
822 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
823 add_referenced_var (tmp_load);
824 tmp_load = make_ssa_name (tmp_load, NULL);
825 load = gimple_build_omp_atomic_load (tmp_load, addr);
826 SSA_NAME_DEF_STMT (tmp_load) = load;
827 gsi = gsi_start_bb (new_bb);
828 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
829
830 e = split_block (new_bb, load);
831 new_bb = e->dest;
832 gsi = gsi_start_bb (new_bb);
833 ref = tmp_load;
834 x = fold_build2 (reduc->reduction_code,
835 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
836 PHI_RESULT (reduc->new_phi));
837
838 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
839 GSI_CONTINUE_LINKING);
840
841 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
842 return 1;
843 }
844
845 /* Create the atomic operation at the join point of the threads.
846 REDUCTION_LIST describes the reductions in the LOOP.
847 LD_ST_DATA describes the shared data structure where
848 shared data is stored in and loaded from. */
849 static void
850 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
851 struct clsn_data *ld_st_data)
852 {
853 htab_traverse (reduction_list, create_phi_for_local_result, loop);
854 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
855 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
856 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
857 }
858
859 /* Callback for htab_traverse. Loads the final reduction value at the
860 join point of all threads, and inserts it in the right place. */
861
862 static int
863 create_loads_for_reductions (void **slot, void *data)
864 {
865 struct reduction_info *const red = (struct reduction_info *) *slot;
866 struct clsn_data *const clsn_data = (struct clsn_data *) data;
867 gimple stmt;
868 gimple_stmt_iterator gsi;
869 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
870 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
871 tree load_struct;
872 tree name;
873 tree x;
874
875 gsi = gsi_after_labels (clsn_data->load_bb);
876 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
877 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
878 NULL_TREE);
879
880 x = load_struct;
881 name = PHI_RESULT (red->keep_res);
882 stmt = gimple_build_assign (name, x);
883 SSA_NAME_DEF_STMT (name) = stmt;
884
885 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
886
887 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
888 !gsi_end_p (gsi); gsi_next (&gsi))
889 if (gsi_stmt (gsi) == red->keep_res)
890 {
891 remove_phi_node (&gsi, false);
892 return 1;
893 }
894 gcc_unreachable ();
895 }
896
897 /* Load the reduction result that was stored in LD_ST_DATA.
898 REDUCTION_LIST describes the list of reductions that the
899 loads should be generated for. */
900 static void
901 create_final_loads_for_reduction (htab_t reduction_list,
902 struct clsn_data *ld_st_data)
903 {
904 gimple_stmt_iterator gsi;
905 tree t;
906 gimple stmt;
907
908 gsi = gsi_after_labels (ld_st_data->load_bb);
909 t = build_fold_addr_expr (ld_st_data->store);
910 stmt = gimple_build_assign (ld_st_data->load, t);
911
912 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
913 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
914
915 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
916
917 }
918
919 /* Callback for htab_traverse. Store the neutral value for the
920 particular reduction's operation, e.g. 0 for PLUS_EXPR,
921 1 for MULT_EXPR, etc. into the reduction field.
922 The reduction is specified in SLOT. The store information is
923 passed in DATA. */
924
925 static int
926 create_stores_for_reduction (void **slot, void *data)
927 {
928 struct reduction_info *const red = (struct reduction_info *) *slot;
929 struct clsn_data *const clsn_data = (struct clsn_data *) data;
930 tree t;
931 gimple stmt;
932 gimple_stmt_iterator gsi;
933 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
934
935 gsi = gsi_last_bb (clsn_data->store_bb);
936 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
937 stmt = gimple_build_assign (t, red->initial_value);
938 mark_virtual_ops_for_renaming (stmt);
939 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
940
941 return 1;
942 }
943
944 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
945 store to a field of STORE in STORE_BB for the ssa name and its duplicate
946 specified in SLOT. */
947
948 static int
949 create_loads_and_stores_for_name (void **slot, void *data)
950 {
951 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
952 struct clsn_data *const clsn_data = (struct clsn_data *) data;
953 tree t;
954 gimple stmt;
955 gimple_stmt_iterator gsi;
956 tree type = TREE_TYPE (elt->new_name);
957 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
958 tree load_struct;
959
960 gsi = gsi_last_bb (clsn_data->store_bb);
961 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
962 stmt = gimple_build_assign (t, ssa_name (elt->version));
963 mark_virtual_ops_for_renaming (stmt);
964 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
965
966 gsi = gsi_last_bb (clsn_data->load_bb);
967 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
968 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
969 stmt = gimple_build_assign (elt->new_name, t);
970 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
971 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
972
973 return 1;
974 }
975
976 /* Moves all the variables used in LOOP and defined outside of it (including
977 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
978 name) to a structure created for this purpose. The code
979
980 while (1)
981 {
982 use (a);
983 use (b);
984 }
985
986 is transformed this way:
987
988 bb0:
989 old.a = a;
990 old.b = b;
991
992 bb1:
993 a' = new->a;
994 b' = new->b;
995 while (1)
996 {
997 use (a');
998 use (b');
999 }
1000
1001 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1002 pointer `new' is intentionally not initialized (the loop will be split to a
1003 separate function later, and `new' will be initialized from its arguments).
1004 LD_ST_DATA holds information about the shared data structure used to pass
1005 information among the threads. It is initialized here, and
1006 gen_parallel_loop will pass it to create_call_for_reduction that
1007 needs this information. REDUCTION_LIST describes the reductions
1008 in LOOP. */
1009
1010 static void
1011 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1012 tree *arg_struct, tree *new_arg_struct,
1013 struct clsn_data *ld_st_data)
1014
1015 {
1016 basic_block bb1 = split_edge (entry);
1017 basic_block bb0 = single_pred (bb1);
1018 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1019 name_to_copy_elt_eq, free);
1020 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1021 free);
1022 unsigned i;
1023 tree type, type_name, nvar;
1024 gimple_stmt_iterator gsi;
1025 struct clsn_data clsn_data;
1026 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1027 basic_block bb;
1028 basic_block entry_bb = bb1;
1029 basic_block exit_bb = exit->dest;
1030
1031 entry = single_succ_edge (entry_bb);
1032 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1033
1034 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1035 {
1036 if (bb != entry_bb && bb != exit_bb)
1037 {
1038 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1039 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1040 name_copies, decl_copies);
1041
1042 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1043 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1044 name_copies, decl_copies);
1045 }
1046 }
1047
1048 VEC_free (basic_block, heap, body);
1049
1050 if (htab_elements (name_copies) == 0 && reduction_list == 0)
1051 {
1052 /* It may happen that there is nothing to copy (if there are only
1053 loop carried and external variables in the loop). */
1054 *arg_struct = NULL;
1055 *new_arg_struct = NULL;
1056 }
1057 else
1058 {
1059 /* Create the type for the structure to store the ssa names to. */
1060 type = lang_hooks.types.make_type (RECORD_TYPE);
1061 type_name = build_decl (BUILTINS_LOCATION,
1062 TYPE_DECL, create_tmp_var_name (".paral_data"),
1063 type);
1064 TYPE_NAME (type) = type_name;
1065
1066 htab_traverse (name_copies, add_field_for_name, type);
1067 if (reduction_list && htab_elements (reduction_list) > 0)
1068 {
1069 /* Create the fields for reductions. */
1070 htab_traverse (reduction_list, add_field_for_reduction,
1071 type);
1072 }
1073 layout_type (type);
1074
1075 /* Create the loads and stores. */
1076 *arg_struct = create_tmp_var (type, ".paral_data_store");
1077 add_referenced_var (*arg_struct);
1078 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1079 add_referenced_var (nvar);
1080 *new_arg_struct = make_ssa_name (nvar, NULL);
1081
1082 ld_st_data->store = *arg_struct;
1083 ld_st_data->load = *new_arg_struct;
1084 ld_st_data->store_bb = bb0;
1085 ld_st_data->load_bb = bb1;
1086
1087 htab_traverse (name_copies, create_loads_and_stores_for_name,
1088 ld_st_data);
1089
1090 /* Load the calculation from memory (after the join of the threads). */
1091
1092 if (reduction_list && htab_elements (reduction_list) > 0)
1093 {
1094 htab_traverse (reduction_list, create_stores_for_reduction,
1095 ld_st_data);
1096 clsn_data.load = make_ssa_name (nvar, NULL);
1097 clsn_data.load_bb = exit->dest;
1098 clsn_data.store = ld_st_data->store;
1099 create_final_loads_for_reduction (reduction_list, &clsn_data);
1100 }
1101 }
1102
1103 htab_delete (decl_copies);
1104 htab_delete (name_copies);
1105 }
1106
1107 /* Bitmap containing uids of functions created by parallelization. We cannot
1108 allocate it from the default obstack, as it must live across compilation
1109 of several functions; we make it gc allocated instead. */
1110
1111 static GTY(()) bitmap parallelized_functions;
1112
1113 /* Returns true if FN was created by create_loop_fn. */
1114
1115 static bool
1116 parallelized_function_p (tree fn)
1117 {
1118 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1119 return false;
1120
1121 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1122 }
1123
1124 /* Creates and returns an empty function that will receive the body of
1125 a parallelized loop. */
1126
1127 static tree
1128 create_loop_fn (void)
1129 {
1130 char buf[100];
1131 char *tname;
1132 tree decl, type, name, t;
1133 struct function *act_cfun = cfun;
1134 static unsigned loopfn_num;
1135
1136 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1137 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1138 clean_symbol_name (tname);
1139 name = get_identifier (tname);
1140 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1141
1142 decl = build_decl (BUILTINS_LOCATION,
1143 FUNCTION_DECL, name, type);
1144 if (!parallelized_functions)
1145 parallelized_functions = BITMAP_GGC_ALLOC ();
1146 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1147
1148 TREE_STATIC (decl) = 1;
1149 TREE_USED (decl) = 1;
1150 DECL_ARTIFICIAL (decl) = 1;
1151 DECL_IGNORED_P (decl) = 0;
1152 TREE_PUBLIC (decl) = 0;
1153 DECL_UNINLINABLE (decl) = 1;
1154 DECL_EXTERNAL (decl) = 0;
1155 DECL_CONTEXT (decl) = NULL_TREE;
1156 DECL_INITIAL (decl) = make_node (BLOCK);
1157
1158 t = build_decl (BUILTINS_LOCATION,
1159 RESULT_DECL, NULL_TREE, void_type_node);
1160 DECL_ARTIFICIAL (t) = 1;
1161 DECL_IGNORED_P (t) = 1;
1162 DECL_RESULT (decl) = t;
1163
1164 t = build_decl (BUILTINS_LOCATION,
1165 PARM_DECL, get_identifier (".paral_data_param"),
1166 ptr_type_node);
1167 DECL_ARTIFICIAL (t) = 1;
1168 DECL_ARG_TYPE (t) = ptr_type_node;
1169 DECL_CONTEXT (t) = decl;
1170 TREE_USED (t) = 1;
1171 DECL_ARGUMENTS (decl) = t;
1172
1173 allocate_struct_function (decl, false);
1174
1175 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1176 it. */
1177 set_cfun (act_cfun);
1178
1179 return decl;
1180 }
1181
1182 /* Moves the exit condition of LOOP to the beginning of its header, and
1183 duplicates the part of the last iteration that gets disabled to the
1184 exit of the loop. NIT is the number of iterations of the loop
1185 (used to initialize the variables in the duplicated part).
1186
1187 TODO: the common case is that latch of the loop is empty and immediately
1188 follows the loop exit. In this case, it would be better not to copy the
1189 body of the loop, but only move the entry of the loop directly before the
1190 exit check and increase the number of iterations of the loop by one.
1191 This may need some additional preconditioning in case NIT = ~0.
1192 REDUCTION_LIST describes the reductions in LOOP. */
1193
1194 static void
1195 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1196 {
1197 basic_block *bbs, *nbbs, ex_bb, orig_header;
1198 unsigned n;
1199 bool ok;
1200 edge exit = single_dom_exit (loop), hpred;
1201 tree control, control_name, res, t;
1202 gimple phi, nphi, cond_stmt, stmt;
1203 gimple_stmt_iterator gsi;
1204
1205 split_block_after_labels (loop->header);
1206 orig_header = single_succ (loop->header);
1207 hpred = single_succ_edge (loop->header);
1208
1209 cond_stmt = last_stmt (exit->src);
1210 control = gimple_cond_lhs (cond_stmt);
1211 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1212
1213 /* Make sure that we have phi nodes on exit for all loop header phis
1214 (create_parallel_loop requires that). */
1215 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1216 {
1217 phi = gsi_stmt (gsi);
1218 res = PHI_RESULT (phi);
1219 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1220 SET_PHI_RESULT (phi, t);
1221
1222 nphi = create_phi_node (res, orig_header);
1223 SSA_NAME_DEF_STMT (res) = nphi;
1224 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1225
1226 if (res == control)
1227 {
1228 gimple_cond_set_lhs (cond_stmt, t);
1229 update_stmt (cond_stmt);
1230 control = t;
1231 }
1232 }
1233
1234 bbs = get_loop_body_in_dom_order (loop);
1235 for (n = 0; bbs[n] != exit->src; n++)
1236 continue;
1237 nbbs = XNEWVEC (basic_block, n);
1238 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1239 bbs + 1, n, nbbs);
1240 gcc_assert (ok);
1241 free (bbs);
1242 ex_bb = nbbs[0];
1243 free (nbbs);
1244
1245 /* Other than reductions, the only gimple reg that should be copied
1246 out of the loop is the control variable. */
1247
1248 control_name = NULL_TREE;
1249 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1250 {
1251 phi = gsi_stmt (gsi);
1252 res = PHI_RESULT (phi);
1253 if (!is_gimple_reg (res))
1254 {
1255 gsi_next (&gsi);
1256 continue;
1257 }
1258
1259 /* Check if it is a part of reduction. If it is,
1260 keep the phi at the reduction's keep_res field. The
1261 PHI_RESULT of this phi is the resulting value of the reduction
1262 variable when exiting the loop. */
1263
1264 exit = single_dom_exit (loop);
1265
1266 if (htab_elements (reduction_list) > 0)
1267 {
1268 struct reduction_info *red;
1269
1270 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1271
1272 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1273 if (red)
1274 {
1275 red->keep_res = phi;
1276 gsi_next (&gsi);
1277 continue;
1278 }
1279 }
1280 gcc_assert (control_name == NULL_TREE
1281 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1282 control_name = res;
1283 remove_phi_node (&gsi, false);
1284 }
1285 gcc_assert (control_name != NULL_TREE);
1286
1287 /* Initialize the control variable to NIT. */
1288 gsi = gsi_after_labels (ex_bb);
1289 nit = force_gimple_operand_gsi (&gsi,
1290 fold_convert (TREE_TYPE (control_name), nit),
1291 false, NULL_TREE, false, GSI_SAME_STMT);
1292 stmt = gimple_build_assign (control_name, nit);
1293 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1294 SSA_NAME_DEF_STMT (control_name) = stmt;
1295 }
1296
1297 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1298 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1299 NEW_DATA is the variable that should be initialized from the argument
1300 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1301 basic block containing GIMPLE_OMP_PARALLEL tree. */
1302
1303 static basic_block
1304 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1305 tree new_data, unsigned n_threads)
1306 {
1307 gimple_stmt_iterator gsi;
1308 basic_block bb, paral_bb, for_bb, ex_bb;
1309 tree t, param, res;
1310 gimple stmt, for_stmt, phi, cond_stmt;
1311 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1312 edge exit, nexit, guard, end, e;
1313
1314 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1315 bb = loop_preheader_edge (loop)->src;
1316 paral_bb = single_pred (bb);
1317 gsi = gsi_last_bb (paral_bb);
1318
1319 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1320 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1321 = build_int_cst (integer_type_node, n_threads);
1322 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1323
1324 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1325
1326 /* Initialize NEW_DATA. */
1327 if (data)
1328 {
1329 gsi = gsi_after_labels (bb);
1330
1331 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1332 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1333 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1334 SSA_NAME_DEF_STMT (param) = stmt;
1335
1336 stmt = gimple_build_assign (new_data,
1337 fold_convert (TREE_TYPE (new_data), param));
1338 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1339 SSA_NAME_DEF_STMT (new_data) = stmt;
1340 }
1341
1342 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1343 bb = split_loop_exit_edge (single_dom_exit (loop));
1344 gsi = gsi_last_bb (bb);
1345 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1346
1347 /* Extract data for GIMPLE_OMP_FOR. */
1348 gcc_assert (loop->header == single_dom_exit (loop)->src);
1349 cond_stmt = last_stmt (loop->header);
1350
1351 cvar = gimple_cond_lhs (cond_stmt);
1352 cvar_base = SSA_NAME_VAR (cvar);
1353 phi = SSA_NAME_DEF_STMT (cvar);
1354 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1355 initvar = make_ssa_name (cvar_base, NULL);
1356 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1357 initvar);
1358 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1359
1360 gsi = gsi_last_bb (loop->latch);
1361 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1362 gsi_remove (&gsi, true);
1363
1364 /* Prepare cfg. */
1365 for_bb = split_edge (loop_preheader_edge (loop));
1366 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1367 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1368 gcc_assert (exit == single_dom_exit (loop));
1369
1370 guard = make_edge (for_bb, ex_bb, 0);
1371 single_succ_edge (loop->latch)->flags = 0;
1372 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1373 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1374 {
1375 source_location locus;
1376 tree def;
1377 phi = gsi_stmt (gsi);
1378 res = PHI_RESULT (phi);
1379 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1380
1381 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1382 locus = gimple_phi_arg_location_from_edge (stmt,
1383 loop_preheader_edge (loop));
1384 add_phi_arg (phi, def, guard, locus);
1385
1386 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1387 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1388 add_phi_arg (phi, def, end, locus);
1389 }
1390 e = redirect_edge_and_branch (exit, nexit->dest);
1391 PENDING_STMT (e) = NULL;
1392
1393 /* Emit GIMPLE_OMP_FOR. */
1394 gimple_cond_set_lhs (cond_stmt, cvar_base);
1395 type = TREE_TYPE (cvar);
1396 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1397 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1398
1399 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1400 gimple_omp_for_set_index (for_stmt, 0, initvar);
1401 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1402 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1403 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1404 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1405 cvar_base,
1406 build_int_cst (type, 1)));
1407
1408 gsi = gsi_last_bb (for_bb);
1409 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1410 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1411
1412 /* Emit GIMPLE_OMP_CONTINUE. */
1413 gsi = gsi_last_bb (loop->latch);
1414 stmt = gimple_build_omp_continue (cvar_next, cvar);
1415 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1416 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1417
1418 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1419 gsi = gsi_last_bb (ex_bb);
1420 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1421
1422 return paral_bb;
1423 }
1424
1425 /* Generates code to execute the iterations of LOOP in N_THREADS
1426 threads in parallel.
1427
1428 NITER describes number of iterations of LOOP.
1429 REDUCTION_LIST describes the reductions existent in the LOOP. */
1430
1431 static void
1432 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1433 unsigned n_threads, struct tree_niter_desc *niter)
1434 {
1435 struct loop *nloop;
1436 loop_iterator li;
1437 tree many_iterations_cond, type, nit;
1438 tree arg_struct, new_arg_struct;
1439 gimple_seq stmts;
1440 basic_block parallel_head;
1441 edge entry, exit;
1442 struct clsn_data clsn_data;
1443 unsigned prob;
1444
1445 /* From
1446
1447 ---------------------------------------------------------------------
1448 loop
1449 {
1450 IV = phi (INIT, IV + STEP)
1451 BODY1;
1452 if (COND)
1453 break;
1454 BODY2;
1455 }
1456 ---------------------------------------------------------------------
1457
1458 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1459 we generate the following code:
1460
1461 ---------------------------------------------------------------------
1462
1463 if (MAY_BE_ZERO
1464 || NITER < MIN_PER_THREAD * N_THREADS)
1465 goto original;
1466
1467 BODY1;
1468 store all local loop-invariant variables used in body of the loop to DATA.
1469 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1470 load the variables from DATA.
1471 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1472 BODY2;
1473 BODY1;
1474 GIMPLE_OMP_CONTINUE;
1475 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1476 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1477 goto end;
1478
1479 original:
1480 loop
1481 {
1482 IV = phi (INIT, IV + STEP)
1483 BODY1;
1484 if (COND)
1485 break;
1486 BODY2;
1487 }
1488
1489 end:
1490
1491 */
1492
1493 /* Create two versions of the loop -- in the old one, we know that the
1494 number of iterations is large enough, and we will transform it into the
1495 loop that will be split to loop_fn, the new one will be used for the
1496 remaining iterations. */
1497
1498 type = TREE_TYPE (niter->niter);
1499 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1500 NULL_TREE);
1501 if (stmts)
1502 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1503
1504 many_iterations_cond =
1505 fold_build2 (GE_EXPR, boolean_type_node,
1506 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1507 many_iterations_cond
1508 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1509 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1510 many_iterations_cond);
1511 many_iterations_cond
1512 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1513 if (stmts)
1514 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1515 if (!is_gimple_condexpr (many_iterations_cond))
1516 {
1517 many_iterations_cond
1518 = force_gimple_operand (many_iterations_cond, &stmts,
1519 true, NULL_TREE);
1520 if (stmts)
1521 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1522 }
1523
1524 initialize_original_copy_tables ();
1525
1526 /* We assume that the loop usually iterates a lot. */
1527 prob = 4 * REG_BR_PROB_BASE / 5;
1528 nloop = loop_version (loop, many_iterations_cond, NULL,
1529 prob, prob, REG_BR_PROB_BASE - prob, true);
1530 update_ssa (TODO_update_ssa);
1531 free_original_copy_tables ();
1532
1533 /* Base all the induction variables in LOOP on a single control one. */
1534 canonicalize_loop_ivs (loop, &nit);
1535
1536 /* Ensure that the exit condition is the first statement in the loop. */
1537 transform_to_exit_first_loop (loop, reduction_list, nit);
1538
1539 /* Generate initializations for reductions. */
1540 if (htab_elements (reduction_list) > 0)
1541 htab_traverse (reduction_list, initialize_reductions, loop);
1542
1543 /* Eliminate the references to local variables from the loop. */
1544 gcc_assert (single_exit (loop));
1545 entry = loop_preheader_edge (loop);
1546 exit = single_dom_exit (loop);
1547
1548 eliminate_local_variables (entry, exit);
1549 /* In the old loop, move all variables non-local to the loop to a structure
1550 and back, and create separate decls for the variables used in loop. */
1551 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1552 &new_arg_struct, &clsn_data);
1553
1554 /* Create the parallel constructs. */
1555 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1556 new_arg_struct, n_threads);
1557 if (htab_elements (reduction_list) > 0)
1558 create_call_for_reduction (loop, reduction_list, &clsn_data);
1559
1560 scev_reset ();
1561
1562 /* Cancel the loop (it is simpler to do it here rather than to teach the
1563 expander to do it). */
1564 cancel_loop_tree (loop);
1565
1566 /* Free loop bound estimations that could contain references to
1567 removed statements. */
1568 FOR_EACH_LOOP (li, loop, 0)
1569 free_numbers_of_iterations_estimates_loop (loop);
1570
1571 /* Expand the parallel constructs. We do it directly here instead of running
1572 a separate expand_omp pass, since it is more efficient, and less likely to
1573 cause troubles with further analyses not being able to deal with the
1574 OMP trees. */
1575
1576 omp_expand_local (parallel_head);
1577 }
1578
1579 /* Returns true when LOOP contains vector phi nodes. */
1580
1581 static bool
1582 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1583 {
1584 unsigned i;
1585 basic_block *bbs = get_loop_body_in_dom_order (loop);
1586 gimple_stmt_iterator gsi;
1587 bool res = true;
1588
1589 for (i = 0; i < loop->num_nodes; i++)
1590 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1591 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1592 goto end;
1593
1594 res = false;
1595 end:
1596 free (bbs);
1597 return res;
1598 }
1599
1600 /* Create a reduction_info struct, initialize it with REDUC_STMT
1601 and PHI, insert it to the REDUCTION_LIST. */
1602
1603 static void
1604 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1605 {
1606 PTR *slot;
1607 struct reduction_info *new_reduction;
1608
1609 gcc_assert (reduc_stmt);
1610
1611 if (dump_file && (dump_flags & TDF_DETAILS))
1612 {
1613 fprintf (dump_file,
1614 "Detected reduction. reduction stmt is: \n");
1615 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1616 fprintf (dump_file, "\n");
1617 }
1618
1619 new_reduction = XCNEW (struct reduction_info);
1620
1621 new_reduction->reduc_stmt = reduc_stmt;
1622 new_reduction->reduc_phi = phi;
1623 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1624 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1625 *slot = new_reduction;
1626 }
1627
1628 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1629
1630 static void
1631 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1632 {
1633 gimple_stmt_iterator gsi;
1634 loop_vec_info simple_loop_info;
1635
1636 vect_dump = NULL;
1637 simple_loop_info = vect_analyze_loop_form (loop);
1638
1639 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1640 {
1641 gimple phi = gsi_stmt (gsi);
1642 affine_iv iv;
1643 tree res = PHI_RESULT (phi);
1644 bool double_reduc;
1645
1646 if (!is_gimple_reg (res))
1647 continue;
1648
1649 if (!simple_iv (loop, loop, res, &iv, true)
1650 && simple_loop_info)
1651 {
1652 gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1653 if (reduc_stmt)
1654 build_new_reduction (reduction_list, reduc_stmt, phi);
1655 }
1656 }
1657 destroy_loop_vec_info (simple_loop_info, true);
1658 }
1659
1660 /* Try to initialize NITER for code generation part. */
1661
1662 static bool
1663 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1664 {
1665 edge exit = single_dom_exit (loop);
1666
1667 gcc_assert (exit);
1668
1669 /* We need to know # of iterations, and there should be no uses of values
1670 defined inside loop outside of it, unless the values are invariants of
1671 the loop. */
1672 if (!number_of_iterations_exit (loop, exit, niter, false))
1673 {
1674 if (dump_file && (dump_flags & TDF_DETAILS))
1675 fprintf (dump_file, " FAILED: number of iterations not known\n");
1676 return false;
1677 }
1678
1679 return true;
1680 }
1681
1682 /* Try to initialize REDUCTION_LIST for code generation part.
1683 REDUCTION_LIST describes the reductions. */
1684
1685 static bool
1686 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1687 {
1688 edge exit = single_dom_exit (loop);
1689 gimple_stmt_iterator gsi;
1690
1691 gcc_assert (exit);
1692
1693 gather_scalar_reductions (loop, reduction_list);
1694
1695
1696 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1697 {
1698 gimple phi = gsi_stmt (gsi);
1699 struct reduction_info *red;
1700 imm_use_iterator imm_iter;
1701 use_operand_p use_p;
1702 gimple reduc_phi;
1703 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1704
1705 if (is_gimple_reg (val))
1706 {
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1708 {
1709 fprintf (dump_file, "phi is ");
1710 print_gimple_stmt (dump_file, phi, 0, 0);
1711 fprintf (dump_file, "arg of phi to exit: value ");
1712 print_generic_expr (dump_file, val, 0);
1713 fprintf (dump_file, " used outside loop\n");
1714 fprintf (dump_file,
1715 " checking if it a part of reduction pattern: \n");
1716 }
1717 if (htab_elements (reduction_list) == 0)
1718 {
1719 if (dump_file && (dump_flags & TDF_DETAILS))
1720 fprintf (dump_file,
1721 " FAILED: it is not a part of reduction.\n");
1722 return false;
1723 }
1724 reduc_phi = NULL;
1725 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1726 {
1727 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1728 {
1729 reduc_phi = USE_STMT (use_p);
1730 break;
1731 }
1732 }
1733 red = reduction_phi (reduction_list, reduc_phi);
1734 if (red == NULL)
1735 {
1736 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file,
1738 " FAILED: it is not a part of reduction.\n");
1739 return false;
1740 }
1741 if (dump_file && (dump_flags & TDF_DETAILS))
1742 {
1743 fprintf (dump_file, "reduction phi is ");
1744 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1745 fprintf (dump_file, "reduction stmt is ");
1746 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1747 }
1748 }
1749 }
1750
1751 /* The iterations of the loop may communicate only through bivs whose
1752 iteration space can be distributed efficiently. */
1753 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1754 {
1755 gimple phi = gsi_stmt (gsi);
1756 tree def = PHI_RESULT (phi);
1757 affine_iv iv;
1758
1759 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1760 {
1761 struct reduction_info *red;
1762
1763 red = reduction_phi (reduction_list, phi);
1764 if (red == NULL)
1765 {
1766 if (dump_file && (dump_flags & TDF_DETAILS))
1767 fprintf (dump_file,
1768 " FAILED: scalar dependency between iterations\n");
1769 return false;
1770 }
1771 }
1772 }
1773
1774
1775 return true;
1776 }
1777
1778 /* Detect parallel loops and generate parallel code using libgomp
1779 primitives. Returns true if some loop was parallelized, false
1780 otherwise. */
1781
1782 bool
1783 parallelize_loops (void)
1784 {
1785 unsigned n_threads = flag_tree_parallelize_loops;
1786 bool changed = false;
1787 struct loop *loop;
1788 struct tree_niter_desc niter_desc;
1789 loop_iterator li;
1790 htab_t reduction_list;
1791
1792 /* Do not parallelize loops in the functions created by parallelization. */
1793 if (parallelized_function_p (cfun->decl))
1794 return false;
1795
1796 reduction_list = htab_create (10, reduction_info_hash,
1797 reduction_info_eq, free);
1798 init_stmt_vec_info_vec ();
1799
1800 FOR_EACH_LOOP (li, loop, 0)
1801 {
1802 htab_empty (reduction_list);
1803
1804 /* If we use autopar in graphite pass, we use it's marked dependency
1805 checking results. */
1806 if (flag_loop_parallelize_all && !loop->can_be_parallel)
1807 continue;
1808
1809 /* FIXME: Only consider innermost loops with just one exit. */
1810 if (loop->inner || !single_dom_exit (loop))
1811 continue;
1812
1813 if (/* And of course, the loop must be parallelizable. */
1814 !can_duplicate_loop_p (loop)
1815 || loop_has_blocks_with_irreducible_flag (loop)
1816 /* FIXME: the check for vector phi nodes could be removed. */
1817 || loop_has_vector_phi_nodes (loop))
1818 continue;
1819
1820 /* FIXME: Bypass this check as graphite doesn't update the
1821 count and frequency correctly now. */
1822 if (!flag_loop_parallelize_all
1823 && (expected_loop_iterations (loop) <= n_threads
1824 /* Do not bother with loops in cold areas. */
1825 || optimize_loop_nest_for_size_p (loop)))
1826 continue;
1827
1828 if (!try_get_loop_niter (loop, &niter_desc))
1829 continue;
1830
1831 if (!try_create_reduction_list (loop, reduction_list))
1832 continue;
1833
1834 if (!flag_loop_parallelize_all && !loop_parallel_p (loop))
1835 continue;
1836
1837 changed = true;
1838 gen_parallel_loop (loop, reduction_list,
1839 n_threads, &niter_desc);
1840 verify_flow_info ();
1841 verify_dominators (CDI_DOMINATORS);
1842 verify_loop_structure ();
1843 verify_loop_closed_ssa ();
1844 }
1845
1846 free_stmt_vec_info_vec ();
1847 htab_delete (reduction_list);
1848
1849 /* Parallelization will cause new function calls to be inserted through
1850 which local variables will escape. Reset the points-to solutions
1851 for ESCAPED and CALLUSED. */
1852 if (changed)
1853 {
1854 pt_solution_reset (&cfun->gimple_df->escaped);
1855 pt_solution_reset (&cfun->gimple_df->callused);
1856 }
1857
1858 return changed;
1859 }
1860
1861 #include "gt-tree-parloops.h"