* g++.dg/cpp0x/nullptr21.c: Remove printfs, make self-checking.
[gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tree-flow.h"
27 #include "cfgloop.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
34
35 /* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
41
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
57 -- handling of common reduction patterns for outer loops.
58
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
60 /*
61 Reduction handling:
62 currently we use vect_force_simple_reduction() to detect reduction patterns.
63 The code transformation will be introduced by an example.
64
65
66 parloop
67 {
68 int sum=1;
69
70 for (i = 0; i < N; i++)
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75 }
76
77 gimple-like code:
78 header_bb:
79
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
89
90 exit_bb:
91
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
94
95
96 after reduction transformation (only relevant parts):
97
98 parloop
99 {
100
101 ....
102
103
104 # Storing the initial value given by the user. #
105
106 .paral_data_store.32.sum.27 = 1;
107
108 #pragma omp parallel num_threads(4)
109
110 #pragma omp for schedule(static)
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
118 sum.27_11 = D.1827_8 + sum.27_29;
119
120 GIMPLE_OMP_CONTINUE
121
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
124 GIMPLE_OMP_RETURN
125
126 # Creating the atomic operation is done at
127 create_call_for_reduction_1() #
128
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
133
134 GIMPLE_OMP_RETURN
135
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
138 The value computed by the threads is loaded from the
139 shared struct. #
140
141
142 .paral_data_load.33_52 = &.paral_data_store.32;
143 sum_37 = .paral_data_load.33_52->sum.27;
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150 ...
151
152 }
153
154 */
155
156 /* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158 #define MIN_PER_THREAD 100
159
160 /* Element of the hashtable, representing a
161 reduction in the current loop. */
162 struct reduction_info
163 {
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
170 of the reduction variable when existing the loop. */
171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
173 tree init; /* reduction initialization value. */
174 gimple new_phi; /* (helper field) Newly created phi node whose result
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178 };
179
180 /* Equality and hash functions for hashtab code. */
181
182 static int
183 reduction_info_eq (const void *aa, const void *bb)
184 {
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
187
188 return (a->reduc_phi == b->reduc_phi);
189 }
190
191 static hashval_t
192 reduction_info_hash (const void *aa)
193 {
194 const struct reduction_info *a = (const struct reduction_info *) aa;
195
196 return a->reduc_version;
197 }
198
199 static struct reduction_info *
200 reduction_phi (htab_t reduction_list, gimple phi)
201 {
202 struct reduction_info tmpred, *red;
203
204 if (htab_elements (reduction_list) == 0 || phi == NULL)
205 return NULL;
206
207 tmpred.reduc_phi = phi;
208 tmpred.reduc_version = gimple_uid (phi);
209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
210
211 return red;
212 }
213
214 /* Element of hashtable of names to copy. */
215
216 struct name_to_copy_elt
217 {
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
222 };
223
224 /* Equality and hash functions for hashtab code. */
225
226 static int
227 name_to_copy_elt_eq (const void *aa, const void *bb)
228 {
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
231
232 return a->version == b->version;
233 }
234
235 static hashval_t
236 name_to_copy_elt_hash (const void *aa)
237 {
238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
239
240 return (hashval_t) a->version;
241 }
242
243 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246 typedef struct lambda_trans_matrix_s
247 {
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252 } *lambda_trans_matrix;
253 #define LTM_MATRIX(T) ((T)->matrix)
254 #define LTM_ROWSIZE(T) ((T)->rowsize)
255 #define LTM_COLSIZE(T) ((T)->colsize)
256 #define LTM_DENOMINATOR(T) ((T)->denominator)
257
258 /* Allocate a new transformation matrix. */
259
260 static lambda_trans_matrix
261 lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
263 {
264 lambda_trans_matrix ret;
265
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
273 }
274
275 /* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
278
279 static void
280 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
282 {
283 int i, j;
284
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
289 }
290
291 /* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
294
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
303
304 static bool
305 lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
308 {
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
312
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
315
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
319
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
323
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
328
329 distres = lambda_vector_new (nb_loops);
330
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333 {
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
340
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
344
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
349
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 {
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
355
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
358 }
359 }
360 return true;
361 }
362
363 /* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
366
367 static bool
368 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
369 {
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
372 VEC (data_reference_p, heap) *datarefs;
373 lambda_trans_matrix trans;
374 bool ret = false;
375
376 if (dump_file && (dump_flags & TDF_DETAILS))
377 {
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
381 else
382 fprintf (dump_file, "loop NOT innermost\n");
383 }
384
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
389 loop_nest = VEC_alloc (loop_p, heap, 3);
390 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391 &dependence_relations))
392 {
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
395 ret = false;
396 goto end;
397 }
398 if (dump_file && (dump_flags & TDF_DETAILS))
399 dump_data_dependence_relations (dump_file, dependence_relations);
400
401 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
402 LTM_MATRIX (trans)[0][0] = -1;
403
404 if (lambda_transform_legal_p (trans, 1, dependence_relations))
405 {
406 ret = true;
407 if (dump_file && (dump_flags & TDF_DETAILS))
408 fprintf (dump_file, " SUCCESS: may be parallelized\n");
409 }
410 else if (dump_file && (dump_flags & TDF_DETAILS))
411 fprintf (dump_file,
412 " FAILED: data dependencies exist across iterations\n");
413
414 end:
415 VEC_free (loop_p, heap, loop_nest);
416 free_dependence_relations (dependence_relations);
417 free_data_refs (datarefs);
418
419 return ret;
420 }
421
422 /* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
424
425 static inline bool
426 loop_has_blocks_with_irreducible_flag (struct loop *loop)
427 {
428 unsigned i;
429 basic_block *bbs = get_loop_body_in_dom_order (loop);
430 bool res = true;
431
432 for (i = 0; i < loop->num_nodes; i++)
433 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434 goto end;
435
436 res = false;
437 end:
438 free (bbs);
439 return res;
440 }
441
442 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
444 to their addresses that can be reused. The address of OBJ is known to
445 be invariant in the whole function. Other needed statements are placed
446 right before GSI. */
447
448 static tree
449 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450 gimple_stmt_iterator *gsi)
451 {
452 int uid;
453 void **dslot;
454 struct int_tree_map ielt, *nielt;
455 tree *var_p, name, bvar, addr;
456 gimple stmt;
457 gimple_seq stmts;
458
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj = unshare_expr (obj);
462 for (var_p = &obj;
463 handled_component_p (*var_p);
464 var_p = &TREE_OPERAND (*var_p, 0))
465 continue;
466
467 /* Canonicalize the access to base on a MEM_REF. */
468 if (DECL_P (*var_p))
469 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
473 on it. */
474 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
475 ielt.uid = uid;
476 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477 if (!*dslot)
478 {
479 if (gsi == NULL)
480 return NULL;
481 addr = TREE_OPERAND (*var_p, 0);
482 bvar = create_tmp_var (TREE_TYPE (addr),
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p, 0), 0)));
485 add_referenced_var (bvar);
486 stmt = gimple_build_assign (bvar, addr);
487 name = make_ssa_name (bvar, stmt);
488 gimple_assign_set_lhs (stmt, name);
489 gsi_insert_on_edge_immediate (entry, stmt);
490
491 nielt = XNEW (struct int_tree_map);
492 nielt->uid = uid;
493 nielt->to = name;
494 *dslot = nielt;
495 }
496 else
497 name = ((struct int_tree_map *) *dslot)->to;
498
499 /* Express the address in terms of the canonical SSA name. */
500 TREE_OPERAND (*var_p, 0) = name;
501 if (gsi == NULL)
502 return build_fold_addr_expr_with_type (obj, type);
503
504 name = force_gimple_operand (build_addr (obj, current_function_decl),
505 &stmts, true, NULL_TREE);
506 if (!gimple_seq_empty_p (stmts))
507 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
508
509 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
510 {
511 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
512 NULL_TREE);
513 if (!gimple_seq_empty_p (stmts))
514 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
515 }
516
517 return name;
518 }
519
520 /* Callback for htab_traverse. Create the initialization statement
521 for reduction described in SLOT, and place it at the preheader of
522 the loop described in DATA. */
523
524 static int
525 initialize_reductions (void **slot, void *data)
526 {
527 tree init, c;
528 tree bvar, type, arg;
529 edge e;
530
531 struct reduction_info *const reduc = (struct reduction_info *) *slot;
532 struct loop *loop = (struct loop *) data;
533
534 /* Create initialization in preheader:
535 reduction_variable = initialization value of reduction. */
536
537 /* In the phi node at the header, replace the argument coming
538 from the preheader with the reduction initialization value. */
539
540 /* Create a new variable to initialize the reduction. */
541 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
542 bvar = create_tmp_var (type, "reduction");
543 add_referenced_var (bvar);
544
545 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
546 OMP_CLAUSE_REDUCTION);
547 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
548 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
549
550 init = omp_reduction_init (c, TREE_TYPE (bvar));
551 reduc->init = init;
552
553 /* Replace the argument representing the initialization value
554 with the initialization value for the reduction (neutral
555 element for the particular operation, e.g. 0 for PLUS_EXPR,
556 1 for MULT_EXPR, etc).
557 Keep the old value in a new variable "reduction_initial",
558 that will be taken in consideration after the parallel
559 computing is done. */
560
561 e = loop_preheader_edge (loop);
562 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
563 /* Create new variable to hold the initial value. */
564
565 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
566 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
567 reduc->initial_value = arg;
568 return 1;
569 }
570
571 struct elv_data
572 {
573 struct walk_stmt_info info;
574 edge entry;
575 htab_t decl_address;
576 gimple_stmt_iterator *gsi;
577 bool changed;
578 bool reset;
579 };
580
581 /* Eliminates references to local variables in *TP out of the single
582 entry single exit region starting at DTA->ENTRY.
583 DECL_ADDRESS contains addresses of the references that had their
584 address taken already. If the expression is changed, CHANGED is
585 set to true. Callback for walk_tree. */
586
587 static tree
588 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
589 {
590 struct elv_data *const dta = (struct elv_data *) data;
591 tree t = *tp, var, addr, addr_type, type, obj;
592
593 if (DECL_P (t))
594 {
595 *walk_subtrees = 0;
596
597 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
598 return NULL_TREE;
599
600 type = TREE_TYPE (t);
601 addr_type = build_pointer_type (type);
602 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
603 dta->gsi);
604 if (dta->gsi == NULL && addr == NULL_TREE)
605 {
606 dta->reset = true;
607 return NULL_TREE;
608 }
609
610 *tp = build_simple_mem_ref (addr);
611
612 dta->changed = true;
613 return NULL_TREE;
614 }
615
616 if (TREE_CODE (t) == ADDR_EXPR)
617 {
618 /* ADDR_EXPR may appear in two contexts:
619 -- as a gimple operand, when the address taken is a function invariant
620 -- as gimple rhs, when the resulting address in not a function
621 invariant
622 We do not need to do anything special in the latter case (the base of
623 the memory reference whose address is taken may be replaced in the
624 DECL_P case). The former case is more complicated, as we need to
625 ensure that the new address is still a gimple operand. Thus, it
626 is not sufficient to replace just the base of the memory reference --
627 we need to move the whole computation of the address out of the
628 loop. */
629 if (!is_gimple_val (t))
630 return NULL_TREE;
631
632 *walk_subtrees = 0;
633 obj = TREE_OPERAND (t, 0);
634 var = get_base_address (obj);
635 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
636 return NULL_TREE;
637
638 addr_type = TREE_TYPE (t);
639 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
640 dta->gsi);
641 if (dta->gsi == NULL && addr == NULL_TREE)
642 {
643 dta->reset = true;
644 return NULL_TREE;
645 }
646 *tp = addr;
647
648 dta->changed = true;
649 return NULL_TREE;
650 }
651
652 if (!EXPR_P (t))
653 *walk_subtrees = 0;
654
655 return NULL_TREE;
656 }
657
658 /* Moves the references to local variables in STMT at *GSI out of the single
659 entry single exit region starting at ENTRY. DECL_ADDRESS contains
660 addresses of the references that had their address taken
661 already. */
662
663 static void
664 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
665 htab_t decl_address)
666 {
667 struct elv_data dta;
668 gimple stmt = gsi_stmt (*gsi);
669
670 memset (&dta.info, '\0', sizeof (dta.info));
671 dta.entry = entry;
672 dta.decl_address = decl_address;
673 dta.changed = false;
674 dta.reset = false;
675
676 if (gimple_debug_bind_p (stmt))
677 {
678 dta.gsi = NULL;
679 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
680 eliminate_local_variables_1, &dta.info, NULL);
681 if (dta.reset)
682 {
683 gimple_debug_bind_reset_value (stmt);
684 dta.changed = true;
685 }
686 }
687 else
688 {
689 dta.gsi = gsi;
690 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
691 }
692
693 if (dta.changed)
694 update_stmt (stmt);
695 }
696
697 /* Eliminates the references to local variables from the single entry
698 single exit region between the ENTRY and EXIT edges.
699
700 This includes:
701 1) Taking address of a local variable -- these are moved out of the
702 region (and temporary variable is created to hold the address if
703 necessary).
704
705 2) Dereferencing a local variable -- these are replaced with indirect
706 references. */
707
708 static void
709 eliminate_local_variables (edge entry, edge exit)
710 {
711 basic_block bb;
712 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
713 unsigned i;
714 gimple_stmt_iterator gsi;
715 bool has_debug_stmt = false;
716 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
717 free);
718 basic_block entry_bb = entry->src;
719 basic_block exit_bb = exit->dest;
720
721 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
722
723 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
724 if (bb != entry_bb && bb != exit_bb)
725 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
726 if (is_gimple_debug (gsi_stmt (gsi)))
727 {
728 if (gimple_debug_bind_p (gsi_stmt (gsi)))
729 has_debug_stmt = true;
730 }
731 else
732 eliminate_local_variables_stmt (entry, &gsi, decl_address);
733
734 if (has_debug_stmt)
735 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
736 if (bb != entry_bb && bb != exit_bb)
737 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738 if (gimple_debug_bind_p (gsi_stmt (gsi)))
739 eliminate_local_variables_stmt (entry, &gsi, decl_address);
740
741 htab_delete (decl_address);
742 VEC_free (basic_block, heap, body);
743 }
744
745 /* Returns true if expression EXPR is not defined between ENTRY and
746 EXIT, i.e. if all its operands are defined outside of the region. */
747
748 static bool
749 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
750 {
751 basic_block entry_bb = entry->src;
752 basic_block exit_bb = exit->dest;
753 basic_block def_bb;
754
755 if (is_gimple_min_invariant (expr))
756 return true;
757
758 if (TREE_CODE (expr) == SSA_NAME)
759 {
760 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
761 if (def_bb
762 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
763 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
764 return false;
765
766 return true;
767 }
768
769 return false;
770 }
771
772 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773 The copies are stored to NAME_COPIES, if NAME was already duplicated,
774 its duplicate stored in NAME_COPIES is returned.
775
776 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777 duplicated, storing the copies in DECL_COPIES. */
778
779 static tree
780 separate_decls_in_region_name (tree name,
781 htab_t name_copies, htab_t decl_copies,
782 bool copy_name_p)
783 {
784 tree copy, var, var_copy;
785 unsigned idx, uid, nuid;
786 struct int_tree_map ielt, *nielt;
787 struct name_to_copy_elt elt, *nelt;
788 void **slot, **dslot;
789
790 if (TREE_CODE (name) != SSA_NAME)
791 return name;
792
793 idx = SSA_NAME_VERSION (name);
794 elt.version = idx;
795 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
796 copy_name_p ? INSERT : NO_INSERT);
797 if (slot && *slot)
798 return ((struct name_to_copy_elt *) *slot)->new_name;
799
800 var = SSA_NAME_VAR (name);
801 uid = DECL_UID (var);
802 ielt.uid = uid;
803 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
804 if (!*dslot)
805 {
806 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
807 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
808 add_referenced_var (var_copy);
809 nielt = XNEW (struct int_tree_map);
810 nielt->uid = uid;
811 nielt->to = var_copy;
812 *dslot = nielt;
813
814 /* Ensure that when we meet this decl next time, we won't duplicate
815 it again. */
816 nuid = DECL_UID (var_copy);
817 ielt.uid = nuid;
818 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
819 gcc_assert (!*dslot);
820 nielt = XNEW (struct int_tree_map);
821 nielt->uid = nuid;
822 nielt->to = var_copy;
823 *dslot = nielt;
824 }
825 else
826 var_copy = ((struct int_tree_map *) *dslot)->to;
827
828 if (copy_name_p)
829 {
830 copy = duplicate_ssa_name (name, NULL);
831 nelt = XNEW (struct name_to_copy_elt);
832 nelt->version = idx;
833 nelt->new_name = copy;
834 nelt->field = NULL_TREE;
835 *slot = nelt;
836 }
837 else
838 {
839 gcc_assert (!slot);
840 copy = name;
841 }
842
843 SSA_NAME_VAR (copy) = var_copy;
844 return copy;
845 }
846
847 /* Finds the ssa names used in STMT that are defined outside the
848 region between ENTRY and EXIT and replaces such ssa names with
849 their duplicates. The duplicates are stored to NAME_COPIES. Base
850 decls of all ssa names used in STMT (including those defined in
851 LOOP) are replaced with the new temporary variables; the
852 replacement decls are stored in DECL_COPIES. */
853
854 static void
855 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
856 htab_t name_copies, htab_t decl_copies)
857 {
858 use_operand_p use;
859 def_operand_p def;
860 ssa_op_iter oi;
861 tree name, copy;
862 bool copy_name_p;
863
864 mark_virtual_ops_for_renaming (stmt);
865
866 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
867 {
868 name = DEF_FROM_PTR (def);
869 gcc_assert (TREE_CODE (name) == SSA_NAME);
870 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
871 false);
872 gcc_assert (copy == name);
873 }
874
875 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
876 {
877 name = USE_FROM_PTR (use);
878 if (TREE_CODE (name) != SSA_NAME)
879 continue;
880
881 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
882 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
883 copy_name_p);
884 SET_USE (use, copy);
885 }
886 }
887
888 /* Finds the ssa names used in STMT that are defined outside the
889 region between ENTRY and EXIT and replaces such ssa names with
890 their duplicates. The duplicates are stored to NAME_COPIES. Base
891 decls of all ssa names used in STMT (including those defined in
892 LOOP) are replaced with the new temporary variables; the
893 replacement decls are stored in DECL_COPIES. */
894
895 static bool
896 separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
897 htab_t decl_copies)
898 {
899 use_operand_p use;
900 ssa_op_iter oi;
901 tree var, name;
902 struct int_tree_map ielt;
903 struct name_to_copy_elt elt;
904 void **slot, **dslot;
905
906 if (gimple_debug_bind_p (stmt))
907 var = gimple_debug_bind_get_var (stmt);
908 else if (gimple_debug_source_bind_p (stmt))
909 var = gimple_debug_source_bind_get_var (stmt);
910 else
911 return true;
912 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
913 return true;
914 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
915 ielt.uid = DECL_UID (var);
916 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
917 if (!dslot)
918 return true;
919 if (gimple_debug_bind_p (stmt))
920 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
921 else if (gimple_debug_source_bind_p (stmt))
922 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
923
924 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
925 {
926 name = USE_FROM_PTR (use);
927 if (TREE_CODE (name) != SSA_NAME)
928 continue;
929
930 elt.version = SSA_NAME_VERSION (name);
931 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
932 if (!slot)
933 {
934 gimple_debug_bind_reset_value (stmt);
935 update_stmt (stmt);
936 break;
937 }
938
939 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
940 }
941
942 return false;
943 }
944
945 /* Callback for htab_traverse. Adds a field corresponding to the reduction
946 specified in SLOT. The type is passed in DATA. */
947
948 static int
949 add_field_for_reduction (void **slot, void *data)
950 {
951
952 struct reduction_info *const red = (struct reduction_info *) *slot;
953 tree const type = (tree) data;
954 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
955 tree field = build_decl (gimple_location (red->reduc_stmt),
956 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
957
958 insert_field_into_struct (type, field);
959
960 red->field = field;
961
962 return 1;
963 }
964
965 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
966 described in SLOT. The type is passed in DATA. */
967
968 static int
969 add_field_for_name (void **slot, void *data)
970 {
971 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
972 tree type = (tree) data;
973 tree name = ssa_name (elt->version);
974 tree var = SSA_NAME_VAR (name);
975 tree field = build_decl (DECL_SOURCE_LOCATION (var),
976 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
977
978 insert_field_into_struct (type, field);
979 elt->field = field;
980
981 return 1;
982 }
983
984 /* Callback for htab_traverse. A local result is the intermediate result
985 computed by a single
986 thread, or the initial value in case no iteration was executed.
987 This function creates a phi node reflecting these values.
988 The phi's result will be stored in NEW_PHI field of the
989 reduction's data structure. */
990
991 static int
992 create_phi_for_local_result (void **slot, void *data)
993 {
994 struct reduction_info *const reduc = (struct reduction_info *) *slot;
995 const struct loop *const loop = (const struct loop *) data;
996 edge e;
997 gimple new_phi;
998 basic_block store_bb;
999 tree local_res;
1000 source_location locus;
1001
1002 /* STORE_BB is the block where the phi
1003 should be stored. It is the destination of the loop exit.
1004 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1005 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1006
1007 /* STORE_BB has two predecessors. One coming from the loop
1008 (the reduction's result is computed at the loop),
1009 and another coming from a block preceding the loop,
1010 when no iterations
1011 are executed (the initial value should be taken). */
1012 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1013 e = EDGE_PRED (store_bb, 1);
1014 else
1015 e = EDGE_PRED (store_bb, 0);
1016 local_res
1017 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1018 NULL);
1019 locus = gimple_location (reduc->reduc_stmt);
1020 new_phi = create_phi_node (local_res, store_bb);
1021 SSA_NAME_DEF_STMT (local_res) = new_phi;
1022 add_phi_arg (new_phi, reduc->init, e, locus);
1023 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1024 FALLTHRU_EDGE (loop->latch), locus);
1025 reduc->new_phi = new_phi;
1026
1027 return 1;
1028 }
1029
1030 struct clsn_data
1031 {
1032 tree store;
1033 tree load;
1034
1035 basic_block store_bb;
1036 basic_block load_bb;
1037 };
1038
1039 /* Callback for htab_traverse. Create an atomic instruction for the
1040 reduction described in SLOT.
1041 DATA annotates the place in memory the atomic operation relates to,
1042 and the basic block it needs to be generated in. */
1043
1044 static int
1045 create_call_for_reduction_1 (void **slot, void *data)
1046 {
1047 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1048 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1049 gimple_stmt_iterator gsi;
1050 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1051 tree load_struct;
1052 basic_block bb;
1053 basic_block new_bb;
1054 edge e;
1055 tree t, addr, ref, x;
1056 tree tmp_load, name;
1057 gimple load;
1058
1059 load_struct = build_simple_mem_ref (clsn_data->load);
1060 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1061
1062 addr = build_addr (t, current_function_decl);
1063
1064 /* Create phi node. */
1065 bb = clsn_data->load_bb;
1066
1067 e = split_block (bb, t);
1068 new_bb = e->dest;
1069
1070 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1071 add_referenced_var (tmp_load);
1072 tmp_load = make_ssa_name (tmp_load, NULL);
1073 load = gimple_build_omp_atomic_load (tmp_load, addr);
1074 SSA_NAME_DEF_STMT (tmp_load) = load;
1075 gsi = gsi_start_bb (new_bb);
1076 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1077
1078 e = split_block (new_bb, load);
1079 new_bb = e->dest;
1080 gsi = gsi_start_bb (new_bb);
1081 ref = tmp_load;
1082 x = fold_build2 (reduc->reduction_code,
1083 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1084 PHI_RESULT (reduc->new_phi));
1085
1086 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1087 GSI_CONTINUE_LINKING);
1088
1089 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1090 return 1;
1091 }
1092
1093 /* Create the atomic operation at the join point of the threads.
1094 REDUCTION_LIST describes the reductions in the LOOP.
1095 LD_ST_DATA describes the shared data structure where
1096 shared data is stored in and loaded from. */
1097 static void
1098 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1099 struct clsn_data *ld_st_data)
1100 {
1101 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1102 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1103 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1104 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1105 }
1106
1107 /* Callback for htab_traverse. Loads the final reduction value at the
1108 join point of all threads, and inserts it in the right place. */
1109
1110 static int
1111 create_loads_for_reductions (void **slot, void *data)
1112 {
1113 struct reduction_info *const red = (struct reduction_info *) *slot;
1114 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1115 gimple stmt;
1116 gimple_stmt_iterator gsi;
1117 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1118 tree load_struct;
1119 tree name;
1120 tree x;
1121
1122 gsi = gsi_after_labels (clsn_data->load_bb);
1123 load_struct = build_simple_mem_ref (clsn_data->load);
1124 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1125 NULL_TREE);
1126
1127 x = load_struct;
1128 name = PHI_RESULT (red->keep_res);
1129 stmt = gimple_build_assign (name, x);
1130 SSA_NAME_DEF_STMT (name) = stmt;
1131
1132 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1133
1134 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1135 !gsi_end_p (gsi); gsi_next (&gsi))
1136 if (gsi_stmt (gsi) == red->keep_res)
1137 {
1138 remove_phi_node (&gsi, false);
1139 return 1;
1140 }
1141 gcc_unreachable ();
1142 }
1143
1144 /* Load the reduction result that was stored in LD_ST_DATA.
1145 REDUCTION_LIST describes the list of reductions that the
1146 loads should be generated for. */
1147 static void
1148 create_final_loads_for_reduction (htab_t reduction_list,
1149 struct clsn_data *ld_st_data)
1150 {
1151 gimple_stmt_iterator gsi;
1152 tree t;
1153 gimple stmt;
1154
1155 gsi = gsi_after_labels (ld_st_data->load_bb);
1156 t = build_fold_addr_expr (ld_st_data->store);
1157 stmt = gimple_build_assign (ld_st_data->load, t);
1158
1159 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1160 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1161
1162 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1163
1164 }
1165
1166 /* Callback for htab_traverse. Store the neutral value for the
1167 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1168 1 for MULT_EXPR, etc. into the reduction field.
1169 The reduction is specified in SLOT. The store information is
1170 passed in DATA. */
1171
1172 static int
1173 create_stores_for_reduction (void **slot, void *data)
1174 {
1175 struct reduction_info *const red = (struct reduction_info *) *slot;
1176 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1177 tree t;
1178 gimple stmt;
1179 gimple_stmt_iterator gsi;
1180 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1181
1182 gsi = gsi_last_bb (clsn_data->store_bb);
1183 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1184 stmt = gimple_build_assign (t, red->initial_value);
1185 mark_virtual_ops_for_renaming (stmt);
1186 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1187
1188 return 1;
1189 }
1190
1191 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1192 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1193 specified in SLOT. */
1194
1195 static int
1196 create_loads_and_stores_for_name (void **slot, void *data)
1197 {
1198 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1199 struct clsn_data *const clsn_data = (struct clsn_data *) data;
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (elt->new_name);
1204 tree load_struct;
1205
1206 gsi = gsi_last_bb (clsn_data->store_bb);
1207 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1208 stmt = gimple_build_assign (t, ssa_name (elt->version));
1209 mark_virtual_ops_for_renaming (stmt);
1210 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1211
1212 gsi = gsi_last_bb (clsn_data->load_bb);
1213 load_struct = build_simple_mem_ref (clsn_data->load);
1214 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1215 stmt = gimple_build_assign (elt->new_name, t);
1216 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1217 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1218
1219 return 1;
1220 }
1221
1222 /* Moves all the variables used in LOOP and defined outside of it (including
1223 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1224 name) to a structure created for this purpose. The code
1225
1226 while (1)
1227 {
1228 use (a);
1229 use (b);
1230 }
1231
1232 is transformed this way:
1233
1234 bb0:
1235 old.a = a;
1236 old.b = b;
1237
1238 bb1:
1239 a' = new->a;
1240 b' = new->b;
1241 while (1)
1242 {
1243 use (a');
1244 use (b');
1245 }
1246
1247 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1248 pointer `new' is intentionally not initialized (the loop will be split to a
1249 separate function later, and `new' will be initialized from its arguments).
1250 LD_ST_DATA holds information about the shared data structure used to pass
1251 information among the threads. It is initialized here, and
1252 gen_parallel_loop will pass it to create_call_for_reduction that
1253 needs this information. REDUCTION_LIST describes the reductions
1254 in LOOP. */
1255
1256 static void
1257 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1258 tree *arg_struct, tree *new_arg_struct,
1259 struct clsn_data *ld_st_data)
1260
1261 {
1262 basic_block bb1 = split_edge (entry);
1263 basic_block bb0 = single_pred (bb1);
1264 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1265 name_to_copy_elt_eq, free);
1266 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1267 free);
1268 unsigned i;
1269 tree type, type_name, nvar;
1270 gimple_stmt_iterator gsi;
1271 struct clsn_data clsn_data;
1272 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1273 basic_block bb;
1274 basic_block entry_bb = bb1;
1275 basic_block exit_bb = exit->dest;
1276 bool has_debug_stmt = false;
1277
1278 entry = single_succ_edge (entry_bb);
1279 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1280
1281 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1282 {
1283 if (bb != entry_bb && bb != exit_bb)
1284 {
1285 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1286 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1287 name_copies, decl_copies);
1288
1289 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1290 {
1291 gimple stmt = gsi_stmt (gsi);
1292
1293 if (is_gimple_debug (stmt))
1294 has_debug_stmt = true;
1295 else
1296 separate_decls_in_region_stmt (entry, exit, stmt,
1297 name_copies, decl_copies);
1298 }
1299 }
1300 }
1301
1302 /* Now process debug bind stmts. We must not create decls while
1303 processing debug stmts, so we defer their processing so as to
1304 make sure we will have debug info for as many variables as
1305 possible (all of those that were dealt with in the loop above),
1306 and discard those for which we know there's nothing we can
1307 do. */
1308 if (has_debug_stmt)
1309 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
1310 if (bb != entry_bb && bb != exit_bb)
1311 {
1312 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1313 {
1314 gimple stmt = gsi_stmt (gsi);
1315
1316 if (is_gimple_debug (stmt))
1317 {
1318 if (separate_decls_in_region_debug (stmt, name_copies,
1319 decl_copies))
1320 {
1321 gsi_remove (&gsi, true);
1322 continue;
1323 }
1324 }
1325
1326 gsi_next (&gsi);
1327 }
1328 }
1329
1330 VEC_free (basic_block, heap, body);
1331
1332 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1333 {
1334 /* It may happen that there is nothing to copy (if there are only
1335 loop carried and external variables in the loop). */
1336 *arg_struct = NULL;
1337 *new_arg_struct = NULL;
1338 }
1339 else
1340 {
1341 /* Create the type for the structure to store the ssa names to. */
1342 type = lang_hooks.types.make_type (RECORD_TYPE);
1343 type_name = build_decl (UNKNOWN_LOCATION,
1344 TYPE_DECL, create_tmp_var_name (".paral_data"),
1345 type);
1346 TYPE_NAME (type) = type_name;
1347
1348 htab_traverse (name_copies, add_field_for_name, type);
1349 if (reduction_list && htab_elements (reduction_list) > 0)
1350 {
1351 /* Create the fields for reductions. */
1352 htab_traverse (reduction_list, add_field_for_reduction,
1353 type);
1354 }
1355 layout_type (type);
1356
1357 /* Create the loads and stores. */
1358 *arg_struct = create_tmp_var (type, ".paral_data_store");
1359 add_referenced_var (*arg_struct);
1360 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1361 add_referenced_var (nvar);
1362 *new_arg_struct = make_ssa_name (nvar, NULL);
1363
1364 ld_st_data->store = *arg_struct;
1365 ld_st_data->load = *new_arg_struct;
1366 ld_st_data->store_bb = bb0;
1367 ld_st_data->load_bb = bb1;
1368
1369 htab_traverse (name_copies, create_loads_and_stores_for_name,
1370 ld_st_data);
1371
1372 /* Load the calculation from memory (after the join of the threads). */
1373
1374 if (reduction_list && htab_elements (reduction_list) > 0)
1375 {
1376 htab_traverse (reduction_list, create_stores_for_reduction,
1377 ld_st_data);
1378 clsn_data.load = make_ssa_name (nvar, NULL);
1379 clsn_data.load_bb = exit->dest;
1380 clsn_data.store = ld_st_data->store;
1381 create_final_loads_for_reduction (reduction_list, &clsn_data);
1382 }
1383 }
1384
1385 htab_delete (decl_copies);
1386 htab_delete (name_copies);
1387 }
1388
1389 /* Bitmap containing uids of functions created by parallelization. We cannot
1390 allocate it from the default obstack, as it must live across compilation
1391 of several functions; we make it gc allocated instead. */
1392
1393 static GTY(()) bitmap parallelized_functions;
1394
1395 /* Returns true if FN was created by create_loop_fn. */
1396
1397 bool
1398 parallelized_function_p (tree fn)
1399 {
1400 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1401 return false;
1402
1403 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1404 }
1405
1406 /* Creates and returns an empty function that will receive the body of
1407 a parallelized loop. */
1408
1409 static tree
1410 create_loop_fn (location_t loc)
1411 {
1412 char buf[100];
1413 char *tname;
1414 tree decl, type, name, t;
1415 struct function *act_cfun = cfun;
1416 static unsigned loopfn_num;
1417
1418 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1419 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1420 clean_symbol_name (tname);
1421 name = get_identifier (tname);
1422 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1423
1424 decl = build_decl (loc, FUNCTION_DECL, name, type);
1425 if (!parallelized_functions)
1426 parallelized_functions = BITMAP_GGC_ALLOC ();
1427 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1428
1429 TREE_STATIC (decl) = 1;
1430 TREE_USED (decl) = 1;
1431 DECL_ARTIFICIAL (decl) = 1;
1432 DECL_IGNORED_P (decl) = 0;
1433 TREE_PUBLIC (decl) = 0;
1434 DECL_UNINLINABLE (decl) = 1;
1435 DECL_EXTERNAL (decl) = 0;
1436 DECL_CONTEXT (decl) = NULL_TREE;
1437 DECL_INITIAL (decl) = make_node (BLOCK);
1438
1439 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1440 DECL_ARTIFICIAL (t) = 1;
1441 DECL_IGNORED_P (t) = 1;
1442 DECL_RESULT (decl) = t;
1443
1444 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1445 ptr_type_node);
1446 DECL_ARTIFICIAL (t) = 1;
1447 DECL_ARG_TYPE (t) = ptr_type_node;
1448 DECL_CONTEXT (t) = decl;
1449 TREE_USED (t) = 1;
1450 DECL_ARGUMENTS (decl) = t;
1451
1452 allocate_struct_function (decl, false);
1453
1454 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1455 it. */
1456 set_cfun (act_cfun);
1457
1458 return decl;
1459 }
1460
1461 /* Moves the exit condition of LOOP to the beginning of its header, and
1462 duplicates the part of the last iteration that gets disabled to the
1463 exit of the loop. NIT is the number of iterations of the loop
1464 (used to initialize the variables in the duplicated part).
1465
1466 TODO: the common case is that latch of the loop is empty and immediately
1467 follows the loop exit. In this case, it would be better not to copy the
1468 body of the loop, but only move the entry of the loop directly before the
1469 exit check and increase the number of iterations of the loop by one.
1470 This may need some additional preconditioning in case NIT = ~0.
1471 REDUCTION_LIST describes the reductions in LOOP. */
1472
1473 static void
1474 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1475 {
1476 basic_block *bbs, *nbbs, ex_bb, orig_header;
1477 unsigned n;
1478 bool ok;
1479 edge exit = single_dom_exit (loop), hpred;
1480 tree control, control_name, res, t;
1481 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1482 gimple_stmt_iterator gsi;
1483 tree nit_1;
1484
1485 split_block_after_labels (loop->header);
1486 orig_header = single_succ (loop->header);
1487 hpred = single_succ_edge (loop->header);
1488
1489 cond_stmt = last_stmt (exit->src);
1490 control = gimple_cond_lhs (cond_stmt);
1491 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1492
1493 /* Make sure that we have phi nodes on exit for all loop header phis
1494 (create_parallel_loop requires that). */
1495 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1496 {
1497 phi = gsi_stmt (gsi);
1498 res = PHI_RESULT (phi);
1499 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1500 SET_PHI_RESULT (phi, t);
1501 nphi = create_phi_node (res, orig_header);
1502 SSA_NAME_DEF_STMT (res) = nphi;
1503 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1504
1505 if (res == control)
1506 {
1507 gimple_cond_set_lhs (cond_stmt, t);
1508 update_stmt (cond_stmt);
1509 control = t;
1510 }
1511 }
1512
1513 bbs = get_loop_body_in_dom_order (loop);
1514
1515 for (n = 0; bbs[n] != exit->src; n++)
1516 continue;
1517 nbbs = XNEWVEC (basic_block, n);
1518 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1519 bbs + 1, n, nbbs);
1520 gcc_assert (ok);
1521 free (bbs);
1522 ex_bb = nbbs[0];
1523 free (nbbs);
1524
1525 /* Other than reductions, the only gimple reg that should be copied
1526 out of the loop is the control variable. */
1527 exit = single_dom_exit (loop);
1528 control_name = NULL_TREE;
1529 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1530 {
1531 phi = gsi_stmt (gsi);
1532 res = PHI_RESULT (phi);
1533 if (!is_gimple_reg (res))
1534 {
1535 gsi_next (&gsi);
1536 continue;
1537 }
1538
1539 /* Check if it is a part of reduction. If it is,
1540 keep the phi at the reduction's keep_res field. The
1541 PHI_RESULT of this phi is the resulting value of the reduction
1542 variable when exiting the loop. */
1543
1544 if (htab_elements (reduction_list) > 0)
1545 {
1546 struct reduction_info *red;
1547
1548 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1549 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1550 if (red)
1551 {
1552 red->keep_res = phi;
1553 gsi_next (&gsi);
1554 continue;
1555 }
1556 }
1557 gcc_assert (control_name == NULL_TREE
1558 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1559 control_name = res;
1560 remove_phi_node (&gsi, false);
1561 }
1562 gcc_assert (control_name != NULL_TREE);
1563
1564 /* Initialize the control variable to number of iterations
1565 according to the rhs of the exit condition. */
1566 gsi = gsi_after_labels (ex_bb);
1567 cond_nit = last_stmt (exit->src);
1568 nit_1 = gimple_cond_rhs (cond_nit);
1569 nit_1 = force_gimple_operand_gsi (&gsi,
1570 fold_convert (TREE_TYPE (control_name), nit_1),
1571 false, NULL_TREE, false, GSI_SAME_STMT);
1572 stmt = gimple_build_assign (control_name, nit_1);
1573 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1574 SSA_NAME_DEF_STMT (control_name) = stmt;
1575 }
1576
1577 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1578 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1579 NEW_DATA is the variable that should be initialized from the argument
1580 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1581 basic block containing GIMPLE_OMP_PARALLEL tree. */
1582
1583 static basic_block
1584 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1585 tree new_data, unsigned n_threads, location_t loc)
1586 {
1587 gimple_stmt_iterator gsi;
1588 basic_block bb, paral_bb, for_bb, ex_bb;
1589 tree t, param;
1590 gimple stmt, for_stmt, phi, cond_stmt;
1591 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1592 edge exit, nexit, guard, end, e;
1593
1594 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1595 bb = loop_preheader_edge (loop)->src;
1596 paral_bb = single_pred (bb);
1597 gsi = gsi_last_bb (paral_bb);
1598
1599 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1600 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1601 = build_int_cst (integer_type_node, n_threads);
1602 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1603 gimple_set_location (stmt, loc);
1604
1605 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1606
1607 /* Initialize NEW_DATA. */
1608 if (data)
1609 {
1610 gsi = gsi_after_labels (bb);
1611
1612 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1613 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1614 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1615 SSA_NAME_DEF_STMT (param) = stmt;
1616
1617 stmt = gimple_build_assign (new_data,
1618 fold_convert (TREE_TYPE (new_data), param));
1619 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1620 SSA_NAME_DEF_STMT (new_data) = stmt;
1621 }
1622
1623 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1624 bb = split_loop_exit_edge (single_dom_exit (loop));
1625 gsi = gsi_last_bb (bb);
1626 stmt = gimple_build_omp_return (false);
1627 gimple_set_location (stmt, loc);
1628 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1629
1630 /* Extract data for GIMPLE_OMP_FOR. */
1631 gcc_assert (loop->header == single_dom_exit (loop)->src);
1632 cond_stmt = last_stmt (loop->header);
1633
1634 cvar = gimple_cond_lhs (cond_stmt);
1635 cvar_base = SSA_NAME_VAR (cvar);
1636 phi = SSA_NAME_DEF_STMT (cvar);
1637 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1638 initvar = make_ssa_name (cvar_base, NULL);
1639 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1640 initvar);
1641 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1642
1643 gsi = gsi_last_nondebug_bb (loop->latch);
1644 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1645 gsi_remove (&gsi, true);
1646
1647 /* Prepare cfg. */
1648 for_bb = split_edge (loop_preheader_edge (loop));
1649 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1650 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1651 gcc_assert (exit == single_dom_exit (loop));
1652
1653 guard = make_edge (for_bb, ex_bb, 0);
1654 single_succ_edge (loop->latch)->flags = 0;
1655 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1656 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1657 {
1658 source_location locus;
1659 tree def;
1660 phi = gsi_stmt (gsi);
1661 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1662
1663 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1664 locus = gimple_phi_arg_location_from_edge (stmt,
1665 loop_preheader_edge (loop));
1666 add_phi_arg (phi, def, guard, locus);
1667
1668 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1669 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1670 add_phi_arg (phi, def, end, locus);
1671 }
1672 e = redirect_edge_and_branch (exit, nexit->dest);
1673 PENDING_STMT (e) = NULL;
1674
1675 /* Emit GIMPLE_OMP_FOR. */
1676 gimple_cond_set_lhs (cond_stmt, cvar_base);
1677 type = TREE_TYPE (cvar);
1678 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1679 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1680
1681 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1682 gimple_set_location (for_stmt, loc);
1683 gimple_omp_for_set_index (for_stmt, 0, initvar);
1684 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1685 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1686 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1687 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1688 cvar_base,
1689 build_int_cst (type, 1)));
1690
1691 gsi = gsi_last_bb (for_bb);
1692 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1693 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1694
1695 /* Emit GIMPLE_OMP_CONTINUE. */
1696 gsi = gsi_last_bb (loop->latch);
1697 stmt = gimple_build_omp_continue (cvar_next, cvar);
1698 gimple_set_location (stmt, loc);
1699 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1700 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1701
1702 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1703 gsi = gsi_last_bb (ex_bb);
1704 stmt = gimple_build_omp_return (true);
1705 gimple_set_location (stmt, loc);
1706 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1707
1708 /* After the above dom info is hosed. Re-compute it. */
1709 free_dominance_info (CDI_DOMINATORS);
1710 calculate_dominance_info (CDI_DOMINATORS);
1711
1712 return paral_bb;
1713 }
1714
1715 /* Generates code to execute the iterations of LOOP in N_THREADS
1716 threads in parallel.
1717
1718 NITER describes number of iterations of LOOP.
1719 REDUCTION_LIST describes the reductions existent in the LOOP. */
1720
1721 static void
1722 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1723 unsigned n_threads, struct tree_niter_desc *niter)
1724 {
1725 loop_iterator li;
1726 tree many_iterations_cond, type, nit;
1727 tree arg_struct, new_arg_struct;
1728 gimple_seq stmts;
1729 basic_block parallel_head;
1730 edge entry, exit;
1731 struct clsn_data clsn_data;
1732 unsigned prob;
1733 location_t loc;
1734 gimple cond_stmt;
1735 unsigned int m_p_thread=2;
1736
1737 /* From
1738
1739 ---------------------------------------------------------------------
1740 loop
1741 {
1742 IV = phi (INIT, IV + STEP)
1743 BODY1;
1744 if (COND)
1745 break;
1746 BODY2;
1747 }
1748 ---------------------------------------------------------------------
1749
1750 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1751 we generate the following code:
1752
1753 ---------------------------------------------------------------------
1754
1755 if (MAY_BE_ZERO
1756 || NITER < MIN_PER_THREAD * N_THREADS)
1757 goto original;
1758
1759 BODY1;
1760 store all local loop-invariant variables used in body of the loop to DATA.
1761 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1762 load the variables from DATA.
1763 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1764 BODY2;
1765 BODY1;
1766 GIMPLE_OMP_CONTINUE;
1767 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1768 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1769 goto end;
1770
1771 original:
1772 loop
1773 {
1774 IV = phi (INIT, IV + STEP)
1775 BODY1;
1776 if (COND)
1777 break;
1778 BODY2;
1779 }
1780
1781 end:
1782
1783 */
1784
1785 /* Create two versions of the loop -- in the old one, we know that the
1786 number of iterations is large enough, and we will transform it into the
1787 loop that will be split to loop_fn, the new one will be used for the
1788 remaining iterations. */
1789
1790 /* We should compute a better number-of-iterations value for outer loops.
1791 That is, if we have
1792
1793 for (i = 0; i < n; ++i)
1794 for (j = 0; j < m; ++j)
1795 ...
1796
1797 we should compute nit = n * m, not nit = n.
1798 Also may_be_zero handling would need to be adjusted. */
1799
1800 type = TREE_TYPE (niter->niter);
1801 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1802 NULL_TREE);
1803 if (stmts)
1804 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1805
1806 if (loop->inner)
1807 m_p_thread=2;
1808 else
1809 m_p_thread=MIN_PER_THREAD;
1810
1811 many_iterations_cond =
1812 fold_build2 (GE_EXPR, boolean_type_node,
1813 nit, build_int_cst (type, m_p_thread * n_threads));
1814
1815 many_iterations_cond
1816 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1817 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1818 many_iterations_cond);
1819 many_iterations_cond
1820 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1821 if (stmts)
1822 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1823 if (!is_gimple_condexpr (many_iterations_cond))
1824 {
1825 many_iterations_cond
1826 = force_gimple_operand (many_iterations_cond, &stmts,
1827 true, NULL_TREE);
1828 if (stmts)
1829 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1830 }
1831
1832 initialize_original_copy_tables ();
1833
1834 /* We assume that the loop usually iterates a lot. */
1835 prob = 4 * REG_BR_PROB_BASE / 5;
1836 loop_version (loop, many_iterations_cond, NULL,
1837 prob, prob, REG_BR_PROB_BASE - prob, true);
1838 update_ssa (TODO_update_ssa);
1839 free_original_copy_tables ();
1840
1841 /* Base all the induction variables in LOOP on a single control one. */
1842 canonicalize_loop_ivs (loop, &nit, true);
1843
1844 /* Ensure that the exit condition is the first statement in the loop. */
1845 transform_to_exit_first_loop (loop, reduction_list, nit);
1846
1847 /* Generate initializations for reductions. */
1848 if (htab_elements (reduction_list) > 0)
1849 htab_traverse (reduction_list, initialize_reductions, loop);
1850
1851 /* Eliminate the references to local variables from the loop. */
1852 gcc_assert (single_exit (loop));
1853 entry = loop_preheader_edge (loop);
1854 exit = single_dom_exit (loop);
1855
1856 eliminate_local_variables (entry, exit);
1857 /* In the old loop, move all variables non-local to the loop to a structure
1858 and back, and create separate decls for the variables used in loop. */
1859 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1860 &new_arg_struct, &clsn_data);
1861
1862 /* Create the parallel constructs. */
1863 loc = UNKNOWN_LOCATION;
1864 cond_stmt = last_stmt (loop->header);
1865 if (cond_stmt)
1866 loc = gimple_location (cond_stmt);
1867 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1868 new_arg_struct, n_threads, loc);
1869 if (htab_elements (reduction_list) > 0)
1870 create_call_for_reduction (loop, reduction_list, &clsn_data);
1871
1872 scev_reset ();
1873
1874 /* Cancel the loop (it is simpler to do it here rather than to teach the
1875 expander to do it). */
1876 cancel_loop_tree (loop);
1877
1878 /* Free loop bound estimations that could contain references to
1879 removed statements. */
1880 FOR_EACH_LOOP (li, loop, 0)
1881 free_numbers_of_iterations_estimates_loop (loop);
1882
1883 /* Expand the parallel constructs. We do it directly here instead of running
1884 a separate expand_omp pass, since it is more efficient, and less likely to
1885 cause troubles with further analyses not being able to deal with the
1886 OMP trees. */
1887
1888 omp_expand_local (parallel_head);
1889 }
1890
1891 /* Returns true when LOOP contains vector phi nodes. */
1892
1893 static bool
1894 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1895 {
1896 unsigned i;
1897 basic_block *bbs = get_loop_body_in_dom_order (loop);
1898 gimple_stmt_iterator gsi;
1899 bool res = true;
1900
1901 for (i = 0; i < loop->num_nodes; i++)
1902 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1903 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1904 goto end;
1905
1906 res = false;
1907 end:
1908 free (bbs);
1909 return res;
1910 }
1911
1912 /* Create a reduction_info struct, initialize it with REDUC_STMT
1913 and PHI, insert it to the REDUCTION_LIST. */
1914
1915 static void
1916 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1917 {
1918 PTR *slot;
1919 struct reduction_info *new_reduction;
1920
1921 gcc_assert (reduc_stmt);
1922
1923 if (dump_file && (dump_flags & TDF_DETAILS))
1924 {
1925 fprintf (dump_file,
1926 "Detected reduction. reduction stmt is: \n");
1927 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1928 fprintf (dump_file, "\n");
1929 }
1930
1931 new_reduction = XCNEW (struct reduction_info);
1932
1933 new_reduction->reduc_stmt = reduc_stmt;
1934 new_reduction->reduc_phi = phi;
1935 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1936 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1937 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1938 *slot = new_reduction;
1939 }
1940
1941 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1942
1943 static int
1944 set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1945 {
1946 struct reduction_info *const red = (struct reduction_info *) *slot;
1947 gimple_set_uid (red->reduc_phi, red->reduc_version);
1948 return 1;
1949 }
1950
1951 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1952
1953 static void
1954 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1955 {
1956 gimple_stmt_iterator gsi;
1957 loop_vec_info simple_loop_info;
1958
1959 vect_dump = NULL;
1960 simple_loop_info = vect_analyze_loop_form (loop);
1961
1962 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1963 {
1964 gimple phi = gsi_stmt (gsi);
1965 affine_iv iv;
1966 tree res = PHI_RESULT (phi);
1967 bool double_reduc;
1968
1969 if (!is_gimple_reg (res))
1970 continue;
1971
1972 if (!simple_iv (loop, loop, res, &iv, true)
1973 && simple_loop_info)
1974 {
1975 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1976 phi, true,
1977 &double_reduc);
1978 if (reduc_stmt && !double_reduc)
1979 build_new_reduction (reduction_list, reduc_stmt, phi);
1980 }
1981 }
1982 destroy_loop_vec_info (simple_loop_info, true);
1983
1984 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1985 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1986 only now. */
1987 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
1988 }
1989
1990 /* Try to initialize NITER for code generation part. */
1991
1992 static bool
1993 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1994 {
1995 edge exit = single_dom_exit (loop);
1996
1997 gcc_assert (exit);
1998
1999 /* We need to know # of iterations, and there should be no uses of values
2000 defined inside loop outside of it, unless the values are invariants of
2001 the loop. */
2002 if (!number_of_iterations_exit (loop, exit, niter, false))
2003 {
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 fprintf (dump_file, " FAILED: number of iterations not known\n");
2006 return false;
2007 }
2008
2009 return true;
2010 }
2011
2012 /* Try to initialize REDUCTION_LIST for code generation part.
2013 REDUCTION_LIST describes the reductions. */
2014
2015 static bool
2016 try_create_reduction_list (loop_p loop, htab_t reduction_list)
2017 {
2018 edge exit = single_dom_exit (loop);
2019 gimple_stmt_iterator gsi;
2020
2021 gcc_assert (exit);
2022
2023 gather_scalar_reductions (loop, reduction_list);
2024
2025
2026 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2027 {
2028 gimple phi = gsi_stmt (gsi);
2029 struct reduction_info *red;
2030 imm_use_iterator imm_iter;
2031 use_operand_p use_p;
2032 gimple reduc_phi;
2033 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2034
2035 if (is_gimple_reg (val))
2036 {
2037 if (dump_file && (dump_flags & TDF_DETAILS))
2038 {
2039 fprintf (dump_file, "phi is ");
2040 print_gimple_stmt (dump_file, phi, 0, 0);
2041 fprintf (dump_file, "arg of phi to exit: value ");
2042 print_generic_expr (dump_file, val, 0);
2043 fprintf (dump_file, " used outside loop\n");
2044 fprintf (dump_file,
2045 " checking if it a part of reduction pattern: \n");
2046 }
2047 if (htab_elements (reduction_list) == 0)
2048 {
2049 if (dump_file && (dump_flags & TDF_DETAILS))
2050 fprintf (dump_file,
2051 " FAILED: it is not a part of reduction.\n");
2052 return false;
2053 }
2054 reduc_phi = NULL;
2055 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2056 {
2057 if (!gimple_debug_bind_p (USE_STMT (use_p))
2058 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2059 {
2060 reduc_phi = USE_STMT (use_p);
2061 break;
2062 }
2063 }
2064 red = reduction_phi (reduction_list, reduc_phi);
2065 if (red == NULL)
2066 {
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file,
2069 " FAILED: it is not a part of reduction.\n");
2070 return false;
2071 }
2072 if (dump_file && (dump_flags & TDF_DETAILS))
2073 {
2074 fprintf (dump_file, "reduction phi is ");
2075 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2076 fprintf (dump_file, "reduction stmt is ");
2077 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2078 }
2079 }
2080 }
2081
2082 /* The iterations of the loop may communicate only through bivs whose
2083 iteration space can be distributed efficiently. */
2084 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2085 {
2086 gimple phi = gsi_stmt (gsi);
2087 tree def = PHI_RESULT (phi);
2088 affine_iv iv;
2089
2090 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2091 {
2092 struct reduction_info *red;
2093
2094 red = reduction_phi (reduction_list, phi);
2095 if (red == NULL)
2096 {
2097 if (dump_file && (dump_flags & TDF_DETAILS))
2098 fprintf (dump_file,
2099 " FAILED: scalar dependency between iterations\n");
2100 return false;
2101 }
2102 }
2103 }
2104
2105
2106 return true;
2107 }
2108
2109 /* Detect parallel loops and generate parallel code using libgomp
2110 primitives. Returns true if some loop was parallelized, false
2111 otherwise. */
2112
2113 bool
2114 parallelize_loops (void)
2115 {
2116 unsigned n_threads = flag_tree_parallelize_loops;
2117 bool changed = false;
2118 struct loop *loop;
2119 struct tree_niter_desc niter_desc;
2120 loop_iterator li;
2121 htab_t reduction_list;
2122 struct obstack parloop_obstack;
2123 HOST_WIDE_INT estimated;
2124 LOC loop_loc;
2125
2126 /* Do not parallelize loops in the functions created by parallelization. */
2127 if (parallelized_function_p (cfun->decl))
2128 return false;
2129 if (cfun->has_nonlocal_label)
2130 return false;
2131
2132 gcc_obstack_init (&parloop_obstack);
2133 reduction_list = htab_create (10, reduction_info_hash,
2134 reduction_info_eq, free);
2135 init_stmt_vec_info_vec ();
2136
2137 FOR_EACH_LOOP (li, loop, 0)
2138 {
2139 htab_empty (reduction_list);
2140 if (dump_file && (dump_flags & TDF_DETAILS))
2141 {
2142 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2143 if (loop->inner)
2144 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2145 else
2146 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2147 }
2148
2149 /* If we use autopar in graphite pass, we use its marked dependency
2150 checking results. */
2151 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2152 {
2153 if (dump_file && (dump_flags & TDF_DETAILS))
2154 fprintf (dump_file, "loop is not parallel according to graphite\n");
2155 continue;
2156 }
2157
2158 if (!single_dom_exit (loop))
2159 {
2160
2161 if (dump_file && (dump_flags & TDF_DETAILS))
2162 fprintf (dump_file, "loop is !single_dom_exit\n");
2163
2164 continue;
2165 }
2166
2167 if (/* And of course, the loop must be parallelizable. */
2168 !can_duplicate_loop_p (loop)
2169 || loop_has_blocks_with_irreducible_flag (loop)
2170 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2171 /* FIXME: the check for vector phi nodes could be removed. */
2172 || loop_has_vector_phi_nodes (loop))
2173 continue;
2174
2175 estimated = estimated_stmt_executions_int (loop);
2176 if (estimated == -1)
2177 estimated = max_stmt_executions_int (loop);
2178 /* FIXME: Bypass this check as graphite doesn't update the
2179 count and frequency correctly now. */
2180 if (!flag_loop_parallelize_all
2181 && ((estimated != -1
2182 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2183 /* Do not bother with loops in cold areas. */
2184 || optimize_loop_nest_for_size_p (loop)))
2185 continue;
2186
2187 if (!try_get_loop_niter (loop, &niter_desc))
2188 continue;
2189
2190 if (!try_create_reduction_list (loop, reduction_list))
2191 continue;
2192
2193 if (!flag_loop_parallelize_all
2194 && !loop_parallel_p (loop, &parloop_obstack))
2195 continue;
2196
2197 changed = true;
2198 if (dump_file && (dump_flags & TDF_DETAILS))
2199 {
2200 if (loop->inner)
2201 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2202 else
2203 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2204 loop_loc = find_loop_location (loop);
2205 if (loop_loc != UNKNOWN_LOC)
2206 fprintf (dump_file, "\nloop at %s:%d: ",
2207 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2208 }
2209 gen_parallel_loop (loop, reduction_list,
2210 n_threads, &niter_desc);
2211 #ifdef ENABLE_CHECKING
2212 verify_flow_info ();
2213 verify_loop_structure ();
2214 verify_loop_closed_ssa (true);
2215 #endif
2216 }
2217
2218 free_stmt_vec_info_vec ();
2219 htab_delete (reduction_list);
2220 obstack_free (&parloop_obstack, NULL);
2221
2222 /* Parallelization will cause new function calls to be inserted through
2223 which local variables will escape. Reset the points-to solution
2224 for ESCAPED. */
2225 if (changed)
2226 pt_solution_reset (&cfun->gimple_df->escaped);
2227
2228 return changed;
2229 }
2230
2231 #include "gt-tree-parloops.h"