re PR middle-end/35196 (lastprivate broken for static non-ordered loops)
[gcc.git] / gcc / omp-low.c
1 /* Lowering pass for OpenMP directives. Converts OpenMP directives
2 into explicit calls to the runtime library (libgomp) and data
3 marshalling to implement data sharing and copying clauses.
4 Contributed by Diego Novillo <dnovillo@redhat.com>
5
6 Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "tree.h"
29 #include "rtl.h"
30 #include "tree-gimple.h"
31 #include "tree-inline.h"
32 #include "langhooks.h"
33 #include "diagnostic.h"
34 #include "tree-flow.h"
35 #include "timevar.h"
36 #include "flags.h"
37 #include "function.h"
38 #include "expr.h"
39 #include "toplev.h"
40 #include "tree-pass.h"
41 #include "ggc.h"
42 #include "except.h"
43 #include "splay-tree.h"
44 #include "optabs.h"
45 #include "cfgloop.h"
46
47 /* Lowering of OpenMP parallel and workshare constructs proceeds in two
48 phases. The first phase scans the function looking for OMP statements
49 and then for variables that must be replaced to satisfy data sharing
50 clauses. The second phase expands code for the constructs, as well as
51 re-gimplifying things when variables have been replaced with complex
52 expressions.
53
54 Final code generation is done by pass_expand_omp. The flowgraph is
55 scanned for parallel regions which are then moved to a new
56 function, to be invoked by the thread library. */
57
58 /* Context structure. Used to store information about each parallel
59 directive in the code. */
60
61 typedef struct omp_context
62 {
63 /* This field must be at the beginning, as we do "inheritance": Some
64 callback functions for tree-inline.c (e.g., omp_copy_decl)
65 receive a copy_body_data pointer that is up-casted to an
66 omp_context pointer. */
67 copy_body_data cb;
68
69 /* The tree of contexts corresponding to the encountered constructs. */
70 struct omp_context *outer;
71 tree stmt;
72
73 /* Map variables to fields in a structure that allows communication
74 between sending and receiving threads. */
75 splay_tree field_map;
76 tree record_type;
77 tree sender_decl;
78 tree receiver_decl;
79
80 /* A chain of variables to add to the top-level block surrounding the
81 construct. In the case of a parallel, this is in the child function. */
82 tree block_vars;
83
84 /* What to do with variables with implicitly determined sharing
85 attributes. */
86 enum omp_clause_default_kind default_kind;
87
88 /* Nesting depth of this context. Used to beautify error messages re
89 invalid gotos. The outermost ctx is depth 1, with depth 0 being
90 reserved for the main body of the function. */
91 int depth;
92
93 /* True if this parallel directive is nested within another. */
94 bool is_nested;
95 } omp_context;
96
97
98 /* A structure describing the main elements of a parallel loop. */
99
100 struct omp_for_data
101 {
102 tree v, n1, n2, step, chunk_size, for_stmt;
103 enum tree_code cond_code;
104 tree pre;
105 bool have_nowait, have_ordered;
106 enum omp_clause_schedule_kind sched_kind;
107 };
108
109
110 static splay_tree all_contexts;
111 static int parallel_nesting_level;
112 struct omp_region *root_omp_region;
113
114 static void scan_omp (tree *, omp_context *);
115 static void lower_omp (tree *, omp_context *);
116 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
117 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
118
119 /* Find an OpenMP clause of type KIND within CLAUSES. */
120
121 tree
122 find_omp_clause (tree clauses, enum tree_code kind)
123 {
124 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
125 if (OMP_CLAUSE_CODE (clauses) == kind)
126 return clauses;
127
128 return NULL_TREE;
129 }
130
131 /* Return true if CTX is for an omp parallel. */
132
133 static inline bool
134 is_parallel_ctx (omp_context *ctx)
135 {
136 return TREE_CODE (ctx->stmt) == OMP_PARALLEL;
137 }
138
139
140 /* Return true if REGION is a combined parallel+workshare region. */
141
142 static inline bool
143 is_combined_parallel (struct omp_region *region)
144 {
145 return region->is_combined_parallel;
146 }
147
148
149 /* Extract the header elements of parallel loop FOR_STMT and store
150 them into *FD. */
151
152 static void
153 extract_omp_for_data (tree for_stmt, struct omp_for_data *fd)
154 {
155 tree t, var;
156
157 fd->for_stmt = for_stmt;
158 fd->pre = NULL;
159
160 t = OMP_FOR_INIT (for_stmt);
161 gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
162 fd->v = GIMPLE_STMT_OPERAND (t, 0);
163 gcc_assert (SSA_VAR_P (fd->v));
164 gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE);
165 var = TREE_CODE (fd->v) == SSA_NAME ? SSA_NAME_VAR (fd->v) : fd->v;
166 fd->n1 = GIMPLE_STMT_OPERAND (t, 1);
167
168 t = OMP_FOR_COND (for_stmt);
169 fd->cond_code = TREE_CODE (t);
170 gcc_assert (TREE_OPERAND (t, 0) == var);
171 fd->n2 = TREE_OPERAND (t, 1);
172 switch (fd->cond_code)
173 {
174 case LT_EXPR:
175 case GT_EXPR:
176 break;
177 case LE_EXPR:
178 fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
179 build_int_cst (TREE_TYPE (fd->n2), 1));
180 fd->cond_code = LT_EXPR;
181 break;
182 case GE_EXPR:
183 fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2,
184 build_int_cst (TREE_TYPE (fd->n2), 1));
185 fd->cond_code = GT_EXPR;
186 break;
187 default:
188 gcc_unreachable ();
189 }
190
191 t = OMP_FOR_INCR (fd->for_stmt);
192 gcc_assert (TREE_CODE (t) == GIMPLE_MODIFY_STMT);
193 gcc_assert (GIMPLE_STMT_OPERAND (t, 0) == var);
194 t = GIMPLE_STMT_OPERAND (t, 1);
195 gcc_assert (TREE_OPERAND (t, 0) == var);
196 switch (TREE_CODE (t))
197 {
198 case PLUS_EXPR:
199 fd->step = TREE_OPERAND (t, 1);
200 break;
201 case MINUS_EXPR:
202 fd->step = TREE_OPERAND (t, 1);
203 fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step);
204 break;
205 default:
206 gcc_unreachable ();
207 }
208
209 fd->have_nowait = fd->have_ordered = false;
210 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
211 fd->chunk_size = NULL_TREE;
212
213 for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
214 switch (OMP_CLAUSE_CODE (t))
215 {
216 case OMP_CLAUSE_NOWAIT:
217 fd->have_nowait = true;
218 break;
219 case OMP_CLAUSE_ORDERED:
220 fd->have_ordered = true;
221 break;
222 case OMP_CLAUSE_SCHEDULE:
223 fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t);
224 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
225 break;
226 default:
227 break;
228 }
229
230 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
231 gcc_assert (fd->chunk_size == NULL);
232 else if (fd->chunk_size == NULL)
233 {
234 /* We only need to compute a default chunk size for ordered
235 static loops and dynamic loops. */
236 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered)
237 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
238 ? integer_zero_node : integer_one_node;
239 }
240 }
241
242
243 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
244 is the immediate dominator of PAR_ENTRY_BB, return true if there
245 are no data dependencies that would prevent expanding the parallel
246 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
247
248 When expanding a combined parallel+workshare region, the call to
249 the child function may need additional arguments in the case of
250 OMP_FOR regions. In some cases, these arguments are computed out
251 of variables passed in from the parent to the child via 'struct
252 .omp_data_s'. For instance:
253
254 #pragma omp parallel for schedule (guided, i * 4)
255 for (j ...)
256
257 Is lowered into:
258
259 # BLOCK 2 (PAR_ENTRY_BB)
260 .omp_data_o.i = i;
261 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
262
263 # BLOCK 3 (WS_ENTRY_BB)
264 .omp_data_i = &.omp_data_o;
265 D.1667 = .omp_data_i->i;
266 D.1598 = D.1667 * 4;
267 #pragma omp for schedule (guided, D.1598)
268
269 When we outline the parallel region, the call to the child function
270 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
271 that value is computed *after* the call site. So, in principle we
272 cannot do the transformation.
273
274 To see whether the code in WS_ENTRY_BB blocks the combined
275 parallel+workshare call, we collect all the variables used in the
276 OMP_FOR header check whether they appear on the LHS of any
277 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
278 call.
279
280 FIXME. If we had the SSA form built at this point, we could merely
281 hoist the code in block 3 into block 2 and be done with it. But at
282 this point we don't have dataflow information and though we could
283 hack something up here, it is really not worth the aggravation. */
284
285 static bool
286 workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb)
287 {
288 struct omp_for_data fd;
289 tree par_stmt, ws_stmt;
290
291 par_stmt = last_stmt (par_entry_bb);
292 ws_stmt = last_stmt (ws_entry_bb);
293
294 if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
295 return true;
296
297 gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR);
298
299 extract_omp_for_data (ws_stmt, &fd);
300
301 /* FIXME. We give up too easily here. If any of these arguments
302 are not constants, they will likely involve variables that have
303 been mapped into fields of .omp_data_s for sharing with the child
304 function. With appropriate data flow, it would be possible to
305 see through this. */
306 if (!is_gimple_min_invariant (fd.n1)
307 || !is_gimple_min_invariant (fd.n2)
308 || !is_gimple_min_invariant (fd.step)
309 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
310 return false;
311
312 return true;
313 }
314
315
316 /* Collect additional arguments needed to emit a combined
317 parallel+workshare call. WS_STMT is the workshare directive being
318 expanded. */
319
320 static tree
321 get_ws_args_for (tree ws_stmt)
322 {
323 tree t;
324
325 if (TREE_CODE (ws_stmt) == OMP_FOR)
326 {
327 struct omp_for_data fd;
328 tree ws_args;
329
330 extract_omp_for_data (ws_stmt, &fd);
331
332 ws_args = NULL_TREE;
333 if (fd.chunk_size)
334 {
335 t = fold_convert (long_integer_type_node, fd.chunk_size);
336 ws_args = tree_cons (NULL, t, ws_args);
337 }
338
339 t = fold_convert (long_integer_type_node, fd.step);
340 ws_args = tree_cons (NULL, t, ws_args);
341
342 t = fold_convert (long_integer_type_node, fd.n2);
343 ws_args = tree_cons (NULL, t, ws_args);
344
345 t = fold_convert (long_integer_type_node, fd.n1);
346 ws_args = tree_cons (NULL, t, ws_args);
347
348 return ws_args;
349 }
350 else if (TREE_CODE (ws_stmt) == OMP_SECTIONS)
351 {
352 /* Number of sections is equal to the number of edges from the
353 OMP_SECTIONS_SWITCH statement, except for the one to the exit
354 of the sections region. */
355 basic_block bb = single_succ (bb_for_stmt (ws_stmt));
356 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
357 t = tree_cons (NULL, t, NULL);
358 return t;
359 }
360
361 gcc_unreachable ();
362 }
363
364
365 /* Discover whether REGION is a combined parallel+workshare region. */
366
367 static void
368 determine_parallel_type (struct omp_region *region)
369 {
370 basic_block par_entry_bb, par_exit_bb;
371 basic_block ws_entry_bb, ws_exit_bb;
372
373 if (region == NULL || region->inner == NULL
374 || region->exit == NULL || region->inner->exit == NULL
375 || region->inner->cont == NULL)
376 return;
377
378 /* We only support parallel+for and parallel+sections. */
379 if (region->type != OMP_PARALLEL
380 || (region->inner->type != OMP_FOR
381 && region->inner->type != OMP_SECTIONS))
382 return;
383
384 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
385 WS_EXIT_BB -> PAR_EXIT_BB. */
386 par_entry_bb = region->entry;
387 par_exit_bb = region->exit;
388 ws_entry_bb = region->inner->entry;
389 ws_exit_bb = region->inner->exit;
390
391 if (single_succ (par_entry_bb) == ws_entry_bb
392 && single_succ (ws_exit_bb) == par_exit_bb
393 && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb)
394 && (OMP_PARALLEL_COMBINED (last_stmt (par_entry_bb))
395 || (last_and_only_stmt (ws_entry_bb)
396 && last_and_only_stmt (par_exit_bb))))
397 {
398 tree ws_stmt = last_stmt (ws_entry_bb);
399
400 if (region->inner->type == OMP_FOR)
401 {
402 /* If this is a combined parallel loop, we need to determine
403 whether or not to use the combined library calls. There
404 are two cases where we do not apply the transformation:
405 static loops and any kind of ordered loop. In the first
406 case, we already open code the loop so there is no need
407 to do anything else. In the latter case, the combined
408 parallel loop call would still need extra synchronization
409 to implement ordered semantics, so there would not be any
410 gain in using the combined call. */
411 tree clauses = OMP_FOR_CLAUSES (ws_stmt);
412 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
413 if (c == NULL
414 || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC
415 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
416 {
417 region->is_combined_parallel = false;
418 region->inner->is_combined_parallel = false;
419 return;
420 }
421 }
422
423 region->is_combined_parallel = true;
424 region->inner->is_combined_parallel = true;
425 region->ws_args = get_ws_args_for (ws_stmt);
426 }
427 }
428
429
430 /* Return true if EXPR is variable sized. */
431
432 static inline bool
433 is_variable_sized (const_tree expr)
434 {
435 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
436 }
437
438 /* Return true if DECL is a reference type. */
439
440 static inline bool
441 is_reference (tree decl)
442 {
443 return lang_hooks.decls.omp_privatize_by_reference (decl);
444 }
445
446 /* Lookup variables in the decl or field splay trees. The "maybe" form
447 allows for the variable form to not have been entered, otherwise we
448 assert that the variable must have been entered. */
449
450 static inline tree
451 lookup_decl (tree var, omp_context *ctx)
452 {
453 tree *n;
454 n = (tree *) pointer_map_contains (ctx->cb.decl_map, var);
455 return *n;
456 }
457
458 static inline tree
459 maybe_lookup_decl (tree var, omp_context *ctx)
460 {
461 tree *n;
462 n = (tree *) pointer_map_contains (ctx->cb.decl_map, var);
463 return n ? *n : NULL_TREE;
464 }
465
466 static inline tree
467 lookup_field (tree var, omp_context *ctx)
468 {
469 splay_tree_node n;
470 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
471 return (tree) n->value;
472 }
473
474 static inline tree
475 maybe_lookup_field (tree var, omp_context *ctx)
476 {
477 splay_tree_node n;
478 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
479 return n ? (tree) n->value : NULL_TREE;
480 }
481
482 /* Return true if DECL should be copied by pointer. SHARED_P is true
483 if DECL is to be shared. */
484
485 static bool
486 use_pointer_for_field (const_tree decl, bool shared_p)
487 {
488 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
489 return true;
490
491 /* We can only use copy-in/copy-out semantics for shared variables
492 when we know the value is not accessible from an outer scope. */
493 if (shared_p)
494 {
495 /* ??? Trivially accessible from anywhere. But why would we even
496 be passing an address in this case? Should we simply assert
497 this to be false, or should we have a cleanup pass that removes
498 these from the list of mappings? */
499 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
500 return true;
501
502 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
503 without analyzing the expression whether or not its location
504 is accessible to anyone else. In the case of nested parallel
505 regions it certainly may be. */
506 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
507 return true;
508
509 /* Do not use copy-in/copy-out for variables that have their
510 address taken. */
511 if (TREE_ADDRESSABLE (decl))
512 return true;
513 }
514
515 return false;
516 }
517
518 /* Create a new VAR_DECL and copy information from VAR to it. */
519
520 tree
521 copy_var_decl (tree var, tree name, tree type)
522 {
523 tree copy = build_decl (VAR_DECL, name, type);
524
525 TREE_ADDRESSABLE (copy) = TREE_ADDRESSABLE (var);
526 TREE_THIS_VOLATILE (copy) = TREE_THIS_VOLATILE (var);
527 DECL_GIMPLE_REG_P (copy) = DECL_GIMPLE_REG_P (var);
528 DECL_NO_TBAA_P (copy) = DECL_NO_TBAA_P (var);
529 DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var);
530 DECL_IGNORED_P (copy) = DECL_IGNORED_P (var);
531 DECL_CONTEXT (copy) = DECL_CONTEXT (var);
532 DECL_SOURCE_LOCATION (copy) = DECL_SOURCE_LOCATION (var);
533 TREE_USED (copy) = 1;
534 DECL_SEEN_IN_BIND_EXPR_P (copy) = 1;
535
536 return copy;
537 }
538
539 /* Construct a new automatic decl similar to VAR. */
540
541 static tree
542 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
543 {
544 tree copy = copy_var_decl (var, name, type);
545
546 DECL_CONTEXT (copy) = current_function_decl;
547 TREE_CHAIN (copy) = ctx->block_vars;
548 ctx->block_vars = copy;
549
550 return copy;
551 }
552
553 static tree
554 omp_copy_decl_1 (tree var, omp_context *ctx)
555 {
556 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
557 }
558
559 /* Build tree nodes to access the field for VAR on the receiver side. */
560
561 static tree
562 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
563 {
564 tree x, field = lookup_field (var, ctx);
565
566 /* If the receiver record type was remapped in the child function,
567 remap the field into the new record type. */
568 x = maybe_lookup_field (field, ctx);
569 if (x != NULL)
570 field = x;
571
572 x = build_fold_indirect_ref (ctx->receiver_decl);
573 x = build3 (COMPONENT_REF, TREE_TYPE (field), x, field, NULL);
574 if (by_ref)
575 x = build_fold_indirect_ref (x);
576
577 return x;
578 }
579
580 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
581 of a parallel, this is a component reference; for workshare constructs
582 this is some variable. */
583
584 static tree
585 build_outer_var_ref (tree var, omp_context *ctx)
586 {
587 tree x;
588
589 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
590 x = var;
591 else if (is_variable_sized (var))
592 {
593 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
594 x = build_outer_var_ref (x, ctx);
595 x = build_fold_indirect_ref (x);
596 }
597 else if (is_parallel_ctx (ctx))
598 {
599 bool by_ref = use_pointer_for_field (var, false);
600 x = build_receiver_ref (var, by_ref, ctx);
601 }
602 else if (ctx->outer)
603 x = lookup_decl (var, ctx->outer);
604 else if (is_reference (var))
605 /* This can happen with orphaned constructs. If var is reference, it is
606 possible it is shared and as such valid. */
607 x = var;
608 else
609 gcc_unreachable ();
610
611 if (is_reference (var))
612 x = build_fold_indirect_ref (x);
613
614 return x;
615 }
616
617 /* Build tree nodes to access the field for VAR on the sender side. */
618
619 static tree
620 build_sender_ref (tree var, omp_context *ctx)
621 {
622 tree field = lookup_field (var, ctx);
623 return build3 (COMPONENT_REF, TREE_TYPE (field),
624 ctx->sender_decl, field, NULL);
625 }
626
627 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. */
628
629 static void
630 install_var_field (tree var, bool by_ref, omp_context *ctx)
631 {
632 tree field, type;
633
634 gcc_assert (!splay_tree_lookup (ctx->field_map, (splay_tree_key) var));
635
636 type = TREE_TYPE (var);
637 if (by_ref)
638 type = build_pointer_type (type);
639
640 field = build_decl (FIELD_DECL, DECL_NAME (var), type);
641
642 /* Remember what variable this field was created for. This does have a
643 side effect of making dwarf2out ignore this member, so for helpful
644 debugging we clear it later in delete_omp_context. */
645 DECL_ABSTRACT_ORIGIN (field) = var;
646
647 insert_field_into_struct (ctx->record_type, field);
648
649 splay_tree_insert (ctx->field_map, (splay_tree_key) var,
650 (splay_tree_value) field);
651 }
652
653 static tree
654 install_var_local (tree var, omp_context *ctx)
655 {
656 tree new_var = omp_copy_decl_1 (var, ctx);
657 insert_decl_map (&ctx->cb, var, new_var);
658 return new_var;
659 }
660
661 /* Adjust the replacement for DECL in CTX for the new context. This means
662 copying the DECL_VALUE_EXPR, and fixing up the type. */
663
664 static void
665 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
666 {
667 tree new_decl, size;
668
669 new_decl = lookup_decl (decl, ctx);
670
671 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
672
673 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
674 && DECL_HAS_VALUE_EXPR_P (decl))
675 {
676 tree ve = DECL_VALUE_EXPR (decl);
677 walk_tree (&ve, copy_body_r, &ctx->cb, NULL);
678 SET_DECL_VALUE_EXPR (new_decl, ve);
679 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
680 }
681
682 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
683 {
684 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
685 if (size == error_mark_node)
686 size = TYPE_SIZE (TREE_TYPE (new_decl));
687 DECL_SIZE (new_decl) = size;
688
689 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
690 if (size == error_mark_node)
691 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
692 DECL_SIZE_UNIT (new_decl) = size;
693 }
694 }
695
696 /* The callback for remap_decl. Search all containing contexts for a
697 mapping of the variable; this avoids having to duplicate the splay
698 tree ahead of time. We know a mapping doesn't already exist in the
699 given context. Create new mappings to implement default semantics. */
700
701 static tree
702 omp_copy_decl (tree var, copy_body_data *cb)
703 {
704 omp_context *ctx = (omp_context *) cb;
705 tree new_var;
706
707 if (TREE_CODE (var) == LABEL_DECL)
708 {
709 new_var = create_artificial_label ();
710 DECL_CONTEXT (new_var) = current_function_decl;
711 insert_decl_map (&ctx->cb, var, new_var);
712 return new_var;
713 }
714
715 while (!is_parallel_ctx (ctx))
716 {
717 ctx = ctx->outer;
718 if (ctx == NULL)
719 return var;
720 new_var = maybe_lookup_decl (var, ctx);
721 if (new_var)
722 return new_var;
723 }
724
725 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
726 return var;
727
728 return error_mark_node;
729 }
730
731
732 /* Return the parallel region associated with STMT. */
733
734 /* Debugging dumps for parallel regions. */
735 void dump_omp_region (FILE *, struct omp_region *, int);
736 void debug_omp_region (struct omp_region *);
737 void debug_all_omp_regions (void);
738
739 /* Dump the parallel region tree rooted at REGION. */
740
741 void
742 dump_omp_region (FILE *file, struct omp_region *region, int indent)
743 {
744 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
745 tree_code_name[region->type]);
746
747 if (region->inner)
748 dump_omp_region (file, region->inner, indent + 4);
749
750 if (region->cont)
751 {
752 fprintf (file, "%*sbb %d: OMP_CONTINUE\n", indent, "",
753 region->cont->index);
754 }
755
756 if (region->exit)
757 fprintf (file, "%*sbb %d: OMP_RETURN\n", indent, "",
758 region->exit->index);
759 else
760 fprintf (file, "%*s[no exit marker]\n", indent, "");
761
762 if (region->next)
763 dump_omp_region (file, region->next, indent);
764 }
765
766 void
767 debug_omp_region (struct omp_region *region)
768 {
769 dump_omp_region (stderr, region, 0);
770 }
771
772 void
773 debug_all_omp_regions (void)
774 {
775 dump_omp_region (stderr, root_omp_region, 0);
776 }
777
778
779 /* Create a new parallel region starting at STMT inside region PARENT. */
780
781 struct omp_region *
782 new_omp_region (basic_block bb, enum tree_code type, struct omp_region *parent)
783 {
784 struct omp_region *region = xcalloc (1, sizeof (*region));
785
786 region->outer = parent;
787 region->entry = bb;
788 region->type = type;
789
790 if (parent)
791 {
792 /* This is a nested region. Add it to the list of inner
793 regions in PARENT. */
794 region->next = parent->inner;
795 parent->inner = region;
796 }
797 else
798 {
799 /* This is a toplevel region. Add it to the list of toplevel
800 regions in ROOT_OMP_REGION. */
801 region->next = root_omp_region;
802 root_omp_region = region;
803 }
804
805 return region;
806 }
807
808 /* Release the memory associated with the region tree rooted at REGION. */
809
810 static void
811 free_omp_region_1 (struct omp_region *region)
812 {
813 struct omp_region *i, *n;
814
815 for (i = region->inner; i ; i = n)
816 {
817 n = i->next;
818 free_omp_region_1 (i);
819 }
820
821 free (region);
822 }
823
824 /* Release the memory for the entire omp region tree. */
825
826 void
827 free_omp_regions (void)
828 {
829 struct omp_region *r, *n;
830 for (r = root_omp_region; r ; r = n)
831 {
832 n = r->next;
833 free_omp_region_1 (r);
834 }
835 root_omp_region = NULL;
836 }
837
838
839 /* Create a new context, with OUTER_CTX being the surrounding context. */
840
841 static omp_context *
842 new_omp_context (tree stmt, omp_context *outer_ctx)
843 {
844 omp_context *ctx = XCNEW (omp_context);
845
846 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
847 (splay_tree_value) ctx);
848 ctx->stmt = stmt;
849
850 if (outer_ctx)
851 {
852 ctx->outer = outer_ctx;
853 ctx->cb = outer_ctx->cb;
854 ctx->cb.block = NULL;
855 ctx->depth = outer_ctx->depth + 1;
856 }
857 else
858 {
859 ctx->cb.src_fn = current_function_decl;
860 ctx->cb.dst_fn = current_function_decl;
861 ctx->cb.src_node = cgraph_node (current_function_decl);
862 ctx->cb.dst_node = ctx->cb.src_node;
863 ctx->cb.src_cfun = cfun;
864 ctx->cb.copy_decl = omp_copy_decl;
865 ctx->cb.eh_region = -1;
866 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
867 ctx->depth = 1;
868 }
869
870 ctx->cb.decl_map = pointer_map_create ();
871
872 return ctx;
873 }
874
875 /* Destroy a omp_context data structures. Called through the splay tree
876 value delete callback. */
877
878 static void
879 delete_omp_context (splay_tree_value value)
880 {
881 omp_context *ctx = (omp_context *) value;
882
883 pointer_map_destroy (ctx->cb.decl_map);
884
885 if (ctx->field_map)
886 splay_tree_delete (ctx->field_map);
887
888 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
889 it produces corrupt debug information. */
890 if (ctx->record_type)
891 {
892 tree t;
893 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
894 DECL_ABSTRACT_ORIGIN (t) = NULL;
895 }
896
897 XDELETE (ctx);
898 }
899
900 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
901 context. */
902
903 static void
904 fixup_child_record_type (omp_context *ctx)
905 {
906 tree f, type = ctx->record_type;
907
908 /* ??? It isn't sufficient to just call remap_type here, because
909 variably_modified_type_p doesn't work the way we expect for
910 record types. Testing each field for whether it needs remapping
911 and creating a new record by hand works, however. */
912 for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
913 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
914 break;
915 if (f)
916 {
917 tree name, new_fields = NULL;
918
919 type = lang_hooks.types.make_type (RECORD_TYPE);
920 name = DECL_NAME (TYPE_NAME (ctx->record_type));
921 name = build_decl (TYPE_DECL, name, type);
922 TYPE_NAME (type) = name;
923
924 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
925 {
926 tree new_f = copy_node (f);
927 DECL_CONTEXT (new_f) = type;
928 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
929 TREE_CHAIN (new_f) = new_fields;
930 new_fields = new_f;
931
932 /* Arrange to be able to look up the receiver field
933 given the sender field. */
934 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
935 (splay_tree_value) new_f);
936 }
937 TYPE_FIELDS (type) = nreverse (new_fields);
938 layout_type (type);
939 }
940
941 TREE_TYPE (ctx->receiver_decl) = build_pointer_type (type);
942 }
943
944 /* Instantiate decls as necessary in CTX to satisfy the data sharing
945 specified by CLAUSES. */
946
947 static void
948 scan_sharing_clauses (tree clauses, omp_context *ctx)
949 {
950 tree c, decl;
951 bool scan_array_reductions = false;
952
953 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
954 {
955 bool by_ref;
956
957 switch (OMP_CLAUSE_CODE (c))
958 {
959 case OMP_CLAUSE_PRIVATE:
960 decl = OMP_CLAUSE_DECL (c);
961 if (!is_variable_sized (decl))
962 install_var_local (decl, ctx);
963 break;
964
965 case OMP_CLAUSE_SHARED:
966 gcc_assert (is_parallel_ctx (ctx));
967 decl = OMP_CLAUSE_DECL (c);
968 gcc_assert (!is_variable_sized (decl));
969 by_ref = use_pointer_for_field (decl, true);
970 /* Global variables don't need to be copied,
971 the receiver side will use them directly. */
972 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
973 break;
974 if (! TREE_READONLY (decl)
975 || TREE_ADDRESSABLE (decl)
976 || by_ref
977 || is_reference (decl))
978 {
979 install_var_field (decl, by_ref, ctx);
980 install_var_local (decl, ctx);
981 break;
982 }
983 /* We don't need to copy const scalar vars back. */
984 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
985 goto do_private;
986
987 case OMP_CLAUSE_LASTPRIVATE:
988 /* Let the corresponding firstprivate clause create
989 the variable. */
990 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
991 break;
992 /* FALLTHRU */
993
994 case OMP_CLAUSE_FIRSTPRIVATE:
995 case OMP_CLAUSE_REDUCTION:
996 decl = OMP_CLAUSE_DECL (c);
997 do_private:
998 if (is_variable_sized (decl))
999 break;
1000 else if (is_parallel_ctx (ctx)
1001 && ! is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
1002 ctx)))
1003 {
1004 by_ref = use_pointer_for_field (decl, false);
1005 install_var_field (decl, by_ref, ctx);
1006 }
1007 install_var_local (decl, ctx);
1008 break;
1009
1010 case OMP_CLAUSE_COPYPRIVATE:
1011 if (ctx->outer)
1012 scan_omp (&OMP_CLAUSE_DECL (c), ctx->outer);
1013 /* FALLTHRU */
1014
1015 case OMP_CLAUSE_COPYIN:
1016 decl = OMP_CLAUSE_DECL (c);
1017 by_ref = use_pointer_for_field (decl, false);
1018 install_var_field (decl, by_ref, ctx);
1019 break;
1020
1021 case OMP_CLAUSE_DEFAULT:
1022 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
1023 break;
1024
1025 case OMP_CLAUSE_IF:
1026 case OMP_CLAUSE_NUM_THREADS:
1027 case OMP_CLAUSE_SCHEDULE:
1028 if (ctx->outer)
1029 scan_omp (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
1030 break;
1031
1032 case OMP_CLAUSE_NOWAIT:
1033 case OMP_CLAUSE_ORDERED:
1034 break;
1035
1036 default:
1037 gcc_unreachable ();
1038 }
1039 }
1040
1041 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1042 {
1043 switch (OMP_CLAUSE_CODE (c))
1044 {
1045 case OMP_CLAUSE_LASTPRIVATE:
1046 /* Let the corresponding firstprivate clause create
1047 the variable. */
1048 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1049 break;
1050 /* FALLTHRU */
1051
1052 case OMP_CLAUSE_PRIVATE:
1053 case OMP_CLAUSE_FIRSTPRIVATE:
1054 case OMP_CLAUSE_REDUCTION:
1055 decl = OMP_CLAUSE_DECL (c);
1056 if (is_variable_sized (decl))
1057 install_var_local (decl, ctx);
1058 fixup_remapped_decl (decl, ctx,
1059 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
1060 && OMP_CLAUSE_PRIVATE_DEBUG (c));
1061 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1062 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1063 scan_array_reductions = true;
1064 break;
1065
1066 case OMP_CLAUSE_SHARED:
1067 decl = OMP_CLAUSE_DECL (c);
1068 if (! is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1069 fixup_remapped_decl (decl, ctx, false);
1070 break;
1071
1072 case OMP_CLAUSE_COPYPRIVATE:
1073 case OMP_CLAUSE_COPYIN:
1074 case OMP_CLAUSE_DEFAULT:
1075 case OMP_CLAUSE_IF:
1076 case OMP_CLAUSE_NUM_THREADS:
1077 case OMP_CLAUSE_SCHEDULE:
1078 case OMP_CLAUSE_NOWAIT:
1079 case OMP_CLAUSE_ORDERED:
1080 break;
1081
1082 default:
1083 gcc_unreachable ();
1084 }
1085 }
1086
1087 if (scan_array_reductions)
1088 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1089 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1090 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1091 {
1092 scan_omp (&OMP_CLAUSE_REDUCTION_INIT (c), ctx);
1093 scan_omp (&OMP_CLAUSE_REDUCTION_MERGE (c), ctx);
1094 }
1095 }
1096
1097 /* Create a new name for omp child function. Returns an identifier. */
1098
1099 static GTY(()) unsigned int tmp_ompfn_id_num;
1100
1101 static tree
1102 create_omp_child_function_name (void)
1103 {
1104 tree name = DECL_ASSEMBLER_NAME (current_function_decl);
1105 size_t len = IDENTIFIER_LENGTH (name);
1106 char *tmp_name, *prefix;
1107
1108 prefix = alloca (len + sizeof ("_omp_fn"));
1109 memcpy (prefix, IDENTIFIER_POINTER (name), len);
1110 strcpy (prefix + len, "_omp_fn");
1111 #ifndef NO_DOT_IN_LABEL
1112 prefix[len] = '.';
1113 #elif !defined NO_DOLLAR_IN_LABEL
1114 prefix[len] = '$';
1115 #endif
1116 ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, tmp_ompfn_id_num++);
1117 return get_identifier (tmp_name);
1118 }
1119
1120 /* Build a decl for the omp child function. It'll not contain a body
1121 yet, just the bare decl. */
1122
1123 static void
1124 create_omp_child_function (omp_context *ctx)
1125 {
1126 tree decl, type, name, t;
1127
1128 name = create_omp_child_function_name ();
1129 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1130
1131 decl = build_decl (FUNCTION_DECL, name, type);
1132 decl = lang_hooks.decls.pushdecl (decl);
1133
1134 ctx->cb.dst_fn = decl;
1135
1136 TREE_STATIC (decl) = 1;
1137 TREE_USED (decl) = 1;
1138 DECL_ARTIFICIAL (decl) = 1;
1139 DECL_IGNORED_P (decl) = 0;
1140 TREE_PUBLIC (decl) = 0;
1141 DECL_UNINLINABLE (decl) = 1;
1142 DECL_EXTERNAL (decl) = 0;
1143 DECL_CONTEXT (decl) = NULL_TREE;
1144 DECL_INITIAL (decl) = make_node (BLOCK);
1145
1146 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1147 DECL_ARTIFICIAL (t) = 1;
1148 DECL_IGNORED_P (t) = 1;
1149 DECL_RESULT (decl) = t;
1150
1151 t = build_decl (PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node);
1152 DECL_ARTIFICIAL (t) = 1;
1153 DECL_ARG_TYPE (t) = ptr_type_node;
1154 DECL_CONTEXT (t) = current_function_decl;
1155 TREE_USED (t) = 1;
1156 DECL_ARGUMENTS (decl) = t;
1157 ctx->receiver_decl = t;
1158
1159 /* Allocate memory for the function structure. The call to
1160 allocate_struct_function clobbers CFUN, so we need to restore
1161 it afterward. */
1162 push_struct_function (decl);
1163 DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt);
1164 cfun->function_end_locus = EXPR_LOCATION (ctx->stmt);
1165 pop_cfun ();
1166 }
1167
1168
1169 /* Scan an OpenMP parallel directive. */
1170
1171 static void
1172 scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx)
1173 {
1174 omp_context *ctx;
1175 tree name;
1176
1177 /* Ignore parallel directives with empty bodies, unless there
1178 are copyin clauses. */
1179 if (optimize > 0
1180 && empty_body_p (OMP_PARALLEL_BODY (*stmt_p))
1181 && find_omp_clause (OMP_CLAUSES (*stmt_p), OMP_CLAUSE_COPYIN) == NULL)
1182 {
1183 *stmt_p = build_empty_stmt ();
1184 return;
1185 }
1186
1187 ctx = new_omp_context (*stmt_p, outer_ctx);
1188 if (parallel_nesting_level > 1)
1189 ctx->is_nested = true;
1190 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1191 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
1192 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1193 name = create_tmp_var_name (".omp_data_s");
1194 name = build_decl (TYPE_DECL, name, ctx->record_type);
1195 TYPE_NAME (ctx->record_type) = name;
1196 create_omp_child_function (ctx);
1197 OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn;
1198
1199 scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx);
1200 scan_omp (&OMP_PARALLEL_BODY (*stmt_p), ctx);
1201
1202 if (TYPE_FIELDS (ctx->record_type) == NULL)
1203 ctx->record_type = ctx->receiver_decl = NULL;
1204 else
1205 {
1206 layout_type (ctx->record_type);
1207 fixup_child_record_type (ctx);
1208 }
1209 }
1210
1211
1212 /* Scan an OpenMP loop directive. */
1213
1214 static void
1215 scan_omp_for (tree *stmt_p, omp_context *outer_ctx)
1216 {
1217 omp_context *ctx;
1218 tree stmt;
1219
1220 stmt = *stmt_p;
1221 ctx = new_omp_context (stmt, outer_ctx);
1222
1223 scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx);
1224
1225 scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
1226 scan_omp (&OMP_FOR_INIT (stmt), ctx);
1227 scan_omp (&OMP_FOR_COND (stmt), ctx);
1228 scan_omp (&OMP_FOR_INCR (stmt), ctx);
1229 scan_omp (&OMP_FOR_BODY (stmt), ctx);
1230 }
1231
1232 /* Scan an OpenMP sections directive. */
1233
1234 static void
1235 scan_omp_sections (tree *stmt_p, omp_context *outer_ctx)
1236 {
1237 tree stmt;
1238 omp_context *ctx;
1239
1240 stmt = *stmt_p;
1241 ctx = new_omp_context (stmt, outer_ctx);
1242 scan_sharing_clauses (OMP_SECTIONS_CLAUSES (stmt), ctx);
1243 scan_omp (&OMP_SECTIONS_BODY (stmt), ctx);
1244 }
1245
1246 /* Scan an OpenMP single directive. */
1247
1248 static void
1249 scan_omp_single (tree *stmt_p, omp_context *outer_ctx)
1250 {
1251 tree stmt = *stmt_p;
1252 omp_context *ctx;
1253 tree name;
1254
1255 ctx = new_omp_context (stmt, outer_ctx);
1256 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1257 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
1258 name = create_tmp_var_name (".omp_copy_s");
1259 name = build_decl (TYPE_DECL, name, ctx->record_type);
1260 TYPE_NAME (ctx->record_type) = name;
1261
1262 scan_sharing_clauses (OMP_SINGLE_CLAUSES (stmt), ctx);
1263 scan_omp (&OMP_SINGLE_BODY (stmt), ctx);
1264
1265 if (TYPE_FIELDS (ctx->record_type) == NULL)
1266 ctx->record_type = NULL;
1267 else
1268 layout_type (ctx->record_type);
1269 }
1270
1271
1272 /* Check OpenMP nesting restrictions. */
1273 static void
1274 check_omp_nesting_restrictions (tree t, omp_context *ctx)
1275 {
1276 switch (TREE_CODE (t))
1277 {
1278 case OMP_FOR:
1279 case OMP_SECTIONS:
1280 case OMP_SINGLE:
1281 for (; ctx != NULL; ctx = ctx->outer)
1282 switch (TREE_CODE (ctx->stmt))
1283 {
1284 case OMP_FOR:
1285 case OMP_SECTIONS:
1286 case OMP_SINGLE:
1287 case OMP_ORDERED:
1288 case OMP_MASTER:
1289 warning (0, "work-sharing region may not be closely nested inside "
1290 "of work-sharing, critical, ordered or master region");
1291 return;
1292 case OMP_PARALLEL:
1293 return;
1294 default:
1295 break;
1296 }
1297 break;
1298 case OMP_MASTER:
1299 for (; ctx != NULL; ctx = ctx->outer)
1300 switch (TREE_CODE (ctx->stmt))
1301 {
1302 case OMP_FOR:
1303 case OMP_SECTIONS:
1304 case OMP_SINGLE:
1305 warning (0, "master region may not be closely nested inside "
1306 "of work-sharing region");
1307 return;
1308 case OMP_PARALLEL:
1309 return;
1310 default:
1311 break;
1312 }
1313 break;
1314 case OMP_ORDERED:
1315 for (; ctx != NULL; ctx = ctx->outer)
1316 switch (TREE_CODE (ctx->stmt))
1317 {
1318 case OMP_CRITICAL:
1319 warning (0, "ordered region may not be closely nested inside "
1320 "of critical region");
1321 return;
1322 case OMP_FOR:
1323 if (find_omp_clause (OMP_CLAUSES (ctx->stmt),
1324 OMP_CLAUSE_ORDERED) == NULL)
1325 warning (0, "ordered region must be closely nested inside "
1326 "a loop region with an ordered clause");
1327 return;
1328 case OMP_PARALLEL:
1329 return;
1330 default:
1331 break;
1332 }
1333 break;
1334 case OMP_CRITICAL:
1335 for (; ctx != NULL; ctx = ctx->outer)
1336 if (TREE_CODE (ctx->stmt) == OMP_CRITICAL
1337 && OMP_CRITICAL_NAME (t) == OMP_CRITICAL_NAME (ctx->stmt))
1338 {
1339 warning (0, "critical region may not be nested inside a critical "
1340 "region with the same name");
1341 return;
1342 }
1343 break;
1344 default:
1345 break;
1346 }
1347 }
1348
1349
1350 /* Callback for walk_stmts used to scan for OpenMP directives at TP. */
1351
1352 static tree
1353 scan_omp_1 (tree *tp, int *walk_subtrees, void *data)
1354 {
1355 struct walk_stmt_info *wi = data;
1356 omp_context *ctx = wi->info;
1357 tree t = *tp;
1358
1359 if (EXPR_HAS_LOCATION (t))
1360 input_location = EXPR_LOCATION (t);
1361
1362 /* Check the OpenMP nesting restrictions. */
1363 if (OMP_DIRECTIVE_P (t) && ctx != NULL)
1364 check_omp_nesting_restrictions (t, ctx);
1365
1366 *walk_subtrees = 0;
1367 switch (TREE_CODE (t))
1368 {
1369 case OMP_PARALLEL:
1370 parallel_nesting_level++;
1371 scan_omp_parallel (tp, ctx);
1372 parallel_nesting_level--;
1373 break;
1374
1375 case OMP_FOR:
1376 scan_omp_for (tp, ctx);
1377 break;
1378
1379 case OMP_SECTIONS:
1380 scan_omp_sections (tp, ctx);
1381 break;
1382
1383 case OMP_SINGLE:
1384 scan_omp_single (tp, ctx);
1385 break;
1386
1387 case OMP_SECTION:
1388 case OMP_MASTER:
1389 case OMP_ORDERED:
1390 case OMP_CRITICAL:
1391 ctx = new_omp_context (*tp, ctx);
1392 scan_omp (&OMP_BODY (*tp), ctx);
1393 break;
1394
1395 case BIND_EXPR:
1396 {
1397 tree var;
1398 *walk_subtrees = 1;
1399
1400 for (var = BIND_EXPR_VARS (t); var ; var = TREE_CHAIN (var))
1401 insert_decl_map (&ctx->cb, var, var);
1402 }
1403 break;
1404
1405 case VAR_DECL:
1406 case PARM_DECL:
1407 case LABEL_DECL:
1408 case RESULT_DECL:
1409 if (ctx)
1410 *tp = remap_decl (t, &ctx->cb);
1411 break;
1412
1413 default:
1414 if (ctx && TYPE_P (t))
1415 *tp = remap_type (t, &ctx->cb);
1416 else if (!DECL_P (t))
1417 *walk_subtrees = 1;
1418 break;
1419 }
1420
1421 return NULL_TREE;
1422 }
1423
1424
1425 /* Scan all the statements starting at STMT_P. CTX contains context
1426 information about the OpenMP directives and clauses found during
1427 the scan. */
1428
1429 static void
1430 scan_omp (tree *stmt_p, omp_context *ctx)
1431 {
1432 location_t saved_location;
1433 struct walk_stmt_info wi;
1434
1435 memset (&wi, 0, sizeof (wi));
1436 wi.callback = scan_omp_1;
1437 wi.info = ctx;
1438 wi.want_bind_expr = (ctx != NULL);
1439 wi.want_locations = true;
1440
1441 saved_location = input_location;
1442 walk_stmts (&wi, stmt_p);
1443 input_location = saved_location;
1444 }
1445 \f
1446 /* Re-gimplification and code generation routines. */
1447
1448 /* Build a call to GOMP_barrier. */
1449
1450 static tree
1451 build_omp_barrier (void)
1452 {
1453 return build_call_expr (built_in_decls[BUILT_IN_GOMP_BARRIER], 0);
1454 }
1455
1456 /* If a context was created for STMT when it was scanned, return it. */
1457
1458 static omp_context *
1459 maybe_lookup_ctx (tree stmt)
1460 {
1461 splay_tree_node n;
1462 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
1463 return n ? (omp_context *) n->value : NULL;
1464 }
1465
1466
1467 /* Find the mapping for DECL in CTX or the immediately enclosing
1468 context that has a mapping for DECL.
1469
1470 If CTX is a nested parallel directive, we may have to use the decl
1471 mappings created in CTX's parent context. Suppose that we have the
1472 following parallel nesting (variable UIDs showed for clarity):
1473
1474 iD.1562 = 0;
1475 #omp parallel shared(iD.1562) -> outer parallel
1476 iD.1562 = iD.1562 + 1;
1477
1478 #omp parallel shared (iD.1562) -> inner parallel
1479 iD.1562 = iD.1562 - 1;
1480
1481 Each parallel structure will create a distinct .omp_data_s structure
1482 for copying iD.1562 in/out of the directive:
1483
1484 outer parallel .omp_data_s.1.i -> iD.1562
1485 inner parallel .omp_data_s.2.i -> iD.1562
1486
1487 A shared variable mapping will produce a copy-out operation before
1488 the parallel directive and a copy-in operation after it. So, in
1489 this case we would have:
1490
1491 iD.1562 = 0;
1492 .omp_data_o.1.i = iD.1562;
1493 #omp parallel shared(iD.1562) -> outer parallel
1494 .omp_data_i.1 = &.omp_data_o.1
1495 .omp_data_i.1->i = .omp_data_i.1->i + 1;
1496
1497 .omp_data_o.2.i = iD.1562; -> **
1498 #omp parallel shared(iD.1562) -> inner parallel
1499 .omp_data_i.2 = &.omp_data_o.2
1500 .omp_data_i.2->i = .omp_data_i.2->i - 1;
1501
1502
1503 ** This is a problem. The symbol iD.1562 cannot be referenced
1504 inside the body of the outer parallel region. But since we are
1505 emitting this copy operation while expanding the inner parallel
1506 directive, we need to access the CTX structure of the outer
1507 parallel directive to get the correct mapping:
1508
1509 .omp_data_o.2.i = .omp_data_i.1->i
1510
1511 Since there may be other workshare or parallel directives enclosing
1512 the parallel directive, it may be necessary to walk up the context
1513 parent chain. This is not a problem in general because nested
1514 parallelism happens only rarely. */
1515
1516 static tree
1517 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1518 {
1519 tree t;
1520 omp_context *up;
1521
1522 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1523 t = maybe_lookup_decl (decl, up);
1524
1525 gcc_assert (!ctx->is_nested || t || is_global_var (decl));
1526
1527 return t ? t : decl;
1528 }
1529
1530
1531 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
1532 in outer contexts. */
1533
1534 static tree
1535 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
1536 {
1537 tree t = NULL;
1538 omp_context *up;
1539
1540 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
1541 t = maybe_lookup_decl (decl, up);
1542
1543 return t ? t : decl;
1544 }
1545
1546
1547 /* Construct the initialization value for reduction CLAUSE. */
1548
1549 tree
1550 omp_reduction_init (tree clause, tree type)
1551 {
1552 switch (OMP_CLAUSE_REDUCTION_CODE (clause))
1553 {
1554 case PLUS_EXPR:
1555 case MINUS_EXPR:
1556 case BIT_IOR_EXPR:
1557 case BIT_XOR_EXPR:
1558 case TRUTH_OR_EXPR:
1559 case TRUTH_ORIF_EXPR:
1560 case TRUTH_XOR_EXPR:
1561 case NE_EXPR:
1562 return fold_convert (type, integer_zero_node);
1563
1564 case MULT_EXPR:
1565 case TRUTH_AND_EXPR:
1566 case TRUTH_ANDIF_EXPR:
1567 case EQ_EXPR:
1568 return fold_convert (type, integer_one_node);
1569
1570 case BIT_AND_EXPR:
1571 return fold_convert (type, integer_minus_one_node);
1572
1573 case MAX_EXPR:
1574 if (SCALAR_FLOAT_TYPE_P (type))
1575 {
1576 REAL_VALUE_TYPE max, min;
1577 if (HONOR_INFINITIES (TYPE_MODE (type)))
1578 {
1579 real_inf (&max);
1580 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
1581 }
1582 else
1583 real_maxval (&min, 1, TYPE_MODE (type));
1584 return build_real (type, min);
1585 }
1586 else
1587 {
1588 gcc_assert (INTEGRAL_TYPE_P (type));
1589 return TYPE_MIN_VALUE (type);
1590 }
1591
1592 case MIN_EXPR:
1593 if (SCALAR_FLOAT_TYPE_P (type))
1594 {
1595 REAL_VALUE_TYPE max;
1596 if (HONOR_INFINITIES (TYPE_MODE (type)))
1597 real_inf (&max);
1598 else
1599 real_maxval (&max, 0, TYPE_MODE (type));
1600 return build_real (type, max);
1601 }
1602 else
1603 {
1604 gcc_assert (INTEGRAL_TYPE_P (type));
1605 return TYPE_MAX_VALUE (type);
1606 }
1607
1608 default:
1609 gcc_unreachable ();
1610 }
1611 }
1612
1613 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
1614 from the receiver (aka child) side and initializers for REFERENCE_TYPE
1615 private variables. Initialization statements go in ILIST, while calls
1616 to destructors go in DLIST. */
1617
1618 static void
1619 lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist,
1620 omp_context *ctx)
1621 {
1622 tree_stmt_iterator diter;
1623 tree c, dtor, copyin_seq, x, ptr;
1624 bool copyin_by_ref = false;
1625 bool lastprivate_firstprivate = false;
1626 int pass;
1627
1628 *dlist = alloc_stmt_list ();
1629 diter = tsi_start (*dlist);
1630 copyin_seq = NULL;
1631
1632 /* Do all the fixed sized types in the first pass, and the variable sized
1633 types in the second pass. This makes sure that the scalar arguments to
1634 the variable sized types are processed before we use them in the
1635 variable sized operations. */
1636 for (pass = 0; pass < 2; ++pass)
1637 {
1638 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1639 {
1640 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
1641 tree var, new_var;
1642 bool by_ref;
1643
1644 switch (c_kind)
1645 {
1646 case OMP_CLAUSE_PRIVATE:
1647 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
1648 continue;
1649 break;
1650 case OMP_CLAUSE_SHARED:
1651 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
1652 {
1653 gcc_assert (is_global_var (OMP_CLAUSE_DECL (c)));
1654 continue;
1655 }
1656 case OMP_CLAUSE_FIRSTPRIVATE:
1657 case OMP_CLAUSE_COPYIN:
1658 case OMP_CLAUSE_REDUCTION:
1659 break;
1660 case OMP_CLAUSE_LASTPRIVATE:
1661 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1662 {
1663 lastprivate_firstprivate = true;
1664 if (pass != 0)
1665 continue;
1666 }
1667 break;
1668 default:
1669 continue;
1670 }
1671
1672 new_var = var = OMP_CLAUSE_DECL (c);
1673 if (c_kind != OMP_CLAUSE_COPYIN)
1674 new_var = lookup_decl (var, ctx);
1675
1676 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
1677 {
1678 if (pass != 0)
1679 continue;
1680 }
1681 else if (is_variable_sized (var))
1682 {
1683 /* For variable sized types, we need to allocate the
1684 actual storage here. Call alloca and store the
1685 result in the pointer decl that we created elsewhere. */
1686 if (pass == 0)
1687 continue;
1688
1689 ptr = DECL_VALUE_EXPR (new_var);
1690 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
1691 ptr = TREE_OPERAND (ptr, 0);
1692 gcc_assert (DECL_P (ptr));
1693
1694 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
1695 x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x);
1696 x = fold_convert (TREE_TYPE (ptr), x);
1697 x = build_gimple_modify_stmt (ptr, x);
1698 gimplify_and_add (x, ilist);
1699 }
1700 else if (is_reference (var))
1701 {
1702 /* For references that are being privatized for Fortran,
1703 allocate new backing storage for the new pointer
1704 variable. This allows us to avoid changing all the
1705 code that expects a pointer to something that expects
1706 a direct variable. Note that this doesn't apply to
1707 C++, since reference types are disallowed in data
1708 sharing clauses there, except for NRV optimized
1709 return values. */
1710 if (pass == 0)
1711 continue;
1712
1713 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
1714 if (TREE_CONSTANT (x))
1715 {
1716 const char *name = NULL;
1717 if (DECL_NAME (var))
1718 name = IDENTIFIER_POINTER (DECL_NAME (new_var));
1719
1720 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
1721 name);
1722 gimple_add_tmp_var (x);
1723 x = build_fold_addr_expr_with_type (x, TREE_TYPE (new_var));
1724 }
1725 else
1726 {
1727 x = build_call_expr (built_in_decls[BUILT_IN_ALLOCA], 1, x);
1728 x = fold_convert (TREE_TYPE (new_var), x);
1729 }
1730
1731 x = build_gimple_modify_stmt (new_var, x);
1732 gimplify_and_add (x, ilist);
1733
1734 new_var = build_fold_indirect_ref (new_var);
1735 }
1736 else if (c_kind == OMP_CLAUSE_REDUCTION
1737 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1738 {
1739 if (pass == 0)
1740 continue;
1741 }
1742 else if (pass != 0)
1743 continue;
1744
1745 switch (OMP_CLAUSE_CODE (c))
1746 {
1747 case OMP_CLAUSE_SHARED:
1748 /* Shared global vars are just accessed directly. */
1749 if (is_global_var (new_var))
1750 break;
1751 /* Set up the DECL_VALUE_EXPR for shared variables now. This
1752 needs to be delayed until after fixup_child_record_type so
1753 that we get the correct type during the dereference. */
1754 by_ref = use_pointer_for_field (var, true);
1755 x = build_receiver_ref (var, by_ref, ctx);
1756 SET_DECL_VALUE_EXPR (new_var, x);
1757 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
1758
1759 /* ??? If VAR is not passed by reference, and the variable
1760 hasn't been initialized yet, then we'll get a warning for
1761 the store into the omp_data_s structure. Ideally, we'd be
1762 able to notice this and not store anything at all, but
1763 we're generating code too early. Suppress the warning. */
1764 if (!by_ref)
1765 TREE_NO_WARNING (var) = 1;
1766 break;
1767
1768 case OMP_CLAUSE_LASTPRIVATE:
1769 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1770 break;
1771 /* FALLTHRU */
1772
1773 case OMP_CLAUSE_PRIVATE:
1774 x = lang_hooks.decls.omp_clause_default_ctor (c, new_var);
1775 if (x)
1776 gimplify_and_add (x, ilist);
1777 /* FALLTHRU */
1778
1779 do_dtor:
1780 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
1781 if (x)
1782 {
1783 dtor = x;
1784 gimplify_stmt (&dtor);
1785 tsi_link_before (&diter, dtor, TSI_SAME_STMT);
1786 }
1787 break;
1788
1789 case OMP_CLAUSE_FIRSTPRIVATE:
1790 x = build_outer_var_ref (var, ctx);
1791 x = lang_hooks.decls.omp_clause_copy_ctor (c, new_var, x);
1792 gimplify_and_add (x, ilist);
1793 goto do_dtor;
1794 break;
1795
1796 case OMP_CLAUSE_COPYIN:
1797 by_ref = use_pointer_for_field (var, false);
1798 x = build_receiver_ref (var, by_ref, ctx);
1799 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
1800 append_to_statement_list (x, &copyin_seq);
1801 copyin_by_ref |= by_ref;
1802 break;
1803
1804 case OMP_CLAUSE_REDUCTION:
1805 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1806 {
1807 gimplify_and_add (OMP_CLAUSE_REDUCTION_INIT (c), ilist);
1808 OMP_CLAUSE_REDUCTION_INIT (c) = NULL;
1809 }
1810 else
1811 {
1812 x = omp_reduction_init (c, TREE_TYPE (new_var));
1813 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
1814 x = build_gimple_modify_stmt (new_var, x);
1815 gimplify_and_add (x, ilist);
1816 }
1817 break;
1818
1819 default:
1820 gcc_unreachable ();
1821 }
1822 }
1823 }
1824
1825 /* The copyin sequence is not to be executed by the main thread, since
1826 that would result in self-copies. Perhaps not visible to scalars,
1827 but it certainly is to C++ operator=. */
1828 if (copyin_seq)
1829 {
1830 x = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
1831 x = build2 (NE_EXPR, boolean_type_node, x,
1832 build_int_cst (TREE_TYPE (x), 0));
1833 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
1834 gimplify_and_add (x, ilist);
1835 }
1836
1837 /* If any copyin variable is passed by reference, we must ensure the
1838 master thread doesn't modify it before it is copied over in all
1839 threads. Similarly for variables in both firstprivate and
1840 lastprivate clauses we need to ensure the lastprivate copying
1841 happens after firstprivate copying in all threads. */
1842 if (copyin_by_ref || lastprivate_firstprivate)
1843 gimplify_and_add (build_omp_barrier (), ilist);
1844 }
1845
1846
1847 /* Generate code to implement the LASTPRIVATE clauses. This is used for
1848 both parallel and workshare constructs. PREDICATE may be NULL if it's
1849 always true. */
1850
1851 static void
1852 lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list,
1853 omp_context *ctx)
1854 {
1855 tree sub_list, x, c;
1856
1857 /* Early exit if there are no lastprivate clauses. */
1858 clauses = find_omp_clause (clauses, OMP_CLAUSE_LASTPRIVATE);
1859 if (clauses == NULL)
1860 {
1861 /* If this was a workshare clause, see if it had been combined
1862 with its parallel. In that case, look for the clauses on the
1863 parallel statement itself. */
1864 if (is_parallel_ctx (ctx))
1865 return;
1866
1867 ctx = ctx->outer;
1868 if (ctx == NULL || !is_parallel_ctx (ctx))
1869 return;
1870
1871 clauses = find_omp_clause (OMP_PARALLEL_CLAUSES (ctx->stmt),
1872 OMP_CLAUSE_LASTPRIVATE);
1873 if (clauses == NULL)
1874 return;
1875 }
1876
1877 sub_list = alloc_stmt_list ();
1878
1879 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1880 {
1881 tree var, new_var;
1882
1883 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LASTPRIVATE)
1884 continue;
1885
1886 var = OMP_CLAUSE_DECL (c);
1887 new_var = lookup_decl (var, ctx);
1888
1889 x = build_outer_var_ref (var, ctx);
1890 if (is_reference (var))
1891 new_var = build_fold_indirect_ref (new_var);
1892 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
1893 append_to_statement_list (x, &sub_list);
1894 }
1895
1896 if (predicate)
1897 x = build3 (COND_EXPR, void_type_node, predicate, sub_list, NULL);
1898 else
1899 x = sub_list;
1900
1901 gimplify_and_add (x, stmt_list);
1902 }
1903
1904
1905 /* Generate code to implement the REDUCTION clauses. */
1906
1907 static void
1908 lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx)
1909 {
1910 tree sub_list = NULL, x, c;
1911 int count = 0;
1912
1913 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
1914 update in that case, otherwise use a lock. */
1915 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
1916 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
1917 {
1918 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1919 {
1920 /* Never use OMP_ATOMIC for array reductions. */
1921 count = -1;
1922 break;
1923 }
1924 count++;
1925 }
1926
1927 if (count == 0)
1928 return;
1929
1930 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
1931 {
1932 tree var, ref, new_var;
1933 enum tree_code code;
1934
1935 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
1936 continue;
1937
1938 var = OMP_CLAUSE_DECL (c);
1939 new_var = lookup_decl (var, ctx);
1940 if (is_reference (var))
1941 new_var = build_fold_indirect_ref (new_var);
1942 ref = build_outer_var_ref (var, ctx);
1943 code = OMP_CLAUSE_REDUCTION_CODE (c);
1944
1945 /* reduction(-:var) sums up the partial results, so it acts
1946 identically to reduction(+:var). */
1947 if (code == MINUS_EXPR)
1948 code = PLUS_EXPR;
1949
1950 if (count == 1)
1951 {
1952 tree addr = build_fold_addr_expr (ref);
1953
1954 addr = save_expr (addr);
1955 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
1956 x = fold_build2 (code, TREE_TYPE (ref), ref, new_var);
1957 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
1958 gimplify_and_add (x, stmt_list);
1959 return;
1960 }
1961
1962 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
1963 {
1964 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
1965
1966 if (is_reference (var))
1967 ref = build_fold_addr_expr (ref);
1968 SET_DECL_VALUE_EXPR (placeholder, ref);
1969 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
1970 gimplify_and_add (OMP_CLAUSE_REDUCTION_MERGE (c), &sub_list);
1971 OMP_CLAUSE_REDUCTION_MERGE (c) = NULL;
1972 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
1973 }
1974 else
1975 {
1976 x = build2 (code, TREE_TYPE (ref), ref, new_var);
1977 ref = build_outer_var_ref (var, ctx);
1978 x = build_gimple_modify_stmt (ref, x);
1979 append_to_statement_list (x, &sub_list);
1980 }
1981 }
1982
1983 x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ATOMIC_START], 0);
1984 gimplify_and_add (x, stmt_list);
1985
1986 gimplify_and_add (sub_list, stmt_list);
1987
1988 x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ATOMIC_END], 0);
1989 gimplify_and_add (x, stmt_list);
1990 }
1991
1992
1993 /* Generate code to implement the COPYPRIVATE clauses. */
1994
1995 static void
1996 lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist,
1997 omp_context *ctx)
1998 {
1999 tree c;
2000
2001 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
2002 {
2003 tree var, ref, x;
2004 bool by_ref;
2005
2006 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
2007 continue;
2008
2009 var = OMP_CLAUSE_DECL (c);
2010 by_ref = use_pointer_for_field (var, false);
2011
2012 ref = build_sender_ref (var, ctx);
2013 x = lookup_decl_in_outer_ctx (var, ctx);
2014 x = by_ref ? build_fold_addr_expr (x) : x;
2015 x = build_gimple_modify_stmt (ref, x);
2016 gimplify_and_add (x, slist);
2017
2018 ref = build_receiver_ref (var, by_ref, ctx);
2019 if (is_reference (var))
2020 {
2021 ref = build_fold_indirect_ref (ref);
2022 var = build_fold_indirect_ref (var);
2023 }
2024 x = lang_hooks.decls.omp_clause_assign_op (c, var, ref);
2025 gimplify_and_add (x, rlist);
2026 }
2027 }
2028
2029
2030 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
2031 and REDUCTION from the sender (aka parent) side. */
2032
2033 static void
2034 lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx)
2035 {
2036 tree c;
2037
2038 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
2039 {
2040 tree val, ref, x, var;
2041 bool by_ref, do_in = false, do_out = false;
2042
2043 switch (OMP_CLAUSE_CODE (c))
2044 {
2045 case OMP_CLAUSE_FIRSTPRIVATE:
2046 case OMP_CLAUSE_COPYIN:
2047 case OMP_CLAUSE_LASTPRIVATE:
2048 case OMP_CLAUSE_REDUCTION:
2049 break;
2050 default:
2051 continue;
2052 }
2053
2054 val = OMP_CLAUSE_DECL (c);
2055 var = lookup_decl_in_outer_ctx (val, ctx);
2056
2057 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
2058 && is_global_var (var))
2059 continue;
2060 if (is_variable_sized (val))
2061 continue;
2062 by_ref = use_pointer_for_field (val, false);
2063
2064 switch (OMP_CLAUSE_CODE (c))
2065 {
2066 case OMP_CLAUSE_FIRSTPRIVATE:
2067 case OMP_CLAUSE_COPYIN:
2068 do_in = true;
2069 break;
2070
2071 case OMP_CLAUSE_LASTPRIVATE:
2072 if (by_ref || is_reference (val))
2073 {
2074 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2075 continue;
2076 do_in = true;
2077 }
2078 else
2079 do_out = true;
2080 break;
2081
2082 case OMP_CLAUSE_REDUCTION:
2083 do_in = true;
2084 do_out = !(by_ref || is_reference (val));
2085 break;
2086
2087 default:
2088 gcc_unreachable ();
2089 }
2090
2091 if (do_in)
2092 {
2093 ref = build_sender_ref (val, ctx);
2094 x = by_ref ? build_fold_addr_expr (var) : var;
2095 x = build_gimple_modify_stmt (ref, x);
2096 gimplify_and_add (x, ilist);
2097 }
2098
2099 if (do_out)
2100 {
2101 ref = build_sender_ref (val, ctx);
2102 x = build_gimple_modify_stmt (var, ref);
2103 gimplify_and_add (x, olist);
2104 }
2105 }
2106 }
2107
2108 /* Generate code to implement SHARED from the sender (aka parent) side.
2109 This is trickier, since OMP_PARALLEL_CLAUSES doesn't list things that
2110 got automatically shared. */
2111
2112 static void
2113 lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx)
2114 {
2115 tree var, ovar, nvar, f, x;
2116
2117 if (ctx->record_type == NULL)
2118 return;
2119
2120 for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f))
2121 {
2122 ovar = DECL_ABSTRACT_ORIGIN (f);
2123 nvar = maybe_lookup_decl (ovar, ctx);
2124 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
2125 continue;
2126
2127 /* If CTX is a nested parallel directive. Find the immediately
2128 enclosing parallel or workshare construct that contains a
2129 mapping for OVAR. */
2130 var = lookup_decl_in_outer_ctx (ovar, ctx);
2131
2132 if (use_pointer_for_field (ovar, true))
2133 {
2134 x = build_sender_ref (ovar, ctx);
2135 var = build_fold_addr_expr (var);
2136 x = build_gimple_modify_stmt (x, var);
2137 gimplify_and_add (x, ilist);
2138 }
2139 else
2140 {
2141 x = build_sender_ref (ovar, ctx);
2142 x = build_gimple_modify_stmt (x, var);
2143 gimplify_and_add (x, ilist);
2144
2145 x = build_sender_ref (ovar, ctx);
2146 x = build_gimple_modify_stmt (var, x);
2147 gimplify_and_add (x, olist);
2148 }
2149 }
2150 }
2151
2152 /* Build the function calls to GOMP_parallel_start etc to actually
2153 generate the parallel operation. REGION is the parallel region
2154 being expanded. BB is the block where to insert the code. WS_ARGS
2155 will be set if this is a call to a combined parallel+workshare
2156 construct, it contains the list of additional arguments needed by
2157 the workshare construct. */
2158
2159 static void
2160 expand_parallel_call (struct omp_region *region, basic_block bb,
2161 tree entry_stmt, tree ws_args)
2162 {
2163 tree t, t1, t2, val, cond, c, clauses;
2164 block_stmt_iterator si;
2165 int start_ix;
2166
2167 clauses = OMP_PARALLEL_CLAUSES (entry_stmt);
2168
2169 /* Determine what flavor of GOMP_parallel_start we will be
2170 emitting. */
2171 start_ix = BUILT_IN_GOMP_PARALLEL_START;
2172 if (is_combined_parallel (region))
2173 {
2174 switch (region->inner->type)
2175 {
2176 case OMP_FOR:
2177 start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START
2178 + region->inner->sched_kind;
2179 break;
2180 case OMP_SECTIONS:
2181 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START;
2182 break;
2183 default:
2184 gcc_unreachable ();
2185 }
2186 }
2187
2188 /* By default, the value of NUM_THREADS is zero (selected at run time)
2189 and there is no conditional. */
2190 cond = NULL_TREE;
2191 val = build_int_cst (unsigned_type_node, 0);
2192
2193 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
2194 if (c)
2195 cond = OMP_CLAUSE_IF_EXPR (c);
2196
2197 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
2198 if (c)
2199 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
2200
2201 /* Ensure 'val' is of the correct type. */
2202 val = fold_convert (unsigned_type_node, val);
2203
2204 /* If we found the clause 'if (cond)', build either
2205 (cond != 0) or (cond ? val : 1u). */
2206 if (cond)
2207 {
2208 block_stmt_iterator si;
2209
2210 cond = gimple_boolify (cond);
2211
2212 if (integer_zerop (val))
2213 val = fold_build2 (EQ_EXPR, unsigned_type_node, cond,
2214 build_int_cst (TREE_TYPE (cond), 0));
2215 else
2216 {
2217 basic_block cond_bb, then_bb, else_bb;
2218 edge e, e_then, e_else;
2219 tree t, tmp_then, tmp_else, tmp_join, tmp_var;
2220
2221 tmp_var = create_tmp_var (TREE_TYPE (val), NULL);
2222 if (gimple_in_ssa_p (cfun))
2223 {
2224 tmp_then = make_ssa_name (tmp_var, NULL_TREE);
2225 tmp_else = make_ssa_name (tmp_var, NULL_TREE);
2226 tmp_join = make_ssa_name (tmp_var, NULL_TREE);
2227 }
2228 else
2229 {
2230 tmp_then = tmp_var;
2231 tmp_else = tmp_var;
2232 tmp_join = tmp_var;
2233 }
2234
2235 e = split_block (bb, NULL);
2236 cond_bb = e->src;
2237 bb = e->dest;
2238 remove_edge (e);
2239
2240 then_bb = create_empty_bb (cond_bb);
2241 else_bb = create_empty_bb (then_bb);
2242 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
2243 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
2244
2245 t = build3 (COND_EXPR, void_type_node,
2246 cond, NULL_TREE, NULL_TREE);
2247
2248 si = bsi_start (cond_bb);
2249 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2250
2251 si = bsi_start (then_bb);
2252 t = build_gimple_modify_stmt (tmp_then, val);
2253 if (gimple_in_ssa_p (cfun))
2254 SSA_NAME_DEF_STMT (tmp_then) = t;
2255 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2256
2257 si = bsi_start (else_bb);
2258 t = build_gimple_modify_stmt (tmp_else,
2259 build_int_cst (unsigned_type_node, 1));
2260 if (gimple_in_ssa_p (cfun))
2261 SSA_NAME_DEF_STMT (tmp_else) = t;
2262 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2263
2264 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
2265 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
2266 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
2267 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
2268
2269 if (gimple_in_ssa_p (cfun))
2270 {
2271 tree phi = create_phi_node (tmp_join, bb);
2272 SSA_NAME_DEF_STMT (tmp_join) = phi;
2273 add_phi_arg (phi, tmp_then, e_then);
2274 add_phi_arg (phi, tmp_else, e_else);
2275 }
2276
2277 val = tmp_join;
2278 }
2279
2280 si = bsi_start (bb);
2281 val = force_gimple_operand_bsi (&si, val, true, NULL_TREE,
2282 false, BSI_CONTINUE_LINKING);
2283 }
2284
2285 si = bsi_last (bb);
2286 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2287 if (t == NULL)
2288 t1 = null_pointer_node;
2289 else
2290 t1 = build_fold_addr_expr (t);
2291 t2 = build_fold_addr_expr (OMP_PARALLEL_FN (entry_stmt));
2292
2293 if (ws_args)
2294 {
2295 tree args = tree_cons (NULL, t2,
2296 tree_cons (NULL, t1,
2297 tree_cons (NULL, val, ws_args)));
2298 t = build_function_call_expr (built_in_decls[start_ix], args);
2299 }
2300 else
2301 t = build_call_expr (built_in_decls[start_ix], 3, t2, t1, val);
2302
2303 force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2304 false, BSI_CONTINUE_LINKING);
2305
2306 t = OMP_PARALLEL_DATA_ARG (entry_stmt);
2307 if (t == NULL)
2308 t = null_pointer_node;
2309 else
2310 t = build_fold_addr_expr (t);
2311 t = build_call_expr (OMP_PARALLEL_FN (entry_stmt), 1, t);
2312 force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2313 false, BSI_CONTINUE_LINKING);
2314
2315 t = build_call_expr (built_in_decls[BUILT_IN_GOMP_PARALLEL_END], 0);
2316 force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2317 false, BSI_CONTINUE_LINKING);
2318 }
2319
2320
2321 /* If exceptions are enabled, wrap *STMT_P in a MUST_NOT_THROW catch
2322 handler. This prevents programs from violating the structured
2323 block semantics with throws. */
2324
2325 static void
2326 maybe_catch_exception (tree *stmt_p)
2327 {
2328 tree f, t;
2329
2330 if (!flag_exceptions)
2331 return;
2332
2333 if (lang_protect_cleanup_actions)
2334 t = lang_protect_cleanup_actions ();
2335 else
2336 t = build_call_expr (built_in_decls[BUILT_IN_TRAP], 0);
2337 f = build2 (EH_FILTER_EXPR, void_type_node, NULL, NULL);
2338 EH_FILTER_MUST_NOT_THROW (f) = 1;
2339 gimplify_and_add (t, &EH_FILTER_FAILURE (f));
2340
2341 t = build2 (TRY_CATCH_EXPR, void_type_node, *stmt_p, NULL);
2342 append_to_statement_list (f, &TREE_OPERAND (t, 1));
2343
2344 *stmt_p = NULL;
2345 append_to_statement_list (t, stmt_p);
2346 }
2347
2348 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
2349
2350 static tree
2351 list2chain (tree list)
2352 {
2353 tree t;
2354
2355 for (t = list; t; t = TREE_CHAIN (t))
2356 {
2357 tree var = TREE_VALUE (t);
2358 if (TREE_CHAIN (t))
2359 TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t));
2360 else
2361 TREE_CHAIN (var) = NULL_TREE;
2362 }
2363
2364 return list ? TREE_VALUE (list) : NULL_TREE;
2365 }
2366
2367
2368 /* Remove barriers in REGION->EXIT's block. Note that this is only
2369 valid for OMP_PARALLEL regions. Since the end of a parallel region
2370 is an implicit barrier, any workshare inside the OMP_PARALLEL that
2371 left a barrier at the end of the OMP_PARALLEL region can now be
2372 removed. */
2373
2374 static void
2375 remove_exit_barrier (struct omp_region *region)
2376 {
2377 block_stmt_iterator si;
2378 basic_block exit_bb;
2379 edge_iterator ei;
2380 edge e;
2381 tree t;
2382
2383 exit_bb = region->exit;
2384
2385 /* If the parallel region doesn't return, we don't have REGION->EXIT
2386 block at all. */
2387 if (! exit_bb)
2388 return;
2389
2390 /* The last insn in the block will be the parallel's OMP_RETURN. The
2391 workshare's OMP_RETURN will be in a preceding block. The kinds of
2392 statements that can appear in between are extremely limited -- no
2393 memory operations at all. Here, we allow nothing at all, so the
2394 only thing we allow to precede this OMP_RETURN is a label. */
2395 si = bsi_last (exit_bb);
2396 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2397 bsi_prev (&si);
2398 if (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) != LABEL_EXPR)
2399 return;
2400
2401 FOR_EACH_EDGE (e, ei, exit_bb->preds)
2402 {
2403 si = bsi_last (e->src);
2404 if (bsi_end_p (si))
2405 continue;
2406 t = bsi_stmt (si);
2407 if (TREE_CODE (t) == OMP_RETURN)
2408 OMP_RETURN_NOWAIT (t) = 1;
2409 }
2410 }
2411
2412 static void
2413 remove_exit_barriers (struct omp_region *region)
2414 {
2415 if (region->type == OMP_PARALLEL)
2416 remove_exit_barrier (region);
2417
2418 if (region->inner)
2419 {
2420 region = region->inner;
2421 remove_exit_barriers (region);
2422 while (region->next)
2423 {
2424 region = region->next;
2425 remove_exit_barriers (region);
2426 }
2427 }
2428 }
2429
2430 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
2431 calls. These can't be declared as const functions, but
2432 within one parallel body they are constant, so they can be
2433 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
2434 which are declared const. */
2435
2436 static void
2437 optimize_omp_library_calls (void)
2438 {
2439 basic_block bb;
2440 block_stmt_iterator bsi;
2441 tree thr_num_id
2442 = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM]);
2443 tree num_thr_id
2444 = DECL_ASSEMBLER_NAME (built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS]);
2445
2446 FOR_EACH_BB (bb)
2447 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
2448 {
2449 tree stmt = bsi_stmt (bsi);
2450 tree call = get_call_expr_in (stmt);
2451 tree decl;
2452
2453 if (call
2454 && (decl = get_callee_fndecl (call))
2455 && DECL_EXTERNAL (decl)
2456 && TREE_PUBLIC (decl)
2457 && DECL_INITIAL (decl) == NULL)
2458 {
2459 tree built_in;
2460
2461 if (DECL_NAME (decl) == thr_num_id)
2462 built_in = built_in_decls [BUILT_IN_OMP_GET_THREAD_NUM];
2463 else if (DECL_NAME (decl) == num_thr_id)
2464 built_in = built_in_decls [BUILT_IN_OMP_GET_NUM_THREADS];
2465 else
2466 continue;
2467
2468 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
2469 || call_expr_nargs (call) != 0)
2470 continue;
2471
2472 if (flag_exceptions && !TREE_NOTHROW (decl))
2473 continue;
2474
2475 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
2476 || TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (decl)))
2477 != TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (built_in))))
2478 continue;
2479
2480 CALL_EXPR_FN (call) = build_fold_addr_expr (built_in);
2481 }
2482 }
2483 }
2484
2485 /* Expand the OpenMP parallel directive starting at REGION. */
2486
2487 static void
2488 expand_omp_parallel (struct omp_region *region)
2489 {
2490 basic_block entry_bb, exit_bb, new_bb;
2491 struct function *child_cfun;
2492 tree child_fn, block, t, ws_args;
2493 block_stmt_iterator si;
2494 tree entry_stmt;
2495 edge e;
2496
2497 entry_stmt = last_stmt (region->entry);
2498 child_fn = OMP_PARALLEL_FN (entry_stmt);
2499 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
2500 /* If this function has been already instrumented, make sure
2501 the child function isn't instrumented again. */
2502 child_cfun->after_tree_profile = cfun->after_tree_profile;
2503
2504 entry_bb = region->entry;
2505 exit_bb = region->exit;
2506
2507 if (is_combined_parallel (region))
2508 ws_args = region->ws_args;
2509 else
2510 ws_args = NULL_TREE;
2511
2512 if (child_cfun->cfg)
2513 {
2514 /* Due to inlining, it may happen that we have already outlined
2515 the region, in which case all we need to do is make the
2516 sub-graph unreachable and emit the parallel call. */
2517 edge entry_succ_e, exit_succ_e;
2518 block_stmt_iterator si;
2519
2520 entry_succ_e = single_succ_edge (entry_bb);
2521
2522 si = bsi_last (entry_bb);
2523 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL);
2524 bsi_remove (&si, true);
2525
2526 new_bb = entry_bb;
2527 if (exit_bb)
2528 {
2529 exit_succ_e = single_succ_edge (exit_bb);
2530 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
2531 }
2532 remove_edge_and_dominated_blocks (entry_succ_e);
2533 }
2534 else
2535 {
2536 /* If the parallel region needs data sent from the parent
2537 function, then the very first statement (except possible
2538 tree profile counter updates) of the parallel body
2539 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
2540 &.OMP_DATA_O is passed as an argument to the child function,
2541 we need to replace it with the argument as seen by the child
2542 function.
2543
2544 In most cases, this will end up being the identity assignment
2545 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
2546 a function call that has been inlined, the original PARM_DECL
2547 .OMP_DATA_I may have been converted into a different local
2548 variable. In which case, we need to keep the assignment. */
2549 if (OMP_PARALLEL_DATA_ARG (entry_stmt))
2550 {
2551 basic_block entry_succ_bb = single_succ (entry_bb);
2552 block_stmt_iterator si;
2553 tree parcopy_stmt = NULL_TREE, arg, narg;
2554
2555 for (si = bsi_start (entry_succ_bb); ; bsi_next (&si))
2556 {
2557 tree stmt, arg;
2558
2559 gcc_assert (!bsi_end_p (si));
2560 stmt = bsi_stmt (si);
2561 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2562 continue;
2563
2564 arg = GIMPLE_STMT_OPERAND (stmt, 1);
2565 STRIP_NOPS (arg);
2566 if (TREE_CODE (arg) == ADDR_EXPR
2567 && TREE_OPERAND (arg, 0)
2568 == OMP_PARALLEL_DATA_ARG (entry_stmt))
2569 {
2570 parcopy_stmt = stmt;
2571 break;
2572 }
2573 }
2574
2575 gcc_assert (parcopy_stmt != NULL_TREE);
2576 arg = DECL_ARGUMENTS (child_fn);
2577
2578 if (!gimple_in_ssa_p (cfun))
2579 {
2580 if (GIMPLE_STMT_OPERAND (parcopy_stmt, 0) == arg)
2581 bsi_remove (&si, true);
2582 else
2583 GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = arg;
2584 }
2585 else
2586 {
2587 /* If we are in ssa form, we must load the value from the default
2588 definition of the argument. That should not be defined now,
2589 since the argument is not used uninitialized. */
2590 gcc_assert (gimple_default_def (cfun, arg) == NULL);
2591 narg = make_ssa_name (arg, build_empty_stmt ());
2592 set_default_def (arg, narg);
2593 GIMPLE_STMT_OPERAND (parcopy_stmt, 1) = narg;
2594 update_stmt (parcopy_stmt);
2595 }
2596 }
2597
2598 /* Declare local variables needed in CHILD_CFUN. */
2599 block = DECL_INITIAL (child_fn);
2600 BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list);
2601 DECL_SAVED_TREE (child_fn) = bb_stmt_list (single_succ (entry_bb));
2602
2603 /* Reset DECL_CONTEXT on function arguments. */
2604 for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t))
2605 DECL_CONTEXT (t) = child_fn;
2606
2607 /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the
2608 child function. */
2609 si = bsi_last (entry_bb);
2610 t = bsi_stmt (si);
2611 gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL);
2612 bsi_remove (&si, true);
2613 e = split_block (entry_bb, t);
2614 entry_bb = e->dest;
2615 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
2616
2617 /* Convert OMP_RETURN into a RETURN_EXPR. */
2618 if (exit_bb)
2619 {
2620 si = bsi_last (exit_bb);
2621 gcc_assert (!bsi_end_p (si)
2622 && TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
2623 t = build1 (RETURN_EXPR, void_type_node, NULL);
2624 bsi_insert_after (&si, t, BSI_SAME_STMT);
2625 bsi_remove (&si, true);
2626 }
2627
2628 /* Move the parallel region into CHILD_CFUN. */
2629
2630 if (gimple_in_ssa_p (cfun))
2631 {
2632 push_cfun (child_cfun);
2633 init_tree_ssa ();
2634 init_ssa_operands ();
2635 cfun->gimple_df->in_ssa_p = true;
2636 pop_cfun ();
2637 }
2638 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb);
2639 if (exit_bb)
2640 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
2641
2642 /* Inform the callgraph about the new function. */
2643 DECL_STRUCT_FUNCTION (child_fn)->curr_properties
2644 = cfun->curr_properties;
2645 cgraph_add_new_function (child_fn, true);
2646
2647 /* Fix the callgraph edges for child_cfun. Those for cfun will be
2648 fixed in a following pass. */
2649 push_cfun (child_cfun);
2650 if (optimize)
2651 optimize_omp_library_calls ();
2652 rebuild_cgraph_edges ();
2653
2654 /* Some EH regions might become dead, see PR34608. If
2655 pass_cleanup_cfg isn't the first pass to happen with the
2656 new child, these dead EH edges might cause problems.
2657 Clean them up now. */
2658 if (flag_exceptions)
2659 {
2660 basic_block bb;
2661 tree save_current = current_function_decl;
2662 bool changed = false;
2663
2664 current_function_decl = child_fn;
2665 FOR_EACH_BB (bb)
2666 changed |= tree_purge_dead_eh_edges (bb);
2667 if (changed)
2668 cleanup_tree_cfg ();
2669 current_function_decl = save_current;
2670 }
2671 pop_cfun ();
2672 }
2673
2674 /* Emit a library call to launch the children threads. */
2675 expand_parallel_call (region, new_bb, entry_stmt, ws_args);
2676 update_ssa (TODO_update_ssa_only_virtuals);
2677 }
2678
2679
2680 /* A subroutine of expand_omp_for. Generate code for a parallel
2681 loop with any schedule. Given parameters:
2682
2683 for (V = N1; V cond N2; V += STEP) BODY;
2684
2685 where COND is "<" or ">", we generate pseudocode
2686
2687 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2688 if (more) goto L0; else goto L3;
2689 L0:
2690 V = istart0;
2691 iend = iend0;
2692 L1:
2693 BODY;
2694 V += STEP;
2695 if (V cond iend) goto L1; else goto L2;
2696 L2:
2697 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2698 L3:
2699
2700 If this is a combined omp parallel loop, instead of the call to
2701 GOMP_loop_foo_start, we call GOMP_loop_foo_next. */
2702
2703 static void
2704 expand_omp_for_generic (struct omp_region *region,
2705 struct omp_for_data *fd,
2706 enum built_in_function start_fn,
2707 enum built_in_function next_fn)
2708 {
2709 tree type, istart0, iend0, iend, phi;
2710 tree t, vmain, vback;
2711 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb;
2712 basic_block l2_bb = NULL, l3_bb = NULL;
2713 block_stmt_iterator si;
2714 bool in_combined_parallel = is_combined_parallel (region);
2715 bool broken_loop = region->cont == NULL;
2716 edge e, ne;
2717
2718 gcc_assert (!broken_loop || !in_combined_parallel);
2719
2720 type = TREE_TYPE (fd->v);
2721
2722 istart0 = create_tmp_var (long_integer_type_node, ".istart0");
2723 iend0 = create_tmp_var (long_integer_type_node, ".iend0");
2724 TREE_ADDRESSABLE (istart0) = 1;
2725 TREE_ADDRESSABLE (iend0) = 1;
2726 if (gimple_in_ssa_p (cfun))
2727 {
2728 add_referenced_var (istart0);
2729 add_referenced_var (iend0);
2730 }
2731
2732 entry_bb = region->entry;
2733 cont_bb = region->cont;
2734 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2735 gcc_assert (broken_loop
2736 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2737 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2738 l1_bb = single_succ (l0_bb);
2739 if (!broken_loop)
2740 {
2741 l2_bb = create_empty_bb (cont_bb);
2742 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb);
2743 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2744 }
2745 else
2746 l2_bb = NULL;
2747 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2748 exit_bb = region->exit;
2749
2750 si = bsi_last (entry_bb);
2751 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2752 if (in_combined_parallel)
2753 {
2754 /* In a combined parallel loop, emit a call to
2755 GOMP_loop_foo_next. */
2756 t = build_call_expr (built_in_decls[next_fn], 2,
2757 build_fold_addr_expr (istart0),
2758 build_fold_addr_expr (iend0));
2759 }
2760 else
2761 {
2762 tree t0, t1, t2, t3, t4;
2763 /* If this is not a combined parallel loop, emit a call to
2764 GOMP_loop_foo_start in ENTRY_BB. */
2765 t4 = build_fold_addr_expr (iend0);
2766 t3 = build_fold_addr_expr (istart0);
2767 t2 = fold_convert (long_integer_type_node, fd->step);
2768 t1 = fold_convert (long_integer_type_node, fd->n2);
2769 t0 = fold_convert (long_integer_type_node, fd->n1);
2770 if (fd->chunk_size)
2771 {
2772 t = fold_convert (long_integer_type_node, fd->chunk_size);
2773 t = build_call_expr (built_in_decls[start_fn], 6,
2774 t0, t1, t2, t, t3, t4);
2775 }
2776 else
2777 t = build_call_expr (built_in_decls[start_fn], 5,
2778 t0, t1, t2, t3, t4);
2779 }
2780 t = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2781 true, BSI_SAME_STMT);
2782 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
2783 bsi_insert_after (&si, t, BSI_SAME_STMT);
2784
2785 /* Remove the OMP_FOR statement. */
2786 bsi_remove (&si, true);
2787
2788 /* Iteration setup for sequential loop goes in L0_BB. */
2789 si = bsi_start (l0_bb);
2790 t = fold_convert (type, istart0);
2791 t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
2792 false, BSI_CONTINUE_LINKING);
2793 t = build_gimple_modify_stmt (fd->v, t);
2794 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2795 if (gimple_in_ssa_p (cfun))
2796 SSA_NAME_DEF_STMT (fd->v) = t;
2797
2798 t = fold_convert (type, iend0);
2799 iend = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2800 false, BSI_CONTINUE_LINKING);
2801
2802 if (!broken_loop)
2803 {
2804 /* Code to control the increment and predicate for the sequential
2805 loop goes in the CONT_BB. */
2806 si = bsi_last (cont_bb);
2807 t = bsi_stmt (si);
2808 gcc_assert (TREE_CODE (t) == OMP_CONTINUE);
2809 vmain = TREE_OPERAND (t, 1);
2810 vback = TREE_OPERAND (t, 0);
2811
2812 t = fold_build2 (PLUS_EXPR, type, vmain, fd->step);
2813 t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
2814 true, BSI_SAME_STMT);
2815 t = build_gimple_modify_stmt (vback, t);
2816 bsi_insert_before (&si, t, BSI_SAME_STMT);
2817 if (gimple_in_ssa_p (cfun))
2818 SSA_NAME_DEF_STMT (vback) = t;
2819
2820 t = build2 (fd->cond_code, boolean_type_node, vback, iend);
2821 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
2822 bsi_insert_before (&si, t, BSI_SAME_STMT);
2823
2824 /* Remove OMP_CONTINUE. */
2825 bsi_remove (&si, true);
2826
2827 /* Emit code to get the next parallel iteration in L2_BB. */
2828 si = bsi_start (l2_bb);
2829
2830 t = build_call_expr (built_in_decls[next_fn], 2,
2831 build_fold_addr_expr (istart0),
2832 build_fold_addr_expr (iend0));
2833 t = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2834 false, BSI_CONTINUE_LINKING);
2835 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
2836 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2837 }
2838
2839 /* Add the loop cleanup function. */
2840 si = bsi_last (exit_bb);
2841 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
2842 t = built_in_decls[BUILT_IN_GOMP_LOOP_END_NOWAIT];
2843 else
2844 t = built_in_decls[BUILT_IN_GOMP_LOOP_END];
2845 t = build_call_expr (t, 0);
2846 bsi_insert_after (&si, t, BSI_SAME_STMT);
2847 bsi_remove (&si, true);
2848
2849 /* Connect the new blocks. */
2850 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
2851 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
2852
2853 if (!broken_loop)
2854 {
2855 e = find_edge (cont_bb, l3_bb);
2856 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
2857
2858 for (phi = phi_nodes (l3_bb); phi; phi = PHI_CHAIN (phi))
2859 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
2860 PHI_ARG_DEF_FROM_EDGE (phi, e));
2861 remove_edge (e);
2862
2863 find_edge (cont_bb, l1_bb)->flags = EDGE_TRUE_VALUE;
2864 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
2865 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
2866
2867 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
2868 recompute_dominator (CDI_DOMINATORS, l2_bb));
2869 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
2870 recompute_dominator (CDI_DOMINATORS, l3_bb));
2871 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
2872 recompute_dominator (CDI_DOMINATORS, l0_bb));
2873 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
2874 recompute_dominator (CDI_DOMINATORS, l1_bb));
2875 }
2876 }
2877
2878
2879 /* A subroutine of expand_omp_for. Generate code for a parallel
2880 loop with static schedule and no specified chunk size. Given
2881 parameters:
2882
2883 for (V = N1; V cond N2; V += STEP) BODY;
2884
2885 where COND is "<" or ">", we generate pseudocode
2886
2887 if (cond is <)
2888 adj = STEP - 1;
2889 else
2890 adj = STEP + 1;
2891 n = (adj + N2 - N1) / STEP;
2892 q = n / nthreads;
2893 q += (q * nthreads != n);
2894 s0 = q * threadid;
2895 e0 = min(s0 + q, n);
2896 V = s0 * STEP + N1;
2897 if (s0 >= e0) goto L2; else goto L0;
2898 L0:
2899 e = e0 * STEP + N1;
2900 L1:
2901 BODY;
2902 V += STEP;
2903 if (V cond e) goto L1;
2904 L2:
2905 */
2906
2907 static void
2908 expand_omp_for_static_nochunk (struct omp_region *region,
2909 struct omp_for_data *fd)
2910 {
2911 tree n, q, s0, e0, e, t, nthreads, threadid;
2912 tree type, vmain, vback;
2913 basic_block entry_bb, exit_bb, seq_start_bb, body_bb, cont_bb;
2914 basic_block fin_bb;
2915 block_stmt_iterator si;
2916
2917 type = TREE_TYPE (fd->v);
2918
2919 entry_bb = region->entry;
2920 cont_bb = region->cont;
2921 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2922 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2923 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2924 body_bb = single_succ (seq_start_bb);
2925 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
2926 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2927 fin_bb = FALLTHRU_EDGE (cont_bb)->dest;
2928 exit_bb = region->exit;
2929
2930 /* Iteration space partitioning goes in ENTRY_BB. */
2931 si = bsi_last (entry_bb);
2932 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
2933
2934 t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
2935 t = fold_convert (type, t);
2936 nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2937 true, BSI_SAME_STMT);
2938
2939 t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
2940 t = fold_convert (type, t);
2941 threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
2942 true, BSI_SAME_STMT);
2943
2944 fd->n1 = force_gimple_operand_bsi (&si,
2945 fold_convert (type, fd->n1),
2946 true, NULL_TREE,
2947 true, BSI_SAME_STMT);
2948
2949 fd->n2 = force_gimple_operand_bsi (&si,
2950 fold_convert (type, fd->n2),
2951 true, NULL_TREE,
2952 true, BSI_SAME_STMT);
2953
2954 fd->step = force_gimple_operand_bsi (&si,
2955 fold_convert (type, fd->step),
2956 true, NULL_TREE,
2957 true, BSI_SAME_STMT);
2958
2959 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
2960 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
2961 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
2962 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
2963 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
2964 t = fold_convert (type, t);
2965 n = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
2966
2967 t = fold_build2 (TRUNC_DIV_EXPR, type, n, nthreads);
2968 q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
2969
2970 t = fold_build2 (MULT_EXPR, type, q, nthreads);
2971 t = fold_build2 (NE_EXPR, type, t, n);
2972 t = fold_build2 (PLUS_EXPR, type, q, t);
2973 q = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
2974
2975 t = build2 (MULT_EXPR, type, q, threadid);
2976 s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
2977
2978 t = fold_build2 (PLUS_EXPR, type, s0, q);
2979 t = fold_build2 (MIN_EXPR, type, t, n);
2980 e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE, true, BSI_SAME_STMT);
2981
2982 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
2983 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
2984 bsi_insert_before (&si, t, BSI_SAME_STMT);
2985
2986 /* Remove the OMP_FOR statement. */
2987 bsi_remove (&si, true);
2988
2989 /* Setup code for sequential iteration goes in SEQ_START_BB. */
2990 si = bsi_start (seq_start_bb);
2991
2992 t = fold_convert (type, s0);
2993 t = fold_build2 (MULT_EXPR, type, t, fd->step);
2994 t = fold_build2 (PLUS_EXPR, type, t, fd->n1);
2995 t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
2996 false, BSI_CONTINUE_LINKING);
2997 t = build_gimple_modify_stmt (fd->v, t);
2998 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
2999 if (gimple_in_ssa_p (cfun))
3000 SSA_NAME_DEF_STMT (fd->v) = t;
3001
3002 t = fold_convert (type, e0);
3003 t = fold_build2 (MULT_EXPR, type, t, fd->step);
3004 t = fold_build2 (PLUS_EXPR, type, t, fd->n1);
3005 e = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3006 false, BSI_CONTINUE_LINKING);
3007
3008 /* The code controlling the sequential loop replaces the OMP_CONTINUE. */
3009 si = bsi_last (cont_bb);
3010 t = bsi_stmt (si);
3011 gcc_assert (TREE_CODE (t) == OMP_CONTINUE);
3012 vmain = TREE_OPERAND (t, 1);
3013 vback = TREE_OPERAND (t, 0);
3014
3015 t = fold_build2 (PLUS_EXPR, type, vmain, fd->step);
3016 t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
3017 true, BSI_SAME_STMT);
3018 t = build_gimple_modify_stmt (vback, t);
3019 bsi_insert_before (&si, t, BSI_SAME_STMT);
3020 if (gimple_in_ssa_p (cfun))
3021 SSA_NAME_DEF_STMT (vback) = t;
3022
3023 t = build2 (fd->cond_code, boolean_type_node, vback, e);
3024 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
3025 bsi_insert_before (&si, t, BSI_SAME_STMT);
3026
3027 /* Remove the OMP_CONTINUE statement. */
3028 bsi_remove (&si, true);
3029
3030 /* Replace the OMP_RETURN with a barrier, or nothing. */
3031 si = bsi_last (exit_bb);
3032 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
3033 force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE,
3034 false, BSI_SAME_STMT);
3035 bsi_remove (&si, true);
3036
3037 /* Connect all the blocks. */
3038 find_edge (entry_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3039 find_edge (entry_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3040
3041 find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
3042 find_edge (cont_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
3043
3044 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, entry_bb);
3045 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3046 recompute_dominator (CDI_DOMINATORS, body_bb));
3047 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3048 recompute_dominator (CDI_DOMINATORS, fin_bb));
3049 }
3050
3051
3052 /* A subroutine of expand_omp_for. Generate code for a parallel
3053 loop with static schedule and a specified chunk size. Given
3054 parameters:
3055
3056 for (V = N1; V cond N2; V += STEP) BODY;
3057
3058 where COND is "<" or ">", we generate pseudocode
3059
3060 if (cond is <)
3061 adj = STEP - 1;
3062 else
3063 adj = STEP + 1;
3064 n = (adj + N2 - N1) / STEP;
3065 trip = 0;
3066 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3067 here so that V is defined
3068 if the loop is not entered
3069 L0:
3070 s0 = (trip * nthreads + threadid) * CHUNK;
3071 e0 = min(s0 + CHUNK, n);
3072 if (s0 < n) goto L1; else goto L4;
3073 L1:
3074 V = s0 * STEP + N1;
3075 e = e0 * STEP + N1;
3076 L2:
3077 BODY;
3078 V += STEP;
3079 if (V cond e) goto L2; else goto L3;
3080 L3:
3081 trip += 1;
3082 goto L0;
3083 L4:
3084 */
3085
3086 static void
3087 expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd)
3088 {
3089 tree n, s0, e0, e, t, phi, nphi, args;
3090 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3091 tree type, cont, v_main, v_back, v_extra;
3092 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3093 basic_block trip_update_bb, cont_bb, fin_bb;
3094 block_stmt_iterator si;
3095 edge se, re, ene;
3096
3097 type = TREE_TYPE (fd->v);
3098
3099 entry_bb = region->entry;
3100 se = split_block (entry_bb, last_stmt (entry_bb));
3101 entry_bb = se->src;
3102 iter_part_bb = se->dest;
3103 cont_bb = region->cont;
3104 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3105 gcc_assert (BRANCH_EDGE (iter_part_bb)->dest
3106 == FALLTHRU_EDGE (cont_bb)->dest);
3107 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3108 body_bb = single_succ (seq_start_bb);
3109 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
3110 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3111 fin_bb = FALLTHRU_EDGE (cont_bb)->dest;
3112 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3113 exit_bb = region->exit;
3114
3115 /* Trip and adjustment setup goes in ENTRY_BB. */
3116 si = bsi_last (entry_bb);
3117 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_FOR);
3118
3119 t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS], 0);
3120 t = fold_convert (type, t);
3121 nthreads = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3122 true, BSI_SAME_STMT);
3123
3124 t = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
3125 t = fold_convert (type, t);
3126 threadid = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3127 true, BSI_SAME_STMT);
3128
3129 fd->n1 = force_gimple_operand_bsi (&si, fold_convert (type, fd->n1),
3130 true, NULL_TREE,
3131 true, BSI_SAME_STMT);
3132 fd->n2 = force_gimple_operand_bsi (&si, fold_convert (type, fd->n2),
3133 true, NULL_TREE,
3134 true, BSI_SAME_STMT);
3135 fd->step = force_gimple_operand_bsi (&si, fold_convert (type, fd->step),
3136 true, NULL_TREE,
3137 true, BSI_SAME_STMT);
3138 fd->chunk_size
3139 = force_gimple_operand_bsi (&si, fold_convert (type,
3140 fd->chunk_size),
3141 true, NULL_TREE,
3142 true, BSI_SAME_STMT);
3143
3144 t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1));
3145 t = fold_build2 (PLUS_EXPR, type, fd->step, t);
3146 t = fold_build2 (PLUS_EXPR, type, t, fd->n2);
3147 t = fold_build2 (MINUS_EXPR, type, t, fd->n1);
3148 t = fold_build2 (TRUNC_DIV_EXPR, type, t, fd->step);
3149 t = fold_convert (type, t);
3150 n = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3151 true, BSI_SAME_STMT);
3152
3153 trip_var = create_tmp_var (type, ".trip");
3154 if (gimple_in_ssa_p (cfun))
3155 {
3156 add_referenced_var (trip_var);
3157 trip_init = make_ssa_name (trip_var, NULL_TREE);
3158 trip_main = make_ssa_name (trip_var, NULL_TREE);
3159 trip_back = make_ssa_name (trip_var, NULL_TREE);
3160 }
3161 else
3162 {
3163 trip_init = trip_var;
3164 trip_main = trip_var;
3165 trip_back = trip_var;
3166 }
3167
3168 t = build_gimple_modify_stmt (trip_init, build_int_cst (type, 0));
3169 bsi_insert_before (&si, t, BSI_SAME_STMT);
3170 if (gimple_in_ssa_p (cfun))
3171 SSA_NAME_DEF_STMT (trip_init) = t;
3172
3173 t = fold_build2 (MULT_EXPR, type, threadid, fd->chunk_size);
3174 t = fold_build2 (MULT_EXPR, type, t, fd->step);
3175 t = fold_build2 (PLUS_EXPR, type, t, fd->n1);
3176 v_extra = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3177 true, BSI_SAME_STMT);
3178
3179 /* Remove the OMP_FOR. */
3180 bsi_remove (&si, true);
3181
3182 /* Iteration space partitioning goes in ITER_PART_BB. */
3183 si = bsi_last (iter_part_bb);
3184
3185 t = fold_build2 (MULT_EXPR, type, trip_main, nthreads);
3186 t = fold_build2 (PLUS_EXPR, type, t, threadid);
3187 t = fold_build2 (MULT_EXPR, type, t, fd->chunk_size);
3188 s0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3189 false, BSI_CONTINUE_LINKING);
3190
3191 t = fold_build2 (PLUS_EXPR, type, s0, fd->chunk_size);
3192 t = fold_build2 (MIN_EXPR, type, t, n);
3193 e0 = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3194 false, BSI_CONTINUE_LINKING);
3195
3196 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3197 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
3198 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3199
3200 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3201 si = bsi_start (seq_start_bb);
3202
3203 t = fold_convert (type, s0);
3204 t = fold_build2 (MULT_EXPR, type, t, fd->step);
3205 t = fold_build2 (PLUS_EXPR, type, t, fd->n1);
3206 t = force_gimple_operand_bsi (&si, t, false, NULL_TREE,
3207 false, BSI_CONTINUE_LINKING);
3208 t = build_gimple_modify_stmt (fd->v, t);
3209 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3210 if (gimple_in_ssa_p (cfun))
3211 SSA_NAME_DEF_STMT (fd->v) = t;
3212
3213 t = fold_convert (type, e0);
3214 t = fold_build2 (MULT_EXPR, type, t, fd->step);
3215 t = fold_build2 (PLUS_EXPR, type, t, fd->n1);
3216 e = force_gimple_operand_bsi (&si, t, true, NULL_TREE,
3217 false, BSI_CONTINUE_LINKING);
3218
3219 /* The code controlling the sequential loop goes in CONT_BB,
3220 replacing the OMP_CONTINUE. */
3221 si = bsi_last (cont_bb);
3222 cont = bsi_stmt (si);
3223 gcc_assert (TREE_CODE (cont) == OMP_CONTINUE);
3224 v_main = TREE_OPERAND (cont, 1);
3225 v_back = TREE_OPERAND (cont, 0);
3226
3227 t = build2 (PLUS_EXPR, type, v_main, fd->step);
3228 t = build_gimple_modify_stmt (v_back, t);
3229 bsi_insert_before (&si, t, BSI_SAME_STMT);
3230 if (gimple_in_ssa_p (cfun))
3231 SSA_NAME_DEF_STMT (v_back) = t;
3232
3233 t = build2 (fd->cond_code, boolean_type_node, v_back, e);
3234 t = build3 (COND_EXPR, void_type_node, t, NULL_TREE, NULL_TREE);
3235 bsi_insert_before (&si, t, BSI_SAME_STMT);
3236
3237 /* Remove OMP_CONTINUE. */
3238 bsi_remove (&si, true);
3239
3240 /* Trip update code goes into TRIP_UPDATE_BB. */
3241 si = bsi_start (trip_update_bb);
3242
3243 t = build_int_cst (type, 1);
3244 t = build2 (PLUS_EXPR, type, trip_main, t);
3245 t = build_gimple_modify_stmt (trip_back, t);
3246 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3247 if (gimple_in_ssa_p (cfun))
3248 SSA_NAME_DEF_STMT (trip_back) = t;
3249
3250 /* Replace the OMP_RETURN with a barrier, or nothing. */
3251 si = bsi_last (exit_bb);
3252 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)))
3253 force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE,
3254 false, BSI_SAME_STMT);
3255 bsi_remove (&si, true);
3256
3257 /* Connect the new blocks. */
3258 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
3259 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
3260
3261 find_edge (cont_bb, body_bb)->flags = EDGE_TRUE_VALUE;
3262 find_edge (cont_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE;
3263
3264 redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
3265
3266 if (gimple_in_ssa_p (cfun))
3267 {
3268 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
3269 remove arguments of the phi nodes in fin_bb. We need to create
3270 appropriate phi nodes in iter_part_bb instead. */
3271 se = single_pred_edge (fin_bb);
3272 re = single_succ_edge (trip_update_bb);
3273 ene = single_succ_edge (entry_bb);
3274
3275 args = PENDING_STMT (re);
3276 PENDING_STMT (re) = NULL_TREE;
3277 for (phi = phi_nodes (fin_bb);
3278 phi && args;
3279 phi = PHI_CHAIN (phi), args = TREE_CHAIN (args))
3280 {
3281 t = PHI_RESULT (phi);
3282 gcc_assert (t == TREE_PURPOSE (args));
3283 nphi = create_phi_node (t, iter_part_bb);
3284 SSA_NAME_DEF_STMT (t) = nphi;
3285
3286 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
3287 /* A special case -- fd->v is not yet computed in iter_part_bb, we
3288 need to use v_extra instead. */
3289 if (t == fd->v)
3290 t = v_extra;
3291 add_phi_arg (nphi, t, ene);
3292 add_phi_arg (nphi, TREE_VALUE (args), re);
3293 }
3294 gcc_assert (!phi && !args);
3295 while ((phi = phi_nodes (fin_bb)) != NULL_TREE)
3296 remove_phi_node (phi, NULL_TREE, false);
3297
3298 /* Make phi node for trip. */
3299 phi = create_phi_node (trip_main, iter_part_bb);
3300 SSA_NAME_DEF_STMT (trip_main) = phi;
3301 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb));
3302 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb));
3303 }
3304
3305 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
3306 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
3307 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
3308 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3309 recompute_dominator (CDI_DOMINATORS, fin_bb));
3310 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
3311 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
3312 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3313 recompute_dominator (CDI_DOMINATORS, body_bb));
3314 }
3315
3316
3317 /* Expand the OpenMP loop defined by REGION. */
3318
3319 static void
3320 expand_omp_for (struct omp_region *region)
3321 {
3322 struct omp_for_data fd;
3323
3324 extract_omp_for_data (last_stmt (region->entry), &fd);
3325 region->sched_kind = fd.sched_kind;
3326
3327 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
3328 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
3329 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
3330 if (region->cont)
3331 {
3332 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
3333 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
3334 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
3335 }
3336
3337 if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
3338 && !fd.have_ordered
3339 && region->cont != NULL)
3340 {
3341 if (fd.chunk_size == NULL)
3342 expand_omp_for_static_nochunk (region, &fd);
3343 else
3344 expand_omp_for_static_chunk (region, &fd);
3345 }
3346 else
3347 {
3348 int fn_index = fd.sched_kind + fd.have_ordered * 4;
3349 int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index;
3350 int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index;
3351 expand_omp_for_generic (region, &fd, start_ix, next_ix);
3352 }
3353
3354 update_ssa (TODO_update_ssa_only_virtuals);
3355 }
3356
3357
3358 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
3359
3360 v = GOMP_sections_start (n);
3361 L0:
3362 switch (v)
3363 {
3364 case 0:
3365 goto L2;
3366 case 1:
3367 section 1;
3368 goto L1;
3369 case 2:
3370 ...
3371 case n:
3372 ...
3373 default:
3374 abort ();
3375 }
3376 L1:
3377 v = GOMP_sections_next ();
3378 goto L0;
3379 L2:
3380 reduction;
3381
3382 If this is a combined parallel sections, replace the call to
3383 GOMP_sections_start with call to GOMP_sections_next. */
3384
3385 static void
3386 expand_omp_sections (struct omp_region *region)
3387 {
3388 tree label_vec, l1, l2, t, u, sections_stmt, vin, vmain, vnext, cont;
3389 unsigned i, casei, len;
3390 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
3391 block_stmt_iterator si;
3392 struct omp_region *inner;
3393 bool exit_reachable = region->cont != NULL;
3394
3395 gcc_assert (exit_reachable == (region->exit != NULL));
3396 entry_bb = region->entry;
3397 l0_bb = single_succ (entry_bb);
3398 l1_bb = region->cont;
3399 l2_bb = region->exit;
3400 if (exit_reachable)
3401 {
3402 gcc_assert (single_pred (l2_bb) == l0_bb);
3403 default_bb = create_empty_bb (l1_bb->prev_bb);
3404 l1 = tree_block_label (l1_bb);
3405 l2 = tree_block_label (l2_bb);
3406 }
3407 else
3408 {
3409 default_bb = create_empty_bb (l0_bb);
3410 l1 = NULL_TREE;
3411 l2 = tree_block_label (default_bb);
3412 }
3413
3414 /* We will build a switch() with enough cases for all the
3415 OMP_SECTION regions, a '0' case to handle the end of more work
3416 and a default case to abort if something goes wrong. */
3417 len = EDGE_COUNT (l0_bb->succs);
3418 label_vec = make_tree_vec (len + 1);
3419
3420 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
3421 OMP_SECTIONS statement. */
3422 si = bsi_last (entry_bb);
3423 sections_stmt = bsi_stmt (si);
3424 gcc_assert (TREE_CODE (sections_stmt) == OMP_SECTIONS);
3425 vin = OMP_SECTIONS_CONTROL (sections_stmt);
3426 if (!is_combined_parallel (region))
3427 {
3428 /* If we are not inside a combined parallel+sections region,
3429 call GOMP_sections_start. */
3430 t = build_int_cst (unsigned_type_node,
3431 exit_reachable ? len - 1 : len);
3432 u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START];
3433 t = build_call_expr (u, 1, t);
3434 }
3435 else
3436 {
3437 /* Otherwise, call GOMP_sections_next. */
3438 u = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT];
3439 t = build_call_expr (u, 0);
3440 }
3441 t = build_gimple_modify_stmt (vin, t);
3442 bsi_insert_after (&si, t, BSI_SAME_STMT);
3443 if (gimple_in_ssa_p (cfun))
3444 SSA_NAME_DEF_STMT (vin) = t;
3445 bsi_remove (&si, true);
3446
3447 /* The switch() statement replacing OMP_SECTIONS_SWITCH goes in L0_BB. */
3448 si = bsi_last (l0_bb);
3449 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTIONS_SWITCH);
3450 if (exit_reachable)
3451 {
3452 cont = last_stmt (l1_bb);
3453 gcc_assert (TREE_CODE (cont) == OMP_CONTINUE);
3454 vmain = TREE_OPERAND (cont, 1);
3455 vnext = TREE_OPERAND (cont, 0);
3456 }
3457 else
3458 {
3459 vmain = vin;
3460 vnext = NULL_TREE;
3461 }
3462
3463 t = build3 (SWITCH_EXPR, void_type_node, vmain, NULL, label_vec);
3464 bsi_insert_after (&si, t, BSI_SAME_STMT);
3465 bsi_remove (&si, true);
3466
3467 i = 0;
3468 if (exit_reachable)
3469 {
3470 t = build3 (CASE_LABEL_EXPR, void_type_node,
3471 build_int_cst (unsigned_type_node, 0), NULL, l2);
3472 TREE_VEC_ELT (label_vec, 0) = t;
3473 i++;
3474 }
3475
3476 /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */
3477 for (inner = region->inner, casei = 1;
3478 inner;
3479 inner = inner->next, i++, casei++)
3480 {
3481 basic_block s_entry_bb, s_exit_bb;
3482
3483 s_entry_bb = inner->entry;
3484 s_exit_bb = inner->exit;
3485
3486 t = tree_block_label (s_entry_bb);
3487 u = build_int_cst (unsigned_type_node, casei);
3488 u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t);
3489 TREE_VEC_ELT (label_vec, i) = u;
3490
3491 si = bsi_last (s_entry_bb);
3492 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SECTION);
3493 gcc_assert (i < len || OMP_SECTION_LAST (bsi_stmt (si)));
3494 bsi_remove (&si, true);
3495 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
3496
3497 if (s_exit_bb == NULL)
3498 continue;
3499
3500 si = bsi_last (s_exit_bb);
3501 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3502 bsi_remove (&si, true);
3503
3504 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
3505 }
3506
3507 /* Error handling code goes in DEFAULT_BB. */
3508 t = tree_block_label (default_bb);
3509 u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t);
3510 TREE_VEC_ELT (label_vec, len) = u;
3511 make_edge (l0_bb, default_bb, 0);
3512
3513 si = bsi_start (default_bb);
3514 t = build_call_expr (built_in_decls[BUILT_IN_TRAP], 0);
3515 bsi_insert_after (&si, t, BSI_CONTINUE_LINKING);
3516
3517 if (exit_reachable)
3518 {
3519 /* Code to get the next section goes in L1_BB. */
3520 si = bsi_last (l1_bb);
3521 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_CONTINUE);
3522
3523 t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT], 0);
3524 t = build_gimple_modify_stmt (vnext, t);
3525 bsi_insert_after (&si, t, BSI_SAME_STMT);
3526 if (gimple_in_ssa_p (cfun))
3527 SSA_NAME_DEF_STMT (vnext) = t;
3528 bsi_remove (&si, true);
3529
3530 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
3531
3532 /* Cleanup function replaces OMP_RETURN in EXIT_BB. */
3533 si = bsi_last (l2_bb);
3534 if (OMP_RETURN_NOWAIT (bsi_stmt (si)))
3535 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END_NOWAIT];
3536 else
3537 t = built_in_decls[BUILT_IN_GOMP_SECTIONS_END];
3538 t = build_call_expr (t, 0);
3539 bsi_insert_after (&si, t, BSI_SAME_STMT);
3540 bsi_remove (&si, true);
3541 }
3542
3543 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
3544 }
3545
3546
3547 /* Expand code for an OpenMP single directive. We've already expanded
3548 much of the code, here we simply place the GOMP_barrier call. */
3549
3550 static void
3551 expand_omp_single (struct omp_region *region)
3552 {
3553 basic_block entry_bb, exit_bb;
3554 block_stmt_iterator si;
3555 bool need_barrier = false;
3556
3557 entry_bb = region->entry;
3558 exit_bb = region->exit;
3559
3560 si = bsi_last (entry_bb);
3561 /* The terminal barrier at the end of a GOMP_single_copy sequence cannot
3562 be removed. We need to ensure that the thread that entered the single
3563 does not exit before the data is copied out by the other threads. */
3564 if (find_omp_clause (OMP_SINGLE_CLAUSES (bsi_stmt (si)),
3565 OMP_CLAUSE_COPYPRIVATE))
3566 need_barrier = true;
3567 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE);
3568 bsi_remove (&si, true);
3569 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3570
3571 si = bsi_last (exit_bb);
3572 if (!OMP_RETURN_NOWAIT (bsi_stmt (si)) || need_barrier)
3573 force_gimple_operand_bsi (&si, build_omp_barrier (), false, NULL_TREE,
3574 false, BSI_SAME_STMT);
3575 bsi_remove (&si, true);
3576 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3577 }
3578
3579
3580 /* Generic expansion for OpenMP synchronization directives: master,
3581 ordered and critical. All we need to do here is remove the entry
3582 and exit markers for REGION. */
3583
3584 static void
3585 expand_omp_synch (struct omp_region *region)
3586 {
3587 basic_block entry_bb, exit_bb;
3588 block_stmt_iterator si;
3589
3590 entry_bb = region->entry;
3591 exit_bb = region->exit;
3592
3593 si = bsi_last (entry_bb);
3594 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_SINGLE
3595 || TREE_CODE (bsi_stmt (si)) == OMP_MASTER
3596 || TREE_CODE (bsi_stmt (si)) == OMP_ORDERED
3597 || TREE_CODE (bsi_stmt (si)) == OMP_CRITICAL);
3598 bsi_remove (&si, true);
3599 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
3600
3601 if (exit_bb)
3602 {
3603 si = bsi_last (exit_bb);
3604 gcc_assert (TREE_CODE (bsi_stmt (si)) == OMP_RETURN);
3605 bsi_remove (&si, true);
3606 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
3607 }
3608 }
3609
3610 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
3611 operation as a __sync_fetch_and_op builtin. INDEX is log2 of the
3612 size of the data type, and thus usable to find the index of the builtin
3613 decl. Returns false if the expression is not of the proper form. */
3614
3615 static bool
3616 expand_omp_atomic_fetch_op (basic_block load_bb,
3617 tree addr, tree loaded_val,
3618 tree stored_val, int index)
3619 {
3620 enum built_in_function base;
3621 tree decl, itype, call;
3622 enum insn_code *optab;
3623 tree rhs;
3624 basic_block store_bb = single_succ (load_bb);
3625 block_stmt_iterator bsi;
3626 tree stmt;
3627
3628 /* We expect to find the following sequences:
3629
3630 load_bb:
3631 OMP_ATOMIC_LOAD (tmp, mem)
3632
3633 store_bb:
3634 val = tmp OP something; (or: something OP tmp)
3635 OMP_STORE (val)
3636
3637 ???FIXME: Allow a more flexible sequence.
3638 Perhaps use data flow to pick the statements.
3639
3640 */
3641
3642 bsi = bsi_after_labels (store_bb);
3643 stmt = bsi_stmt (bsi);
3644 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3645 return false;
3646 bsi_next (&bsi);
3647 if (TREE_CODE (bsi_stmt (bsi)) != OMP_ATOMIC_STORE)
3648 return false;
3649
3650 if (!operand_equal_p (GIMPLE_STMT_OPERAND (stmt, 0), stored_val, 0))
3651 return false;
3652
3653 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
3654
3655 /* Check for one of the supported fetch-op operations. */
3656 switch (TREE_CODE (rhs))
3657 {
3658 case PLUS_EXPR:
3659 case POINTER_PLUS_EXPR:
3660 base = BUILT_IN_FETCH_AND_ADD_N;
3661 optab = sync_add_optab;
3662 break;
3663 case MINUS_EXPR:
3664 base = BUILT_IN_FETCH_AND_SUB_N;
3665 optab = sync_add_optab;
3666 break;
3667 case BIT_AND_EXPR:
3668 base = BUILT_IN_FETCH_AND_AND_N;
3669 optab = sync_and_optab;
3670 break;
3671 case BIT_IOR_EXPR:
3672 base = BUILT_IN_FETCH_AND_OR_N;
3673 optab = sync_ior_optab;
3674 break;
3675 case BIT_XOR_EXPR:
3676 base = BUILT_IN_FETCH_AND_XOR_N;
3677 optab = sync_xor_optab;
3678 break;
3679 default:
3680 return false;
3681 }
3682 /* Make sure the expression is of the proper form. */
3683 if (operand_equal_p (TREE_OPERAND (rhs, 0), loaded_val, 0))
3684 rhs = TREE_OPERAND (rhs, 1);
3685 else if (commutative_tree_code (TREE_CODE (rhs))
3686 && operand_equal_p (TREE_OPERAND (rhs, 1), loaded_val, 0))
3687 rhs = TREE_OPERAND (rhs, 0);
3688 else
3689 return false;
3690
3691 decl = built_in_decls[base + index + 1];
3692 itype = TREE_TYPE (TREE_TYPE (decl));
3693
3694 if (optab[TYPE_MODE (itype)] == CODE_FOR_nothing)
3695 return false;
3696
3697 bsi = bsi_last (load_bb);
3698 gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
3699 call = build_call_expr (decl, 2, addr, fold_convert (itype, rhs));
3700 force_gimple_operand_bsi (&bsi, call, true, NULL_TREE, true, BSI_SAME_STMT);
3701 bsi_remove (&bsi, true);
3702
3703 bsi = bsi_last (store_bb);
3704 gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
3705 bsi_remove (&bsi, true);
3706 bsi = bsi_last (store_bb);
3707 bsi_remove (&bsi, true);
3708
3709 if (gimple_in_ssa_p (cfun))
3710 update_ssa (TODO_update_ssa_no_phi);
3711
3712 return true;
3713 }
3714
3715 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
3716
3717 oldval = *addr;
3718 repeat:
3719 newval = rhs; // with oldval replacing *addr in rhs
3720 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
3721 if (oldval != newval)
3722 goto repeat;
3723
3724 INDEX is log2 of the size of the data type, and thus usable to find the
3725 index of the builtin decl. */
3726
3727 static bool
3728 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
3729 tree addr, tree loaded_val, tree stored_val,
3730 int index)
3731 {
3732 tree loadedi, storedi, initial, new_stored, new_storedi, old_vali;
3733 tree type, itype, cmpxchg, iaddr;
3734 block_stmt_iterator bsi;
3735 basic_block loop_header = single_succ (load_bb);
3736 tree phi, x;
3737 edge e;
3738
3739 cmpxchg = built_in_decls[BUILT_IN_VAL_COMPARE_AND_SWAP_N + index + 1];
3740 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
3741 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
3742
3743 if (sync_compare_and_swap[TYPE_MODE (itype)] == CODE_FOR_nothing)
3744 return false;
3745
3746 /* Load the initial value, replacing the OMP_ATOMIC_LOAD. */
3747 bsi = bsi_last (load_bb);
3748 gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
3749 initial = force_gimple_operand_bsi (&bsi, build_fold_indirect_ref (addr),
3750 true, NULL_TREE, true, BSI_SAME_STMT);
3751 /* Move the value to the LOADED_VAL temporary. */
3752 if (gimple_in_ssa_p (cfun))
3753 {
3754 gcc_assert (phi_nodes (loop_header) == NULL_TREE);
3755 phi = create_phi_node (loaded_val, loop_header);
3756 SSA_NAME_DEF_STMT (loaded_val) = phi;
3757 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
3758 initial);
3759 }
3760 else
3761 bsi_insert_before (&bsi,
3762 build_gimple_modify_stmt (loaded_val, initial),
3763 BSI_SAME_STMT);
3764 bsi_remove (&bsi, true);
3765
3766 bsi = bsi_last (store_bb);
3767 gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
3768
3769 /* For floating-point values, we'll need to view-convert them to integers
3770 so that we can perform the atomic compare and swap. Simplify the
3771 following code by always setting up the "i"ntegral variables. */
3772 if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
3773 {
3774 loadedi = loaded_val;
3775 storedi = stored_val;
3776 iaddr = addr;
3777 }
3778 else
3779 {
3780 loadedi = force_gimple_operand_bsi (&bsi,
3781 build1 (VIEW_CONVERT_EXPR, itype,
3782 loaded_val), true,
3783 NULL_TREE, true, BSI_SAME_STMT);
3784 storedi =
3785 force_gimple_operand_bsi (&bsi,
3786 build1 (VIEW_CONVERT_EXPR, itype,
3787 stored_val), true, NULL_TREE, true,
3788 BSI_SAME_STMT);
3789 iaddr = fold_convert (build_pointer_type (itype), addr);
3790 }
3791
3792 /* Build the compare&swap statement. */
3793 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
3794 new_storedi = force_gimple_operand_bsi (&bsi,
3795 fold_convert (itype, new_storedi),
3796 true, NULL_TREE,
3797 true, BSI_SAME_STMT);
3798 if (storedi == stored_val)
3799 new_stored = new_storedi;
3800 else
3801 new_stored = force_gimple_operand_bsi (&bsi,
3802 build1 (VIEW_CONVERT_EXPR, type,
3803 new_storedi), true,
3804 NULL_TREE, true, BSI_SAME_STMT);
3805
3806 if (gimple_in_ssa_p (cfun))
3807 old_vali = loadedi;
3808 else
3809 {
3810 old_vali = create_tmp_var (itype, NULL);
3811 x = build_gimple_modify_stmt (old_vali, loadedi);
3812 bsi_insert_before (&bsi, x, BSI_SAME_STMT);
3813
3814 x = build_gimple_modify_stmt (loaded_val, new_stored);
3815 bsi_insert_before (&bsi, x, BSI_SAME_STMT);
3816 }
3817
3818 /* Note that we always perform the comparison as an integer, even for
3819 floating point. This allows the atomic operation to properly
3820 succeed even with NaNs and -0.0. */
3821 x = build3 (COND_EXPR, void_type_node,
3822 build2 (NE_EXPR, boolean_type_node,
3823 new_storedi, old_vali), NULL_TREE, NULL_TREE);
3824 bsi_insert_before (&bsi, x, BSI_SAME_STMT);
3825
3826 /* Update cfg. */
3827 e = single_succ_edge (store_bb);
3828 e->flags &= ~EDGE_FALLTHRU;
3829 e->flags |= EDGE_FALSE_VALUE;
3830
3831 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
3832
3833 /* Copy the new value to loaded_val (we already did that before the condition
3834 if we are not in SSA). */
3835 if (gimple_in_ssa_p (cfun))
3836 {
3837 phi = phi_nodes (loop_header);
3838 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_stored);
3839 }
3840
3841 /* Remove OMP_ATOMIC_STORE. */
3842 bsi_remove (&bsi, true);
3843
3844 if (gimple_in_ssa_p (cfun))
3845 update_ssa (TODO_update_ssa_no_phi);
3846
3847 return true;
3848 }
3849
3850 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
3851
3852 GOMP_atomic_start ();
3853 *addr = rhs;
3854 GOMP_atomic_end ();
3855
3856 The result is not globally atomic, but works so long as all parallel
3857 references are within #pragma omp atomic directives. According to
3858 responses received from omp@openmp.org, appears to be within spec.
3859 Which makes sense, since that's how several other compilers handle
3860 this situation as well.
3861 LOADED_VAL and ADDR are the operands of OMP_ATOMIC_LOAD we're expanding.
3862 STORED_VAL is the operand of the matching OMP_ATOMIC_STORE.
3863
3864 We replace
3865 OMP_ATOMIC_LOAD (loaded_val, addr) with
3866 loaded_val = *addr;
3867
3868 and replace
3869 OMP_ATOMIC_ATORE (stored_val) with
3870 *addr = stored_val;
3871 */
3872
3873 static bool
3874 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
3875 tree addr, tree loaded_val, tree stored_val)
3876 {
3877 block_stmt_iterator bsi;
3878 tree t;
3879
3880 bsi = bsi_last (load_bb);
3881 gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_LOAD);
3882
3883 t = built_in_decls[BUILT_IN_GOMP_ATOMIC_START];
3884 t = build_function_call_expr (t, 0);
3885 force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT);
3886
3887 t = build_gimple_modify_stmt (loaded_val, build_fold_indirect_ref (addr));
3888 if (gimple_in_ssa_p (cfun))
3889 SSA_NAME_DEF_STMT (loaded_val) = t;
3890 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
3891 bsi_remove (&bsi, true);
3892
3893 bsi = bsi_last (store_bb);
3894 gcc_assert (TREE_CODE (bsi_stmt (bsi)) == OMP_ATOMIC_STORE);
3895
3896 t = build_gimple_modify_stmt (build_fold_indirect_ref (unshare_expr (addr)),
3897 stored_val);
3898 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
3899
3900 t = built_in_decls[BUILT_IN_GOMP_ATOMIC_END];
3901 t = build_function_call_expr (t, 0);
3902 force_gimple_operand_bsi (&bsi, t, true, NULL_TREE, true, BSI_SAME_STMT);
3903 bsi_remove (&bsi, true);
3904
3905 if (gimple_in_ssa_p (cfun))
3906 update_ssa (TODO_update_ssa_no_phi);
3907 return true;
3908 }
3909
3910 /* Expand an OMP_ATOMIC statement. We try to expand
3911 using expand_omp_atomic_fetch_op. If it failed, we try to
3912 call expand_omp_atomic_pipeline, and if it fails too, the
3913 ultimate fallback is wrapping the operation in a mutex
3914 (expand_omp_atomic_mutex). REGION is the atomic region built
3915 by build_omp_regions_1(). */
3916
3917 static void
3918 expand_omp_atomic (struct omp_region *region)
3919 {
3920 basic_block load_bb = region->entry, store_bb = region->exit;
3921 tree load = last_stmt (load_bb), store = last_stmt (store_bb);
3922 tree loaded_val = TREE_OPERAND (load, 0);
3923 tree addr = TREE_OPERAND (load, 1);
3924 tree stored_val = TREE_OPERAND (store, 0);
3925 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
3926 HOST_WIDE_INT index;
3927
3928 /* Make sure the type is one of the supported sizes. */
3929 index = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
3930 index = exact_log2 (index);
3931 if (index >= 0 && index <= 4)
3932 {
3933 unsigned int align = TYPE_ALIGN_UNIT (type);
3934
3935 /* __sync builtins require strict data alignment. */
3936 if (exact_log2 (align) >= index)
3937 {
3938 /* When possible, use specialized atomic update functions. */
3939 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
3940 && store_bb == single_succ (load_bb))
3941 {
3942 if (expand_omp_atomic_fetch_op (load_bb, addr,
3943 loaded_val, stored_val, index))
3944 return;
3945 }
3946
3947 /* If we don't have specialized __sync builtins, try and implement
3948 as a compare and swap loop. */
3949 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
3950 loaded_val, stored_val, index))
3951 return;
3952 }
3953 }
3954
3955 /* The ultimate fallback is wrapping the operation in a mutex. */
3956 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
3957 }
3958
3959
3960 /* Expand the parallel region tree rooted at REGION. Expansion
3961 proceeds in depth-first order. Innermost regions are expanded
3962 first. This way, parallel regions that require a new function to
3963 be created (e.g., OMP_PARALLEL) can be expanded without having any
3964 internal dependencies in their body. */
3965
3966 static void
3967 expand_omp (struct omp_region *region)
3968 {
3969 while (region)
3970 {
3971 /* First, determine whether this is a combined parallel+workshare
3972 region. */
3973 if (region->type == OMP_PARALLEL)
3974 determine_parallel_type (region);
3975
3976 if (region->inner)
3977 expand_omp (region->inner);
3978
3979 switch (region->type)
3980 {
3981 case OMP_PARALLEL:
3982 expand_omp_parallel (region);
3983 break;
3984
3985 case OMP_FOR:
3986 expand_omp_for (region);
3987 break;
3988
3989 case OMP_SECTIONS:
3990 expand_omp_sections (region);
3991 break;
3992
3993 case OMP_SECTION:
3994 /* Individual omp sections are handled together with their
3995 parent OMP_SECTIONS region. */
3996 break;
3997
3998 case OMP_SINGLE:
3999 expand_omp_single (region);
4000 break;
4001
4002 case OMP_MASTER:
4003 case OMP_ORDERED:
4004 case OMP_CRITICAL:
4005 expand_omp_synch (region);
4006 break;
4007
4008 case OMP_ATOMIC_LOAD:
4009 expand_omp_atomic (region);
4010 break;
4011
4012
4013 default:
4014 gcc_unreachable ();
4015 }
4016
4017 region = region->next;
4018 }
4019 }
4020
4021
4022 /* Helper for build_omp_regions. Scan the dominator tree starting at
4023 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
4024 true, the function ends once a single tree is built (otherwise, whole
4025 forest of OMP constructs may be built). */
4026
4027 static void
4028 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
4029 bool single_tree)
4030 {
4031 block_stmt_iterator si;
4032 tree stmt;
4033 basic_block son;
4034
4035 si = bsi_last (bb);
4036 if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si)))
4037 {
4038 struct omp_region *region;
4039 enum tree_code code;
4040
4041 stmt = bsi_stmt (si);
4042 code = TREE_CODE (stmt);
4043 if (code == OMP_RETURN)
4044 {
4045 /* STMT is the return point out of region PARENT. Mark it
4046 as the exit point and make PARENT the immediately
4047 enclosing region. */
4048 gcc_assert (parent);
4049 region = parent;
4050 region->exit = bb;
4051 parent = parent->outer;
4052 }
4053 else if (code == OMP_ATOMIC_STORE)
4054 {
4055 /* OMP_ATOMIC_STORE is analoguous to OMP_RETURN, but matches with
4056 OMP_ATOMIC_LOAD. */
4057 gcc_assert (parent);
4058 gcc_assert (parent->type == OMP_ATOMIC_LOAD);
4059 region = parent;
4060 region->exit = bb;
4061 parent = parent->outer;
4062 }
4063
4064 else if (code == OMP_CONTINUE)
4065 {
4066 gcc_assert (parent);
4067 parent->cont = bb;
4068 }
4069 else if (code == OMP_SECTIONS_SWITCH)
4070 {
4071 /* OMP_SECTIONS_SWITCH is part of OMP_SECTIONS, and we do nothing for
4072 it. */ ;
4073 }
4074 else
4075 {
4076 /* Otherwise, this directive becomes the parent for a new
4077 region. */
4078 region = new_omp_region (bb, code, parent);
4079 parent = region;
4080 }
4081 }
4082
4083 if (single_tree && !parent)
4084 return;
4085
4086 for (son = first_dom_son (CDI_DOMINATORS, bb);
4087 son;
4088 son = next_dom_son (CDI_DOMINATORS, son))
4089 build_omp_regions_1 (son, parent, single_tree);
4090 }
4091
4092 /* Builds the tree of OMP regions rooted at ROOT, storing it to
4093 root_omp_region. */
4094
4095 static void
4096 build_omp_regions_root (basic_block root)
4097 {
4098 gcc_assert (root_omp_region == NULL);
4099 build_omp_regions_1 (root, NULL, true);
4100 gcc_assert (root_omp_region != NULL);
4101 }
4102
4103 /* Expands omp construct (and its subconstructs) starting in HEAD. */
4104
4105 void
4106 omp_expand_local (basic_block head)
4107 {
4108 build_omp_regions_root (head);
4109 if (dump_file && (dump_flags & TDF_DETAILS))
4110 {
4111 fprintf (dump_file, "\nOMP region tree\n\n");
4112 dump_omp_region (dump_file, root_omp_region, 0);
4113 fprintf (dump_file, "\n");
4114 }
4115
4116 remove_exit_barriers (root_omp_region);
4117 expand_omp (root_omp_region);
4118
4119 free_omp_regions ();
4120 }
4121
4122 /* Scan the CFG and build a tree of OMP regions. Return the root of
4123 the OMP region tree. */
4124
4125 static void
4126 build_omp_regions (void)
4127 {
4128 gcc_assert (root_omp_region == NULL);
4129 calculate_dominance_info (CDI_DOMINATORS);
4130 build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL, false);
4131 }
4132
4133
4134 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
4135
4136 static unsigned int
4137 execute_expand_omp (void)
4138 {
4139 build_omp_regions ();
4140
4141 if (!root_omp_region)
4142 return 0;
4143
4144 if (dump_file)
4145 {
4146 fprintf (dump_file, "\nOMP region tree\n\n");
4147 dump_omp_region (dump_file, root_omp_region, 0);
4148 fprintf (dump_file, "\n");
4149 }
4150
4151 remove_exit_barriers (root_omp_region);
4152
4153 expand_omp (root_omp_region);
4154
4155 cleanup_tree_cfg ();
4156
4157 free_omp_regions ();
4158
4159 return 0;
4160 }
4161
4162 /* OMP expansion in SSA form. For testing purposes only. */
4163
4164 static bool
4165 gate_expand_omp_ssa (void)
4166 {
4167 return flag_openmp_ssa && flag_openmp != 0 && errorcount == 0;
4168 }
4169
4170 struct tree_opt_pass pass_expand_omp_ssa =
4171 {
4172 "ompexpssa", /* name */
4173 gate_expand_omp_ssa, /* gate */
4174 execute_expand_omp, /* execute */
4175 NULL, /* sub */
4176 NULL, /* next */
4177 0, /* static_pass_number */
4178 0, /* tv_id */
4179 PROP_gimple_any, /* properties_required */
4180 PROP_gimple_lomp, /* properties_provided */
4181 0, /* properties_destroyed */
4182 0, /* todo_flags_start */
4183 TODO_dump_func, /* todo_flags_finish */
4184 0 /* letter */
4185 };
4186
4187 /* OMP expansion -- the default pass, run before creation of SSA form. */
4188
4189 static bool
4190 gate_expand_omp (void)
4191 {
4192 return ((!flag_openmp_ssa || !optimize)
4193 && flag_openmp != 0 && errorcount == 0);
4194 }
4195
4196 struct tree_opt_pass pass_expand_omp =
4197 {
4198 "ompexp", /* name */
4199 gate_expand_omp, /* gate */
4200 execute_expand_omp, /* execute */
4201 NULL, /* sub */
4202 NULL, /* next */
4203 0, /* static_pass_number */
4204 0, /* tv_id */
4205 PROP_gimple_any, /* properties_required */
4206 PROP_gimple_lomp, /* properties_provided */
4207 0, /* properties_destroyed */
4208 0, /* todo_flags_start */
4209 TODO_dump_func, /* todo_flags_finish */
4210 0 /* letter */
4211 };
4212 \f
4213 /* Routines to lower OpenMP directives into OMP-GIMPLE. */
4214
4215 /* Lower the OpenMP sections directive in *STMT_P. */
4216
4217 static void
4218 lower_omp_sections (tree *stmt_p, omp_context *ctx)
4219 {
4220 tree new_stmt, stmt, body, bind, block, ilist, olist, new_body, control;
4221 tree t, dlist;
4222 tree_stmt_iterator tsi;
4223 unsigned i, len;
4224
4225 stmt = *stmt_p;
4226
4227 push_gimplify_context ();
4228
4229 dlist = NULL;
4230 ilist = NULL;
4231 lower_rec_input_clauses (OMP_SECTIONS_CLAUSES (stmt), &ilist, &dlist, ctx);
4232
4233 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
4234 for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi))
4235 continue;
4236
4237 tsi = tsi_start (OMP_SECTIONS_BODY (stmt));
4238 body = alloc_stmt_list ();
4239 for (i = 0; i < len; i++, tsi_next (&tsi))
4240 {
4241 omp_context *sctx;
4242 tree sec_start, sec_end;
4243
4244 sec_start = tsi_stmt (tsi);
4245 sctx = maybe_lookup_ctx (sec_start);
4246 gcc_assert (sctx);
4247
4248 append_to_statement_list (sec_start, &body);
4249
4250 lower_omp (&OMP_SECTION_BODY (sec_start), sctx);
4251 append_to_statement_list (OMP_SECTION_BODY (sec_start), &body);
4252 OMP_SECTION_BODY (sec_start) = NULL;
4253
4254 if (i == len - 1)
4255 {
4256 tree l = alloc_stmt_list ();
4257 lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL,
4258 &l, ctx);
4259 append_to_statement_list (l, &body);
4260 OMP_SECTION_LAST (sec_start) = 1;
4261 }
4262
4263 sec_end = make_node (OMP_RETURN);
4264 append_to_statement_list (sec_end, &body);
4265 }
4266
4267 block = make_node (BLOCK);
4268 bind = build3 (BIND_EXPR, void_type_node, NULL, body, block);
4269
4270 olist = NULL_TREE;
4271 lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx);
4272
4273 pop_gimplify_context (NULL_TREE);
4274 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
4275
4276 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4277 TREE_SIDE_EFFECTS (new_stmt) = 1;
4278
4279 new_body = alloc_stmt_list ();
4280 append_to_statement_list (ilist, &new_body);
4281 append_to_statement_list (stmt, &new_body);
4282 append_to_statement_list (make_node (OMP_SECTIONS_SWITCH), &new_body);
4283 append_to_statement_list (bind, &new_body);
4284
4285 control = create_tmp_var (unsigned_type_node, ".section");
4286 t = build2 (OMP_CONTINUE, void_type_node, control, control);
4287 OMP_SECTIONS_CONTROL (stmt) = control;
4288 append_to_statement_list (t, &new_body);
4289
4290 append_to_statement_list (olist, &new_body);
4291 append_to_statement_list (dlist, &new_body);
4292
4293 maybe_catch_exception (&new_body);
4294
4295 t = make_node (OMP_RETURN);
4296 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt),
4297 OMP_CLAUSE_NOWAIT);
4298 append_to_statement_list (t, &new_body);
4299
4300 BIND_EXPR_BODY (new_stmt) = new_body;
4301 OMP_SECTIONS_BODY (stmt) = NULL;
4302
4303 *stmt_p = new_stmt;
4304 }
4305
4306
4307 /* A subroutine of lower_omp_single. Expand the simple form of
4308 an OMP_SINGLE, without a copyprivate clause:
4309
4310 if (GOMP_single_start ())
4311 BODY;
4312 [ GOMP_barrier (); ] -> unless 'nowait' is present.
4313
4314 FIXME. It may be better to delay expanding the logic of this until
4315 pass_expand_omp. The expanded logic may make the job more difficult
4316 to a synchronization analysis pass. */
4317
4318 static void
4319 lower_omp_single_simple (tree single_stmt, tree *pre_p)
4320 {
4321 tree t;
4322
4323 t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_START], 0);
4324 t = build3 (COND_EXPR, void_type_node, t,
4325 OMP_SINGLE_BODY (single_stmt), NULL);
4326 gimplify_and_add (t, pre_p);
4327 }
4328
4329
4330 /* A subroutine of lower_omp_single. Expand the simple form of
4331 an OMP_SINGLE, with a copyprivate clause:
4332
4333 #pragma omp single copyprivate (a, b, c)
4334
4335 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
4336
4337 {
4338 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
4339 {
4340 BODY;
4341 copyout.a = a;
4342 copyout.b = b;
4343 copyout.c = c;
4344 GOMP_single_copy_end (&copyout);
4345 }
4346 else
4347 {
4348 a = copyout_p->a;
4349 b = copyout_p->b;
4350 c = copyout_p->c;
4351 }
4352 GOMP_barrier ();
4353 }
4354
4355 FIXME. It may be better to delay expanding the logic of this until
4356 pass_expand_omp. The expanded logic may make the job more difficult
4357 to a synchronization analysis pass. */
4358
4359 static void
4360 lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx)
4361 {
4362 tree ptr_type, t, l0, l1, l2, copyin_seq;
4363
4364 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
4365
4366 ptr_type = build_pointer_type (ctx->record_type);
4367 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
4368
4369 l0 = create_artificial_label ();
4370 l1 = create_artificial_label ();
4371 l2 = create_artificial_label ();
4372
4373 t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_START], 0);
4374 t = fold_convert (ptr_type, t);
4375 t = build_gimple_modify_stmt (ctx->receiver_decl, t);
4376 gimplify_and_add (t, pre_p);
4377
4378 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
4379 build_int_cst (ptr_type, 0));
4380 t = build3 (COND_EXPR, void_type_node, t,
4381 build_and_jump (&l0), build_and_jump (&l1));
4382 gimplify_and_add (t, pre_p);
4383
4384 t = build1 (LABEL_EXPR, void_type_node, l0);
4385 gimplify_and_add (t, pre_p);
4386
4387 append_to_statement_list (OMP_SINGLE_BODY (single_stmt), pre_p);
4388
4389 copyin_seq = NULL;
4390 lower_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p,
4391 &copyin_seq, ctx);
4392
4393 t = build_fold_addr_expr (ctx->sender_decl);
4394 t = build_call_expr (built_in_decls[BUILT_IN_GOMP_SINGLE_COPY_END], 1, t);
4395 gimplify_and_add (t, pre_p);
4396
4397 t = build_and_jump (&l2);
4398 gimplify_and_add (t, pre_p);
4399
4400 t = build1 (LABEL_EXPR, void_type_node, l1);
4401 gimplify_and_add (t, pre_p);
4402
4403 append_to_statement_list (copyin_seq, pre_p);
4404
4405 t = build1 (LABEL_EXPR, void_type_node, l2);
4406 gimplify_and_add (t, pre_p);
4407 }
4408
4409
4410 /* Expand code for an OpenMP single directive. */
4411
4412 static void
4413 lower_omp_single (tree *stmt_p, omp_context *ctx)
4414 {
4415 tree t, bind, block, single_stmt = *stmt_p, dlist;
4416
4417 push_gimplify_context ();
4418
4419 block = make_node (BLOCK);
4420 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
4421 TREE_SIDE_EFFECTS (bind) = 1;
4422
4423 lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt),
4424 &BIND_EXPR_BODY (bind), &dlist, ctx);
4425 lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx);
4426
4427 append_to_statement_list (single_stmt, &BIND_EXPR_BODY (bind));
4428
4429 if (ctx->record_type)
4430 lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx);
4431 else
4432 lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind));
4433
4434 OMP_SINGLE_BODY (single_stmt) = NULL;
4435
4436 append_to_statement_list (dlist, &BIND_EXPR_BODY (bind));
4437
4438 maybe_catch_exception (&BIND_EXPR_BODY (bind));
4439
4440 t = make_node (OMP_RETURN);
4441 OMP_RETURN_NOWAIT (t) = !!find_omp_clause (OMP_SINGLE_CLAUSES (single_stmt),
4442 OMP_CLAUSE_NOWAIT);
4443 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
4444
4445 pop_gimplify_context (bind);
4446
4447 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
4448 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
4449 }
4450
4451
4452 /* Expand code for an OpenMP master directive. */
4453
4454 static void
4455 lower_omp_master (tree *stmt_p, omp_context *ctx)
4456 {
4457 tree bind, block, stmt = *stmt_p, lab = NULL, x;
4458
4459 push_gimplify_context ();
4460
4461 block = make_node (BLOCK);
4462 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
4463 TREE_SIDE_EFFECTS (bind) = 1;
4464
4465 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4466
4467 x = build_call_expr (built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM], 0);
4468 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
4469 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
4470 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
4471
4472 lower_omp (&OMP_MASTER_BODY (stmt), ctx);
4473 maybe_catch_exception (&OMP_MASTER_BODY (stmt));
4474 append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind));
4475 OMP_MASTER_BODY (stmt) = NULL;
4476
4477 x = build1 (LABEL_EXPR, void_type_node, lab);
4478 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
4479
4480 x = make_node (OMP_RETURN);
4481 OMP_RETURN_NOWAIT (x) = 1;
4482 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
4483
4484 pop_gimplify_context (bind);
4485
4486 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
4487 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
4488 }
4489
4490
4491 /* Expand code for an OpenMP ordered directive. */
4492
4493 static void
4494 lower_omp_ordered (tree *stmt_p, omp_context *ctx)
4495 {
4496 tree bind, block, stmt = *stmt_p, x;
4497
4498 push_gimplify_context ();
4499
4500 block = make_node (BLOCK);
4501 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
4502 TREE_SIDE_EFFECTS (bind) = 1;
4503
4504 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4505
4506 x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ORDERED_START], 0);
4507 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
4508
4509 lower_omp (&OMP_ORDERED_BODY (stmt), ctx);
4510 maybe_catch_exception (&OMP_ORDERED_BODY (stmt));
4511 append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind));
4512 OMP_ORDERED_BODY (stmt) = NULL;
4513
4514 x = build_call_expr (built_in_decls[BUILT_IN_GOMP_ORDERED_END], 0);
4515 gimplify_and_add (x, &BIND_EXPR_BODY (bind));
4516
4517 x = make_node (OMP_RETURN);
4518 OMP_RETURN_NOWAIT (x) = 1;
4519 append_to_statement_list (x, &BIND_EXPR_BODY (bind));
4520
4521 pop_gimplify_context (bind);
4522
4523 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
4524 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
4525 }
4526
4527
4528 /* Gimplify an OMP_CRITICAL statement. This is a relatively simple
4529 substitution of a couple of function calls. But in the NAMED case,
4530 requires that languages coordinate a symbol name. It is therefore
4531 best put here in common code. */
4532
4533 static GTY((param1_is (tree), param2_is (tree)))
4534 splay_tree critical_name_mutexes;
4535
4536 static void
4537 lower_omp_critical (tree *stmt_p, omp_context *ctx)
4538 {
4539 tree bind, block, stmt = *stmt_p;
4540 tree t, lock, unlock, name;
4541
4542 name = OMP_CRITICAL_NAME (stmt);
4543 if (name)
4544 {
4545 tree decl;
4546 splay_tree_node n;
4547
4548 if (!critical_name_mutexes)
4549 critical_name_mutexes
4550 = splay_tree_new_ggc (splay_tree_compare_pointers);
4551
4552 n = splay_tree_lookup (critical_name_mutexes, (splay_tree_key) name);
4553 if (n == NULL)
4554 {
4555 char *new_str;
4556
4557 decl = create_tmp_var_raw (ptr_type_node, NULL);
4558
4559 new_str = ACONCAT ((".gomp_critical_user_",
4560 IDENTIFIER_POINTER (name), NULL));
4561 DECL_NAME (decl) = get_identifier (new_str);
4562 TREE_PUBLIC (decl) = 1;
4563 TREE_STATIC (decl) = 1;
4564 DECL_COMMON (decl) = 1;
4565 DECL_ARTIFICIAL (decl) = 1;
4566 DECL_IGNORED_P (decl) = 1;
4567 varpool_finalize_decl (decl);
4568
4569 splay_tree_insert (critical_name_mutexes, (splay_tree_key) name,
4570 (splay_tree_value) decl);
4571 }
4572 else
4573 decl = (tree) n->value;
4574
4575 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_START];
4576 lock = build_call_expr (lock, 1, build_fold_addr_expr (decl));
4577
4578 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_NAME_END];
4579 unlock = build_call_expr (unlock, 1, build_fold_addr_expr (decl));
4580 }
4581 else
4582 {
4583 lock = built_in_decls[BUILT_IN_GOMP_CRITICAL_START];
4584 lock = build_call_expr (lock, 0);
4585
4586 unlock = built_in_decls[BUILT_IN_GOMP_CRITICAL_END];
4587 unlock = build_call_expr (unlock, 0);
4588 }
4589
4590 push_gimplify_context ();
4591
4592 block = make_node (BLOCK);
4593 *stmt_p = bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block);
4594 TREE_SIDE_EFFECTS (bind) = 1;
4595
4596 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4597
4598 gimplify_and_add (lock, &BIND_EXPR_BODY (bind));
4599
4600 lower_omp (&OMP_CRITICAL_BODY (stmt), ctx);
4601 maybe_catch_exception (&OMP_CRITICAL_BODY (stmt));
4602 append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind));
4603 OMP_CRITICAL_BODY (stmt) = NULL;
4604
4605 gimplify_and_add (unlock, &BIND_EXPR_BODY (bind));
4606
4607 t = make_node (OMP_RETURN);
4608 OMP_RETURN_NOWAIT (t) = 1;
4609 append_to_statement_list (t, &BIND_EXPR_BODY (bind));
4610
4611 pop_gimplify_context (bind);
4612 BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars);
4613 BLOCK_VARS (block) = BIND_EXPR_VARS (bind);
4614 }
4615
4616
4617 /* A subroutine of lower_omp_for. Generate code to emit the predicate
4618 for a lastprivate clause. Given a loop control predicate of (V
4619 cond N2), we gate the clause on (!(V cond N2)). The lowered form
4620 is appended to *DLIST, iterator initialization is appended to
4621 *BODY_P. */
4622
4623 static void
4624 lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p,
4625 tree *dlist, struct omp_context *ctx)
4626 {
4627 tree clauses, cond, stmts, vinit, t;
4628 enum tree_code cond_code;
4629
4630 cond_code = fd->cond_code;
4631 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
4632
4633 /* When possible, use a strict equality expression. This can let VRP
4634 type optimizations deduce the value and remove a copy. */
4635 if (host_integerp (fd->step, 0))
4636 {
4637 HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step);
4638 if (step == 1 || step == -1)
4639 cond_code = EQ_EXPR;
4640 }
4641
4642 cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2);
4643
4644 clauses = OMP_FOR_CLAUSES (fd->for_stmt);
4645 stmts = NULL;
4646 lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
4647 if (stmts != NULL)
4648 {
4649 append_to_statement_list (stmts, dlist);
4650
4651 /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
4652 vinit = fd->n1;
4653 if (cond_code == EQ_EXPR
4654 && host_integerp (fd->n2, 0)
4655 && ! integer_zerop (fd->n2))
4656 vinit = build_int_cst (TREE_TYPE (fd->v), 0);
4657
4658 /* Initialize the iterator variable, so that threads that don't execute
4659 any iterations don't execute the lastprivate clauses by accident. */
4660 t = build_gimple_modify_stmt (fd->v, vinit);
4661 gimplify_and_add (t, body_p);
4662 }
4663 }
4664
4665
4666 /* Lower code for an OpenMP loop directive. */
4667
4668 static void
4669 lower_omp_for (tree *stmt_p, omp_context *ctx)
4670 {
4671 tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p;
4672 struct omp_for_data fd;
4673
4674 stmt = *stmt_p;
4675
4676 push_gimplify_context ();
4677
4678 lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx);
4679 lower_omp (&OMP_FOR_BODY (stmt), ctx);
4680
4681 /* Move declaration of temporaries in the loop body before we make
4682 it go away. */
4683 if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR)
4684 record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn);
4685
4686 new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4687 TREE_SIDE_EFFECTS (new_stmt) = 1;
4688 body_p = &BIND_EXPR_BODY (new_stmt);
4689
4690 /* The pre-body and input clauses go before the lowered OMP_FOR. */
4691 ilist = NULL;
4692 dlist = NULL;
4693 append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p);
4694 lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx);
4695
4696 /* Lower the header expressions. At this point, we can assume that
4697 the header is of the form:
4698
4699 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
4700
4701 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
4702 using the .omp_data_s mapping, if needed. */
4703 rhs_p = &GIMPLE_STMT_OPERAND (OMP_FOR_INIT (stmt), 1);
4704 if (!is_gimple_min_invariant (*rhs_p))
4705 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4706
4707 rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1);
4708 if (!is_gimple_min_invariant (*rhs_p))
4709 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4710
4711 rhs_p = &TREE_OPERAND (GIMPLE_STMT_OPERAND (OMP_FOR_INCR (stmt), 1), 1);
4712 if (!is_gimple_min_invariant (*rhs_p))
4713 *rhs_p = get_formal_tmp_var (*rhs_p, body_p);
4714
4715 /* Once lowered, extract the bounds and clauses. */
4716 extract_omp_for_data (stmt, &fd);
4717
4718 lower_omp_for_lastprivate (&fd, body_p, &dlist, ctx);
4719
4720 append_to_statement_list (stmt, body_p);
4721
4722 append_to_statement_list (OMP_FOR_BODY (stmt), body_p);
4723
4724 t = build2 (OMP_CONTINUE, void_type_node, fd.v, fd.v);
4725 append_to_statement_list (t, body_p);
4726
4727 /* After the loop, add exit clauses. */
4728 lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx);
4729 append_to_statement_list (dlist, body_p);
4730
4731 maybe_catch_exception (body_p);
4732
4733 /* Region exit marker goes at the end of the loop body. */
4734 t = make_node (OMP_RETURN);
4735 OMP_RETURN_NOWAIT (t) = fd.have_nowait;
4736 append_to_statement_list (t, body_p);
4737
4738 pop_gimplify_context (NULL_TREE);
4739 record_vars_into (ctx->block_vars, ctx->cb.dst_fn);
4740
4741 OMP_FOR_BODY (stmt) = NULL_TREE;
4742 OMP_FOR_PRE_BODY (stmt) = NULL_TREE;
4743 *stmt_p = new_stmt;
4744 }
4745
4746 /* Callback for walk_stmts. Check if *TP only contains OMP_FOR
4747 or OMP_PARALLEL. */
4748
4749 static tree
4750 check_combined_parallel (tree *tp, int *walk_subtrees, void *data)
4751 {
4752 struct walk_stmt_info *wi = data;
4753 int *info = wi->info;
4754
4755 *walk_subtrees = 0;
4756 switch (TREE_CODE (*tp))
4757 {
4758 case OMP_FOR:
4759 case OMP_SECTIONS:
4760 *info = *info == 0 ? 1 : -1;
4761 break;
4762 default:
4763 *info = -1;
4764 break;
4765 }
4766 return NULL;
4767 }
4768
4769 /* Lower the OpenMP parallel directive in *STMT_P. CTX holds context
4770 information for the directive. */
4771
4772 static void
4773 lower_omp_parallel (tree *stmt_p, omp_context *ctx)
4774 {
4775 tree clauses, par_bind, par_body, new_body, bind;
4776 tree olist, ilist, par_olist, par_ilist;
4777 tree stmt, child_fn, t;
4778
4779 stmt = *stmt_p;
4780
4781 clauses = OMP_PARALLEL_CLAUSES (stmt);
4782 par_bind = OMP_PARALLEL_BODY (stmt);
4783 par_body = BIND_EXPR_BODY (par_bind);
4784 child_fn = ctx->cb.dst_fn;
4785 if (!OMP_PARALLEL_COMBINED (stmt))
4786 {
4787 struct walk_stmt_info wi;
4788 int ws_num = 0;
4789
4790 memset (&wi, 0, sizeof (wi));
4791 wi.callback = check_combined_parallel;
4792 wi.info = &ws_num;
4793 wi.val_only = true;
4794 walk_stmts (&wi, &par_bind);
4795 if (ws_num == 1)
4796 OMP_PARALLEL_COMBINED (stmt) = 1;
4797 }
4798
4799 push_gimplify_context ();
4800
4801 par_olist = NULL_TREE;
4802 par_ilist = NULL_TREE;
4803 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx);
4804 lower_omp (&par_body, ctx);
4805 lower_reduction_clauses (clauses, &par_olist, ctx);
4806
4807 /* Declare all the variables created by mapping and the variables
4808 declared in the scope of the parallel body. */
4809 record_vars_into (ctx->block_vars, child_fn);
4810 record_vars_into (BIND_EXPR_VARS (par_bind), child_fn);
4811
4812 if (ctx->record_type)
4813 {
4814 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o");
4815 OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl;
4816 }
4817
4818 olist = NULL_TREE;
4819 ilist = NULL_TREE;
4820 lower_send_clauses (clauses, &ilist, &olist, ctx);
4821 lower_send_shared_vars (&ilist, &olist, ctx);
4822
4823 /* Once all the expansions are done, sequence all the different
4824 fragments inside OMP_PARALLEL_BODY. */
4825 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
4826 append_to_statement_list (ilist, &BIND_EXPR_BODY (bind));
4827
4828 new_body = alloc_stmt_list ();
4829
4830 if (ctx->record_type)
4831 {
4832 t = build_fold_addr_expr (ctx->sender_decl);
4833 /* fixup_child_record_type might have changed receiver_decl's type. */
4834 t = fold_convert (TREE_TYPE (ctx->receiver_decl), t);
4835 t = build_gimple_modify_stmt (ctx->receiver_decl, t);
4836 append_to_statement_list (t, &new_body);
4837 }
4838
4839 append_to_statement_list (par_ilist, &new_body);
4840 append_to_statement_list (par_body, &new_body);
4841 append_to_statement_list (par_olist, &new_body);
4842 maybe_catch_exception (&new_body);
4843 t = make_node (OMP_RETURN);
4844 append_to_statement_list (t, &new_body);
4845 OMP_PARALLEL_BODY (stmt) = new_body;
4846
4847 append_to_statement_list (stmt, &BIND_EXPR_BODY (bind));
4848 append_to_statement_list (olist, &BIND_EXPR_BODY (bind));
4849
4850 *stmt_p = bind;
4851
4852 pop_gimplify_context (NULL_TREE);
4853 }
4854
4855
4856 /* Pass *TP back through the gimplifier within the context determined by WI.
4857 This handles replacement of DECL_VALUE_EXPR, as well as adjusting the
4858 flags on ADDR_EXPR. */
4859
4860 static void
4861 lower_regimplify (tree *tp, struct walk_stmt_info *wi)
4862 {
4863 enum gimplify_status gs;
4864 tree pre = NULL;
4865
4866 if (wi->is_lhs)
4867 gs = gimplify_expr (tp, &pre, NULL, is_gimple_lvalue, fb_lvalue);
4868 else if (wi->val_only)
4869 gs = gimplify_expr (tp, &pre, NULL, is_gimple_val, fb_rvalue);
4870 else
4871 gs = gimplify_expr (tp, &pre, NULL, is_gimple_formal_tmp_var, fb_rvalue);
4872 gcc_assert (gs == GS_ALL_DONE);
4873
4874 if (pre)
4875 tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT);
4876 }
4877
4878 /* Copy EXP into a temporary. Insert the initialization statement before TSI. */
4879
4880 static tree
4881 init_tmp_var (tree exp, tree_stmt_iterator *tsi)
4882 {
4883 tree t, stmt;
4884
4885 t = create_tmp_var (TREE_TYPE (exp), NULL);
4886 DECL_GIMPLE_REG_P (t) = 1;
4887 stmt = build_gimple_modify_stmt (t, exp);
4888 SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
4889 tsi_link_before (tsi, stmt, TSI_SAME_STMT);
4890
4891 return t;
4892 }
4893
4894 /* Similarly, but copy from the temporary and insert the statement
4895 after the iterator. */
4896
4897 static tree
4898 save_tmp_var (tree exp, tree_stmt_iterator *tsi)
4899 {
4900 tree t, stmt;
4901
4902 t = create_tmp_var (TREE_TYPE (exp), NULL);
4903 DECL_GIMPLE_REG_P (t) = 1;
4904 stmt = build_gimple_modify_stmt (exp, t);
4905 SET_EXPR_LOCUS (stmt, EXPR_LOCUS (tsi_stmt (*tsi)));
4906 tsi_link_after (tsi, stmt, TSI_SAME_STMT);
4907
4908 return t;
4909 }
4910
4911 /* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */
4912
4913 static tree
4914 lower_omp_1 (tree *tp, int *walk_subtrees, void *data)
4915 {
4916 struct walk_stmt_info *wi = data;
4917 omp_context *ctx = wi->info;
4918 tree t = *tp;
4919
4920 /* If we have issued syntax errors, avoid doing any heavy lifting.
4921 Just replace the OpenMP directives with a NOP to avoid
4922 confusing RTL expansion. */
4923 if (errorcount && OMP_DIRECTIVE_P (*tp))
4924 {
4925 *tp = build_empty_stmt ();
4926 return NULL_TREE;
4927 }
4928
4929 *walk_subtrees = 0;
4930 switch (TREE_CODE (*tp))
4931 {
4932 case OMP_PARALLEL:
4933 ctx = maybe_lookup_ctx (t);
4934 lower_omp_parallel (tp, ctx);
4935 break;
4936
4937 case OMP_FOR:
4938 ctx = maybe_lookup_ctx (t);
4939 gcc_assert (ctx);
4940 lower_omp_for (tp, ctx);
4941 break;
4942
4943 case OMP_SECTIONS:
4944 ctx = maybe_lookup_ctx (t);
4945 gcc_assert (ctx);
4946 lower_omp_sections (tp, ctx);
4947 break;
4948
4949 case OMP_SINGLE:
4950 ctx = maybe_lookup_ctx (t);
4951 gcc_assert (ctx);
4952 lower_omp_single (tp, ctx);
4953 break;
4954
4955 case OMP_MASTER:
4956 ctx = maybe_lookup_ctx (t);
4957 gcc_assert (ctx);
4958 lower_omp_master (tp, ctx);
4959 break;
4960
4961 case OMP_ORDERED:
4962 ctx = maybe_lookup_ctx (t);
4963 gcc_assert (ctx);
4964 lower_omp_ordered (tp, ctx);
4965 break;
4966
4967 case OMP_CRITICAL:
4968 ctx = maybe_lookup_ctx (t);
4969 gcc_assert (ctx);
4970 lower_omp_critical (tp, ctx);
4971 break;
4972
4973 case VAR_DECL:
4974 if (ctx && DECL_HAS_VALUE_EXPR_P (t))
4975 {
4976 lower_regimplify (&t, wi);
4977 if (wi->val_only)
4978 {
4979 if (wi->is_lhs)
4980 t = save_tmp_var (t, &wi->tsi);
4981 else
4982 t = init_tmp_var (t, &wi->tsi);
4983 }
4984 *tp = t;
4985 }
4986 break;
4987
4988 case ADDR_EXPR:
4989 if (ctx)
4990 lower_regimplify (tp, wi);
4991 break;
4992
4993 case ARRAY_REF:
4994 case ARRAY_RANGE_REF:
4995 case REALPART_EXPR:
4996 case IMAGPART_EXPR:
4997 case COMPONENT_REF:
4998 case VIEW_CONVERT_EXPR:
4999 if (ctx)
5000 lower_regimplify (tp, wi);
5001 break;
5002
5003 case INDIRECT_REF:
5004 if (ctx)
5005 {
5006 wi->is_lhs = false;
5007 wi->val_only = true;
5008 lower_regimplify (&TREE_OPERAND (t, 0), wi);
5009 }
5010 break;
5011
5012 default:
5013 if (!TYPE_P (t) && !DECL_P (t))
5014 *walk_subtrees = 1;
5015 break;
5016 }
5017
5018 return NULL_TREE;
5019 }
5020
5021 static void
5022 lower_omp (tree *stmt_p, omp_context *ctx)
5023 {
5024 struct walk_stmt_info wi;
5025
5026 memset (&wi, 0, sizeof (wi));
5027 wi.callback = lower_omp_1;
5028 wi.info = ctx;
5029 wi.val_only = true;
5030 wi.want_locations = true;
5031
5032 walk_stmts (&wi, stmt_p);
5033 }
5034 \f
5035 /* Main entry point. */
5036
5037 static unsigned int
5038 execute_lower_omp (void)
5039 {
5040 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
5041 delete_omp_context);
5042
5043 scan_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
5044 gcc_assert (parallel_nesting_level == 0);
5045
5046 if (all_contexts->root)
5047 lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL);
5048
5049 if (all_contexts)
5050 {
5051 splay_tree_delete (all_contexts);
5052 all_contexts = NULL;
5053 }
5054 return 0;
5055 }
5056
5057 static bool
5058 gate_lower_omp (void)
5059 {
5060 return flag_openmp != 0;
5061 }
5062
5063 struct tree_opt_pass pass_lower_omp =
5064 {
5065 "omplower", /* name */
5066 gate_lower_omp, /* gate */
5067 execute_lower_omp, /* execute */
5068 NULL, /* sub */
5069 NULL, /* next */
5070 0, /* static_pass_number */
5071 0, /* tv_id */
5072 PROP_gimple_any, /* properties_required */
5073 PROP_gimple_lomp, /* properties_provided */
5074 0, /* properties_destroyed */
5075 0, /* todo_flags_start */
5076 TODO_dump_func, /* todo_flags_finish */
5077 0 /* letter */
5078 };
5079 \f
5080 /* The following is a utility to diagnose OpenMP structured block violations.
5081 It is not part of the "omplower" pass, as that's invoked too late. It
5082 should be invoked by the respective front ends after gimplification. */
5083
5084 static splay_tree all_labels;
5085
5086 /* Check for mismatched contexts and generate an error if needed. Return
5087 true if an error is detected. */
5088
5089 static bool
5090 diagnose_sb_0 (tree *stmt_p, tree branch_ctx, tree label_ctx)
5091 {
5092 bool exit_p = true;
5093
5094 if ((label_ctx ? TREE_VALUE (label_ctx) : NULL) == branch_ctx)
5095 return false;
5096
5097 /* Try to avoid confusing the user by producing and error message
5098 with correct "exit" or "enter" verbage. We prefer "exit"
5099 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
5100 if (branch_ctx == NULL)
5101 exit_p = false;
5102 else
5103 {
5104 while (label_ctx)
5105 {
5106 if (TREE_VALUE (label_ctx) == branch_ctx)
5107 {
5108 exit_p = false;
5109 break;
5110 }
5111 label_ctx = TREE_CHAIN (label_ctx);
5112 }
5113 }
5114
5115 if (exit_p)
5116 error ("invalid exit from OpenMP structured block");
5117 else
5118 error ("invalid entry to OpenMP structured block");
5119
5120 *stmt_p = build_empty_stmt ();
5121 return true;
5122 }
5123
5124 /* Pass 1: Create a minimal tree of OpenMP structured blocks, and record
5125 where in the tree each label is found. */
5126
5127 static tree
5128 diagnose_sb_1 (tree *tp, int *walk_subtrees, void *data)
5129 {
5130 struct walk_stmt_info *wi = data;
5131 tree context = (tree) wi->info;
5132 tree inner_context;
5133 tree t = *tp;
5134
5135 *walk_subtrees = 0;
5136 switch (TREE_CODE (t))
5137 {
5138 case OMP_PARALLEL:
5139 case OMP_SECTIONS:
5140 case OMP_SINGLE:
5141 walk_tree (&OMP_CLAUSES (t), diagnose_sb_1, wi, NULL);
5142 /* FALLTHRU */
5143 case OMP_SECTION:
5144 case OMP_MASTER:
5145 case OMP_ORDERED:
5146 case OMP_CRITICAL:
5147 /* The minimal context here is just a tree of statements. */
5148 inner_context = tree_cons (NULL, t, context);
5149 wi->info = inner_context;
5150 walk_stmts (wi, &OMP_BODY (t));
5151 wi->info = context;
5152 break;
5153
5154 case OMP_FOR:
5155 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_1, wi, NULL);
5156 inner_context = tree_cons (NULL, t, context);
5157 wi->info = inner_context;
5158 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_1, wi, NULL);
5159 walk_tree (&OMP_FOR_COND (t), diagnose_sb_1, wi, NULL);
5160 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_1, wi, NULL);
5161 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
5162 walk_stmts (wi, &OMP_FOR_BODY (t));
5163 wi->info = context;
5164 break;
5165
5166 case LABEL_EXPR:
5167 splay_tree_insert (all_labels, (splay_tree_key) LABEL_EXPR_LABEL (t),
5168 (splay_tree_value) context);
5169 break;
5170
5171 default:
5172 break;
5173 }
5174
5175 return NULL_TREE;
5176 }
5177
5178 /* Pass 2: Check each branch and see if its context differs from that of
5179 the destination label's context. */
5180
5181 static tree
5182 diagnose_sb_2 (tree *tp, int *walk_subtrees, void *data)
5183 {
5184 struct walk_stmt_info *wi = data;
5185 tree context = (tree) wi->info;
5186 splay_tree_node n;
5187 tree t = *tp;
5188
5189 *walk_subtrees = 0;
5190 switch (TREE_CODE (t))
5191 {
5192 case OMP_PARALLEL:
5193 case OMP_SECTIONS:
5194 case OMP_SINGLE:
5195 walk_tree (&OMP_CLAUSES (t), diagnose_sb_2, wi, NULL);
5196 /* FALLTHRU */
5197 case OMP_SECTION:
5198 case OMP_MASTER:
5199 case OMP_ORDERED:
5200 case OMP_CRITICAL:
5201 wi->info = t;
5202 walk_stmts (wi, &OMP_BODY (t));
5203 wi->info = context;
5204 break;
5205
5206 case OMP_FOR:
5207 walk_tree (&OMP_FOR_CLAUSES (t), diagnose_sb_2, wi, NULL);
5208 wi->info = t;
5209 walk_tree (&OMP_FOR_INIT (t), diagnose_sb_2, wi, NULL);
5210 walk_tree (&OMP_FOR_COND (t), diagnose_sb_2, wi, NULL);
5211 walk_tree (&OMP_FOR_INCR (t), diagnose_sb_2, wi, NULL);
5212 walk_stmts (wi, &OMP_FOR_PRE_BODY (t));
5213 walk_stmts (wi, &OMP_FOR_BODY (t));
5214 wi->info = context;
5215 break;
5216
5217 case GOTO_EXPR:
5218 {
5219 tree lab = GOTO_DESTINATION (t);
5220 if (TREE_CODE (lab) != LABEL_DECL)
5221 break;
5222
5223 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
5224 diagnose_sb_0 (tp, context, n ? (tree) n->value : NULL_TREE);
5225 }
5226 break;
5227
5228 case SWITCH_EXPR:
5229 {
5230 tree vec = SWITCH_LABELS (t);
5231 int i, len = TREE_VEC_LENGTH (vec);
5232 for (i = 0; i < len; ++i)
5233 {
5234 tree lab = CASE_LABEL (TREE_VEC_ELT (vec, i));
5235 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
5236 if (diagnose_sb_0 (tp, context, (tree) n->value))
5237 break;
5238 }
5239 }
5240 break;
5241
5242 case RETURN_EXPR:
5243 diagnose_sb_0 (tp, context, NULL_TREE);
5244 break;
5245
5246 default:
5247 break;
5248 }
5249
5250 return NULL_TREE;
5251 }
5252
5253 void
5254 diagnose_omp_structured_block_errors (tree fndecl)
5255 {
5256 tree save_current = current_function_decl;
5257 struct walk_stmt_info wi;
5258
5259 current_function_decl = fndecl;
5260
5261 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
5262
5263 memset (&wi, 0, sizeof (wi));
5264 wi.callback = diagnose_sb_1;
5265 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
5266
5267 memset (&wi, 0, sizeof (wi));
5268 wi.callback = diagnose_sb_2;
5269 wi.want_locations = true;
5270 wi.want_return_expr = true;
5271 walk_stmts (&wi, &DECL_SAVED_TREE (fndecl));
5272
5273 splay_tree_delete (all_labels);
5274 all_labels = NULL;
5275
5276 current_function_decl = save_current;
5277 }
5278
5279 #include "gt-omp-low.h"